netvsc.c 29.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
14
 * this program; if not, see <http://www.gnu.org/licenses/>.
15 16
 *
 * Authors:
17
 *   Haiyang Zhang <haiyangz@microsoft.com>
18 19
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
20 21
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

22
#include <linux/kernel.h>
23 24
#include <linux/sched.h>
#include <linux/wait.h>
25
#include <linux/mm.h>
26
#include <linux/delay.h>
27
#include <linux/io.h>
28
#include <linux/slab.h>
29
#include <linux/netdevice.h>
30
#include <linux/if_ether.h>
31
#include <asm/sync_bitops.h>
32

33
#include "hyperv_net.h"
34 35


36
static struct netvsc_device *alloc_net_device(struct hv_device *device)
37
{
38
	struct netvsc_device *net_device;
39
	struct net_device *ndev = hv_get_drvdata(device);
40

41 42
	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
	if (!net_device)
43 44
		return NULL;

45 46 47 48 49 50
	net_device->cb_buffer = kzalloc(NETVSC_PACKET_SIZE, GFP_KERNEL);
	if (!net_device->cb_buffer) {
		kfree(net_device);
		return NULL;
	}

51
	init_waitqueue_head(&net_device->wait_drain);
52
	net_device->start_remove = false;
53
	net_device->destroy = false;
54
	net_device->dev = device;
55
	net_device->ndev = ndev;
56

57
	hv_set_drvdata(device, net_device);
58
	return net_device;
59 60
}

61 62 63 64 65 66
static void free_netvsc_device(struct netvsc_device *nvdev)
{
	kfree(nvdev->cb_buffer);
	kfree(nvdev);
}

67
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
68
{
69
	struct netvsc_device *net_device;
70

71
	net_device = hv_get_drvdata(device);
72
	if (net_device && net_device->destroy)
73
		net_device = NULL;
74

75
	return net_device;
76 77
}

78
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
79
{
80
	struct netvsc_device *net_device;
81

82
	net_device = hv_get_drvdata(device);
83 84 85 86 87 88

	if (!net_device)
		goto get_in_err;

	if (net_device->destroy &&
		atomic_read(&net_device->num_outstanding_sends) == 0)
89
		net_device = NULL;
90

91
get_in_err:
92
	return net_device;
93 94 95
}


96
static int netvsc_destroy_buf(struct netvsc_device *net_device)
97 98 99
{
	struct nvsp_message *revoke_packet;
	int ret = 0;
100
	struct net_device *ndev = net_device->ndev;
101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127

	/*
	 * If we got a section count, it means we received a
	 * SendReceiveBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->recv_section_cnt) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
		revoke_packet->msg.v1_msg.
		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/*
		 * If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
128
			netdev_err(ndev, "unable to send "
129
				"revoke receive buffer to netvsp\n");
130
			return ret;
131 132 133 134 135 136 137 138 139 140 141 142
		}
	}

	/* Teardown the gpadl on the vsp end */
	if (net_device->recv_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
			   net_device->recv_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
143
			netdev_err(ndev,
144
				   "unable to teardown receive buffer's gpadl\n");
145
			return ret;
146 147 148 149 150 151
		}
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
152
		vfree(net_device->recv_buf);
153 154 155 156 157 158 159 160 161
		net_device->recv_buf = NULL;
	}

	if (net_device->recv_section) {
		net_device->recv_section_cnt = 0;
		kfree(net_device->recv_section);
		net_device->recv_section = NULL;
	}

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
	/* Deal with the send buffer we may have setup.
	 * If we got a  send section size, it means we received a
	 * SendsendBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->send_section_size) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
		revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/* If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev, "unable to send "
				   "revoke send buffer to netvsp\n");
			return ret;
		}
	}
	/* Teardown the gpadl on the vsp end */
	if (net_device->send_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
					   net_device->send_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev,
				   "unable to teardown send buffer's gpadl\n");
			return ret;
		}
204
		net_device->send_buf_gpadl_handle = 0;
205 206 207
	}
	if (net_device->send_buf) {
		/* Free up the receive buffer */
208
		vfree(net_device->send_buf);
209 210 211 212
		net_device->send_buf = NULL;
	}
	kfree(net_device->send_section_map);

213 214 215
	return ret;
}

216
static int netvsc_init_buf(struct hv_device *device)
217
{
218
	int ret = 0;
219
	int t;
220 221
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
222
	struct net_device *ndev;
223

224
	net_device = get_outbound_net_device(device);
225
	if (!net_device)
226
		return -ENODEV;
227
	ndev = net_device->ndev;
228

229
	net_device->recv_buf = vzalloc(net_device->recv_buf_size);
230
	if (!net_device->recv_buf) {
231
		netdev_err(ndev, "unable to allocate receive "
232
			"buffer of size %d\n", net_device->recv_buf_size);
233
		ret = -ENOMEM;
234
		goto cleanup;
235 236
	}

237 238 239 240 241
	/*
	 * Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
242 243 244
	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
				    net_device->recv_buf_size,
				    &net_device->recv_buf_gpadl_handle);
245
	if (ret != 0) {
246
		netdev_err(ndev,
247
			"unable to establish receive buffer's gpadl\n");
248
		goto cleanup;
249 250 251
	}


252
	/* Notify the NetVsp of the gpadl handle */
253
	init_packet = &net_device->channel_init_pkt;
254

255
	memset(init_packet, 0, sizeof(struct nvsp_message));
256

257 258 259 260 261
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
	init_packet->msg.v1_msg.send_recv_buf.
		gpadl_handle = net_device->recv_buf_gpadl_handle;
	init_packet->msg.v1_msg.
		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
262

263
	/* Send the gpadl notification request */
264
	ret = vmbus_sendpacket(device->channel, init_packet,
265
			       sizeof(struct nvsp_message),
266
			       (unsigned long)init_packet,
267
			       VM_PKT_DATA_INBAND,
268
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
269
	if (ret != 0) {
270
		netdev_err(ndev,
271
			"unable to send receive buffer's gpadl to netvsp\n");
272
		goto cleanup;
273 274
	}

275
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
276
	BUG_ON(t == 0);
277

278

279
	/* Check the response */
280 281
	if (init_packet->msg.v1_msg.
	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
282
		netdev_err(ndev, "Unable to complete receive buffer "
283
			   "initialization with NetVsp - status %d\n",
284 285
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
286
		ret = -EINVAL;
287
		goto cleanup;
288 289
	}

290
	/* Parse the response */
291

292 293
	net_device->recv_section_cnt = init_packet->msg.
		v1_msg.send_recv_buf_complete.num_sections;
294

295 296 297 298 299
	net_device->recv_section = kmemdup(
		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
		net_device->recv_section_cnt *
		sizeof(struct nvsp_1_receive_buffer_section),
		GFP_KERNEL);
300
	if (net_device->recv_section == NULL) {
301
		ret = -EINVAL;
302
		goto cleanup;
303 304
	}

305 306 307 308
	/*
	 * For 1st release, there should only be 1 section that represents the
	 * entire receive buffer
	 */
309 310
	if (net_device->recv_section_cnt != 1 ||
	    net_device->recv_section->offset != 0) {
311
		ret = -EINVAL;
312
		goto cleanup;
313 314
	}

315 316
	/* Now setup the send buffer.
	 */
317
	net_device->send_buf = vzalloc(net_device->send_buf_size);
318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
	if (!net_device->send_buf) {
		netdev_err(ndev, "unable to allocate send "
			   "buffer of size %d\n", net_device->send_buf_size);
		ret = -ENOMEM;
		goto cleanup;
	}

	/* Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
				    net_device->send_buf_size,
				    &net_device->send_buf_gpadl_handle);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to establish send buffer's gpadl\n");
		goto cleanup;
	}

	/* Notify the NetVsp of the gpadl handle */
	init_packet = &net_device->channel_init_pkt;
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
	init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
		net_device->send_buf_gpadl_handle;
	init_packet->msg.v1_msg.send_recv_buf.id = 0;

	/* Send the gpadl notification request */
	ret = vmbus_sendpacket(device->channel, init_packet,
			       sizeof(struct nvsp_message),
			       (unsigned long)init_packet,
			       VM_PKT_DATA_INBAND,
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to send send buffer's gpadl to netvsp\n");
		goto cleanup;
	}

	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
	BUG_ON(t == 0);

	/* Check the response */
	if (init_packet->msg.v1_msg.
	    send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
		netdev_err(ndev, "Unable to complete send buffer "
			   "initialization with NetVsp - status %d\n",
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
		ret = -EINVAL;
		goto cleanup;
	}

	/* Parse the response */
	net_device->send_section_size = init_packet->msg.
				v1_msg.send_send_buf_complete.section_size;

	/* Section count is simply the size divided by the section size.
	 */
	net_device->send_section_cnt =
		net_device->send_buf_size/net_device->send_section_size;

	dev_info(&device->device, "Send section size: %d, Section count:%d\n",
		 net_device->send_section_size, net_device->send_section_cnt);

	/* Setup state for managing the send buffer. */
	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
					     BITS_PER_LONG);

	net_device->send_section_map =
		kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
390 391
	if (net_device->send_section_map == NULL) {
		ret = -ENOMEM;
392
		goto cleanup;
393
	}
394

395
	goto exit;
396

397
cleanup:
398
	netvsc_destroy_buf(net_device);
399

400
exit:
401 402 403 404
	return ret;
}


405 406 407 408 409
/* Negotiate NVSP protocol version */
static int negotiate_nvsp_ver(struct hv_device *device,
			      struct netvsc_device *net_device,
			      struct nvsp_message *init_packet,
			      u32 nvsp_ver)
410
{
411
	int ret, t;
412

413
	memset(init_packet, 0, sizeof(struct nvsp_message));
414
	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
415 416
	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
417

418
	/* Send the init request */
419
	ret = vmbus_sendpacket(device->channel, init_packet,
420
			       sizeof(struct nvsp_message),
421
			       (unsigned long)init_packet,
422
			       VM_PKT_DATA_INBAND,
423
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
424

425
	if (ret != 0)
426
		return ret;
427

428
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
429

430 431
	if (t == 0)
		return -ETIMEDOUT;
432

433
	if (init_packet->msg.init_msg.init_complete.status !=
434 435
	    NVSP_STAT_SUCCESS)
		return -EINVAL;
436

437
	if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
438 439 440 441 442
		return 0;

	/* NVSPv2 only: Send NDIS config */
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
443 444
	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu +
						       ETH_HLEN;
445
	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461

	ret = vmbus_sendpacket(device->channel, init_packet,
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);

	return ret;
}

static int netvsc_connect_vsp(struct hv_device *device)
{
	int ret;
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
	int ndis_version;
	struct net_device *ndev;
462 463 464
	u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
	int i, num_ver = 4; /* number of different NVSP versions */
465 466 467 468 469 470 471 472 473

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;
	ndev = net_device->ndev;

	init_packet = &net_device->channel_init_pkt;

	/* Negotiate the latest NVSP protocol supported */
474 475 476 477 478 479 480 481
	for (i = num_ver - 1; i >= 0; i--)
		if (negotiate_nvsp_ver(device, net_device, init_packet,
				       ver_list[i])  == 0) {
			net_device->nvsp_version = ver_list[i];
			break;
		}

	if (i < 0) {
482
		ret = -EPROTO;
483
		goto cleanup;
484
	}
485 486 487

	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);

488
	/* Send the ndis version */
489
	memset(init_packet, 0, sizeof(struct nvsp_message));
490

491
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
492
		ndis_version = 0x00060001;
493 494
	else
		ndis_version = 0x0006001e;
495

496 497 498
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_major_ver =
499
				(ndis_version & 0xFFFF0000) >> 16;
500 501
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_minor_ver =
502
				ndis_version & 0xFFFF;
503

504
	/* Send the init request */
505
	ret = vmbus_sendpacket(device->channel, init_packet,
506 507 508
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);
509
	if (ret != 0)
510
		goto cleanup;
511 512

	/* Post the big receive buffer to NetVSP */
513 514 515 516
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
	else
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
517
	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
518

519
	ret = netvsc_init_buf(device);
520

521
cleanup:
522 523 524
	return ret;
}

525
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
526
{
527
	netvsc_destroy_buf(net_device);
528 529
}

530
/*
531
 * netvsc_device_remove - Callback when the root bus device is removed
532
 */
533
int netvsc_device_remove(struct hv_device *device)
534
{
535
	struct netvsc_device *net_device;
536
	unsigned long flags;
537

538
	net_device = hv_get_drvdata(device);
539

540
	netvsc_disconnect_vsp(net_device);
541

542
	/*
543 544 545 546 547
	 * Since we have already drained, we don't need to busy wait
	 * as was done in final_release_stor_device()
	 * Note that we cannot set the ext pointer to NULL until
	 * we have drained - to drain the outgoing packets, we need to
	 * allow incoming packets.
548
	 */
549 550

	spin_lock_irqsave(&device->channel->inbound_lock, flags);
551
	hv_set_drvdata(device, NULL);
552
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
553

554 555 556 557
	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
558
	dev_notice(&device->device, "net device safe to remove\n");
559

560
	/* Now, we can close the channel safely */
561
	vmbus_close(device->channel);
562

563
	/* Release all resources */
564 565 566
	if (net_device->sub_cb_buf)
		vfree(net_device->sub_cb_buf);

567
	free_netvsc_device(net_device);
568
	return 0;
569 570
}

571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588

#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
 * Get the percentage of available bytes to write in the ring.
 * The return value is in range from 0 to 100.
 */
static inline u32 hv_ringbuf_avail_percent(
		struct hv_ring_buffer_info *ring_info)
{
	u32 avail_read, avail_write;

	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

	return avail_write * 100 / ring_info->ring_datasize;
}

589 590 591 592 593 594
static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
					 u32 index)
{
	sync_change_bit(index, net_device->send_section_map);
}

595 596
static void netvsc_send_completion(struct netvsc_device *net_device,
				   struct hv_device *device,
597
				   struct vmpacket_descriptor *packet)
598
{
599 600
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *nvsc_packet;
601
	struct net_device *ndev;
602
	u32 send_index;
603

604
	ndev = net_device->ndev;
605

606
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
607
			(packet->offset8 << 3));
608

609 610 611 612
	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
613 614 615
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG5_TYPE_SUBCHANNEL)) {
616
		/* Copy the response back */
617
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
618
		       sizeof(struct nvsp_message));
619
		complete(&net_device->channel_init_wait);
620 621
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
622
		int num_outstanding_sends;
623 624 625
		u16 q_idx = 0;
		struct vmbus_channel *channel = device->channel;
		int queue_sends;
626

627
		/* Get the send context */
628
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
629
			packet->trans_id;
630

631
		/* Notify the layer above us */
632
		if (nvsc_packet) {
633 634 635
			send_index = nvsc_packet->send_buf_index;
			if (send_index != NETVSC_INVALID_INDEX)
				netvsc_free_send_slot(net_device, send_index);
636 637
			q_idx = nvsc_packet->q_idx;
			channel = nvsc_packet->channel;
638 639
			nvsc_packet->send_completion(nvsc_packet->
						     send_completion_ctx);
640
		}
641

642 643
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
644 645
		queue_sends = atomic_dec_return(&net_device->
						queue_sends[q_idx]);
646

647 648 649
		if (net_device->destroy && num_outstanding_sends == 0)
			wake_up(&net_device->wait_drain);

650 651 652 653 654 655
		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
		    !net_device->start_remove &&
		    (hv_ringbuf_avail_percent(&channel->outbound) >
		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
				netif_tx_wake_queue(netdev_get_tx_queue(
						    ndev, q_idx));
656
	} else {
657
		netdev_err(ndev, "Unknown send completion packet type- "
658
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
659 660 661 662
	}

}

663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
{
	unsigned long index;
	u32 max_words = net_device->map_words;
	unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
	u32 section_cnt = net_device->send_section_cnt;
	int ret_val = NETVSC_INVALID_INDEX;
	int i;
	int prev_val;

	for (i = 0; i < max_words; i++) {
		if (!~(map_addr[i]))
			continue;
		index = ffz(map_addr[i]);
		prev_val = sync_test_and_set_bit(index, &map_addr[i]);
		if (prev_val)
			continue;
		if ((index + (i * BITS_PER_LONG)) >= section_cnt)
			break;
		ret_val = (index + (i * BITS_PER_LONG));
		break;
	}
	return ret_val;
}

u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
			    unsigned int section_index,
			    struct hv_netvsc_packet *packet)
{
	char *start = net_device->send_buf;
	char *dest = (start + (section_index * net_device->send_section_size));
	int i;
	u32 msg_size = 0;

	for (i = 0; i < packet->page_buf_cnt; i++) {
		char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
		u32 offset = packet->page_buf[i].offset;
		u32 len = packet->page_buf[i].len;

		memcpy(dest, (src + offset), len);
		msg_size += len;
		dest += len;
	}
	return msg_size;
}

709
int netvsc_send(struct hv_device *device,
710
			struct hv_netvsc_packet *packet)
711
{
712
	struct netvsc_device *net_device;
713
	int ret = 0;
714
	struct nvsp_message sendMessage;
715
	struct net_device *ndev;
716
	struct vmbus_channel *out_channel = NULL;
717
	u64 req_id;
718 719 720
	unsigned int section_index = NETVSC_INVALID_INDEX;
	u32 msg_size = 0;
	struct sk_buff *skb;
K
KY Srinivasan 已提交
721
	u16 q_idx = packet->q_idx;
722

723

724
	net_device = get_outbound_net_device(device);
725
	if (!net_device)
726
		return -ENODEV;
727
	ndev = net_device->ndev;
728

729
	sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
730
	if (packet->is_data_pkt) {
731
		/* 0 is RMC_DATA; */
732
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
733 734
	} else {
		/* 1 is RMC_CONTROL; */
735
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
736
	}
737

738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
	/* Attempt to send via sendbuf */
	if (packet->total_data_buflen < net_device->send_section_size) {
		section_index = netvsc_get_next_send_section(net_device);
		if (section_index != NETVSC_INVALID_INDEX) {
			msg_size = netvsc_copy_to_send_buf(net_device,
							   section_index,
							   packet);
			skb = (struct sk_buff *)
			      (unsigned long)packet->send_completion_tid;
			if (skb)
				dev_kfree_skb_any(skb);
			packet->page_buf_cnt = 0;
		}
	}
	packet->send_buf_index = section_index;


755
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
756 757
		section_index;
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
758

759
	if (packet->send_completion)
760
		req_id = (ulong)packet;
761 762 763
	else
		req_id = 0;

764 765 766 767 768
	out_channel = net_device->chn_table[packet->q_idx];
	if (out_channel == NULL)
		out_channel = device->channel;
	packet->channel = out_channel;

769
	if (packet->page_buf_cnt) {
770
		ret = vmbus_sendpacket_pagebuffer(out_channel,
771 772
						  packet->page_buf,
						  packet->page_buf_cnt,
773 774
						  &sendMessage,
						  sizeof(struct nvsp_message),
775
						  req_id);
776
	} else {
777
		ret = vmbus_sendpacket(out_channel, &sendMessage,
778
				sizeof(struct nvsp_message),
779
				req_id,
780 781
				VM_PKT_DATA_INBAND,
				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
782 783
	}

784 785
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
K
KY Srinivasan 已提交
786
		atomic_inc(&net_device->queue_sends[q_idx]);
787 788

		if (hv_ringbuf_avail_percent(&out_channel->outbound) <
789
			RING_AVAIL_PERCENT_LOWATER) {
790
			netif_tx_stop_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
791
					    ndev, q_idx));
792

793
			if (atomic_read(&net_device->
K
KY Srinivasan 已提交
794
				queue_sends[q_idx]) < 1)
795
				netif_tx_wake_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
796
						    ndev, q_idx));
797
		}
798
	} else if (ret == -EAGAIN) {
799
		netif_tx_stop_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
800 801
				    ndev, q_idx));
		if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
802
			netif_tx_wake_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
803
					    ndev, q_idx));
804 805
			ret = -ENOSPC;
		}
806
	} else {
807
		netdev_err(ndev, "Unable to send packet %p ret %d\n",
808
			   packet, ret);
809
	}
810 811 812 813

	return ret;
}

814
static void netvsc_send_recv_completion(struct hv_device *device,
815
					struct vmbus_channel *channel,
816
					struct netvsc_device *net_device,
817
					u64 transaction_id, u32 status)
818 819 820 821
{
	struct nvsp_message recvcompMessage;
	int retries = 0;
	int ret;
822 823 824
	struct net_device *ndev;

	ndev = net_device->ndev;
825 826 827 828

	recvcompMessage.hdr.msg_type =
				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;

829
	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
830 831 832

retry_send_cmplt:
	/* Send the completion */
833
	ret = vmbus_sendpacket(channel, &recvcompMessage,
834 835 836 837 838
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
839
	} else if (ret == -EAGAIN) {
840 841
		/* no more room...wait a bit and attempt to retry 3 times */
		retries++;
842
		netdev_err(ndev, "unable to send receive completion pkt"
843
			" (tid %llx)...retrying %d\n", transaction_id, retries);
844 845 846 847 848

		if (retries < 4) {
			udelay(100);
			goto retry_send_cmplt;
		} else {
849
			netdev_err(ndev, "unable to send receive "
850
				"completion pkt (tid %llx)...give up retrying\n",
851 852 853
				transaction_id);
		}
	} else {
854
		netdev_err(ndev, "unable to send receive "
855
			"completion pkt - %llx\n", transaction_id);
856 857 858
	}
}

859
static void netvsc_receive(struct netvsc_device *net_device,
860
			struct vmbus_channel *channel,
861 862
			struct hv_device *device,
			struct vmpacket_descriptor *packet)
863
{
864 865
	struct vmtransfer_page_packet_header *vmxferpage_packet;
	struct nvsp_message *nvsp_packet;
866 867 868
	struct hv_netvsc_packet nv_pkt;
	struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
	u32 status = NVSP_STAT_SUCCESS;
869 870
	int i;
	int count = 0;
871
	struct net_device *ndev;
872

873
	ndev = net_device->ndev;
874

875 876 877 878
	/*
	 * All inbound packets other than send completion should be xfer page
	 * packet
	 */
879
	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
880
		netdev_err(ndev, "Unknown packet type received - %d\n",
881
			   packet->type);
882 883 884
		return;
	}

885
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
886
			(packet->offset8 << 3));
887

888
	/* Make sure this is a valid nvsp packet */
889 890
	if (nvsp_packet->hdr.msg_type !=
	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
891
		netdev_err(ndev, "Unknown nvsp packet type received-"
892
			" %d\n", nvsp_packet->hdr.msg_type);
893 894 895
		return;
	}

896
	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
897

898
	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
899
		netdev_err(ndev, "Invalid xfer page set id - "
900
			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
901
			   vmxferpage_packet->xfer_pageset_id);
902 903 904
		return;
	}

905 906 907
	count = vmxferpage_packet->range_cnt;
	netvsc_packet->device = device;
	netvsc_packet->channel = channel;
908

909
	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
910
	for (i = 0; i < count; i++) {
911
		/* Initialize the netvsc packet */
912
		netvsc_packet->status = NVSP_STAT_SUCCESS;
913 914
		netvsc_packet->data = (void *)((unsigned long)net_device->
			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
915
		netvsc_packet->total_data_buflen =
916
					vmxferpage_packet->ranges[i].byte_count;
917

918
		/* Pass it to the upper layer */
919
		rndis_filter_receive(device, netvsc_packet);
920

921 922
		if (netvsc_packet->status != NVSP_STAT_SUCCESS)
			status = NVSP_STAT_FAIL;
923 924
	}

925 926
	netvsc_send_recv_completion(device, channel, net_device,
				    vmxferpage_packet->d.trans_id, status);
927 928
}

929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963

static void netvsc_send_table(struct hv_device *hdev,
			      struct vmpacket_descriptor *vmpkt)
{
	struct netvsc_device *nvscdev;
	struct net_device *ndev;
	struct nvsp_message *nvmsg;
	int i;
	u32 count, *tab;

	nvscdev = get_outbound_net_device(hdev);
	if (!nvscdev)
		return;
	ndev = nvscdev->ndev;

	nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
					(vmpkt->offset8 << 3));

	if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
		return;

	count = nvmsg->msg.v5_msg.send_table.count;
	if (count != VRSS_SEND_TAB_SIZE) {
		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
		return;
	}

	tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
		      nvmsg->msg.v5_msg.send_table.offset);

	for (i = 0; i < count; i++)
		nvscdev->send_table[i] = tab[i];
}

void netvsc_channel_cb(void *context)
964
{
965
	int ret;
966 967
	struct vmbus_channel *channel = (struct vmbus_channel *)context;
	struct hv_device *device;
968 969 970
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u64 request_id;
971
	struct vmpacket_descriptor *desc;
972 973
	unsigned char *buffer;
	int bufferlen = NETVSC_PACKET_SIZE;
974
	struct net_device *ndev;
975

976 977 978 979 980
	if (channel->primary_channel != NULL)
		device = channel->primary_channel->device_obj;
	else
		device = channel->device_obj;

981
	net_device = get_inbound_net_device(device);
982
	if (!net_device)
983
		return;
984
	ndev = net_device->ndev;
985
	buffer = get_per_channel_state(channel);
986

987
	do {
988
		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
989
					   &bytes_recvd, &request_id);
990
		if (ret == 0) {
991
			if (bytes_recvd > 0) {
992
				desc = (struct vmpacket_descriptor *)buffer;
993 994
				switch (desc->type) {
				case VM_PKT_COMP:
995 996
					netvsc_send_completion(net_device,
								device, desc);
997 998
					break;

999
				case VM_PKT_DATA_USING_XFER_PAGES:
1000 1001 1002 1003 1004 1005
					netvsc_receive(net_device, channel,
						       device, desc);
					break;

				case VM_PKT_DATA_INBAND:
					netvsc_send_table(device, desc);
1006 1007 1008
					break;

				default:
1009
					netdev_err(ndev,
1010 1011
						   "unhandled packet type %d, "
						   "tid %llx len %d\n",
1012
						   desc->type, request_id,
1013
						   bytes_recvd);
1014
					break;
1015 1016
				}

1017
			} else {
1018 1019 1020
				/*
				 * We are done for this pass.
				 */
1021 1022
				break;
			}
1023

1024
		} else if (ret == -ENOBUFS) {
1025 1026
			if (bufferlen > NETVSC_PACKET_SIZE)
				kfree(buffer);
1027
			/* Handle large packet */
1028
			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
1029
			if (buffer == NULL) {
1030
				/* Try again next time around */
1031
				netdev_err(ndev,
1032
					   "unable to allocate buffer of size "
1033
					   "(%d)!!\n", bytes_recvd);
1034 1035 1036
				break;
			}

1037
			bufferlen = bytes_recvd;
1038 1039 1040
		}
	} while (1);

1041 1042
	if (bufferlen > NETVSC_PACKET_SIZE)
		kfree(buffer);
1043 1044
	return;
}
1045

1046 1047 1048 1049
/*
 * netvsc_device_add - Callback when the device belonging to this
 * driver is added
 */
1050
int netvsc_device_add(struct hv_device *device, void *additional_info)
1051 1052
{
	int ret = 0;
1053 1054
	int ring_size =
	((struct netvsc_device_info *)additional_info)->ring_size;
1055
	struct netvsc_device *net_device;
1056
	struct net_device *ndev;
1057 1058

	net_device = alloc_net_device(device);
1059 1060
	if (!net_device)
		return -ENOMEM;
1061

1062 1063
	net_device->ring_size = ring_size;

1064 1065 1066 1067 1068 1069 1070 1071 1072
	/*
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * In alloc_net_device(), we register struct netvsc_device *
	 * as the driver private data and stash away struct net_device *
	 * in struct netvsc_device *.
	 */
	ndev = net_device->ndev;

1073
	/* Initialize the NetVSC channel extension */
1074
	init_completion(&net_device->channel_init_wait);
1075

1076 1077
	set_per_channel_state(device->channel, net_device->cb_buffer);

1078
	/* Open the channel */
1079 1080
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
1081
			 netvsc_channel_cb, device->channel);
1082 1083

	if (ret != 0) {
1084
		netdev_err(ndev, "unable to open channel: %d\n", ret);
1085 1086 1087 1088
		goto cleanup;
	}

	/* Channel is opened */
1089
	pr_info("hv_netvsc channel opened successfully\n");
1090

1091 1092
	net_device->chn_table[0] = device->channel;

1093 1094 1095
	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
1096
		netdev_err(ndev,
1097
			"unable to connect to NetVSP - %d\n", ret);
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
		goto close;
	}

	return ret;

close:
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

cleanup:
1108
	free_netvsc_device(net_device);
1109 1110 1111

	return ret;
}