netvsc.c 29.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
14
 * this program; if not, see <http://www.gnu.org/licenses/>.
15 16
 *
 * Authors:
17
 *   Haiyang Zhang <haiyangz@microsoft.com>
18 19
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
20 21
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

22
#include <linux/kernel.h>
23 24
#include <linux/sched.h>
#include <linux/wait.h>
25
#include <linux/mm.h>
26
#include <linux/delay.h>
27
#include <linux/io.h>
28
#include <linux/slab.h>
29
#include <linux/netdevice.h>
30
#include <linux/if_ether.h>
31
#include <asm/sync_bitops.h>
32

33
#include "hyperv_net.h"
34 35


36
static struct netvsc_device *alloc_net_device(struct hv_device *device)
37
{
38
	struct netvsc_device *net_device;
39
	struct net_device *ndev = hv_get_drvdata(device);
40

41 42
	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
	if (!net_device)
43 44
		return NULL;

45
	init_waitqueue_head(&net_device->wait_drain);
46
	net_device->start_remove = false;
47
	net_device->destroy = false;
48
	net_device->dev = device;
49
	net_device->ndev = ndev;
50

51
	hv_set_drvdata(device, net_device);
52
	return net_device;
53 54
}

55
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
56
{
57
	struct netvsc_device *net_device;
58

59
	net_device = hv_get_drvdata(device);
60
	if (net_device && net_device->destroy)
61
		net_device = NULL;
62

63
	return net_device;
64 65
}

66
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
67
{
68
	struct netvsc_device *net_device;
69

70
	net_device = hv_get_drvdata(device);
71 72 73 74 75 76

	if (!net_device)
		goto get_in_err;

	if (net_device->destroy &&
		atomic_read(&net_device->num_outstanding_sends) == 0)
77
		net_device = NULL;
78

79
get_in_err:
80
	return net_device;
81 82 83
}


84
static int netvsc_destroy_buf(struct netvsc_device *net_device)
85 86 87
{
	struct nvsp_message *revoke_packet;
	int ret = 0;
88
	struct net_device *ndev = net_device->ndev;
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

	/*
	 * If we got a section count, it means we received a
	 * SendReceiveBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->recv_section_cnt) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
		revoke_packet->msg.v1_msg.
		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/*
		 * If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
116
			netdev_err(ndev, "unable to send "
117
				"revoke receive buffer to netvsp\n");
118
			return ret;
119 120 121 122 123 124 125 126 127 128 129 130
		}
	}

	/* Teardown the gpadl on the vsp end */
	if (net_device->recv_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
			   net_device->recv_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
131
			netdev_err(ndev,
132
				   "unable to teardown receive buffer's gpadl\n");
133
			return ret;
134 135 136 137 138 139
		}
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
140
		vfree(net_device->recv_buf);
141 142 143 144 145 146 147 148 149
		net_device->recv_buf = NULL;
	}

	if (net_device->recv_section) {
		net_device->recv_section_cnt = 0;
		kfree(net_device->recv_section);
		net_device->recv_section = NULL;
	}

150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
	/* Deal with the send buffer we may have setup.
	 * If we got a  send section size, it means we received a
	 * SendsendBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->send_section_size) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
		revoke_packet->msg.v1_msg.revoke_recv_buf.id = 0;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/* If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev, "unable to send "
				   "revoke send buffer to netvsp\n");
			return ret;
		}
	}
	/* Teardown the gpadl on the vsp end */
	if (net_device->send_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
					   net_device->send_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev,
				   "unable to teardown send buffer's gpadl\n");
			return ret;
		}
192
		net_device->send_buf_gpadl_handle = 0;
193 194 195
	}
	if (net_device->send_buf) {
		/* Free up the receive buffer */
196
		vfree(net_device->send_buf);
197 198 199 200
		net_device->send_buf = NULL;
	}
	kfree(net_device->send_section_map);

201 202 203
	return ret;
}

204
static int netvsc_init_buf(struct hv_device *device)
205
{
206
	int ret = 0;
207
	int t;
208 209
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
210
	struct net_device *ndev;
211

212
	net_device = get_outbound_net_device(device);
213
	if (!net_device)
214
		return -ENODEV;
215
	ndev = net_device->ndev;
216

217
	net_device->recv_buf = vzalloc(net_device->recv_buf_size);
218
	if (!net_device->recv_buf) {
219
		netdev_err(ndev, "unable to allocate receive "
220
			"buffer of size %d\n", net_device->recv_buf_size);
221
		ret = -ENOMEM;
222
		goto cleanup;
223 224
	}

225 226 227 228 229
	/*
	 * Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
230 231 232
	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
				    net_device->recv_buf_size,
				    &net_device->recv_buf_gpadl_handle);
233
	if (ret != 0) {
234
		netdev_err(ndev,
235
			"unable to establish receive buffer's gpadl\n");
236
		goto cleanup;
237 238 239
	}


240
	/* Notify the NetVsp of the gpadl handle */
241
	init_packet = &net_device->channel_init_pkt;
242

243
	memset(init_packet, 0, sizeof(struct nvsp_message));
244

245 246 247 248 249
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
	init_packet->msg.v1_msg.send_recv_buf.
		gpadl_handle = net_device->recv_buf_gpadl_handle;
	init_packet->msg.v1_msg.
		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
250

251
	/* Send the gpadl notification request */
252
	ret = vmbus_sendpacket(device->channel, init_packet,
253
			       sizeof(struct nvsp_message),
254
			       (unsigned long)init_packet,
255
			       VM_PKT_DATA_INBAND,
256
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
257
	if (ret != 0) {
258
		netdev_err(ndev,
259
			"unable to send receive buffer's gpadl to netvsp\n");
260
		goto cleanup;
261 262
	}

263
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
264
	BUG_ON(t == 0);
265

266

267
	/* Check the response */
268 269
	if (init_packet->msg.v1_msg.
	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
270
		netdev_err(ndev, "Unable to complete receive buffer "
271
			   "initialization with NetVsp - status %d\n",
272 273
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
274
		ret = -EINVAL;
275
		goto cleanup;
276 277
	}

278
	/* Parse the response */
279

280 281
	net_device->recv_section_cnt = init_packet->msg.
		v1_msg.send_recv_buf_complete.num_sections;
282

283 284 285 286 287
	net_device->recv_section = kmemdup(
		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
		net_device->recv_section_cnt *
		sizeof(struct nvsp_1_receive_buffer_section),
		GFP_KERNEL);
288
	if (net_device->recv_section == NULL) {
289
		ret = -EINVAL;
290
		goto cleanup;
291 292
	}

293 294 295 296
	/*
	 * For 1st release, there should only be 1 section that represents the
	 * entire receive buffer
	 */
297 298
	if (net_device->recv_section_cnt != 1 ||
	    net_device->recv_section->offset != 0) {
299
		ret = -EINVAL;
300
		goto cleanup;
301 302
	}

303 304
	/* Now setup the send buffer.
	 */
305
	net_device->send_buf = vzalloc(net_device->send_buf_size);
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
	if (!net_device->send_buf) {
		netdev_err(ndev, "unable to allocate send "
			   "buffer of size %d\n", net_device->send_buf_size);
		ret = -ENOMEM;
		goto cleanup;
	}

	/* Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
				    net_device->send_buf_size,
				    &net_device->send_buf_gpadl_handle);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to establish send buffer's gpadl\n");
		goto cleanup;
	}

	/* Notify the NetVsp of the gpadl handle */
	init_packet = &net_device->channel_init_pkt;
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
	init_packet->msg.v1_msg.send_recv_buf.gpadl_handle =
		net_device->send_buf_gpadl_handle;
	init_packet->msg.v1_msg.send_recv_buf.id = 0;

	/* Send the gpadl notification request */
	ret = vmbus_sendpacket(device->channel, init_packet,
			       sizeof(struct nvsp_message),
			       (unsigned long)init_packet,
			       VM_PKT_DATA_INBAND,
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to send send buffer's gpadl to netvsp\n");
		goto cleanup;
	}

	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
	BUG_ON(t == 0);

	/* Check the response */
	if (init_packet->msg.v1_msg.
	    send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
		netdev_err(ndev, "Unable to complete send buffer "
			   "initialization with NetVsp - status %d\n",
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
		ret = -EINVAL;
		goto cleanup;
	}

	/* Parse the response */
	net_device->send_section_size = init_packet->msg.
				v1_msg.send_send_buf_complete.section_size;

	/* Section count is simply the size divided by the section size.
	 */
	net_device->send_section_cnt =
		net_device->send_buf_size/net_device->send_section_size;

	dev_info(&device->device, "Send section size: %d, Section count:%d\n",
		 net_device->send_section_size, net_device->send_section_cnt);

	/* Setup state for managing the send buffer. */
	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
					     BITS_PER_LONG);

	net_device->send_section_map =
		kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
378 379
	if (net_device->send_section_map == NULL) {
		ret = -ENOMEM;
380
		goto cleanup;
381
	}
382

383
	goto exit;
384

385
cleanup:
386
	netvsc_destroy_buf(net_device);
387

388
exit:
389 390 391 392
	return ret;
}


393 394 395 396 397
/* Negotiate NVSP protocol version */
static int negotiate_nvsp_ver(struct hv_device *device,
			      struct netvsc_device *net_device,
			      struct nvsp_message *init_packet,
			      u32 nvsp_ver)
398
{
399
	int ret, t;
400

401
	memset(init_packet, 0, sizeof(struct nvsp_message));
402
	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
403 404
	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
405

406
	/* Send the init request */
407
	ret = vmbus_sendpacket(device->channel, init_packet,
408
			       sizeof(struct nvsp_message),
409
			       (unsigned long)init_packet,
410
			       VM_PKT_DATA_INBAND,
411
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
412

413
	if (ret != 0)
414
		return ret;
415

416
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
417

418 419
	if (t == 0)
		return -ETIMEDOUT;
420

421
	if (init_packet->msg.init_msg.init_complete.status !=
422 423
	    NVSP_STAT_SUCCESS)
		return -EINVAL;
424

425
	if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
426 427 428 429 430
		return 0;

	/* NVSPv2 only: Send NDIS config */
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
431
	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu;
432
	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448

	ret = vmbus_sendpacket(device->channel, init_packet,
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);

	return ret;
}

static int netvsc_connect_vsp(struct hv_device *device)
{
	int ret;
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
	int ndis_version;
	struct net_device *ndev;
449 450 451
	u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
	int i, num_ver = 4; /* number of different NVSP versions */
452 453 454 455 456 457 458 459 460

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;
	ndev = net_device->ndev;

	init_packet = &net_device->channel_init_pkt;

	/* Negotiate the latest NVSP protocol supported */
461 462 463 464 465 466 467 468
	for (i = num_ver - 1; i >= 0; i--)
		if (negotiate_nvsp_ver(device, net_device, init_packet,
				       ver_list[i])  == 0) {
			net_device->nvsp_version = ver_list[i];
			break;
		}

	if (i < 0) {
469
		ret = -EPROTO;
470
		goto cleanup;
471
	}
472 473 474

	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);

475
	/* Send the ndis version */
476
	memset(init_packet, 0, sizeof(struct nvsp_message));
477

478
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
479
		ndis_version = 0x00060001;
480 481
	else
		ndis_version = 0x0006001e;
482

483 484 485
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_major_ver =
486
				(ndis_version & 0xFFFF0000) >> 16;
487 488
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_minor_ver =
489
				ndis_version & 0xFFFF;
490

491
	/* Send the init request */
492
	ret = vmbus_sendpacket(device->channel, init_packet,
493 494 495
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);
496
	if (ret != 0)
497
		goto cleanup;
498 499

	/* Post the big receive buffer to NetVSP */
500 501 502 503
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
	else
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
504
	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
505

506
	ret = netvsc_init_buf(device);
507

508
cleanup:
509 510 511
	return ret;
}

512
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
513
{
514
	netvsc_destroy_buf(net_device);
515 516
}

517
/*
518
 * netvsc_device_remove - Callback when the root bus device is removed
519
 */
520
int netvsc_device_remove(struct hv_device *device)
521
{
522
	struct netvsc_device *net_device;
523
	unsigned long flags;
524

525
	net_device = hv_get_drvdata(device);
526

527
	netvsc_disconnect_vsp(net_device);
528

529
	/*
530 531 532 533 534
	 * Since we have already drained, we don't need to busy wait
	 * as was done in final_release_stor_device()
	 * Note that we cannot set the ext pointer to NULL until
	 * we have drained - to drain the outgoing packets, we need to
	 * allow incoming packets.
535
	 */
536 537

	spin_lock_irqsave(&device->channel->inbound_lock, flags);
538
	hv_set_drvdata(device, NULL);
539
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
540

541 542 543 544
	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
545
	dev_notice(&device->device, "net device safe to remove\n");
546

547
	/* Now, we can close the channel safely */
548
	vmbus_close(device->channel);
549

550
	/* Release all resources */
551 552 553
	if (net_device->sub_cb_buf)
		vfree(net_device->sub_cb_buf);

554
	kfree(net_device);
555
	return 0;
556 557
}

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575

#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
 * Get the percentage of available bytes to write in the ring.
 * The return value is in range from 0 to 100.
 */
static inline u32 hv_ringbuf_avail_percent(
		struct hv_ring_buffer_info *ring_info)
{
	u32 avail_read, avail_write;

	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

	return avail_write * 100 / ring_info->ring_datasize;
}

576 577 578 579 580 581
static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
					 u32 index)
{
	sync_change_bit(index, net_device->send_section_map);
}

582 583
static void netvsc_send_completion(struct netvsc_device *net_device,
				   struct hv_device *device,
584
				   struct vmpacket_descriptor *packet)
585
{
586 587
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *nvsc_packet;
588
	struct net_device *ndev;
589
	u32 send_index;
590

591
	ndev = net_device->ndev;
592

593
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
594
			(packet->offset8 << 3));
595

596 597 598 599
	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
600 601 602
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG5_TYPE_SUBCHANNEL)) {
603
		/* Copy the response back */
604
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
605
		       sizeof(struct nvsp_message));
606
		complete(&net_device->channel_init_wait);
607 608
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
609
		int num_outstanding_sends;
610 611 612
		u16 q_idx = 0;
		struct vmbus_channel *channel = device->channel;
		int queue_sends;
613

614
		/* Get the send context */
615
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
616
			packet->trans_id;
617

618
		/* Notify the layer above us */
619
		if (nvsc_packet) {
620 621 622
			send_index = nvsc_packet->send_buf_index;
			if (send_index != NETVSC_INVALID_INDEX)
				netvsc_free_send_slot(net_device, send_index);
623 624
			q_idx = nvsc_packet->q_idx;
			channel = nvsc_packet->channel;
625 626
			nvsc_packet->send_completion(nvsc_packet->
						     send_completion_ctx);
627
		}
628

629 630
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
631 632
		queue_sends = atomic_dec_return(&net_device->
						queue_sends[q_idx]);
633

634 635 636
		if (net_device->destroy && num_outstanding_sends == 0)
			wake_up(&net_device->wait_drain);

637 638 639 640 641 642
		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
		    !net_device->start_remove &&
		    (hv_ringbuf_avail_percent(&channel->outbound) >
		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
				netif_tx_wake_queue(netdev_get_tx_queue(
						    ndev, q_idx));
643
	} else {
644
		netdev_err(ndev, "Unknown send completion packet type- "
645
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
646 647 648 649
	}

}

650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
{
	unsigned long index;
	u32 max_words = net_device->map_words;
	unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
	u32 section_cnt = net_device->send_section_cnt;
	int ret_val = NETVSC_INVALID_INDEX;
	int i;
	int prev_val;

	for (i = 0; i < max_words; i++) {
		if (!~(map_addr[i]))
			continue;
		index = ffz(map_addr[i]);
		prev_val = sync_test_and_set_bit(index, &map_addr[i]);
		if (prev_val)
			continue;
		if ((index + (i * BITS_PER_LONG)) >= section_cnt)
			break;
		ret_val = (index + (i * BITS_PER_LONG));
		break;
	}
	return ret_val;
}

u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
			    unsigned int section_index,
			    struct hv_netvsc_packet *packet)
{
	char *start = net_device->send_buf;
	char *dest = (start + (section_index * net_device->send_section_size));
	int i;
	u32 msg_size = 0;

	for (i = 0; i < packet->page_buf_cnt; i++) {
		char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
		u32 offset = packet->page_buf[i].offset;
		u32 len = packet->page_buf[i].len;

		memcpy(dest, (src + offset), len);
		msg_size += len;
		dest += len;
	}
	return msg_size;
}

696
int netvsc_send(struct hv_device *device,
697
			struct hv_netvsc_packet *packet)
698
{
699
	struct netvsc_device *net_device;
700
	int ret = 0;
701
	struct nvsp_message sendMessage;
702
	struct net_device *ndev;
703
	struct vmbus_channel *out_channel = NULL;
704
	u64 req_id;
705 706 707 708
	unsigned int section_index = NETVSC_INVALID_INDEX;
	u32 msg_size = 0;
	struct sk_buff *skb;

709

710
	net_device = get_outbound_net_device(device);
711
	if (!net_device)
712
		return -ENODEV;
713
	ndev = net_device->ndev;
714

715
	sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
716
	if (packet->is_data_pkt) {
717
		/* 0 is RMC_DATA; */
718
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
719 720
	} else {
		/* 1 is RMC_CONTROL; */
721
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
722
	}
723

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
	/* Attempt to send via sendbuf */
	if (packet->total_data_buflen < net_device->send_section_size) {
		section_index = netvsc_get_next_send_section(net_device);
		if (section_index != NETVSC_INVALID_INDEX) {
			msg_size = netvsc_copy_to_send_buf(net_device,
							   section_index,
							   packet);
			skb = (struct sk_buff *)
			      (unsigned long)packet->send_completion_tid;
			if (skb)
				dev_kfree_skb_any(skb);
			packet->page_buf_cnt = 0;
		}
	}
	packet->send_buf_index = section_index;


741
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
742 743
		section_index;
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = msg_size;
744

745
	if (packet->send_completion)
746
		req_id = (ulong)packet;
747 748 749
	else
		req_id = 0;

750 751 752 753 754
	out_channel = net_device->chn_table[packet->q_idx];
	if (out_channel == NULL)
		out_channel = device->channel;
	packet->channel = out_channel;

755
	if (packet->page_buf_cnt) {
756
		ret = vmbus_sendpacket_pagebuffer(out_channel,
757 758
						  packet->page_buf,
						  packet->page_buf_cnt,
759 760
						  &sendMessage,
						  sizeof(struct nvsp_message),
761
						  req_id);
762
	} else {
763
		ret = vmbus_sendpacket(out_channel, &sendMessage,
764
				sizeof(struct nvsp_message),
765
				req_id,
766 767
				VM_PKT_DATA_INBAND,
				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
768 769
	}

770 771
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
772 773 774
		atomic_inc(&net_device->queue_sends[packet->q_idx]);

		if (hv_ringbuf_avail_percent(&out_channel->outbound) <
775
			RING_AVAIL_PERCENT_LOWATER) {
776 777 778
			netif_tx_stop_queue(netdev_get_tx_queue(
					    ndev, packet->q_idx));

779
			if (atomic_read(&net_device->
780 781 782
				queue_sends[packet->q_idx]) < 1)
				netif_tx_wake_queue(netdev_get_tx_queue(
						    ndev, packet->q_idx));
783
		}
784
	} else if (ret == -EAGAIN) {
785 786 787 788 789
		netif_tx_stop_queue(netdev_get_tx_queue(
				    ndev, packet->q_idx));
		if (atomic_read(&net_device->queue_sends[packet->q_idx]) < 1) {
			netif_tx_wake_queue(netdev_get_tx_queue(
					    ndev, packet->q_idx));
790 791
			ret = -ENOSPC;
		}
792
	} else {
793
		netdev_err(ndev, "Unable to send packet %p ret %d\n",
794
			   packet, ret);
795
	}
796 797 798 799

	return ret;
}

800
static void netvsc_send_recv_completion(struct hv_device *device,
801
					struct vmbus_channel *channel,
802
					struct netvsc_device *net_device,
803
					u64 transaction_id, u32 status)
804 805 806 807
{
	struct nvsp_message recvcompMessage;
	int retries = 0;
	int ret;
808 809 810
	struct net_device *ndev;

	ndev = net_device->ndev;
811 812 813 814

	recvcompMessage.hdr.msg_type =
				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;

815
	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
816 817 818

retry_send_cmplt:
	/* Send the completion */
819
	ret = vmbus_sendpacket(channel, &recvcompMessage,
820 821 822 823 824
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
825
	} else if (ret == -EAGAIN) {
826 827
		/* no more room...wait a bit and attempt to retry 3 times */
		retries++;
828
		netdev_err(ndev, "unable to send receive completion pkt"
829
			" (tid %llx)...retrying %d\n", transaction_id, retries);
830 831 832 833 834

		if (retries < 4) {
			udelay(100);
			goto retry_send_cmplt;
		} else {
835
			netdev_err(ndev, "unable to send receive "
836
				"completion pkt (tid %llx)...give up retrying\n",
837 838 839
				transaction_id);
		}
	} else {
840
		netdev_err(ndev, "unable to send receive "
841
			"completion pkt - %llx\n", transaction_id);
842 843 844
	}
}

845
static void netvsc_receive(struct netvsc_device *net_device,
846
			struct vmbus_channel *channel,
847 848
			struct hv_device *device,
			struct vmpacket_descriptor *packet)
849
{
850 851
	struct vmtransfer_page_packet_header *vmxferpage_packet;
	struct nvsp_message *nvsp_packet;
852 853 854
	struct hv_netvsc_packet nv_pkt;
	struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
	u32 status = NVSP_STAT_SUCCESS;
855 856
	int i;
	int count = 0;
857
	struct net_device *ndev;
858

859
	ndev = net_device->ndev;
860

861 862 863 864
	/*
	 * All inbound packets other than send completion should be xfer page
	 * packet
	 */
865
	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
866
		netdev_err(ndev, "Unknown packet type received - %d\n",
867
			   packet->type);
868 869 870
		return;
	}

871
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
872
			(packet->offset8 << 3));
873

874
	/* Make sure this is a valid nvsp packet */
875 876
	if (nvsp_packet->hdr.msg_type !=
	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
877
		netdev_err(ndev, "Unknown nvsp packet type received-"
878
			" %d\n", nvsp_packet->hdr.msg_type);
879 880 881
		return;
	}

882
	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
883

884
	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
885
		netdev_err(ndev, "Invalid xfer page set id - "
886
			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
887
			   vmxferpage_packet->xfer_pageset_id);
888 889 890
		return;
	}

891 892 893
	count = vmxferpage_packet->range_cnt;
	netvsc_packet->device = device;
	netvsc_packet->channel = channel;
894

895
	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
896
	for (i = 0; i < count; i++) {
897
		/* Initialize the netvsc packet */
898
		netvsc_packet->status = NVSP_STAT_SUCCESS;
899 900
		netvsc_packet->data = (void *)((unsigned long)net_device->
			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
901
		netvsc_packet->total_data_buflen =
902
					vmxferpage_packet->ranges[i].byte_count;
903

904
		/* Pass it to the upper layer */
905
		rndis_filter_receive(device, netvsc_packet);
906

907 908
		if (netvsc_packet->status != NVSP_STAT_SUCCESS)
			status = NVSP_STAT_FAIL;
909 910
	}

911 912
	netvsc_send_recv_completion(device, channel, net_device,
				    vmxferpage_packet->d.trans_id, status);
913 914
}

915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949

static void netvsc_send_table(struct hv_device *hdev,
			      struct vmpacket_descriptor *vmpkt)
{
	struct netvsc_device *nvscdev;
	struct net_device *ndev;
	struct nvsp_message *nvmsg;
	int i;
	u32 count, *tab;

	nvscdev = get_outbound_net_device(hdev);
	if (!nvscdev)
		return;
	ndev = nvscdev->ndev;

	nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
					(vmpkt->offset8 << 3));

	if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
		return;

	count = nvmsg->msg.v5_msg.send_table.count;
	if (count != VRSS_SEND_TAB_SIZE) {
		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
		return;
	}

	tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
		      nvmsg->msg.v5_msg.send_table.offset);

	for (i = 0; i < count; i++)
		nvscdev->send_table[i] = tab[i];
}

void netvsc_channel_cb(void *context)
950
{
951
	int ret;
952 953
	struct vmbus_channel *channel = (struct vmbus_channel *)context;
	struct hv_device *device;
954 955 956
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u64 request_id;
957
	struct vmpacket_descriptor *desc;
958 959
	unsigned char *buffer;
	int bufferlen = NETVSC_PACKET_SIZE;
960
	struct net_device *ndev;
961

962 963 964 965 966
	if (channel->primary_channel != NULL)
		device = channel->primary_channel->device_obj;
	else
		device = channel->device_obj;

967
	net_device = get_inbound_net_device(device);
968
	if (!net_device)
969
		return;
970
	ndev = net_device->ndev;
971
	buffer = get_per_channel_state(channel);
972

973
	do {
974
		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
975
					   &bytes_recvd, &request_id);
976
		if (ret == 0) {
977
			if (bytes_recvd > 0) {
978
				desc = (struct vmpacket_descriptor *)buffer;
979 980
				switch (desc->type) {
				case VM_PKT_COMP:
981 982
					netvsc_send_completion(net_device,
								device, desc);
983 984
					break;

985
				case VM_PKT_DATA_USING_XFER_PAGES:
986 987 988 989 990 991
					netvsc_receive(net_device, channel,
						       device, desc);
					break;

				case VM_PKT_DATA_INBAND:
					netvsc_send_table(device, desc);
992 993 994
					break;

				default:
995
					netdev_err(ndev,
996 997
						   "unhandled packet type %d, "
						   "tid %llx len %d\n",
998
						   desc->type, request_id,
999
						   bytes_recvd);
1000
					break;
1001 1002
				}

1003
			} else {
1004 1005 1006
				/*
				 * We are done for this pass.
				 */
1007 1008
				break;
			}
1009

1010
		} else if (ret == -ENOBUFS) {
1011 1012
			if (bufferlen > NETVSC_PACKET_SIZE)
				kfree(buffer);
1013
			/* Handle large packet */
1014
			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
1015
			if (buffer == NULL) {
1016
				/* Try again next time around */
1017
				netdev_err(ndev,
1018
					   "unable to allocate buffer of size "
1019
					   "(%d)!!\n", bytes_recvd);
1020 1021 1022
				break;
			}

1023
			bufferlen = bytes_recvd;
1024 1025 1026
		}
	} while (1);

1027 1028
	if (bufferlen > NETVSC_PACKET_SIZE)
		kfree(buffer);
1029 1030
	return;
}
1031

1032 1033 1034 1035
/*
 * netvsc_device_add - Callback when the device belonging to this
 * driver is added
 */
1036
int netvsc_device_add(struct hv_device *device, void *additional_info)
1037 1038
{
	int ret = 0;
1039 1040
	int ring_size =
	((struct netvsc_device_info *)additional_info)->ring_size;
1041
	struct netvsc_device *net_device;
1042
	struct net_device *ndev;
1043 1044 1045

	net_device = alloc_net_device(device);
	if (!net_device) {
1046
		ret = -ENOMEM;
1047 1048 1049
		goto cleanup;
	}

1050 1051
	net_device->ring_size = ring_size;

1052 1053 1054 1055 1056 1057 1058 1059 1060
	/*
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * In alloc_net_device(), we register struct netvsc_device *
	 * as the driver private data and stash away struct net_device *
	 * in struct netvsc_device *.
	 */
	ndev = net_device->ndev;

1061
	/* Initialize the NetVSC channel extension */
1062
	init_completion(&net_device->channel_init_wait);
1063

1064 1065
	set_per_channel_state(device->channel, net_device->cb_buffer);

1066
	/* Open the channel */
1067 1068
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
1069
			 netvsc_channel_cb, device->channel);
1070 1071

	if (ret != 0) {
1072
		netdev_err(ndev, "unable to open channel: %d\n", ret);
1073 1074 1075 1076
		goto cleanup;
	}

	/* Channel is opened */
1077
	pr_info("hv_netvsc channel opened successfully\n");
1078

1079 1080
	net_device->chn_table[0] = device->channel;

1081 1082 1083
	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
1084
		netdev_err(ndev,
1085
			"unable to connect to NetVSP - %d\n", ret);
1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
		goto close;
	}

	return ret;

close:
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

cleanup:
1096
	kfree(net_device);
1097 1098 1099

	return ret;
}