netvsc.c 25.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
18
 *   Haiyang Zhang <haiyangz@microsoft.com>
19 20
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/delay.h>
28
#include <linux/io.h>
29
#include <linux/slab.h>
30
#include <linux/netdevice.h>
31
#include <linux/if_ether.h>
32

33
#include "hyperv_net.h"
34 35


36
static struct netvsc_device *alloc_net_device(struct hv_device *device)
37
{
38
	struct netvsc_device *net_device;
39
	struct net_device *ndev = hv_get_drvdata(device);
40

41 42
	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
	if (!net_device)
43 44
		return NULL;

45
	init_waitqueue_head(&net_device->wait_drain);
46
	net_device->start_remove = false;
47
	net_device->destroy = false;
48
	net_device->dev = device;
49
	net_device->ndev = ndev;
50

51
	hv_set_drvdata(device, net_device);
52
	return net_device;
53 54
}

55
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
56
{
57
	struct netvsc_device *net_device;
58

59
	net_device = hv_get_drvdata(device);
60
	if (net_device && net_device->destroy)
61
		net_device = NULL;
62

63
	return net_device;
64 65
}

66
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
67
{
68
	struct netvsc_device *net_device;
69

70
	net_device = hv_get_drvdata(device);
71 72 73 74 75 76

	if (!net_device)
		goto get_in_err;

	if (net_device->destroy &&
		atomic_read(&net_device->num_outstanding_sends) == 0)
77
		net_device = NULL;
78

79
get_in_err:
80
	return net_device;
81 82 83
}


84 85 86 87
static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
{
	struct nvsp_message *revoke_packet;
	int ret = 0;
88
	struct net_device *ndev = net_device->ndev;
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

	/*
	 * If we got a section count, it means we received a
	 * SendReceiveBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->recv_section_cnt) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
		revoke_packet->msg.v1_msg.
		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/*
		 * If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
116
			netdev_err(ndev, "unable to send "
117
				"revoke receive buffer to netvsp\n");
118
			return ret;
119 120 121 122 123 124 125 126 127 128 129 130
		}
	}

	/* Teardown the gpadl on the vsp end */
	if (net_device->recv_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
			   net_device->recv_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
131
			netdev_err(ndev,
132
				   "unable to teardown receive buffer's gpadl\n");
133
			return ret;
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
		}
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
		free_pages((unsigned long)net_device->recv_buf,
			get_order(net_device->recv_buf_size));
		net_device->recv_buf = NULL;
	}

	if (net_device->recv_section) {
		net_device->recv_section_cnt = 0;
		kfree(net_device->recv_section);
		net_device->recv_section = NULL;
	}

	return ret;
}

154
static int netvsc_init_recv_buf(struct hv_device *device)
155
{
156
	int ret = 0;
157
	int t;
158 159
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
160
	struct net_device *ndev;
161

162
	net_device = get_outbound_net_device(device);
163
	if (!net_device)
164
		return -ENODEV;
165
	ndev = net_device->ndev;
166

167
	net_device->recv_buf =
168 169
		(void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
				get_order(net_device->recv_buf_size));
170
	if (!net_device->recv_buf) {
171
		netdev_err(ndev, "unable to allocate receive "
172
			"buffer of size %d\n", net_device->recv_buf_size);
173
		ret = -ENOMEM;
174
		goto cleanup;
175 176
	}

177 178 179 180 181
	/*
	 * Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
182 183 184
	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
				    net_device->recv_buf_size,
				    &net_device->recv_buf_gpadl_handle);
185
	if (ret != 0) {
186
		netdev_err(ndev,
187
			"unable to establish receive buffer's gpadl\n");
188
		goto cleanup;
189 190 191
	}


192
	/* Notify the NetVsp of the gpadl handle */
193
	init_packet = &net_device->channel_init_pkt;
194

195
	memset(init_packet, 0, sizeof(struct nvsp_message));
196

197 198 199 200 201
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
	init_packet->msg.v1_msg.send_recv_buf.
		gpadl_handle = net_device->recv_buf_gpadl_handle;
	init_packet->msg.v1_msg.
		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
202

203
	/* Send the gpadl notification request */
204
	ret = vmbus_sendpacket(device->channel, init_packet,
205
			       sizeof(struct nvsp_message),
206
			       (unsigned long)init_packet,
207
			       VM_PKT_DATA_INBAND,
208
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
209
	if (ret != 0) {
210
		netdev_err(ndev,
211
			"unable to send receive buffer's gpadl to netvsp\n");
212
		goto cleanup;
213 214
	}

215
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
216
	BUG_ON(t == 0);
217

218

219
	/* Check the response */
220 221
	if (init_packet->msg.v1_msg.
	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
222
		netdev_err(ndev, "Unable to complete receive buffer "
223
			   "initialization with NetVsp - status %d\n",
224 225
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
226
		ret = -EINVAL;
227
		goto cleanup;
228 229
	}

230
	/* Parse the response */
231

232 233
	net_device->recv_section_cnt = init_packet->msg.
		v1_msg.send_recv_buf_complete.num_sections;
234

235 236 237 238 239
	net_device->recv_section = kmemdup(
		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
		net_device->recv_section_cnt *
		sizeof(struct nvsp_1_receive_buffer_section),
		GFP_KERNEL);
240
	if (net_device->recv_section == NULL) {
241
		ret = -EINVAL;
242
		goto cleanup;
243 244
	}

245 246 247 248
	/*
	 * For 1st release, there should only be 1 section that represents the
	 * entire receive buffer
	 */
249 250
	if (net_device->recv_section_cnt != 1 ||
	    net_device->recv_section->offset != 0) {
251
		ret = -EINVAL;
252
		goto cleanup;
253 254
	}

255
	goto exit;
256

257
cleanup:
258
	netvsc_destroy_recv_buf(net_device);
259

260
exit:
261 262 263 264
	return ret;
}


265 266 267 268 269
/* Negotiate NVSP protocol version */
static int negotiate_nvsp_ver(struct hv_device *device,
			      struct netvsc_device *net_device,
			      struct nvsp_message *init_packet,
			      u32 nvsp_ver)
270
{
271
	int ret, t;
272

273
	memset(init_packet, 0, sizeof(struct nvsp_message));
274
	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
275 276
	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
277

278
	/* Send the init request */
279
	ret = vmbus_sendpacket(device->channel, init_packet,
280
			       sizeof(struct nvsp_message),
281
			       (unsigned long)init_packet,
282
			       VM_PKT_DATA_INBAND,
283
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
284

285
	if (ret != 0)
286
		return ret;
287

288
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
289

290 291
	if (t == 0)
		return -ETIMEDOUT;
292

293
	if (init_packet->msg.init_msg.init_complete.status !=
294 295
	    NVSP_STAT_SUCCESS)
		return -EINVAL;
296

297 298 299 300 301 302
	if (nvsp_ver != NVSP_PROTOCOL_VERSION_2)
		return 0;

	/* NVSPv2 only: Send NDIS config */
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
303
	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu;
304
	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

	ret = vmbus_sendpacket(device->channel, init_packet,
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);

	return ret;
}

static int netvsc_connect_vsp(struct hv_device *device)
{
	int ret;
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
	int ndis_version;
	struct net_device *ndev;

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;
	ndev = net_device->ndev;

	init_packet = &net_device->channel_init_pkt;

	/* Negotiate the latest NVSP protocol supported */
	if (negotiate_nvsp_ver(device, net_device, init_packet,
			       NVSP_PROTOCOL_VERSION_2) == 0) {
		net_device->nvsp_version = NVSP_PROTOCOL_VERSION_2;
	} else if (negotiate_nvsp_ver(device, net_device, init_packet,
				    NVSP_PROTOCOL_VERSION_1) == 0) {
		net_device->nvsp_version = NVSP_PROTOCOL_VERSION_1;
	} else {
337
		ret = -EPROTO;
338
		goto cleanup;
339
	}
340 341 342

	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);

343
	/* Send the ndis version */
344
	memset(init_packet, 0, sizeof(struct nvsp_message));
345

346
	ndis_version = 0x00050001;
347

348 349 350
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_major_ver =
351
				(ndis_version & 0xFFFF0000) >> 16;
352 353
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_minor_ver =
354
				ndis_version & 0xFFFF;
355

356
	/* Send the init request */
357
	ret = vmbus_sendpacket(device->channel, init_packet,
358 359 360
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);
361
	if (ret != 0)
362
		goto cleanup;
363 364

	/* Post the big receive buffer to NetVSP */
365
	ret = netvsc_init_recv_buf(device);
366

367
cleanup:
368 369 370
	return ret;
}

371
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
372
{
373
	netvsc_destroy_recv_buf(net_device);
374 375
}

376
/*
377
 * netvsc_device_remove - Callback when the root bus device is removed
378
 */
379
int netvsc_device_remove(struct hv_device *device)
380
{
381 382
	struct netvsc_device *net_device;
	struct hv_netvsc_packet *netvsc_packet, *pos;
383
	unsigned long flags;
384

385
	net_device = hv_get_drvdata(device);
386

387
	netvsc_disconnect_vsp(net_device);
388

389
	/*
390 391 392 393 394
	 * Since we have already drained, we don't need to busy wait
	 * as was done in final_release_stor_device()
	 * Note that we cannot set the ext pointer to NULL until
	 * we have drained - to drain the outgoing packets, we need to
	 * allow incoming packets.
395
	 */
396 397

	spin_lock_irqsave(&device->channel->inbound_lock, flags);
398
	hv_set_drvdata(device, NULL);
399
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
400

401 402 403 404
	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
405
	dev_notice(&device->device, "net device safe to remove\n");
406

407
	/* Now, we can close the channel safely */
408
	vmbus_close(device->channel);
409

410
	/* Release all resources */
411
	list_for_each_entry_safe(netvsc_packet, pos,
412
				 &net_device->recv_pkt_list, list_ent) {
413
		list_del(&netvsc_packet->list_ent);
414
		kfree(netvsc_packet);
415 416
	}

417
	kfree(net_device);
418
	return 0;
419 420
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438

#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
 * Get the percentage of available bytes to write in the ring.
 * The return value is in range from 0 to 100.
 */
static inline u32 hv_ringbuf_avail_percent(
		struct hv_ring_buffer_info *ring_info)
{
	u32 avail_read, avail_write;

	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

	return avail_write * 100 / ring_info->ring_datasize;
}

439
static void netvsc_send_completion(struct hv_device *device,
440
				   struct vmpacket_descriptor *packet)
441
{
442 443 444
	struct netvsc_device *net_device;
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *nvsc_packet;
445
	struct net_device *ndev;
446

447
	net_device = get_inbound_net_device(device);
448
	if (!net_device)
449
		return;
450
	ndev = net_device->ndev;
451

452
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
453
			(packet->offset8 << 3));
454

455 456 457 458 459
	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
460
		/* Copy the response back */
461
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
462
		       sizeof(struct nvsp_message));
463
		complete(&net_device->channel_init_wait);
464 465
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
466 467
		int num_outstanding_sends;

468
		/* Get the send context */
469
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
470
			packet->trans_id;
471

472
		/* Notify the layer above us */
473 474
		nvsc_packet->completion.send.send_completion(
			nvsc_packet->completion.send.send_completion_ctx);
475

476 477
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
478

479 480 481
		if (net_device->destroy && num_outstanding_sends == 0)
			wake_up(&net_device->wait_drain);

482 483 484 485 486
		if (netif_queue_stopped(ndev) && !net_device->start_remove &&
			(hv_ringbuf_avail_percent(&device->channel->outbound)
			> RING_AVAIL_PERCENT_HIWATER ||
			num_outstanding_sends < 1))
				netif_wake_queue(ndev);
487
	} else {
488
		netdev_err(ndev, "Unknown send completion packet type- "
489
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
490 491 492 493
	}

}

494
int netvsc_send(struct hv_device *device,
495
			struct hv_netvsc_packet *packet)
496
{
497
	struct netvsc_device *net_device;
498
	int ret = 0;
499
	struct nvsp_message sendMessage;
500
	struct net_device *ndev;
501

502
	net_device = get_outbound_net_device(device);
503
	if (!net_device)
504
		return -ENODEV;
505
	ndev = net_device->ndev;
506

507
	sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
508
	if (packet->is_data_pkt) {
509
		/* 0 is RMC_DATA; */
510
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
511 512
	} else {
		/* 1 is RMC_CONTROL; */
513
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
514
	}
515

516
	/* Not using send buffer section */
517 518 519
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
		0xFFFFFFFF;
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
520

521
	if (packet->page_buf_cnt) {
522
		ret = vmbus_sendpacket_pagebuffer(device->channel,
523 524
						  packet->page_buf,
						  packet->page_buf_cnt,
525 526
						  &sendMessage,
						  sizeof(struct nvsp_message),
527
						  (unsigned long)packet);
528
	} else {
529
		ret = vmbus_sendpacket(device->channel, &sendMessage,
530 531 532 533
				sizeof(struct nvsp_message),
				(unsigned long)packet,
				VM_PKT_DATA_INBAND,
				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
534 535 536

	}

537 538
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
539 540 541 542 543 544 545
		if (hv_ringbuf_avail_percent(&device->channel->outbound) <
			RING_AVAIL_PERCENT_LOWATER) {
			netif_stop_queue(ndev);
			if (atomic_read(&net_device->
				num_outstanding_sends) < 1)
				netif_wake_queue(ndev);
		}
546 547
	} else if (ret == -EAGAIN) {
		netif_stop_queue(ndev);
548
		if (atomic_read(&net_device->num_outstanding_sends) < 1) {
549
			netif_wake_queue(ndev);
550 551
			ret = -ENOSPC;
		}
552
	} else {
553
		netdev_err(ndev, "Unable to send packet %p ret %d\n",
554
			   packet, ret);
555
	}
556 557 558 559

	return ret;
}

560 561 562 563 564 565
static void netvsc_send_recv_completion(struct hv_device *device,
					u64 transaction_id)
{
	struct nvsp_message recvcompMessage;
	int retries = 0;
	int ret;
566 567 568 569
	struct net_device *ndev;
	struct netvsc_device *net_device = hv_get_drvdata(device);

	ndev = net_device->ndev;
570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585

	recvcompMessage.hdr.msg_type =
				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;

	/* FIXME: Pass in the status */
	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status =
		NVSP_STAT_SUCCESS;

retry_send_cmplt:
	/* Send the completion */
	ret = vmbus_sendpacket(device->channel, &recvcompMessage,
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
586
	} else if (ret == -EAGAIN) {
587 588
		/* no more room...wait a bit and attempt to retry 3 times */
		retries++;
589
		netdev_err(ndev, "unable to send receive completion pkt"
590
			" (tid %llx)...retrying %d\n", transaction_id, retries);
591 592 593 594 595

		if (retries < 4) {
			udelay(100);
			goto retry_send_cmplt;
		} else {
596
			netdev_err(ndev, "unable to send receive "
597
				"completion pkt (tid %llx)...give up retrying\n",
598 599 600
				transaction_id);
		}
	} else {
601
		netdev_err(ndev, "unable to send receive "
602
			"completion pkt - %llx\n", transaction_id);
603 604 605
	}
}

606 607 608 609
/* Send a receive completion packet to RNDIS device (ie NetVsp) */
static void netvsc_receive_completion(void *context)
{
	struct hv_netvsc_packet *packet = context;
610
	struct hv_device *device = packet->device;
611 612 613 614
	struct netvsc_device *net_device;
	u64 transaction_id = 0;
	bool fsend_receive_comp = false;
	unsigned long flags;
615
	struct net_device *ndev;
616 617 618 619 620 621 622

	/*
	 * Even though it seems logical to do a GetOutboundNetDevice() here to
	 * send out receive completion, we are using GetInboundNetDevice()
	 * since we may have disable outbound traffic already.
	 */
	net_device = get_inbound_net_device(device);
623
	if (!net_device)
624
		return;
625
	ndev = net_device->ndev;
626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653

	/* Overloading use of the lock. */
	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);

	packet->xfer_page_pkt->count--;

	/*
	 * Last one in the line that represent 1 xfer page packet.
	 * Return the xfer page packet itself to the freelist
	 */
	if (packet->xfer_page_pkt->count == 0) {
		fsend_receive_comp = true;
		transaction_id = packet->completion.recv.recv_completion_tid;
		list_add_tail(&packet->xfer_page_pkt->list_ent,
			      &net_device->recv_pkt_list);

	}

	/* Put the packet back */
	list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);

	/* Send a receive completion for the xfer page packet */
	if (fsend_receive_comp)
		netvsc_send_recv_completion(device, transaction_id);

}

654
static void netvsc_receive(struct hv_device *device,
655
			    struct vmpacket_descriptor *packet)
656
{
657 658 659 660
	struct netvsc_device *net_device;
	struct vmtransfer_page_packet_header *vmxferpage_packet;
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *netvsc_packet = NULL;
661
	/* struct netvsc_driver *netvscDriver; */
662
	struct xferpage_packet *xferpage_packet = NULL;
663 664
	int i;
	int count = 0;
665
	unsigned long flags;
666
	struct net_device *ndev;
667

668
	LIST_HEAD(listHead);
669

670
	net_device = get_inbound_net_device(device);
671
	if (!net_device)
672
		return;
673
	ndev = net_device->ndev;
674

675 676 677 678
	/*
	 * All inbound packets other than send completion should be xfer page
	 * packet
	 */
679
	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
680
		netdev_err(ndev, "Unknown packet type received - %d\n",
681
			   packet->type);
682 683 684
		return;
	}

685
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
686
			(packet->offset8 << 3));
687

688
	/* Make sure this is a valid nvsp packet */
689 690
	if (nvsp_packet->hdr.msg_type !=
	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
691
		netdev_err(ndev, "Unknown nvsp packet type received-"
692
			" %d\n", nvsp_packet->hdr.msg_type);
693 694 695
		return;
	}

696
	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
697

698
	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
699
		netdev_err(ndev, "Invalid xfer page set id - "
700
			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
701
			   vmxferpage_packet->xfer_pageset_id);
702 703 704
		return;
	}

705 706 707 708 709 710
	/*
	 * Grab free packets (range count + 1) to represent this xfer
	 * page packet. +1 to represent the xfer page packet itself.
	 * We grab it here so that we know exactly how many we can
	 * fulfil
	 */
711 712 713
	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
	while (!list_empty(&net_device->recv_pkt_list)) {
		list_move_tail(net_device->recv_pkt_list.next, &listHead);
714
		if (++count == vmxferpage_packet->range_cnt + 1)
715 716
			break;
	}
717
	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
718

719 720 721 722 723
	/*
	 * We need at least 2 netvsc pkts (1 to represent the xfer
	 * page and at least 1 for the range) i.e. we can handled
	 * some of the xfer page packet ranges...
	 */
724
	if (count < 2) {
725
		netdev_err(ndev, "Got only %d netvsc pkt...needed "
726
			"%d pkts. Dropping this xfer page packet completely!\n",
727
			count, vmxferpage_packet->range_cnt + 1);
728

729
		/* Return it to the freelist */
730
		spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
731
		for (i = count; i != 0; i--) {
732
			list_move_tail(listHead.next,
733
				       &net_device->recv_pkt_list);
734
		}
735
		spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
736
				       flags);
737

738
		netvsc_send_recv_completion(device,
739
					    vmxferpage_packet->d.trans_id);
740 741 742 743

		return;
	}

744
	/* Remove the 1st packet to represent the xfer page packet itself */
745
	xferpage_packet = (struct xferpage_packet *)listHead.next;
746
	list_del(&xferpage_packet->list_ent);
747

748
	/* This is how much we can satisfy */
749
	xferpage_packet->count = count - 1;
750

751
	if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
752
		netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
753
			"this xfer page...got %d\n",
754
			vmxferpage_packet->range_cnt, xferpage_packet->count);
755 756
	}

757
	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
758
	for (i = 0; i < (count - 1); i++) {
759
		netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
760
		list_del(&netvsc_packet->list_ent);
761

762
		/* Initialize the netvsc packet */
763 764
		netvsc_packet->xfer_page_pkt = xferpage_packet;
		netvsc_packet->completion.recv.recv_completion =
765
					netvsc_receive_completion;
766
		netvsc_packet->completion.recv.recv_completion_ctx =
767
					netvsc_packet;
768
		netvsc_packet->device = device;
769
		/* Save this so that we can send it back */
770
		netvsc_packet->completion.recv.recv_completion_tid =
771
					vmxferpage_packet->d.trans_id;
772

773 774
		netvsc_packet->data = (void *)((unsigned long)net_device->
			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
775
		netvsc_packet->total_data_buflen =
776
					vmxferpage_packet->ranges[i].byte_count;
777

778
		/* Pass it to the upper layer */
779
		rndis_filter_receive(device, netvsc_packet);
780

781
		netvsc_receive_completion(netvsc_packet->
782
				completion.recv.recv_completion_ctx);
783 784 785 786
	}

}

787
static void netvsc_channel_cb(void *context)
788
{
789
	int ret;
790 791 792 793
	struct hv_device *device = context;
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u64 request_id;
794
	unsigned char *packet;
795
	struct vmpacket_descriptor *desc;
796 797
	unsigned char *buffer;
	int bufferlen = NETVSC_PACKET_SIZE;
798
	struct net_device *ndev;
799

800
	packet = kzalloc(NETVSC_PACKET_SIZE * sizeof(unsigned char),
801
			 GFP_ATOMIC);
802 803 804 805
	if (!packet)
		return;
	buffer = packet;

806
	net_device = get_inbound_net_device(device);
807
	if (!net_device)
808
		goto out;
809
	ndev = net_device->ndev;
810

811
	do {
812
		ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
813
					   &bytes_recvd, &request_id);
814
		if (ret == 0) {
815
			if (bytes_recvd > 0) {
816
				desc = (struct vmpacket_descriptor *)buffer;
817 818
				switch (desc->type) {
				case VM_PKT_COMP:
819
					netvsc_send_completion(device, desc);
820 821
					break;

822
				case VM_PKT_DATA_USING_XFER_PAGES:
823
					netvsc_receive(device, desc);
824 825 826
					break;

				default:
827
					netdev_err(ndev,
828 829
						   "unhandled packet type %d, "
						   "tid %llx len %d\n",
830
						   desc->type, request_id,
831
						   bytes_recvd);
832
					break;
833 834
				}

835
				/* reset */
836
				if (bufferlen > NETVSC_PACKET_SIZE) {
837
					kfree(buffer);
838
					buffer = packet;
839
					bufferlen = NETVSC_PACKET_SIZE;
840
				}
841
			} else {
842
				/* reset */
843
				if (bufferlen > NETVSC_PACKET_SIZE) {
844
					kfree(buffer);
845
					buffer = packet;
846
					bufferlen = NETVSC_PACKET_SIZE;
847 848 849 850
				}

				break;
			}
851
		} else if (ret == -ENOBUFS) {
852
			/* Handle large packet */
853
			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
854
			if (buffer == NULL) {
855
				/* Try again next time around */
856
				netdev_err(ndev,
857
					   "unable to allocate buffer of size "
858
					   "(%d)!!\n", bytes_recvd);
859 860 861
				break;
			}

862
			bufferlen = bytes_recvd;
863 864 865
		}
	} while (1);

866 867
out:
	kfree(buffer);
868 869
	return;
}
870

871 872 873 874
/*
 * netvsc_device_add - Callback when the device belonging to this
 * driver is added
 */
875
int netvsc_device_add(struct hv_device *device, void *additional_info)
876 877 878
{
	int ret = 0;
	int i;
879 880
	int ring_size =
	((struct netvsc_device_info *)additional_info)->ring_size;
881 882
	struct netvsc_device *net_device;
	struct hv_netvsc_packet *packet, *pos;
883
	struct net_device *ndev;
884 885 886

	net_device = alloc_net_device(device);
	if (!net_device) {
887
		ret = -ENOMEM;
888 889 890
		goto cleanup;
	}

891 892 893 894 895 896 897 898 899
	/*
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * In alloc_net_device(), we register struct netvsc_device *
	 * as the driver private data and stash away struct net_device *
	 * in struct netvsc_device *.
	 */
	ndev = net_device->ndev;

900 901 902 903 904 905 906
	/* Initialize the NetVSC channel extension */
	net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
	spin_lock_init(&net_device->recv_pkt_list_lock);

	INIT_LIST_HEAD(&net_device->recv_pkt_list);

	for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
907
		packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL);
908 909 910 911 912 913
		if (!packet)
			break;

		list_add_tail(&packet->list_ent,
			      &net_device->recv_pkt_list);
	}
914
	init_completion(&net_device->channel_init_wait);
915 916

	/* Open the channel */
917 918
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
919 920 921
			 netvsc_channel_cb, device);

	if (ret != 0) {
922
		netdev_err(ndev, "unable to open channel: %d\n", ret);
923 924 925 926
		goto cleanup;
	}

	/* Channel is opened */
927
	pr_info("hv_netvsc channel opened successfully\n");
928 929 930 931

	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
932
		netdev_err(ndev,
933
			"unable to connect to NetVSP - %d\n", ret);
934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952
		goto close;
	}

	return ret;

close:
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

cleanup:

	if (net_device) {
		list_for_each_entry_safe(packet, pos,
					 &net_device->recv_pkt_list,
					 list_ent) {
			list_del(&packet->list_ent);
			kfree(packet);
		}

953
		kfree(net_device);
954 955 956 957
	}

	return ret;
}