netvsc.c 25.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
18
 *   Haiyang Zhang <haiyangz@microsoft.com>
19 20
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/delay.h>
28
#include <linux/io.h>
29
#include <linux/slab.h>
30
#include <linux/netdevice.h>
31
#include <linux/if_ether.h>
32

33
#include "hyperv_net.h"
34 35


36
static struct netvsc_device *alloc_net_device(struct hv_device *device)
37
{
38
	struct netvsc_device *net_device;
39
	struct net_device *ndev = hv_get_drvdata(device);
40

41 42
	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
	if (!net_device)
43 44
		return NULL;

45
	init_waitqueue_head(&net_device->wait_drain);
46
	net_device->start_remove = false;
47
	net_device->destroy = false;
48
	net_device->dev = device;
49
	net_device->ndev = ndev;
50

51
	hv_set_drvdata(device, net_device);
52
	return net_device;
53 54
}

55
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
56
{
57
	struct netvsc_device *net_device;
58

59
	net_device = hv_get_drvdata(device);
60
	if (net_device && net_device->destroy)
61
		net_device = NULL;
62

63
	return net_device;
64 65
}

66
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
67
{
68
	struct netvsc_device *net_device;
69

70
	net_device = hv_get_drvdata(device);
71 72 73 74 75 76

	if (!net_device)
		goto get_in_err;

	if (net_device->destroy &&
		atomic_read(&net_device->num_outstanding_sends) == 0)
77
		net_device = NULL;
78

79
get_in_err:
80
	return net_device;
81 82 83
}


84 85 86 87
static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
{
	struct nvsp_message *revoke_packet;
	int ret = 0;
88
	struct net_device *ndev = net_device->ndev;
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

	/*
	 * If we got a section count, it means we received a
	 * SendReceiveBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->recv_section_cnt) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
		revoke_packet->msg.v1_msg.
		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/*
		 * If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
116
			netdev_err(ndev, "unable to send "
117
				"revoke receive buffer to netvsp\n");
118
			return ret;
119 120 121 122 123 124 125 126 127 128 129 130
		}
	}

	/* Teardown the gpadl on the vsp end */
	if (net_device->recv_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
			   net_device->recv_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
131
			netdev_err(ndev,
132
				   "unable to teardown receive buffer's gpadl\n");
133
			return ret;
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153
		}
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
		free_pages((unsigned long)net_device->recv_buf,
			get_order(net_device->recv_buf_size));
		net_device->recv_buf = NULL;
	}

	if (net_device->recv_section) {
		net_device->recv_section_cnt = 0;
		kfree(net_device->recv_section);
		net_device->recv_section = NULL;
	}

	return ret;
}

154
static int netvsc_init_recv_buf(struct hv_device *device)
155
{
156
	int ret = 0;
157
	int t;
158 159
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
160
	struct net_device *ndev;
161

162
	net_device = get_outbound_net_device(device);
163
	if (!net_device)
164
		return -ENODEV;
165
	ndev = net_device->ndev;
166

167
	net_device->recv_buf =
168 169
		(void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
				get_order(net_device->recv_buf_size));
170
	if (!net_device->recv_buf) {
171
		netdev_err(ndev, "unable to allocate receive "
172
			"buffer of size %d\n", net_device->recv_buf_size);
173
		ret = -ENOMEM;
174
		goto cleanup;
175 176
	}

177 178 179 180 181
	/*
	 * Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
182 183 184
	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
				    net_device->recv_buf_size,
				    &net_device->recv_buf_gpadl_handle);
185
	if (ret != 0) {
186
		netdev_err(ndev,
187
			"unable to establish receive buffer's gpadl\n");
188
		goto cleanup;
189 190 191
	}


192
	/* Notify the NetVsp of the gpadl handle */
193
	init_packet = &net_device->channel_init_pkt;
194

195
	memset(init_packet, 0, sizeof(struct nvsp_message));
196

197 198 199 200 201
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
	init_packet->msg.v1_msg.send_recv_buf.
		gpadl_handle = net_device->recv_buf_gpadl_handle;
	init_packet->msg.v1_msg.
		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
202

203
	/* Send the gpadl notification request */
204
	ret = vmbus_sendpacket(device->channel, init_packet,
205
			       sizeof(struct nvsp_message),
206
			       (unsigned long)init_packet,
207
			       VM_PKT_DATA_INBAND,
208
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
209
	if (ret != 0) {
210
		netdev_err(ndev,
211
			"unable to send receive buffer's gpadl to netvsp\n");
212
		goto cleanup;
213 214
	}

215
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
216
	BUG_ON(t == 0);
217

218

219
	/* Check the response */
220 221
	if (init_packet->msg.v1_msg.
	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
222
		netdev_err(ndev, "Unable to complete receive buffer "
223
			   "initialization with NetVsp - status %d\n",
224 225
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
226
		ret = -EINVAL;
227
		goto cleanup;
228 229
	}

230
	/* Parse the response */
231

232 233
	net_device->recv_section_cnt = init_packet->msg.
		v1_msg.send_recv_buf_complete.num_sections;
234

235 236 237 238 239
	net_device->recv_section = kmemdup(
		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
		net_device->recv_section_cnt *
		sizeof(struct nvsp_1_receive_buffer_section),
		GFP_KERNEL);
240
	if (net_device->recv_section == NULL) {
241
		ret = -EINVAL;
242
		goto cleanup;
243 244
	}

245 246 247 248
	/*
	 * For 1st release, there should only be 1 section that represents the
	 * entire receive buffer
	 */
249 250
	if (net_device->recv_section_cnt != 1 ||
	    net_device->recv_section->offset != 0) {
251
		ret = -EINVAL;
252
		goto cleanup;
253 254
	}

255
	goto exit;
256

257
cleanup:
258
	netvsc_destroy_recv_buf(net_device);
259

260
exit:
261 262 263 264
	return ret;
}


265 266 267 268 269
/* Negotiate NVSP protocol version */
static int negotiate_nvsp_ver(struct hv_device *device,
			      struct netvsc_device *net_device,
			      struct nvsp_message *init_packet,
			      u32 nvsp_ver)
270
{
271
	int ret, t;
272

273
	memset(init_packet, 0, sizeof(struct nvsp_message));
274
	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
275 276
	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
277

278
	/* Send the init request */
279
	ret = vmbus_sendpacket(device->channel, init_packet,
280
			       sizeof(struct nvsp_message),
281
			       (unsigned long)init_packet,
282
			       VM_PKT_DATA_INBAND,
283
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
284

285
	if (ret != 0)
286
		return ret;
287

288
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
289

290 291
	if (t == 0)
		return -ETIMEDOUT;
292

293
	if (init_packet->msg.init_msg.init_complete.status !=
294 295
	    NVSP_STAT_SUCCESS)
		return -EINVAL;
296

297 298 299 300 301 302
	if (nvsp_ver != NVSP_PROTOCOL_VERSION_2)
		return 0;

	/* NVSPv2 only: Send NDIS config */
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
303
	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu;
304
	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336

	ret = vmbus_sendpacket(device->channel, init_packet,
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);

	return ret;
}

static int netvsc_connect_vsp(struct hv_device *device)
{
	int ret;
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
	int ndis_version;
	struct net_device *ndev;

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;
	ndev = net_device->ndev;

	init_packet = &net_device->channel_init_pkt;

	/* Negotiate the latest NVSP protocol supported */
	if (negotiate_nvsp_ver(device, net_device, init_packet,
			       NVSP_PROTOCOL_VERSION_2) == 0) {
		net_device->nvsp_version = NVSP_PROTOCOL_VERSION_2;
	} else if (negotiate_nvsp_ver(device, net_device, init_packet,
				    NVSP_PROTOCOL_VERSION_1) == 0) {
		net_device->nvsp_version = NVSP_PROTOCOL_VERSION_1;
	} else {
337
		ret = -EPROTO;
338
		goto cleanup;
339
	}
340 341 342

	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);

343
	/* Send the ndis version */
344
	memset(init_packet, 0, sizeof(struct nvsp_message));
345

346
	ndis_version = 0x00050001;
347

348 349 350
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_major_ver =
351
				(ndis_version & 0xFFFF0000) >> 16;
352 353
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_minor_ver =
354
				ndis_version & 0xFFFF;
355

356
	/* Send the init request */
357
	ret = vmbus_sendpacket(device->channel, init_packet,
358 359 360
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);
361
	if (ret != 0)
362
		goto cleanup;
363 364

	/* Post the big receive buffer to NetVSP */
365
	ret = netvsc_init_recv_buf(device);
366

367
cleanup:
368 369 370
	return ret;
}

371
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
372
{
373
	netvsc_destroy_recv_buf(net_device);
374 375
}

376
/*
377
 * netvsc_device_remove - Callback when the root bus device is removed
378
 */
379
int netvsc_device_remove(struct hv_device *device)
380
{
381 382
	struct netvsc_device *net_device;
	struct hv_netvsc_packet *netvsc_packet, *pos;
383
	unsigned long flags;
384

385
	net_device = hv_get_drvdata(device);
386

387
	netvsc_disconnect_vsp(net_device);
388

389
	/*
390 391 392 393 394
	 * Since we have already drained, we don't need to busy wait
	 * as was done in final_release_stor_device()
	 * Note that we cannot set the ext pointer to NULL until
	 * we have drained - to drain the outgoing packets, we need to
	 * allow incoming packets.
395
	 */
396 397

	spin_lock_irqsave(&device->channel->inbound_lock, flags);
398
	hv_set_drvdata(device, NULL);
399
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
400

401 402 403 404
	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
405
	dev_notice(&device->device, "net device safe to remove\n");
406

407
	/* Now, we can close the channel safely */
408
	vmbus_close(device->channel);
409

410
	/* Release all resources */
411
	list_for_each_entry_safe(netvsc_packet, pos,
412
				 &net_device->recv_pkt_list, list_ent) {
413
		list_del(&netvsc_packet->list_ent);
414
		kfree(netvsc_packet);
415 416
	}

417
	kfree(net_device);
418
	return 0;
419 420
}

421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438

#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
 * Get the percentage of available bytes to write in the ring.
 * The return value is in range from 0 to 100.
 */
static inline u32 hv_ringbuf_avail_percent(
		struct hv_ring_buffer_info *ring_info)
{
	u32 avail_read, avail_write;

	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

	return avail_write * 100 / ring_info->ring_datasize;
}

439
static void netvsc_send_completion(struct hv_device *device,
440
				   struct vmpacket_descriptor *packet)
441
{
442 443 444
	struct netvsc_device *net_device;
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *nvsc_packet;
445
	struct net_device *ndev;
446

447
	net_device = get_inbound_net_device(device);
448
	if (!net_device)
449
		return;
450
	ndev = net_device->ndev;
451

452
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
453
			(packet->offset8 << 3));
454

455 456 457 458 459
	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
460
		/* Copy the response back */
461
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
462
		       sizeof(struct nvsp_message));
463
		complete(&net_device->channel_init_wait);
464 465
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
466 467
		int num_outstanding_sends;

468
		/* Get the send context */
469
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
470
			packet->trans_id;
471

472
		/* Notify the layer above us */
473 474
		nvsc_packet->completion.send.send_completion(
			nvsc_packet->completion.send.send_completion_ctx);
475

476 477
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
478

479 480 481
		if (net_device->destroy && num_outstanding_sends == 0)
			wake_up(&net_device->wait_drain);

482 483 484 485 486
		if (netif_queue_stopped(ndev) && !net_device->start_remove &&
			(hv_ringbuf_avail_percent(&device->channel->outbound)
			> RING_AVAIL_PERCENT_HIWATER ||
			num_outstanding_sends < 1))
				netif_wake_queue(ndev);
487
	} else {
488
		netdev_err(ndev, "Unknown send completion packet type- "
489
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
490 491 492 493
	}

}

494
int netvsc_send(struct hv_device *device,
495
			struct hv_netvsc_packet *packet)
496
{
497
	struct netvsc_device *net_device;
498
	int ret = 0;
499
	struct nvsp_message sendMessage;
500
	struct net_device *ndev;
501

502
	net_device = get_outbound_net_device(device);
503
	if (!net_device)
504
		return -ENODEV;
505
	ndev = net_device->ndev;
506

507
	sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
508
	if (packet->is_data_pkt) {
509
		/* 0 is RMC_DATA; */
510
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
511 512
	} else {
		/* 1 is RMC_CONTROL; */
513
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
514
	}
515

516
	/* Not using send buffer section */
517 518 519
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
		0xFFFFFFFF;
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
520

521
	if (packet->page_buf_cnt) {
522
		ret = vmbus_sendpacket_pagebuffer(device->channel,
523 524
						  packet->page_buf,
						  packet->page_buf_cnt,
525 526
						  &sendMessage,
						  sizeof(struct nvsp_message),
527
						  (unsigned long)packet);
528
	} else {
529
		ret = vmbus_sendpacket(device->channel, &sendMessage,
530 531 532 533
				sizeof(struct nvsp_message),
				(unsigned long)packet,
				VM_PKT_DATA_INBAND,
				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
534 535 536

	}

537 538
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
539 540 541 542 543 544 545
		if (hv_ringbuf_avail_percent(&device->channel->outbound) <
			RING_AVAIL_PERCENT_LOWATER) {
			netif_stop_queue(ndev);
			if (atomic_read(&net_device->
				num_outstanding_sends) < 1)
				netif_wake_queue(ndev);
		}
546 547
	} else if (ret == -EAGAIN) {
		netif_stop_queue(ndev);
548
		if (atomic_read(&net_device->num_outstanding_sends) < 1) {
549
			netif_wake_queue(ndev);
550 551
			ret = -ENOSPC;
		}
552
	} else {
553
		netdev_err(ndev, "Unable to send packet %p ret %d\n",
554
			   packet, ret);
555
	}
556 557 558 559

	return ret;
}

560
static void netvsc_send_recv_completion(struct hv_device *device,
561
					u64 transaction_id, u32 status)
562 563 564 565
{
	struct nvsp_message recvcompMessage;
	int retries = 0;
	int ret;
566 567 568 569
	struct net_device *ndev;
	struct netvsc_device *net_device = hv_get_drvdata(device);

	ndev = net_device->ndev;
570 571 572 573

	recvcompMessage.hdr.msg_type =
				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;

574
	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
575 576 577 578 579 580 581 582 583

retry_send_cmplt:
	/* Send the completion */
	ret = vmbus_sendpacket(device->channel, &recvcompMessage,
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
584
	} else if (ret == -EAGAIN) {
585 586
		/* no more room...wait a bit and attempt to retry 3 times */
		retries++;
587
		netdev_err(ndev, "unable to send receive completion pkt"
588
			" (tid %llx)...retrying %d\n", transaction_id, retries);
589 590 591 592 593

		if (retries < 4) {
			udelay(100);
			goto retry_send_cmplt;
		} else {
594
			netdev_err(ndev, "unable to send receive "
595
				"completion pkt (tid %llx)...give up retrying\n",
596 597 598
				transaction_id);
		}
	} else {
599
		netdev_err(ndev, "unable to send receive "
600
			"completion pkt - %llx\n", transaction_id);
601 602 603
	}
}

604 605 606 607
/* Send a receive completion packet to RNDIS device (ie NetVsp) */
static void netvsc_receive_completion(void *context)
{
	struct hv_netvsc_packet *packet = context;
608
	struct hv_device *device = packet->device;
609 610 611 612
	struct netvsc_device *net_device;
	u64 transaction_id = 0;
	bool fsend_receive_comp = false;
	unsigned long flags;
613
	struct net_device *ndev;
614
	u32 status = NVSP_STAT_NONE;
615 616 617 618 619 620 621

	/*
	 * Even though it seems logical to do a GetOutboundNetDevice() here to
	 * send out receive completion, we are using GetInboundNetDevice()
	 * since we may have disable outbound traffic already.
	 */
	net_device = get_inbound_net_device(device);
622
	if (!net_device)
623
		return;
624
	ndev = net_device->ndev;
625 626 627 628

	/* Overloading use of the lock. */
	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);

629 630 631
	if (packet->status != NVSP_STAT_SUCCESS)
		packet->xfer_page_pkt->status = NVSP_STAT_FAIL;

632 633 634 635 636 637 638 639 640
	packet->xfer_page_pkt->count--;

	/*
	 * Last one in the line that represent 1 xfer page packet.
	 * Return the xfer page packet itself to the freelist
	 */
	if (packet->xfer_page_pkt->count == 0) {
		fsend_receive_comp = true;
		transaction_id = packet->completion.recv.recv_completion_tid;
641
		status = packet->xfer_page_pkt->status;
642 643 644 645 646 647 648 649 650 651 652
		list_add_tail(&packet->xfer_page_pkt->list_ent,
			      &net_device->recv_pkt_list);

	}

	/* Put the packet back */
	list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);

	/* Send a receive completion for the xfer page packet */
	if (fsend_receive_comp)
653
		netvsc_send_recv_completion(device, transaction_id, status);
654 655 656

}

657
static void netvsc_receive(struct hv_device *device,
658
			    struct vmpacket_descriptor *packet)
659
{
660 661 662 663
	struct netvsc_device *net_device;
	struct vmtransfer_page_packet_header *vmxferpage_packet;
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *netvsc_packet = NULL;
664
	/* struct netvsc_driver *netvscDriver; */
665
	struct xferpage_packet *xferpage_packet = NULL;
666 667
	int i;
	int count = 0;
668
	unsigned long flags;
669
	struct net_device *ndev;
670

671
	LIST_HEAD(listHead);
672

673
	net_device = get_inbound_net_device(device);
674
	if (!net_device)
675
		return;
676
	ndev = net_device->ndev;
677

678 679 680 681
	/*
	 * All inbound packets other than send completion should be xfer page
	 * packet
	 */
682
	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
683
		netdev_err(ndev, "Unknown packet type received - %d\n",
684
			   packet->type);
685 686 687
		return;
	}

688
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
689
			(packet->offset8 << 3));
690

691
	/* Make sure this is a valid nvsp packet */
692 693
	if (nvsp_packet->hdr.msg_type !=
	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
694
		netdev_err(ndev, "Unknown nvsp packet type received-"
695
			" %d\n", nvsp_packet->hdr.msg_type);
696 697 698
		return;
	}

699
	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
700

701
	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
702
		netdev_err(ndev, "Invalid xfer page set id - "
703
			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
704
			   vmxferpage_packet->xfer_pageset_id);
705 706 707
		return;
	}

708 709 710 711 712 713
	/*
	 * Grab free packets (range count + 1) to represent this xfer
	 * page packet. +1 to represent the xfer page packet itself.
	 * We grab it here so that we know exactly how many we can
	 * fulfil
	 */
714 715 716
	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
	while (!list_empty(&net_device->recv_pkt_list)) {
		list_move_tail(net_device->recv_pkt_list.next, &listHead);
717
		if (++count == vmxferpage_packet->range_cnt + 1)
718 719
			break;
	}
720
	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
721

722 723 724 725 726
	/*
	 * We need at least 2 netvsc pkts (1 to represent the xfer
	 * page and at least 1 for the range) i.e. we can handled
	 * some of the xfer page packet ranges...
	 */
727
	if (count < 2) {
728
		netdev_err(ndev, "Got only %d netvsc pkt...needed "
729
			"%d pkts. Dropping this xfer page packet completely!\n",
730
			count, vmxferpage_packet->range_cnt + 1);
731

732
		/* Return it to the freelist */
733
		spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
734
		for (i = count; i != 0; i--) {
735
			list_move_tail(listHead.next,
736
				       &net_device->recv_pkt_list);
737
		}
738
		spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
739
				       flags);
740

741
		netvsc_send_recv_completion(device,
742 743
					    vmxferpage_packet->d.trans_id,
					    NVSP_STAT_FAIL);
744 745 746 747

		return;
	}

748
	/* Remove the 1st packet to represent the xfer page packet itself */
749
	xferpage_packet = (struct xferpage_packet *)listHead.next;
750
	list_del(&xferpage_packet->list_ent);
751
	xferpage_packet->status = NVSP_STAT_SUCCESS;
752

753
	/* This is how much we can satisfy */
754
	xferpage_packet->count = count - 1;
755

756
	if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
757
		netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
758
			"this xfer page...got %d\n",
759
			vmxferpage_packet->range_cnt, xferpage_packet->count);
760 761
	}

762
	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
763
	for (i = 0; i < (count - 1); i++) {
764
		netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
765
		list_del(&netvsc_packet->list_ent);
766

767
		/* Initialize the netvsc packet */
768
		netvsc_packet->status = NVSP_STAT_SUCCESS;
769 770
		netvsc_packet->xfer_page_pkt = xferpage_packet;
		netvsc_packet->completion.recv.recv_completion =
771
					netvsc_receive_completion;
772
		netvsc_packet->completion.recv.recv_completion_ctx =
773
					netvsc_packet;
774
		netvsc_packet->device = device;
775
		/* Save this so that we can send it back */
776
		netvsc_packet->completion.recv.recv_completion_tid =
777
					vmxferpage_packet->d.trans_id;
778

779 780
		netvsc_packet->data = (void *)((unsigned long)net_device->
			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
781
		netvsc_packet->total_data_buflen =
782
					vmxferpage_packet->ranges[i].byte_count;
783

784
		/* Pass it to the upper layer */
785
		rndis_filter_receive(device, netvsc_packet);
786

787
		netvsc_receive_completion(netvsc_packet->
788
				completion.recv.recv_completion_ctx);
789 790 791 792
	}

}

793
static void netvsc_channel_cb(void *context)
794
{
795
	int ret;
796 797 798 799
	struct hv_device *device = context;
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u64 request_id;
800
	unsigned char *packet;
801
	struct vmpacket_descriptor *desc;
802 803
	unsigned char *buffer;
	int bufferlen = NETVSC_PACKET_SIZE;
804
	struct net_device *ndev;
805

806
	packet = kzalloc(NETVSC_PACKET_SIZE * sizeof(unsigned char),
807
			 GFP_ATOMIC);
808 809 810 811
	if (!packet)
		return;
	buffer = packet;

812
	net_device = get_inbound_net_device(device);
813
	if (!net_device)
814
		goto out;
815
	ndev = net_device->ndev;
816

817
	do {
818
		ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
819
					   &bytes_recvd, &request_id);
820
		if (ret == 0) {
821
			if (bytes_recvd > 0) {
822
				desc = (struct vmpacket_descriptor *)buffer;
823 824
				switch (desc->type) {
				case VM_PKT_COMP:
825
					netvsc_send_completion(device, desc);
826 827
					break;

828
				case VM_PKT_DATA_USING_XFER_PAGES:
829
					netvsc_receive(device, desc);
830 831 832
					break;

				default:
833
					netdev_err(ndev,
834 835
						   "unhandled packet type %d, "
						   "tid %llx len %d\n",
836
						   desc->type, request_id,
837
						   bytes_recvd);
838
					break;
839 840
				}

841
				/* reset */
842
				if (bufferlen > NETVSC_PACKET_SIZE) {
843
					kfree(buffer);
844
					buffer = packet;
845
					bufferlen = NETVSC_PACKET_SIZE;
846
				}
847
			} else {
848
				/* reset */
849
				if (bufferlen > NETVSC_PACKET_SIZE) {
850
					kfree(buffer);
851
					buffer = packet;
852
					bufferlen = NETVSC_PACKET_SIZE;
853 854 855 856
				}

				break;
			}
857
		} else if (ret == -ENOBUFS) {
858
			/* Handle large packet */
859
			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
860
			if (buffer == NULL) {
861
				/* Try again next time around */
862
				netdev_err(ndev,
863
					   "unable to allocate buffer of size "
864
					   "(%d)!!\n", bytes_recvd);
865 866 867
				break;
			}

868
			bufferlen = bytes_recvd;
869 870 871
		}
	} while (1);

872 873
out:
	kfree(buffer);
874 875
	return;
}
876

877 878 879 880
/*
 * netvsc_device_add - Callback when the device belonging to this
 * driver is added
 */
881
int netvsc_device_add(struct hv_device *device, void *additional_info)
882 883 884
{
	int ret = 0;
	int i;
885 886
	int ring_size =
	((struct netvsc_device_info *)additional_info)->ring_size;
887 888
	struct netvsc_device *net_device;
	struct hv_netvsc_packet *packet, *pos;
889
	struct net_device *ndev;
890 891 892

	net_device = alloc_net_device(device);
	if (!net_device) {
893
		ret = -ENOMEM;
894 895 896
		goto cleanup;
	}

897 898 899 900 901 902 903 904 905
	/*
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * In alloc_net_device(), we register struct netvsc_device *
	 * as the driver private data and stash away struct net_device *
	 * in struct netvsc_device *.
	 */
	ndev = net_device->ndev;

906 907 908 909 910 911 912
	/* Initialize the NetVSC channel extension */
	net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
	spin_lock_init(&net_device->recv_pkt_list_lock);

	INIT_LIST_HEAD(&net_device->recv_pkt_list);

	for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
913
		packet = kzalloc(sizeof(struct hv_netvsc_packet), GFP_KERNEL);
914 915 916 917 918 919
		if (!packet)
			break;

		list_add_tail(&packet->list_ent,
			      &net_device->recv_pkt_list);
	}
920
	init_completion(&net_device->channel_init_wait);
921 922

	/* Open the channel */
923 924
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
925 926 927
			 netvsc_channel_cb, device);

	if (ret != 0) {
928
		netdev_err(ndev, "unable to open channel: %d\n", ret);
929 930 931 932
		goto cleanup;
	}

	/* Channel is opened */
933
	pr_info("hv_netvsc channel opened successfully\n");
934 935 936 937

	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
938
		netdev_err(ndev,
939
			"unable to connect to NetVSP - %d\n", ret);
940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958
		goto close;
	}

	return ret;

close:
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

cleanup:

	if (net_device) {
		list_for_each_entry_safe(packet, pos,
					 &net_device->recv_pkt_list,
					 list_ent) {
			list_del(&packet->list_ent);
			kfree(packet);
		}

959
		kfree(net_device);
960 961 962 963
	}

	return ret;
}