netvsc.c 25.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
18
 *   Haiyang Zhang <haiyangz@microsoft.com>
19 20
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/delay.h>
28
#include <linux/io.h>
29
#include <linux/slab.h>
30
#include <linux/netdevice.h>
31
#include <linux/if_ether.h>
32

33
#include "hyperv_net.h"
34 35


36
static struct netvsc_device *alloc_net_device(struct hv_device *device)
37
{
38
	struct netvsc_device *net_device;
39
	struct net_device *ndev = hv_get_drvdata(device);
40

41 42
	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
	if (!net_device)
43 44
		return NULL;

45
	net_device->start_remove = false;
46
	net_device->destroy = false;
47
	net_device->dev = device;
48
	net_device->ndev = ndev;
49

50
	hv_set_drvdata(device, net_device);
51
	return net_device;
52 53
}

54
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
55
{
56
	struct netvsc_device *net_device;
57

58
	net_device = hv_get_drvdata(device);
59
	if (net_device && net_device->destroy)
60
		net_device = NULL;
61

62
	return net_device;
63 64
}

65
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
66
{
67
	struct netvsc_device *net_device;
68

69
	net_device = hv_get_drvdata(device);
70 71 72 73 74 75

	if (!net_device)
		goto get_in_err;

	if (net_device->destroy &&
		atomic_read(&net_device->num_outstanding_sends) == 0)
76
		net_device = NULL;
77

78
get_in_err:
79
	return net_device;
80 81 82
}


83 84 85 86
static int netvsc_destroy_recv_buf(struct netvsc_device *net_device)
{
	struct nvsp_message *revoke_packet;
	int ret = 0;
87
	struct net_device *ndev = net_device->ndev;
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114

	/*
	 * If we got a section count, it means we received a
	 * SendReceiveBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->recv_section_cnt) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
		revoke_packet->msg.v1_msg.
		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/*
		 * If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
115
			netdev_err(ndev, "unable to send "
116
				"revoke receive buffer to netvsp\n");
117
			return ret;
118 119 120 121 122 123 124 125 126 127 128 129
		}
	}

	/* Teardown the gpadl on the vsp end */
	if (net_device->recv_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
			   net_device->recv_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
130
			netdev_err(ndev,
131
				   "unable to teardown receive buffer's gpadl\n");
132
			return ret;
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
		}
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
		free_pages((unsigned long)net_device->recv_buf,
			get_order(net_device->recv_buf_size));
		net_device->recv_buf = NULL;
	}

	if (net_device->recv_section) {
		net_device->recv_section_cnt = 0;
		kfree(net_device->recv_section);
		net_device->recv_section = NULL;
	}

	return ret;
}

153
static int netvsc_init_recv_buf(struct hv_device *device)
154
{
155
	int ret = 0;
156
	int t;
157 158
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
159
	struct net_device *ndev;
160

161
	net_device = get_outbound_net_device(device);
162
	if (!net_device)
163
		return -ENODEV;
164
	ndev = net_device->ndev;
165

166
	net_device->recv_buf =
167 168
		(void *)__get_free_pages(GFP_KERNEL|__GFP_ZERO,
				get_order(net_device->recv_buf_size));
169
	if (!net_device->recv_buf) {
170
		netdev_err(ndev, "unable to allocate receive "
171
			"buffer of size %d\n", net_device->recv_buf_size);
172
		ret = -ENOMEM;
173
		goto cleanup;
174 175
	}

176 177 178 179 180
	/*
	 * Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
181 182 183
	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
				    net_device->recv_buf_size,
				    &net_device->recv_buf_gpadl_handle);
184
	if (ret != 0) {
185
		netdev_err(ndev,
186
			"unable to establish receive buffer's gpadl\n");
187
		goto cleanup;
188 189 190
	}


191
	/* Notify the NetVsp of the gpadl handle */
192
	init_packet = &net_device->channel_init_pkt;
193

194
	memset(init_packet, 0, sizeof(struct nvsp_message));
195

196 197 198 199 200
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
	init_packet->msg.v1_msg.send_recv_buf.
		gpadl_handle = net_device->recv_buf_gpadl_handle;
	init_packet->msg.v1_msg.
		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
201

202
	/* Send the gpadl notification request */
203
	ret = vmbus_sendpacket(device->channel, init_packet,
204
			       sizeof(struct nvsp_message),
205
			       (unsigned long)init_packet,
206
			       VM_PKT_DATA_INBAND,
207
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
208
	if (ret != 0) {
209
		netdev_err(ndev,
210
			"unable to send receive buffer's gpadl to netvsp\n");
211
		goto cleanup;
212 213
	}

214
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
215
	BUG_ON(t == 0);
216

217

218
	/* Check the response */
219 220
	if (init_packet->msg.v1_msg.
	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
221
		netdev_err(ndev, "Unable to complete receive buffer "
222
			   "initialization with NetVsp - status %d\n",
223 224
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
225
		ret = -EINVAL;
226
		goto cleanup;
227 228
	}

229
	/* Parse the response */
230

231 232
	net_device->recv_section_cnt = init_packet->msg.
		v1_msg.send_recv_buf_complete.num_sections;
233

234 235 236 237 238
	net_device->recv_section = kmemdup(
		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
		net_device->recv_section_cnt *
		sizeof(struct nvsp_1_receive_buffer_section),
		GFP_KERNEL);
239
	if (net_device->recv_section == NULL) {
240
		ret = -EINVAL;
241
		goto cleanup;
242 243
	}

244 245 246 247
	/*
	 * For 1st release, there should only be 1 section that represents the
	 * entire receive buffer
	 */
248 249
	if (net_device->recv_section_cnt != 1 ||
	    net_device->recv_section->offset != 0) {
250
		ret = -EINVAL;
251
		goto cleanup;
252 253
	}

254
	goto exit;
255

256
cleanup:
257
	netvsc_destroy_recv_buf(net_device);
258

259
exit:
260 261 262 263
	return ret;
}


264 265 266 267 268
/* Negotiate NVSP protocol version */
static int negotiate_nvsp_ver(struct hv_device *device,
			      struct netvsc_device *net_device,
			      struct nvsp_message *init_packet,
			      u32 nvsp_ver)
269
{
270
	int ret, t;
271

272
	memset(init_packet, 0, sizeof(struct nvsp_message));
273
	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
274 275
	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
276

277
	/* Send the init request */
278
	ret = vmbus_sendpacket(device->channel, init_packet,
279
			       sizeof(struct nvsp_message),
280
			       (unsigned long)init_packet,
281
			       VM_PKT_DATA_INBAND,
282
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
283

284
	if (ret != 0)
285
		return ret;
286

287
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
288

289 290
	if (t == 0)
		return -ETIMEDOUT;
291

292
	if (init_packet->msg.init_msg.init_complete.status !=
293 294
	    NVSP_STAT_SUCCESS)
		return -EINVAL;
295

296 297 298 299 300 301
	if (nvsp_ver != NVSP_PROTOCOL_VERSION_2)
		return 0;

	/* NVSPv2 only: Send NDIS config */
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
302
	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu;
303
	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335

	ret = vmbus_sendpacket(device->channel, init_packet,
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);

	return ret;
}

static int netvsc_connect_vsp(struct hv_device *device)
{
	int ret;
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
	int ndis_version;
	struct net_device *ndev;

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;
	ndev = net_device->ndev;

	init_packet = &net_device->channel_init_pkt;

	/* Negotiate the latest NVSP protocol supported */
	if (negotiate_nvsp_ver(device, net_device, init_packet,
			       NVSP_PROTOCOL_VERSION_2) == 0) {
		net_device->nvsp_version = NVSP_PROTOCOL_VERSION_2;
	} else if (negotiate_nvsp_ver(device, net_device, init_packet,
				    NVSP_PROTOCOL_VERSION_1) == 0) {
		net_device->nvsp_version = NVSP_PROTOCOL_VERSION_1;
	} else {
336
		ret = -EPROTO;
337
		goto cleanup;
338
	}
339 340 341

	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);

342
	/* Send the ndis version */
343
	memset(init_packet, 0, sizeof(struct nvsp_message));
344

345
	ndis_version = 0x00050001;
346

347 348 349
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_major_ver =
350
				(ndis_version & 0xFFFF0000) >> 16;
351 352
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_minor_ver =
353
				ndis_version & 0xFFFF;
354

355
	/* Send the init request */
356
	ret = vmbus_sendpacket(device->channel, init_packet,
357 358 359
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);
360
	if (ret != 0)
361
		goto cleanup;
362 363

	/* Post the big receive buffer to NetVSP */
364
	ret = netvsc_init_recv_buf(device);
365

366
cleanup:
367 368 369
	return ret;
}

370
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
371
{
372
	netvsc_destroy_recv_buf(net_device);
373 374
}

375
/*
376
 * netvsc_device_remove - Callback when the root bus device is removed
377
 */
378
int netvsc_device_remove(struct hv_device *device)
379
{
380 381
	struct netvsc_device *net_device;
	struct hv_netvsc_packet *netvsc_packet, *pos;
382
	unsigned long flags;
383

384
	net_device = hv_get_drvdata(device);
385 386 387 388
	spin_lock_irqsave(&device->channel->inbound_lock, flags);
	net_device->destroy = true;
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);

389
	/* Wait for all send completions */
390
	while (atomic_read(&net_device->num_outstanding_sends)) {
391
		dev_info(&device->device,
392
			"waiting for %d requests to complete...\n",
393
			atomic_read(&net_device->num_outstanding_sends));
394
		udelay(100);
395 396
	}

397
	netvsc_disconnect_vsp(net_device);
398

399
	/*
400 401 402 403 404
	 * Since we have already drained, we don't need to busy wait
	 * as was done in final_release_stor_device()
	 * Note that we cannot set the ext pointer to NULL until
	 * we have drained - to drain the outgoing packets, we need to
	 * allow incoming packets.
405
	 */
406 407

	spin_lock_irqsave(&device->channel->inbound_lock, flags);
408
	hv_set_drvdata(device, NULL);
409
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
410

411 412 413 414
	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
415
	dev_notice(&device->device, "net device safe to remove\n");
416

417
	/* Now, we can close the channel safely */
418
	vmbus_close(device->channel);
419

420
	/* Release all resources */
421
	list_for_each_entry_safe(netvsc_packet, pos,
422
				 &net_device->recv_pkt_list, list_ent) {
423
		list_del(&netvsc_packet->list_ent);
424
		kfree(netvsc_packet);
425 426
	}

427
	kfree(net_device);
428
	return 0;
429 430
}

431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448

#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
 * Get the percentage of available bytes to write in the ring.
 * The return value is in range from 0 to 100.
 */
static inline u32 hv_ringbuf_avail_percent(
		struct hv_ring_buffer_info *ring_info)
{
	u32 avail_read, avail_write;

	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

	return avail_write * 100 / ring_info->ring_datasize;
}

449
static void netvsc_send_completion(struct hv_device *device,
450
				   struct vmpacket_descriptor *packet)
451
{
452 453 454
	struct netvsc_device *net_device;
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *nvsc_packet;
455
	struct net_device *ndev;
456

457
	net_device = get_inbound_net_device(device);
458
	if (!net_device)
459
		return;
460
	ndev = net_device->ndev;
461

462
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
463
			(packet->offset8 << 3));
464

465 466 467 468 469
	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE)) {
470
		/* Copy the response back */
471
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
472
		       sizeof(struct nvsp_message));
473
		complete(&net_device->channel_init_wait);
474 475
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
476 477
		int num_outstanding_sends;

478
		/* Get the send context */
479
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
480
			packet->trans_id;
481

482
		/* Notify the layer above us */
483 484
		nvsc_packet->completion.send.send_completion(
			nvsc_packet->completion.send.send_completion_ctx);
485

486 487
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
488

489 490 491 492 493
		if (netif_queue_stopped(ndev) && !net_device->start_remove &&
			(hv_ringbuf_avail_percent(&device->channel->outbound)
			> RING_AVAIL_PERCENT_HIWATER ||
			num_outstanding_sends < 1))
				netif_wake_queue(ndev);
494
	} else {
495
		netdev_err(ndev, "Unknown send completion packet type- "
496
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
497 498 499 500
	}

}

501
int netvsc_send(struct hv_device *device,
502
			struct hv_netvsc_packet *packet)
503
{
504
	struct netvsc_device *net_device;
505
	int ret = 0;
506
	struct nvsp_message sendMessage;
507
	struct net_device *ndev;
508

509
	net_device = get_outbound_net_device(device);
510
	if (!net_device)
511
		return -ENODEV;
512
	ndev = net_device->ndev;
513

514
	sendMessage.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
515
	if (packet->is_data_pkt) {
516
		/* 0 is RMC_DATA; */
517
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 0;
518 519
	} else {
		/* 1 is RMC_CONTROL; */
520
		sendMessage.msg.v1_msg.send_rndis_pkt.channel_type = 1;
521
	}
522

523
	/* Not using send buffer section */
524 525 526
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
		0xFFFFFFFF;
	sendMessage.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
527

528
	if (packet->page_buf_cnt) {
529
		ret = vmbus_sendpacket_pagebuffer(device->channel,
530 531
						  packet->page_buf,
						  packet->page_buf_cnt,
532 533
						  &sendMessage,
						  sizeof(struct nvsp_message),
534
						  (unsigned long)packet);
535
	} else {
536
		ret = vmbus_sendpacket(device->channel, &sendMessage,
537 538 539 540
				sizeof(struct nvsp_message),
				(unsigned long)packet,
				VM_PKT_DATA_INBAND,
				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
541 542 543

	}

544 545
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
546 547 548 549 550 551 552
		if (hv_ringbuf_avail_percent(&device->channel->outbound) <
			RING_AVAIL_PERCENT_LOWATER) {
			netif_stop_queue(ndev);
			if (atomic_read(&net_device->
				num_outstanding_sends) < 1)
				netif_wake_queue(ndev);
		}
553 554
	} else if (ret == -EAGAIN) {
		netif_stop_queue(ndev);
555
		if (atomic_read(&net_device->num_outstanding_sends) < 1) {
556
			netif_wake_queue(ndev);
557 558
			ret = -ENOSPC;
		}
559
	} else {
560
		netdev_err(ndev, "Unable to send packet %p ret %d\n",
561
			   packet, ret);
562
	}
563 564 565 566

	return ret;
}

567 568 569 570 571 572
static void netvsc_send_recv_completion(struct hv_device *device,
					u64 transaction_id)
{
	struct nvsp_message recvcompMessage;
	int retries = 0;
	int ret;
573 574 575 576
	struct net_device *ndev;
	struct netvsc_device *net_device = hv_get_drvdata(device);

	ndev = net_device->ndev;
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592

	recvcompMessage.hdr.msg_type =
				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;

	/* FIXME: Pass in the status */
	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status =
		NVSP_STAT_SUCCESS;

retry_send_cmplt:
	/* Send the completion */
	ret = vmbus_sendpacket(device->channel, &recvcompMessage,
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
593
	} else if (ret == -EAGAIN) {
594 595
		/* no more room...wait a bit and attempt to retry 3 times */
		retries++;
596
		netdev_err(ndev, "unable to send receive completion pkt"
597
			" (tid %llx)...retrying %d\n", transaction_id, retries);
598 599 600 601 602

		if (retries < 4) {
			udelay(100);
			goto retry_send_cmplt;
		} else {
603
			netdev_err(ndev, "unable to send receive "
604
				"completion pkt (tid %llx)...give up retrying\n",
605 606 607
				transaction_id);
		}
	} else {
608
		netdev_err(ndev, "unable to send receive "
609
			"completion pkt - %llx\n", transaction_id);
610 611 612
	}
}

613 614 615 616 617 618 619 620 621
/* Send a receive completion packet to RNDIS device (ie NetVsp) */
static void netvsc_receive_completion(void *context)
{
	struct hv_netvsc_packet *packet = context;
	struct hv_device *device = (struct hv_device *)packet->device;
	struct netvsc_device *net_device;
	u64 transaction_id = 0;
	bool fsend_receive_comp = false;
	unsigned long flags;
622
	struct net_device *ndev;
623 624 625 626 627 628 629

	/*
	 * Even though it seems logical to do a GetOutboundNetDevice() here to
	 * send out receive completion, we are using GetInboundNetDevice()
	 * since we may have disable outbound traffic already.
	 */
	net_device = get_inbound_net_device(device);
630
	if (!net_device)
631
		return;
632
	ndev = net_device->ndev;
633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660

	/* Overloading use of the lock. */
	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);

	packet->xfer_page_pkt->count--;

	/*
	 * Last one in the line that represent 1 xfer page packet.
	 * Return the xfer page packet itself to the freelist
	 */
	if (packet->xfer_page_pkt->count == 0) {
		fsend_receive_comp = true;
		transaction_id = packet->completion.recv.recv_completion_tid;
		list_add_tail(&packet->xfer_page_pkt->list_ent,
			      &net_device->recv_pkt_list);

	}

	/* Put the packet back */
	list_add_tail(&packet->list_ent, &net_device->recv_pkt_list);
	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);

	/* Send a receive completion for the xfer page packet */
	if (fsend_receive_comp)
		netvsc_send_recv_completion(device, transaction_id);

}

661
static void netvsc_receive(struct hv_device *device,
662
			    struct vmpacket_descriptor *packet)
663
{
664 665 666 667
	struct netvsc_device *net_device;
	struct vmtransfer_page_packet_header *vmxferpage_packet;
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *netvsc_packet = NULL;
668
	/* struct netvsc_driver *netvscDriver; */
669
	struct xferpage_packet *xferpage_packet = NULL;
670 671
	int i;
	int count = 0;
672
	unsigned long flags;
673
	struct net_device *ndev;
674

675
	LIST_HEAD(listHead);
676

677
	net_device = get_inbound_net_device(device);
678
	if (!net_device)
679
		return;
680
	ndev = net_device->ndev;
681

682 683 684 685
	/*
	 * All inbound packets other than send completion should be xfer page
	 * packet
	 */
686
	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
687
		netdev_err(ndev, "Unknown packet type received - %d\n",
688
			   packet->type);
689 690 691
		return;
	}

692
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
693
			(packet->offset8 << 3));
694

695
	/* Make sure this is a valid nvsp packet */
696 697
	if (nvsp_packet->hdr.msg_type !=
	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
698
		netdev_err(ndev, "Unknown nvsp packet type received-"
699
			" %d\n", nvsp_packet->hdr.msg_type);
700 701 702
		return;
	}

703
	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
704

705
	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
706
		netdev_err(ndev, "Invalid xfer page set id - "
707
			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
708
			   vmxferpage_packet->xfer_pageset_id);
709 710 711
		return;
	}

712 713 714 715 716 717
	/*
	 * Grab free packets (range count + 1) to represent this xfer
	 * page packet. +1 to represent the xfer page packet itself.
	 * We grab it here so that we know exactly how many we can
	 * fulfil
	 */
718 719 720
	spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
	while (!list_empty(&net_device->recv_pkt_list)) {
		list_move_tail(net_device->recv_pkt_list.next, &listHead);
721
		if (++count == vmxferpage_packet->range_cnt + 1)
722 723
			break;
	}
724
	spin_unlock_irqrestore(&net_device->recv_pkt_list_lock, flags);
725

726 727 728 729 730
	/*
	 * We need at least 2 netvsc pkts (1 to represent the xfer
	 * page and at least 1 for the range) i.e. we can handled
	 * some of the xfer page packet ranges...
	 */
731
	if (count < 2) {
732
		netdev_err(ndev, "Got only %d netvsc pkt...needed "
733
			"%d pkts. Dropping this xfer page packet completely!\n",
734
			count, vmxferpage_packet->range_cnt + 1);
735

736
		/* Return it to the freelist */
737
		spin_lock_irqsave(&net_device->recv_pkt_list_lock, flags);
738
		for (i = count; i != 0; i--) {
739
			list_move_tail(listHead.next,
740
				       &net_device->recv_pkt_list);
741
		}
742
		spin_unlock_irqrestore(&net_device->recv_pkt_list_lock,
743
				       flags);
744

745
		netvsc_send_recv_completion(device,
746
					    vmxferpage_packet->d.trans_id);
747 748 749 750

		return;
	}

751
	/* Remove the 1st packet to represent the xfer page packet itself */
752
	xferpage_packet = (struct xferpage_packet *)listHead.next;
753
	list_del(&xferpage_packet->list_ent);
754

755
	/* This is how much we can satisfy */
756
	xferpage_packet->count = count - 1;
757

758
	if (xferpage_packet->count != vmxferpage_packet->range_cnt) {
759
		netdev_err(ndev, "Needed %d netvsc pkts to satisfy "
760
			"this xfer page...got %d\n",
761
			vmxferpage_packet->range_cnt, xferpage_packet->count);
762 763
	}

764
	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
765
	for (i = 0; i < (count - 1); i++) {
766
		netvsc_packet = (struct hv_netvsc_packet *)listHead.next;
767
		list_del(&netvsc_packet->list_ent);
768

769
		/* Initialize the netvsc packet */
770 771
		netvsc_packet->xfer_page_pkt = xferpage_packet;
		netvsc_packet->completion.recv.recv_completion =
772
					netvsc_receive_completion;
773
		netvsc_packet->completion.recv.recv_completion_ctx =
774
					netvsc_packet;
775
		netvsc_packet->device = device;
776
		/* Save this so that we can send it back */
777
		netvsc_packet->completion.recv.recv_completion_tid =
778
					vmxferpage_packet->d.trans_id;
779

780 781
		netvsc_packet->data = (void *)((unsigned long)net_device->
			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
782
		netvsc_packet->total_data_buflen =
783
					vmxferpage_packet->ranges[i].byte_count;
784

785
		/* Pass it to the upper layer */
786
		rndis_filter_receive(device, netvsc_packet);
787

788
		netvsc_receive_completion(netvsc_packet->
789
				completion.recv.recv_completion_ctx);
790 791 792 793
	}

}

794
static void netvsc_channel_cb(void *context)
795
{
796
	int ret;
797 798 799 800
	struct hv_device *device = context;
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u64 request_id;
801
	unsigned char *packet;
802
	struct vmpacket_descriptor *desc;
803 804
	unsigned char *buffer;
	int bufferlen = NETVSC_PACKET_SIZE;
805
	struct net_device *ndev;
806

807
	packet = kzalloc(NETVSC_PACKET_SIZE * sizeof(unsigned char),
808
			 GFP_ATOMIC);
809 810 811 812
	if (!packet)
		return;
	buffer = packet;

813
	net_device = get_inbound_net_device(device);
814
	if (!net_device)
815
		goto out;
816
	ndev = net_device->ndev;
817

818
	do {
819
		ret = vmbus_recvpacket_raw(device->channel, buffer, bufferlen,
820
					   &bytes_recvd, &request_id);
821
		if (ret == 0) {
822
			if (bytes_recvd > 0) {
823
				desc = (struct vmpacket_descriptor *)buffer;
824 825
				switch (desc->type) {
				case VM_PKT_COMP:
826
					netvsc_send_completion(device, desc);
827 828
					break;

829
				case VM_PKT_DATA_USING_XFER_PAGES:
830
					netvsc_receive(device, desc);
831 832 833
					break;

				default:
834
					netdev_err(ndev,
835 836
						   "unhandled packet type %d, "
						   "tid %llx len %d\n",
837
						   desc->type, request_id,
838
						   bytes_recvd);
839
					break;
840 841
				}

842
				/* reset */
843
				if (bufferlen > NETVSC_PACKET_SIZE) {
844
					kfree(buffer);
845
					buffer = packet;
846
					bufferlen = NETVSC_PACKET_SIZE;
847
				}
848
			} else {
849
				/* reset */
850
				if (bufferlen > NETVSC_PACKET_SIZE) {
851
					kfree(buffer);
852
					buffer = packet;
853
					bufferlen = NETVSC_PACKET_SIZE;
854 855 856 857
				}

				break;
			}
858
		} else if (ret == -ENOBUFS) {
859
			/* Handle large packet */
860
			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
861
			if (buffer == NULL) {
862
				/* Try again next time around */
863
				netdev_err(ndev,
864
					   "unable to allocate buffer of size "
865
					   "(%d)!!\n", bytes_recvd);
866 867 868
				break;
			}

869
			bufferlen = bytes_recvd;
870 871 872
		}
	} while (1);

873 874
out:
	kfree(buffer);
875 876
	return;
}
877

878 879 880 881
/*
 * netvsc_device_add - Callback when the device belonging to this
 * driver is added
 */
882
int netvsc_device_add(struct hv_device *device, void *additional_info)
883 884 885
{
	int ret = 0;
	int i;
886 887
	int ring_size =
	((struct netvsc_device_info *)additional_info)->ring_size;
888 889
	struct netvsc_device *net_device;
	struct hv_netvsc_packet *packet, *pos;
890
	struct net_device *ndev;
891 892 893

	net_device = alloc_net_device(device);
	if (!net_device) {
894
		ret = -ENOMEM;
895 896 897
		goto cleanup;
	}

898 899 900 901 902 903 904 905 906
	/*
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * In alloc_net_device(), we register struct netvsc_device *
	 * as the driver private data and stash away struct net_device *
	 * in struct netvsc_device *.
	 */
	ndev = net_device->ndev;

907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
	/* Initialize the NetVSC channel extension */
	net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
	spin_lock_init(&net_device->recv_pkt_list_lock);

	INIT_LIST_HEAD(&net_device->recv_pkt_list);

	for (i = 0; i < NETVSC_RECEIVE_PACKETLIST_COUNT; i++) {
		packet = kzalloc(sizeof(struct hv_netvsc_packet) +
				 (NETVSC_RECEIVE_SG_COUNT *
				  sizeof(struct hv_page_buffer)), GFP_KERNEL);
		if (!packet)
			break;

		list_add_tail(&packet->list_ent,
			      &net_device->recv_pkt_list);
	}
923
	init_completion(&net_device->channel_init_wait);
924 925

	/* Open the channel */
926 927
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
928 929 930
			 netvsc_channel_cb, device);

	if (ret != 0) {
931
		netdev_err(ndev, "unable to open channel: %d\n", ret);
932 933 934 935
		goto cleanup;
	}

	/* Channel is opened */
936
	pr_info("hv_netvsc channel opened successfully\n");
937 938 939 940

	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
941
		netdev_err(ndev,
942
			"unable to connect to NetVSP - %d\n", ret);
943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961
		goto close;
	}

	return ret;

close:
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

cleanup:

	if (net_device) {
		list_for_each_entry_safe(packet, pos,
					 &net_device->recv_pkt_list,
					 list_ent) {
			list_del(&packet->list_ent);
			kfree(packet);
		}

962
		kfree(net_device);
963 964 965 966
	}

	return ret;
}