netvsc.c 31.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
14
 * this program; if not, see <http://www.gnu.org/licenses/>.
15 16
 *
 * Authors:
17
 *   Haiyang Zhang <haiyangz@microsoft.com>
18 19
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
20 21
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

22
#include <linux/kernel.h>
23 24
#include <linux/sched.h>
#include <linux/wait.h>
25
#include <linux/mm.h>
26
#include <linux/delay.h>
27
#include <linux/io.h>
28
#include <linux/slab.h>
29
#include <linux/netdevice.h>
30
#include <linux/if_ether.h>
31
#include <asm/sync_bitops.h>
32

33
#include "hyperv_net.h"
34 35


36
static struct netvsc_device *alloc_net_device(struct hv_device *device)
37
{
38
	struct netvsc_device *net_device;
39
	struct net_device *ndev = hv_get_drvdata(device);
40
	int i;
41

42 43
	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
	if (!net_device)
44 45
		return NULL;

46 47 48 49 50 51
	net_device->cb_buffer = kzalloc(NETVSC_PACKET_SIZE, GFP_KERNEL);
	if (!net_device->cb_buffer) {
		kfree(net_device);
		return NULL;
	}

52
	init_waitqueue_head(&net_device->wait_drain);
53
	net_device->start_remove = false;
54
	net_device->destroy = false;
55
	net_device->dev = device;
56
	net_device->ndev = ndev;
57 58 59 60 61
	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;

	for (i = 0; i < num_online_cpus(); i++)
		spin_lock_init(&net_device->msd[i].lock);
62

63
	hv_set_drvdata(device, net_device);
64
	return net_device;
65 66
}

67 68 69 70 71 72
static void free_netvsc_device(struct netvsc_device *nvdev)
{
	kfree(nvdev->cb_buffer);
	kfree(nvdev);
}

73
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
74
{
75
	struct netvsc_device *net_device;
76

77
	net_device = hv_get_drvdata(device);
78
	if (net_device && net_device->destroy)
79
		net_device = NULL;
80

81
	return net_device;
82 83
}

84
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
85
{
86
	struct netvsc_device *net_device;
87

88
	net_device = hv_get_drvdata(device);
89 90 91 92 93 94

	if (!net_device)
		goto get_in_err;

	if (net_device->destroy &&
		atomic_read(&net_device->num_outstanding_sends) == 0)
95
		net_device = NULL;
96

97
get_in_err:
98
	return net_device;
99 100 101
}


102
static int netvsc_destroy_buf(struct netvsc_device *net_device)
103 104 105
{
	struct nvsp_message *revoke_packet;
	int ret = 0;
106
	struct net_device *ndev = net_device->ndev;
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133

	/*
	 * If we got a section count, it means we received a
	 * SendReceiveBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->recv_section_cnt) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
		revoke_packet->msg.v1_msg.
		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/*
		 * If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
134
			netdev_err(ndev, "unable to send "
135
				"revoke receive buffer to netvsp\n");
136
			return ret;
137 138 139 140 141 142 143 144 145 146 147 148
		}
	}

	/* Teardown the gpadl on the vsp end */
	if (net_device->recv_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
			   net_device->recv_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
149
			netdev_err(ndev,
150
				   "unable to teardown receive buffer's gpadl\n");
151
			return ret;
152 153 154 155 156 157
		}
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
158
		vfree(net_device->recv_buf);
159 160 161 162 163 164 165 166 167
		net_device->recv_buf = NULL;
	}

	if (net_device->recv_section) {
		net_device->recv_section_cnt = 0;
		kfree(net_device->recv_section);
		net_device->recv_section = NULL;
	}

168 169
	/* Deal with the send buffer we may have setup.
	 * If we got a  send section size, it means we received a
170 171
	 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
	 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
172 173 174 175 176 177 178 179 180
	 * to send a revoke msg here
	 */
	if (net_device->send_section_size) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
181 182
		revoke_packet->msg.v1_msg.revoke_send_buf.id =
			NETVSC_SEND_BUFFER_ID;
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/* If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev, "unable to send "
				   "revoke send buffer to netvsp\n");
			return ret;
		}
	}
	/* Teardown the gpadl on the vsp end */
	if (net_device->send_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
					   net_device->send_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev,
				   "unable to teardown send buffer's gpadl\n");
			return ret;
		}
211
		net_device->send_buf_gpadl_handle = 0;
212 213
	}
	if (net_device->send_buf) {
214
		/* Free up the send buffer */
215
		vfree(net_device->send_buf);
216 217 218 219
		net_device->send_buf = NULL;
	}
	kfree(net_device->send_section_map);

220 221 222
	return ret;
}

223
static int netvsc_init_buf(struct hv_device *device)
224
{
225
	int ret = 0;
226
	unsigned long t;
227 228
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
229
	struct net_device *ndev;
230

231
	net_device = get_outbound_net_device(device);
232
	if (!net_device)
233
		return -ENODEV;
234
	ndev = net_device->ndev;
235

236
	net_device->recv_buf = vzalloc(net_device->recv_buf_size);
237
	if (!net_device->recv_buf) {
238
		netdev_err(ndev, "unable to allocate receive "
239
			"buffer of size %d\n", net_device->recv_buf_size);
240
		ret = -ENOMEM;
241
		goto cleanup;
242 243
	}

244 245 246 247 248
	/*
	 * Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
249 250 251
	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
				    net_device->recv_buf_size,
				    &net_device->recv_buf_gpadl_handle);
252
	if (ret != 0) {
253
		netdev_err(ndev,
254
			"unable to establish receive buffer's gpadl\n");
255
		goto cleanup;
256 257 258
	}


259
	/* Notify the NetVsp of the gpadl handle */
260
	init_packet = &net_device->channel_init_pkt;
261

262
	memset(init_packet, 0, sizeof(struct nvsp_message));
263

264 265 266 267 268
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
	init_packet->msg.v1_msg.send_recv_buf.
		gpadl_handle = net_device->recv_buf_gpadl_handle;
	init_packet->msg.v1_msg.
		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
269

270
	/* Send the gpadl notification request */
271
	ret = vmbus_sendpacket(device->channel, init_packet,
272
			       sizeof(struct nvsp_message),
273
			       (unsigned long)init_packet,
274
			       VM_PKT_DATA_INBAND,
275
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
276
	if (ret != 0) {
277
		netdev_err(ndev,
278
			"unable to send receive buffer's gpadl to netvsp\n");
279
		goto cleanup;
280 281
	}

282
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
283
	BUG_ON(t == 0);
284

285

286
	/* Check the response */
287 288
	if (init_packet->msg.v1_msg.
	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
289
		netdev_err(ndev, "Unable to complete receive buffer "
290
			   "initialization with NetVsp - status %d\n",
291 292
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
293
		ret = -EINVAL;
294
		goto cleanup;
295 296
	}

297
	/* Parse the response */
298

299 300
	net_device->recv_section_cnt = init_packet->msg.
		v1_msg.send_recv_buf_complete.num_sections;
301

302 303 304 305 306
	net_device->recv_section = kmemdup(
		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
		net_device->recv_section_cnt *
		sizeof(struct nvsp_1_receive_buffer_section),
		GFP_KERNEL);
307
	if (net_device->recv_section == NULL) {
308
		ret = -EINVAL;
309
		goto cleanup;
310 311
	}

312 313 314 315
	/*
	 * For 1st release, there should only be 1 section that represents the
	 * entire receive buffer
	 */
316 317
	if (net_device->recv_section_cnt != 1 ||
	    net_device->recv_section->offset != 0) {
318
		ret = -EINVAL;
319
		goto cleanup;
320 321
	}

322 323
	/* Now setup the send buffer.
	 */
324
	net_device->send_buf = vzalloc(net_device->send_buf_size);
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
	if (!net_device->send_buf) {
		netdev_err(ndev, "unable to allocate send "
			   "buffer of size %d\n", net_device->send_buf_size);
		ret = -ENOMEM;
		goto cleanup;
	}

	/* Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
				    net_device->send_buf_size,
				    &net_device->send_buf_gpadl_handle);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to establish send buffer's gpadl\n");
		goto cleanup;
	}

	/* Notify the NetVsp of the gpadl handle */
	init_packet = &net_device->channel_init_pkt;
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
349
	init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
350
		net_device->send_buf_gpadl_handle;
351
	init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373

	/* Send the gpadl notification request */
	ret = vmbus_sendpacket(device->channel, init_packet,
			       sizeof(struct nvsp_message),
			       (unsigned long)init_packet,
			       VM_PKT_DATA_INBAND,
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to send send buffer's gpadl to netvsp\n");
		goto cleanup;
	}

	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
	BUG_ON(t == 0);

	/* Check the response */
	if (init_packet->msg.v1_msg.
	    send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
		netdev_err(ndev, "Unable to complete send buffer "
			   "initialization with NetVsp - status %d\n",
			   init_packet->msg.v1_msg.
374
			   send_send_buf_complete.status);
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
		ret = -EINVAL;
		goto cleanup;
	}

	/* Parse the response */
	net_device->send_section_size = init_packet->msg.
				v1_msg.send_send_buf_complete.section_size;

	/* Section count is simply the size divided by the section size.
	 */
	net_device->send_section_cnt =
		net_device->send_buf_size/net_device->send_section_size;

	dev_info(&device->device, "Send section size: %d, Section count:%d\n",
		 net_device->send_section_size, net_device->send_section_cnt);

	/* Setup state for managing the send buffer. */
	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
					     BITS_PER_LONG);

	net_device->send_section_map =
		kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
397 398
	if (net_device->send_section_map == NULL) {
		ret = -ENOMEM;
399
		goto cleanup;
400
	}
401

402
	goto exit;
403

404
cleanup:
405
	netvsc_destroy_buf(net_device);
406

407
exit:
408 409 410 411
	return ret;
}


412 413 414 415 416
/* Negotiate NVSP protocol version */
static int negotiate_nvsp_ver(struct hv_device *device,
			      struct netvsc_device *net_device,
			      struct nvsp_message *init_packet,
			      u32 nvsp_ver)
417
{
418 419
	int ret;
	unsigned long t;
420

421
	memset(init_packet, 0, sizeof(struct nvsp_message));
422
	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
423 424
	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
425

426
	/* Send the init request */
427
	ret = vmbus_sendpacket(device->channel, init_packet,
428
			       sizeof(struct nvsp_message),
429
			       (unsigned long)init_packet,
430
			       VM_PKT_DATA_INBAND,
431
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
432

433
	if (ret != 0)
434
		return ret;
435

436
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
437

438 439
	if (t == 0)
		return -ETIMEDOUT;
440

441
	if (init_packet->msg.init_msg.init_complete.status !=
442 443
	    NVSP_STAT_SUCCESS)
		return -EINVAL;
444

445
	if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
446 447 448 449 450
		return 0;

	/* NVSPv2 only: Send NDIS config */
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
451 452
	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu +
						       ETH_HLEN;
453
	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469

	ret = vmbus_sendpacket(device->channel, init_packet,
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);

	return ret;
}

static int netvsc_connect_vsp(struct hv_device *device)
{
	int ret;
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
	int ndis_version;
	struct net_device *ndev;
470 471 472
	u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
	int i, num_ver = 4; /* number of different NVSP versions */
473 474 475 476 477 478 479 480 481

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;
	ndev = net_device->ndev;

	init_packet = &net_device->channel_init_pkt;

	/* Negotiate the latest NVSP protocol supported */
482 483 484 485 486 487 488 489
	for (i = num_ver - 1; i >= 0; i--)
		if (negotiate_nvsp_ver(device, net_device, init_packet,
				       ver_list[i])  == 0) {
			net_device->nvsp_version = ver_list[i];
			break;
		}

	if (i < 0) {
490
		ret = -EPROTO;
491
		goto cleanup;
492
	}
493 494 495

	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);

496
	/* Send the ndis version */
497
	memset(init_packet, 0, sizeof(struct nvsp_message));
498

499
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
500
		ndis_version = 0x00060001;
501 502
	else
		ndis_version = 0x0006001e;
503

504 505 506
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_major_ver =
507
				(ndis_version & 0xFFFF0000) >> 16;
508 509
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_minor_ver =
510
				ndis_version & 0xFFFF;
511

512
	/* Send the init request */
513
	ret = vmbus_sendpacket(device->channel, init_packet,
514 515 516
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);
517
	if (ret != 0)
518
		goto cleanup;
519 520

	/* Post the big receive buffer to NetVSP */
521 522 523 524
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
	else
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
525
	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
526

527
	ret = netvsc_init_buf(device);
528

529
cleanup:
530 531 532
	return ret;
}

533
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
534
{
535
	netvsc_destroy_buf(net_device);
536 537
}

538
/*
539
 * netvsc_device_remove - Callback when the root bus device is removed
540
 */
541
int netvsc_device_remove(struct hv_device *device)
542
{
543
	struct netvsc_device *net_device;
544
	unsigned long flags;
545

546
	net_device = hv_get_drvdata(device);
547

548
	netvsc_disconnect_vsp(net_device);
549

550
	/*
551 552 553 554 555
	 * Since we have already drained, we don't need to busy wait
	 * as was done in final_release_stor_device()
	 * Note that we cannot set the ext pointer to NULL until
	 * we have drained - to drain the outgoing packets, we need to
	 * allow incoming packets.
556
	 */
557 558

	spin_lock_irqsave(&device->channel->inbound_lock, flags);
559
	hv_set_drvdata(device, NULL);
560
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
561

562 563 564 565
	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
566
	dev_notice(&device->device, "net device safe to remove\n");
567

568
	/* Now, we can close the channel safely */
569
	vmbus_close(device->channel);
570

571
	/* Release all resources */
572
	vfree(net_device->sub_cb_buf);
573
	free_netvsc_device(net_device);
574
	return 0;
575 576
}

577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594

#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
 * Get the percentage of available bytes to write in the ring.
 * The return value is in range from 0 to 100.
 */
static inline u32 hv_ringbuf_avail_percent(
		struct hv_ring_buffer_info *ring_info)
{
	u32 avail_read, avail_write;

	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

	return avail_write * 100 / ring_info->ring_datasize;
}

595 596 597 598 599 600
static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
					 u32 index)
{
	sync_change_bit(index, net_device->send_section_map);
}

601 602
static void netvsc_send_completion(struct netvsc_device *net_device,
				   struct hv_device *device,
603
				   struct vmpacket_descriptor *packet)
604
{
605 606
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *nvsc_packet;
607
	struct net_device *ndev;
608
	u32 send_index;
609

610
	ndev = net_device->ndev;
611

612
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
613
			(packet->offset8 << 3));
614

615 616 617 618
	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
619 620 621
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG5_TYPE_SUBCHANNEL)) {
622
		/* Copy the response back */
623
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
624
		       sizeof(struct nvsp_message));
625
		complete(&net_device->channel_init_wait);
626 627
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
628
		int num_outstanding_sends;
629 630 631
		u16 q_idx = 0;
		struct vmbus_channel *channel = device->channel;
		int queue_sends;
632

633
		/* Get the send context */
634
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
635
			packet->trans_id;
636

637
		/* Notify the layer above us */
638
		if (nvsc_packet) {
639 640 641
			send_index = nvsc_packet->send_buf_index;
			if (send_index != NETVSC_INVALID_INDEX)
				netvsc_free_send_slot(net_device, send_index);
642 643
			q_idx = nvsc_packet->q_idx;
			channel = nvsc_packet->channel;
644 645
			nvsc_packet->send_completion(nvsc_packet->
						     send_completion_ctx);
646
		}
647

648 649
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
650 651
		queue_sends = atomic_dec_return(&net_device->
						queue_sends[q_idx]);
652

653 654 655
		if (net_device->destroy && num_outstanding_sends == 0)
			wake_up(&net_device->wait_drain);

656 657 658 659 660 661
		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
		    !net_device->start_remove &&
		    (hv_ringbuf_avail_percent(&channel->outbound) >
		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
				netif_tx_wake_queue(netdev_get_tx_queue(
						    ndev, q_idx));
662
	} else {
663
		netdev_err(ndev, "Unknown send completion packet type- "
664
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
665 666 667 668
	}

}

669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
{
	unsigned long index;
	u32 max_words = net_device->map_words;
	unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
	u32 section_cnt = net_device->send_section_cnt;
	int ret_val = NETVSC_INVALID_INDEX;
	int i;
	int prev_val;

	for (i = 0; i < max_words; i++) {
		if (!~(map_addr[i]))
			continue;
		index = ffz(map_addr[i]);
		prev_val = sync_test_and_set_bit(index, &map_addr[i]);
		if (prev_val)
			continue;
		if ((index + (i * BITS_PER_LONG)) >= section_cnt)
			break;
		ret_val = (index + (i * BITS_PER_LONG));
		break;
	}
	return ret_val;
}

L
Lad, Prabhakar 已提交
694 695
static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
				   unsigned int section_index,
696
				   u32 pend_size,
L
Lad, Prabhakar 已提交
697
				   struct hv_netvsc_packet *packet)
698 699
{
	char *start = net_device->send_buf;
700 701
	char *dest = start + (section_index * net_device->send_section_size)
		     + pend_size;
702 703
	int i;
	u32 msg_size = 0;
704 705 706 707 708 709 710 711 712
	u32 padding = 0;
	u32 remain = packet->total_data_buflen % net_device->pkt_align;

	/* Add padding */
	if (packet->is_data_pkt && packet->xmit_more && remain) {
		padding = net_device->pkt_align - remain;
		packet->rndis_msg->msg_len += padding;
		packet->total_data_buflen += padding;
	}
713 714 715 716 717 718 719 720 721 722

	for (i = 0; i < packet->page_buf_cnt; i++) {
		char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
		u32 offset = packet->page_buf[i].offset;
		u32 len = packet->page_buf[i].len;

		memcpy(dest, (src + offset), len);
		msg_size += len;
		dest += len;
	}
723 724 725 726 727 728

	if (padding) {
		memset(dest, 0, padding);
		msg_size += padding;
	}

729 730 731
	return msg_size;
}

732 733 734
static inline int netvsc_send_pkt(
	struct hv_netvsc_packet *packet,
	struct netvsc_device *net_device)
735
{
736 737
	struct nvsp_message nvmsg;
	struct vmbus_channel *out_channel = packet->channel;
K
KY Srinivasan 已提交
738
	u16 q_idx = packet->q_idx;
739 740 741
	struct net_device *ndev = net_device->ndev;
	u64 req_id;
	int ret;
742

743
	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
744
	if (packet->is_data_pkt) {
745
		/* 0 is RMC_DATA; */
746
		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
747 748
	} else {
		/* 1 is RMC_CONTROL; */
749
		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
750
	}
751

752 753 754 755 756 757 758
	nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
		packet->send_buf_index;
	if (packet->send_buf_index == NETVSC_INVALID_INDEX)
		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
	else
		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
			packet->total_data_buflen;
759

760
	if (packet->send_completion)
761
		req_id = (ulong)packet;
762 763 764
	else
		req_id = 0;

765 766 767
	if (out_channel->rescind)
		return -ENODEV;

768
	if (packet->page_buf_cnt) {
769
		ret = vmbus_sendpacket_pagebuffer(out_channel,
770 771
						  packet->page_buf,
						  packet->page_buf_cnt,
772
						  &nvmsg,
773
						  sizeof(struct nvsp_message),
774
						  req_id);
775
	} else {
776 777
		ret = vmbus_sendpacket(
				out_channel, &nvmsg,
778
				sizeof(struct nvsp_message),
779
				req_id,
780 781
				VM_PKT_DATA_INBAND,
				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
782 783
	}

784 785
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
K
KY Srinivasan 已提交
786
		atomic_inc(&net_device->queue_sends[q_idx]);
787 788

		if (hv_ringbuf_avail_percent(&out_channel->outbound) <
789
			RING_AVAIL_PERCENT_LOWATER) {
790
			netif_tx_stop_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
791
					    ndev, q_idx));
792

793
			if (atomic_read(&net_device->
K
KY Srinivasan 已提交
794
				queue_sends[q_idx]) < 1)
795
				netif_tx_wake_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
796
						    ndev, q_idx));
797
		}
798
	} else if (ret == -EAGAIN) {
799
		netif_tx_stop_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
800 801
				    ndev, q_idx));
		if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
802
			netif_tx_wake_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
803
					    ndev, q_idx));
804 805
			ret = -ENOSPC;
		}
806
	} else {
807
		netdev_err(ndev, "Unable to send packet %p ret %d\n",
808
			   packet, ret);
809
	}
810

811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868
	return ret;
}

int netvsc_send(struct hv_device *device,
		struct hv_netvsc_packet *packet)
{
	struct netvsc_device *net_device;
	int ret = 0, m_ret = 0;
	struct vmbus_channel *out_channel;
	u16 q_idx = packet->q_idx;
	u32 pktlen = packet->total_data_buflen, msd_len = 0;
	unsigned int section_index = NETVSC_INVALID_INDEX;
	struct sk_buff *skb = NULL;
	unsigned long flag;
	struct multi_send_data *msdp;
	struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;

	out_channel = net_device->chn_table[q_idx];
	if (!out_channel) {
		out_channel = device->channel;
		q_idx = 0;
		packet->q_idx = 0;
	}
	packet->channel = out_channel;
	packet->send_buf_index = NETVSC_INVALID_INDEX;

	msdp = &net_device->msd[q_idx];

	/* batch packets in send buffer if possible */
	spin_lock_irqsave(&msdp->lock, flag);
	if (msdp->pkt)
		msd_len = msdp->pkt->total_data_buflen;

	if (packet->is_data_pkt && msd_len > 0 &&
	    msdp->count < net_device->max_pkt &&
	    msd_len + pktlen + net_device->pkt_align <
	    net_device->send_section_size) {
		section_index = msdp->pkt->send_buf_index;

	} else if (packet->is_data_pkt && pktlen + net_device->pkt_align <
		   net_device->send_section_size) {
		section_index = netvsc_get_next_send_section(net_device);
		if (section_index != NETVSC_INVALID_INDEX) {
				msd_send = msdp->pkt;
				msdp->pkt = NULL;
				msdp->count = 0;
				msd_len = 0;
		}
	}

	if (section_index != NETVSC_INVALID_INDEX) {
		netvsc_copy_to_send_buf(net_device,
					section_index, msd_len,
					packet);
869 870 871 872 873 874 875
		if (!packet->part_of_skb) {
			skb = (struct sk_buff *)
				(unsigned long)
				packet->send_completion_tid;

			packet->send_completion_tid = 0;
		}
876 877 878 879 880

		packet->page_buf_cnt = 0;
		packet->send_buf_index = section_index;
		packet->total_data_buflen += msd_len;

881 882 883
		if (msdp->pkt)
			netvsc_xmit_completion(msdp->pkt);

884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906
		if (packet->xmit_more) {
			msdp->pkt = packet;
			msdp->count++;
		} else {
			cur_send = packet;
			msdp->pkt = NULL;
			msdp->count = 0;
		}
	} else {
		msd_send = msdp->pkt;
		msdp->pkt = NULL;
		msdp->count = 0;
		cur_send = packet;
	}

	spin_unlock_irqrestore(&msdp->lock, flag);

	if (msd_send) {
		m_ret = netvsc_send_pkt(msd_send, net_device);

		if (m_ret != 0) {
			netvsc_free_send_slot(net_device,
					      msd_send->send_buf_index);
907
			netvsc_xmit_completion(msd_send);
908 909 910 911 912 913
		}
	}

	if (cur_send)
		ret = netvsc_send_pkt(cur_send, net_device);

914 915 916 917 918 919 920
	if (ret != 0) {
		if (section_index != NETVSC_INVALID_INDEX)
			netvsc_free_send_slot(net_device, section_index);
	} else if (skb) {
		dev_kfree_skb_any(skb);
	}

921 922 923
	return ret;
}

924
static void netvsc_send_recv_completion(struct hv_device *device,
925
					struct vmbus_channel *channel,
926
					struct netvsc_device *net_device,
927
					u64 transaction_id, u32 status)
928 929 930 931
{
	struct nvsp_message recvcompMessage;
	int retries = 0;
	int ret;
932 933 934
	struct net_device *ndev;

	ndev = net_device->ndev;
935 936 937 938

	recvcompMessage.hdr.msg_type =
				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;

939
	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
940 941 942

retry_send_cmplt:
	/* Send the completion */
943
	ret = vmbus_sendpacket(channel, &recvcompMessage,
944 945 946 947 948
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
949
	} else if (ret == -EAGAIN) {
950 951
		/* no more room...wait a bit and attempt to retry 3 times */
		retries++;
952
		netdev_err(ndev, "unable to send receive completion pkt"
953
			" (tid %llx)...retrying %d\n", transaction_id, retries);
954 955 956 957 958

		if (retries < 4) {
			udelay(100);
			goto retry_send_cmplt;
		} else {
959
			netdev_err(ndev, "unable to send receive "
960
				"completion pkt (tid %llx)...give up retrying\n",
961 962 963
				transaction_id);
		}
	} else {
964
		netdev_err(ndev, "unable to send receive "
965
			"completion pkt - %llx\n", transaction_id);
966 967 968
	}
}

969
static void netvsc_receive(struct netvsc_device *net_device,
970
			struct vmbus_channel *channel,
971 972
			struct hv_device *device,
			struct vmpacket_descriptor *packet)
973
{
974 975
	struct vmtransfer_page_packet_header *vmxferpage_packet;
	struct nvsp_message *nvsp_packet;
976 977 978
	struct hv_netvsc_packet nv_pkt;
	struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
	u32 status = NVSP_STAT_SUCCESS;
979 980
	int i;
	int count = 0;
981
	struct net_device *ndev;
982

983
	ndev = net_device->ndev;
984

985 986 987 988
	/*
	 * All inbound packets other than send completion should be xfer page
	 * packet
	 */
989
	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
990
		netdev_err(ndev, "Unknown packet type received - %d\n",
991
			   packet->type);
992 993 994
		return;
	}

995
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
996
			(packet->offset8 << 3));
997

998
	/* Make sure this is a valid nvsp packet */
999 1000
	if (nvsp_packet->hdr.msg_type !=
	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
1001
		netdev_err(ndev, "Unknown nvsp packet type received-"
1002
			" %d\n", nvsp_packet->hdr.msg_type);
1003 1004 1005
		return;
	}

1006
	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
1007

1008
	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
1009
		netdev_err(ndev, "Invalid xfer page set id - "
1010
			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
1011
			   vmxferpage_packet->xfer_pageset_id);
1012 1013 1014
		return;
	}

1015 1016
	count = vmxferpage_packet->range_cnt;
	netvsc_packet->channel = channel;
1017

1018
	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1019
	for (i = 0; i < count; i++) {
1020
		/* Initialize the netvsc packet */
1021
		netvsc_packet->status = NVSP_STAT_SUCCESS;
1022 1023
		netvsc_packet->data = (void *)((unsigned long)net_device->
			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
1024
		netvsc_packet->total_data_buflen =
1025
					vmxferpage_packet->ranges[i].byte_count;
1026

1027
		/* Pass it to the upper layer */
1028
		rndis_filter_receive(device, netvsc_packet);
1029

1030 1031
		if (netvsc_packet->status != NVSP_STAT_SUCCESS)
			status = NVSP_STAT_FAIL;
1032 1033
	}

1034 1035
	netvsc_send_recv_completion(device, channel, net_device,
				    vmxferpage_packet->d.trans_id, status);
1036 1037
}

1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072

static void netvsc_send_table(struct hv_device *hdev,
			      struct vmpacket_descriptor *vmpkt)
{
	struct netvsc_device *nvscdev;
	struct net_device *ndev;
	struct nvsp_message *nvmsg;
	int i;
	u32 count, *tab;

	nvscdev = get_outbound_net_device(hdev);
	if (!nvscdev)
		return;
	ndev = nvscdev->ndev;

	nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
					(vmpkt->offset8 << 3));

	if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
		return;

	count = nvmsg->msg.v5_msg.send_table.count;
	if (count != VRSS_SEND_TAB_SIZE) {
		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
		return;
	}

	tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
		      nvmsg->msg.v5_msg.send_table.offset);

	for (i = 0; i < count; i++)
		nvscdev->send_table[i] = tab[i];
}

void netvsc_channel_cb(void *context)
1073
{
1074
	int ret;
1075 1076
	struct vmbus_channel *channel = (struct vmbus_channel *)context;
	struct hv_device *device;
1077 1078 1079
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u64 request_id;
1080
	struct vmpacket_descriptor *desc;
1081 1082
	unsigned char *buffer;
	int bufferlen = NETVSC_PACKET_SIZE;
1083
	struct net_device *ndev;
1084

1085 1086 1087 1088 1089
	if (channel->primary_channel != NULL)
		device = channel->primary_channel->device_obj;
	else
		device = channel->device_obj;

1090
	net_device = get_inbound_net_device(device);
1091
	if (!net_device)
1092
		return;
1093
	ndev = net_device->ndev;
1094
	buffer = get_per_channel_state(channel);
1095

1096
	do {
1097
		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
1098
					   &bytes_recvd, &request_id);
1099
		if (ret == 0) {
1100
			if (bytes_recvd > 0) {
1101
				desc = (struct vmpacket_descriptor *)buffer;
1102 1103
				switch (desc->type) {
				case VM_PKT_COMP:
1104 1105
					netvsc_send_completion(net_device,
								device, desc);
1106 1107
					break;

1108
				case VM_PKT_DATA_USING_XFER_PAGES:
1109 1110 1111 1112 1113 1114
					netvsc_receive(net_device, channel,
						       device, desc);
					break;

				case VM_PKT_DATA_INBAND:
					netvsc_send_table(device, desc);
1115 1116 1117
					break;

				default:
1118
					netdev_err(ndev,
1119 1120
						   "unhandled packet type %d, "
						   "tid %llx len %d\n",
1121
						   desc->type, request_id,
1122
						   bytes_recvd);
1123
					break;
1124 1125
				}

1126
			} else {
1127 1128 1129
				/*
				 * We are done for this pass.
				 */
1130 1131
				break;
			}
1132

1133
		} else if (ret == -ENOBUFS) {
1134 1135
			if (bufferlen > NETVSC_PACKET_SIZE)
				kfree(buffer);
1136
			/* Handle large packet */
1137
			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
1138
			if (buffer == NULL) {
1139
				/* Try again next time around */
1140
				netdev_err(ndev,
1141
					   "unable to allocate buffer of size "
1142
					   "(%d)!!\n", bytes_recvd);
1143 1144 1145
				break;
			}

1146
			bufferlen = bytes_recvd;
1147 1148 1149
		}
	} while (1);

1150 1151
	if (bufferlen > NETVSC_PACKET_SIZE)
		kfree(buffer);
1152 1153
	return;
}
1154

1155 1156 1157 1158
/*
 * netvsc_device_add - Callback when the device belonging to this
 * driver is added
 */
1159
int netvsc_device_add(struct hv_device *device, void *additional_info)
1160 1161
{
	int ret = 0;
1162 1163
	int ring_size =
	((struct netvsc_device_info *)additional_info)->ring_size;
1164
	struct netvsc_device *net_device;
1165
	struct net_device *ndev;
1166 1167

	net_device = alloc_net_device(device);
1168 1169
	if (!net_device)
		return -ENOMEM;
1170

1171 1172
	net_device->ring_size = ring_size;

1173 1174 1175 1176 1177 1178 1179 1180 1181
	/*
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * In alloc_net_device(), we register struct netvsc_device *
	 * as the driver private data and stash away struct net_device *
	 * in struct netvsc_device *.
	 */
	ndev = net_device->ndev;

1182
	/* Initialize the NetVSC channel extension */
1183
	init_completion(&net_device->channel_init_wait);
1184

1185 1186
	set_per_channel_state(device->channel, net_device->cb_buffer);

1187
	/* Open the channel */
1188 1189
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
1190
			 netvsc_channel_cb, device->channel);
1191 1192

	if (ret != 0) {
1193
		netdev_err(ndev, "unable to open channel: %d\n", ret);
1194 1195 1196 1197
		goto cleanup;
	}

	/* Channel is opened */
1198
	pr_info("hv_netvsc channel opened successfully\n");
1199

1200 1201
	net_device->chn_table[0] = device->channel;

1202 1203 1204
	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
1205
		netdev_err(ndev,
1206
			"unable to connect to NetVSP - %d\n", ret);
1207 1208 1209 1210 1211 1212 1213 1214 1215 1216
		goto close;
	}

	return ret;

close:
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

cleanup:
1217
	free_netvsc_device(net_device);
1218 1219 1220

	return ret;
}