netvsc.c 32.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
14
 * this program; if not, see <http://www.gnu.org/licenses/>.
15 16
 *
 * Authors:
17
 *   Haiyang Zhang <haiyangz@microsoft.com>
18 19
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
20 21
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

22
#include <linux/kernel.h>
23 24
#include <linux/sched.h>
#include <linux/wait.h>
25
#include <linux/mm.h>
26
#include <linux/delay.h>
27
#include <linux/io.h>
28
#include <linux/slab.h>
29
#include <linux/netdevice.h>
30
#include <linux/if_ether.h>
31
#include <asm/sync_bitops.h>
32

33
#include "hyperv_net.h"
34 35


36
static struct netvsc_device *alloc_net_device(struct hv_device *device)
37
{
38
	struct netvsc_device *net_device;
39
	struct net_device *ndev = hv_get_drvdata(device);
40
	int i;
41

42 43
	net_device = kzalloc(sizeof(struct netvsc_device), GFP_KERNEL);
	if (!net_device)
44 45
		return NULL;

46 47 48 49 50 51
	net_device->cb_buffer = kzalloc(NETVSC_PACKET_SIZE, GFP_KERNEL);
	if (!net_device->cb_buffer) {
		kfree(net_device);
		return NULL;
	}

52
	init_waitqueue_head(&net_device->wait_drain);
53
	net_device->start_remove = false;
54
	net_device->destroy = false;
55
	net_device->dev = device;
56
	net_device->ndev = ndev;
57 58 59 60 61
	net_device->max_pkt = RNDIS_MAX_PKT_DEFAULT;
	net_device->pkt_align = RNDIS_PKT_ALIGN_DEFAULT;

	for (i = 0; i < num_online_cpus(); i++)
		spin_lock_init(&net_device->msd[i].lock);
62

63
	hv_set_drvdata(device, net_device);
64
	return net_device;
65 66
}

67 68 69 70 71 72
static void free_netvsc_device(struct netvsc_device *nvdev)
{
	kfree(nvdev->cb_buffer);
	kfree(nvdev);
}

73
static struct netvsc_device *get_outbound_net_device(struct hv_device *device)
74
{
75
	struct netvsc_device *net_device;
76

77
	net_device = hv_get_drvdata(device);
78
	if (net_device && net_device->destroy)
79
		net_device = NULL;
80

81
	return net_device;
82 83
}

84
static struct netvsc_device *get_inbound_net_device(struct hv_device *device)
85
{
86
	struct netvsc_device *net_device;
87

88
	net_device = hv_get_drvdata(device);
89 90 91 92 93 94

	if (!net_device)
		goto get_in_err;

	if (net_device->destroy &&
		atomic_read(&net_device->num_outstanding_sends) == 0)
95
		net_device = NULL;
96

97
get_in_err:
98
	return net_device;
99 100 101
}


102
static int netvsc_destroy_buf(struct netvsc_device *net_device)
103 104 105
{
	struct nvsp_message *revoke_packet;
	int ret = 0;
106
	struct net_device *ndev = net_device->ndev;
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133

	/*
	 * If we got a section count, it means we received a
	 * SendReceiveBufferComplete msg (ie sent
	 * NvspMessage1TypeSendReceiveBuffer msg) therefore, we need
	 * to send a revoke msg here
	 */
	if (net_device->recv_section_cnt) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_RECV_BUF;
		revoke_packet->msg.v1_msg.
		revoke_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/*
		 * If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
134
			netdev_err(ndev, "unable to send "
135
				"revoke receive buffer to netvsp\n");
136
			return ret;
137 138 139 140 141 142 143 144 145 146 147 148
		}
	}

	/* Teardown the gpadl on the vsp end */
	if (net_device->recv_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
			   net_device->recv_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
149
			netdev_err(ndev,
150
				   "unable to teardown receive buffer's gpadl\n");
151
			return ret;
152 153 154 155 156 157
		}
		net_device->recv_buf_gpadl_handle = 0;
	}

	if (net_device->recv_buf) {
		/* Free up the receive buffer */
158
		vfree(net_device->recv_buf);
159 160 161 162 163 164 165 166 167
		net_device->recv_buf = NULL;
	}

	if (net_device->recv_section) {
		net_device->recv_section_cnt = 0;
		kfree(net_device->recv_section);
		net_device->recv_section = NULL;
	}

168 169
	/* Deal with the send buffer we may have setup.
	 * If we got a  send section size, it means we received a
170 171
	 * NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE msg (ie sent
	 * NVSP_MSG1_TYPE_SEND_SEND_BUF msg) therefore, we need
172 173 174 175 176 177 178 179 180
	 * to send a revoke msg here
	 */
	if (net_device->send_section_size) {
		/* Send the revoke receive buffer */
		revoke_packet = &net_device->revoke_packet;
		memset(revoke_packet, 0, sizeof(struct nvsp_message));

		revoke_packet->hdr.msg_type =
			NVSP_MSG1_TYPE_REVOKE_SEND_BUF;
181 182
		revoke_packet->msg.v1_msg.revoke_send_buf.id =
			NETVSC_SEND_BUFFER_ID;
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210

		ret = vmbus_sendpacket(net_device->dev->channel,
				       revoke_packet,
				       sizeof(struct nvsp_message),
				       (unsigned long)revoke_packet,
				       VM_PKT_DATA_INBAND, 0);
		/* If we failed here, we might as well return and
		 * have a leak rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev, "unable to send "
				   "revoke send buffer to netvsp\n");
			return ret;
		}
	}
	/* Teardown the gpadl on the vsp end */
	if (net_device->send_buf_gpadl_handle) {
		ret = vmbus_teardown_gpadl(net_device->dev->channel,
					   net_device->send_buf_gpadl_handle);

		/* If we failed here, we might as well return and have a leak
		 * rather than continue and a bugchk
		 */
		if (ret != 0) {
			netdev_err(ndev,
				   "unable to teardown send buffer's gpadl\n");
			return ret;
		}
211
		net_device->send_buf_gpadl_handle = 0;
212 213
	}
	if (net_device->send_buf) {
214
		/* Free up the send buffer */
215
		vfree(net_device->send_buf);
216 217 218 219
		net_device->send_buf = NULL;
	}
	kfree(net_device->send_section_map);

220 221 222
	return ret;
}

223
static int netvsc_init_buf(struct hv_device *device)
224
{
225
	int ret = 0;
226
	unsigned long t;
227 228
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
229
	struct net_device *ndev;
230

231
	net_device = get_outbound_net_device(device);
232
	if (!net_device)
233
		return -ENODEV;
234
	ndev = net_device->ndev;
235

236
	net_device->recv_buf = vzalloc(net_device->recv_buf_size);
237
	if (!net_device->recv_buf) {
238
		netdev_err(ndev, "unable to allocate receive "
239
			"buffer of size %d\n", net_device->recv_buf_size);
240
		ret = -ENOMEM;
241
		goto cleanup;
242 243
	}

244 245 246 247 248
	/*
	 * Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
249 250 251
	ret = vmbus_establish_gpadl(device->channel, net_device->recv_buf,
				    net_device->recv_buf_size,
				    &net_device->recv_buf_gpadl_handle);
252
	if (ret != 0) {
253
		netdev_err(ndev,
254
			"unable to establish receive buffer's gpadl\n");
255
		goto cleanup;
256 257 258
	}


259
	/* Notify the NetVsp of the gpadl handle */
260
	init_packet = &net_device->channel_init_pkt;
261

262
	memset(init_packet, 0, sizeof(struct nvsp_message));
263

264 265 266 267 268
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_RECV_BUF;
	init_packet->msg.v1_msg.send_recv_buf.
		gpadl_handle = net_device->recv_buf_gpadl_handle;
	init_packet->msg.v1_msg.
		send_recv_buf.id = NETVSC_RECEIVE_BUFFER_ID;
269

270
	/* Send the gpadl notification request */
271
	ret = vmbus_sendpacket(device->channel, init_packet,
272
			       sizeof(struct nvsp_message),
273
			       (unsigned long)init_packet,
274
			       VM_PKT_DATA_INBAND,
275
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
276
	if (ret != 0) {
277
		netdev_err(ndev,
278
			"unable to send receive buffer's gpadl to netvsp\n");
279
		goto cleanup;
280 281
	}

282
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
283
	BUG_ON(t == 0);
284

285

286
	/* Check the response */
287 288
	if (init_packet->msg.v1_msg.
	    send_recv_buf_complete.status != NVSP_STAT_SUCCESS) {
289
		netdev_err(ndev, "Unable to complete receive buffer "
290
			   "initialization with NetVsp - status %d\n",
291 292
			   init_packet->msg.v1_msg.
			   send_recv_buf_complete.status);
293
		ret = -EINVAL;
294
		goto cleanup;
295 296
	}

297
	/* Parse the response */
298

299 300
	net_device->recv_section_cnt = init_packet->msg.
		v1_msg.send_recv_buf_complete.num_sections;
301

302 303 304 305 306
	net_device->recv_section = kmemdup(
		init_packet->msg.v1_msg.send_recv_buf_complete.sections,
		net_device->recv_section_cnt *
		sizeof(struct nvsp_1_receive_buffer_section),
		GFP_KERNEL);
307
	if (net_device->recv_section == NULL) {
308
		ret = -EINVAL;
309
		goto cleanup;
310 311
	}

312 313 314 315
	/*
	 * For 1st release, there should only be 1 section that represents the
	 * entire receive buffer
	 */
316 317
	if (net_device->recv_section_cnt != 1 ||
	    net_device->recv_section->offset != 0) {
318
		ret = -EINVAL;
319
		goto cleanup;
320 321
	}

322 323
	/* Now setup the send buffer.
	 */
324
	net_device->send_buf = vzalloc(net_device->send_buf_size);
325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
	if (!net_device->send_buf) {
		netdev_err(ndev, "unable to allocate send "
			   "buffer of size %d\n", net_device->send_buf_size);
		ret = -ENOMEM;
		goto cleanup;
	}

	/* Establish the gpadl handle for this buffer on this
	 * channel.  Note: This call uses the vmbus connection rather
	 * than the channel to establish the gpadl handle.
	 */
	ret = vmbus_establish_gpadl(device->channel, net_device->send_buf,
				    net_device->send_buf_size,
				    &net_device->send_buf_gpadl_handle);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to establish send buffer's gpadl\n");
		goto cleanup;
	}

	/* Notify the NetVsp of the gpadl handle */
	init_packet = &net_device->channel_init_pkt;
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_SEND_BUF;
349
	init_packet->msg.v1_msg.send_send_buf.gpadl_handle =
350
		net_device->send_buf_gpadl_handle;
351
	init_packet->msg.v1_msg.send_send_buf.id = NETVSC_SEND_BUFFER_ID;
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373

	/* Send the gpadl notification request */
	ret = vmbus_sendpacket(device->channel, init_packet,
			       sizeof(struct nvsp_message),
			       (unsigned long)init_packet,
			       VM_PKT_DATA_INBAND,
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
	if (ret != 0) {
		netdev_err(ndev,
			   "unable to send send buffer's gpadl to netvsp\n");
		goto cleanup;
	}

	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
	BUG_ON(t == 0);

	/* Check the response */
	if (init_packet->msg.v1_msg.
	    send_send_buf_complete.status != NVSP_STAT_SUCCESS) {
		netdev_err(ndev, "Unable to complete send buffer "
			   "initialization with NetVsp - status %d\n",
			   init_packet->msg.v1_msg.
374
			   send_send_buf_complete.status);
375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396
		ret = -EINVAL;
		goto cleanup;
	}

	/* Parse the response */
	net_device->send_section_size = init_packet->msg.
				v1_msg.send_send_buf_complete.section_size;

	/* Section count is simply the size divided by the section size.
	 */
	net_device->send_section_cnt =
		net_device->send_buf_size/net_device->send_section_size;

	dev_info(&device->device, "Send section size: %d, Section count:%d\n",
		 net_device->send_section_size, net_device->send_section_cnt);

	/* Setup state for managing the send buffer. */
	net_device->map_words = DIV_ROUND_UP(net_device->send_section_cnt,
					     BITS_PER_LONG);

	net_device->send_section_map =
		kzalloc(net_device->map_words * sizeof(ulong), GFP_KERNEL);
397 398
	if (net_device->send_section_map == NULL) {
		ret = -ENOMEM;
399
		goto cleanup;
400
	}
401

402
	goto exit;
403

404
cleanup:
405
	netvsc_destroy_buf(net_device);
406

407
exit:
408 409 410 411
	return ret;
}


412 413 414 415 416
/* Negotiate NVSP protocol version */
static int negotiate_nvsp_ver(struct hv_device *device,
			      struct netvsc_device *net_device,
			      struct nvsp_message *init_packet,
			      u32 nvsp_ver)
417
{
418 419
	int ret;
	unsigned long t;
420

421
	memset(init_packet, 0, sizeof(struct nvsp_message));
422
	init_packet->hdr.msg_type = NVSP_MSG_TYPE_INIT;
423 424
	init_packet->msg.init_msg.init.min_protocol_ver = nvsp_ver;
	init_packet->msg.init_msg.init.max_protocol_ver = nvsp_ver;
425

426
	/* Send the init request */
427
	ret = vmbus_sendpacket(device->channel, init_packet,
428
			       sizeof(struct nvsp_message),
429
			       (unsigned long)init_packet,
430
			       VM_PKT_DATA_INBAND,
431
			       VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
432

433
	if (ret != 0)
434
		return ret;
435

436
	t = wait_for_completion_timeout(&net_device->channel_init_wait, 5*HZ);
437

438 439
	if (t == 0)
		return -ETIMEDOUT;
440

441
	if (init_packet->msg.init_msg.init_complete.status !=
442 443
	    NVSP_STAT_SUCCESS)
		return -EINVAL;
444

445
	if (nvsp_ver == NVSP_PROTOCOL_VERSION_1)
446 447 448 449 450
		return 0;

	/* NVSPv2 only: Send NDIS config */
	memset(init_packet, 0, sizeof(struct nvsp_message));
	init_packet->hdr.msg_type = NVSP_MSG2_TYPE_SEND_NDIS_CONFIG;
451 452
	init_packet->msg.v2_msg.send_ndis_config.mtu = net_device->ndev->mtu +
						       ETH_HLEN;
453
	init_packet->msg.v2_msg.send_ndis_config.capability.ieee8021q = 1;
454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469

	ret = vmbus_sendpacket(device->channel, init_packet,
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);

	return ret;
}

static int netvsc_connect_vsp(struct hv_device *device)
{
	int ret;
	struct netvsc_device *net_device;
	struct nvsp_message *init_packet;
	int ndis_version;
	struct net_device *ndev;
470 471 472
	u32 ver_list[] = { NVSP_PROTOCOL_VERSION_1, NVSP_PROTOCOL_VERSION_2,
		NVSP_PROTOCOL_VERSION_4, NVSP_PROTOCOL_VERSION_5 };
	int i, num_ver = 4; /* number of different NVSP versions */
473 474 475 476 477 478 479 480 481

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;
	ndev = net_device->ndev;

	init_packet = &net_device->channel_init_pkt;

	/* Negotiate the latest NVSP protocol supported */
482 483 484 485 486 487 488 489
	for (i = num_ver - 1; i >= 0; i--)
		if (negotiate_nvsp_ver(device, net_device, init_packet,
				       ver_list[i])  == 0) {
			net_device->nvsp_version = ver_list[i];
			break;
		}

	if (i < 0) {
490
		ret = -EPROTO;
491
		goto cleanup;
492
	}
493 494 495

	pr_debug("Negotiated NVSP version:%x\n", net_device->nvsp_version);

496
	/* Send the ndis version */
497
	memset(init_packet, 0, sizeof(struct nvsp_message));
498

499
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_4)
500
		ndis_version = 0x00060001;
501 502
	else
		ndis_version = 0x0006001e;
503

504 505 506
	init_packet->hdr.msg_type = NVSP_MSG1_TYPE_SEND_NDIS_VER;
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_major_ver =
507
				(ndis_version & 0xFFFF0000) >> 16;
508 509
	init_packet->msg.v1_msg.
		send_ndis_ver.ndis_minor_ver =
510
				ndis_version & 0xFFFF;
511

512
	/* Send the init request */
513
	ret = vmbus_sendpacket(device->channel, init_packet,
514 515 516
				sizeof(struct nvsp_message),
				(unsigned long)init_packet,
				VM_PKT_DATA_INBAND, 0);
517
	if (ret != 0)
518
		goto cleanup;
519 520

	/* Post the big receive buffer to NetVSP */
521 522 523 524
	if (net_device->nvsp_version <= NVSP_PROTOCOL_VERSION_2)
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE_LEGACY;
	else
		net_device->recv_buf_size = NETVSC_RECEIVE_BUFFER_SIZE;
525
	net_device->send_buf_size = NETVSC_SEND_BUFFER_SIZE;
526

527
	ret = netvsc_init_buf(device);
528

529
cleanup:
530 531 532
	return ret;
}

533
static void netvsc_disconnect_vsp(struct netvsc_device *net_device)
534
{
535
	netvsc_destroy_buf(net_device);
536 537
}

538
/*
539
 * netvsc_device_remove - Callback when the root bus device is removed
540
 */
541
int netvsc_device_remove(struct hv_device *device)
542
{
543
	struct netvsc_device *net_device;
544
	unsigned long flags;
545

546
	net_device = hv_get_drvdata(device);
547

548
	netvsc_disconnect_vsp(net_device);
549

550
	/*
551 552 553 554 555
	 * Since we have already drained, we don't need to busy wait
	 * as was done in final_release_stor_device()
	 * Note that we cannot set the ext pointer to NULL until
	 * we have drained - to drain the outgoing packets, we need to
	 * allow incoming packets.
556
	 */
557 558

	spin_lock_irqsave(&device->channel->inbound_lock, flags);
559
	hv_set_drvdata(device, NULL);
560
	spin_unlock_irqrestore(&device->channel->inbound_lock, flags);
561

562 563 564 565
	/*
	 * At this point, no one should be accessing net_device
	 * except in here
	 */
566
	dev_notice(&device->device, "net device safe to remove\n");
567

568
	/* Now, we can close the channel safely */
569
	vmbus_close(device->channel);
570

571
	/* Release all resources */
572
	vfree(net_device->sub_cb_buf);
573
	free_netvsc_device(net_device);
574
	return 0;
575 576
}

577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594

#define RING_AVAIL_PERCENT_HIWATER 20
#define RING_AVAIL_PERCENT_LOWATER 10

/*
 * Get the percentage of available bytes to write in the ring.
 * The return value is in range from 0 to 100.
 */
static inline u32 hv_ringbuf_avail_percent(
		struct hv_ring_buffer_info *ring_info)
{
	u32 avail_read, avail_write;

	hv_get_ringbuffer_availbytes(ring_info, &avail_read, &avail_write);

	return avail_write * 100 / ring_info->ring_datasize;
}

595 596 597 598 599 600
static inline void netvsc_free_send_slot(struct netvsc_device *net_device,
					 u32 index)
{
	sync_change_bit(index, net_device->send_section_map);
}

601 602
static void netvsc_send_completion(struct netvsc_device *net_device,
				   struct hv_device *device,
603
				   struct vmpacket_descriptor *packet)
604
{
605 606
	struct nvsp_message *nvsp_packet;
	struct hv_netvsc_packet *nvsc_packet;
607
	struct net_device *ndev;
608
	u32 send_index;
609

610
	ndev = net_device->ndev;
611

612
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
613
			(packet->offset8 << 3));
614

615 616 617 618
	if ((nvsp_packet->hdr.msg_type == NVSP_MSG_TYPE_INIT_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG1_TYPE_SEND_RECV_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
619 620 621
	     NVSP_MSG1_TYPE_SEND_SEND_BUF_COMPLETE) ||
	    (nvsp_packet->hdr.msg_type ==
	     NVSP_MSG5_TYPE_SUBCHANNEL)) {
622
		/* Copy the response back */
623
		memcpy(&net_device->channel_init_pkt, nvsp_packet,
624
		       sizeof(struct nvsp_message));
625
		complete(&net_device->channel_init_wait);
626 627
	} else if (nvsp_packet->hdr.msg_type ==
		   NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE) {
628
		int num_outstanding_sends;
629 630 631
		u16 q_idx = 0;
		struct vmbus_channel *channel = device->channel;
		int queue_sends;
632

633
		/* Get the send context */
634
		nvsc_packet = (struct hv_netvsc_packet *)(unsigned long)
635
			packet->trans_id;
636

637
		/* Notify the layer above us */
638
		if (nvsc_packet) {
639 640 641
			send_index = nvsc_packet->send_buf_index;
			if (send_index != NETVSC_INVALID_INDEX)
				netvsc_free_send_slot(net_device, send_index);
642 643
			q_idx = nvsc_packet->q_idx;
			channel = nvsc_packet->channel;
644 645
			nvsc_packet->send_completion(nvsc_packet->
						     send_completion_ctx);
646
		}
647

648 649
		num_outstanding_sends =
			atomic_dec_return(&net_device->num_outstanding_sends);
650 651
		queue_sends = atomic_dec_return(&net_device->
						queue_sends[q_idx]);
652

653 654 655
		if (net_device->destroy && num_outstanding_sends == 0)
			wake_up(&net_device->wait_drain);

656 657 658 659 660 661
		if (netif_tx_queue_stopped(netdev_get_tx_queue(ndev, q_idx)) &&
		    !net_device->start_remove &&
		    (hv_ringbuf_avail_percent(&channel->outbound) >
		     RING_AVAIL_PERCENT_HIWATER || queue_sends < 1))
				netif_tx_wake_queue(netdev_get_tx_queue(
						    ndev, q_idx));
662
	} else {
663
		netdev_err(ndev, "Unknown send completion packet type- "
664
			   "%d received!!\n", nvsp_packet->hdr.msg_type);
665 666 667 668
	}

}

669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693
static u32 netvsc_get_next_send_section(struct netvsc_device *net_device)
{
	unsigned long index;
	u32 max_words = net_device->map_words;
	unsigned long *map_addr = (unsigned long *)net_device->send_section_map;
	u32 section_cnt = net_device->send_section_cnt;
	int ret_val = NETVSC_INVALID_INDEX;
	int i;
	int prev_val;

	for (i = 0; i < max_words; i++) {
		if (!~(map_addr[i]))
			continue;
		index = ffz(map_addr[i]);
		prev_val = sync_test_and_set_bit(index, &map_addr[i]);
		if (prev_val)
			continue;
		if ((index + (i * BITS_PER_LONG)) >= section_cnt)
			break;
		ret_val = (index + (i * BITS_PER_LONG));
		break;
	}
	return ret_val;
}

L
Lad, Prabhakar 已提交
694 695
static u32 netvsc_copy_to_send_buf(struct netvsc_device *net_device,
				   unsigned int section_index,
696
				   u32 pend_size,
L
Lad, Prabhakar 已提交
697
				   struct hv_netvsc_packet *packet)
698 699
{
	char *start = net_device->send_buf;
700 701
	char *dest = start + (section_index * net_device->send_section_size)
		     + pend_size;
702 703
	int i;
	u32 msg_size = 0;
704 705
	u32 padding = 0;
	u32 remain = packet->total_data_buflen % net_device->pkt_align;
706 707
	u32 page_count = packet->cp_partial ? packet->rmsg_pgcnt :
		packet->page_buf_cnt;
708 709

	/* Add padding */
710 711
	if (packet->is_data_pkt && packet->xmit_more && remain &&
	    !packet->cp_partial) {
712 713 714 715
		padding = net_device->pkt_align - remain;
		packet->rndis_msg->msg_len += padding;
		packet->total_data_buflen += padding;
	}
716

717
	for (i = 0; i < page_count; i++) {
718 719 720 721 722 723 724 725
		char *src = phys_to_virt(packet->page_buf[i].pfn << PAGE_SHIFT);
		u32 offset = packet->page_buf[i].offset;
		u32 len = packet->page_buf[i].len;

		memcpy(dest, (src + offset), len);
		msg_size += len;
		dest += len;
	}
726 727 728 729 730 731

	if (padding) {
		memset(dest, 0, padding);
		msg_size += padding;
	}

732 733 734
	return msg_size;
}

735 736 737
static inline int netvsc_send_pkt(
	struct hv_netvsc_packet *packet,
	struct netvsc_device *net_device)
738
{
739 740
	struct nvsp_message nvmsg;
	struct vmbus_channel *out_channel = packet->channel;
K
KY Srinivasan 已提交
741
	u16 q_idx = packet->q_idx;
742 743 744
	struct net_device *ndev = net_device->ndev;
	u64 req_id;
	int ret;
745
	struct hv_page_buffer *pgbuf;
746

747
	nvmsg.hdr.msg_type = NVSP_MSG1_TYPE_SEND_RNDIS_PKT;
748
	if (packet->is_data_pkt) {
749
		/* 0 is RMC_DATA; */
750
		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 0;
751 752
	} else {
		/* 1 is RMC_CONTROL; */
753
		nvmsg.msg.v1_msg.send_rndis_pkt.channel_type = 1;
754
	}
755

756 757 758 759 760 761 762
	nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_index =
		packet->send_buf_index;
	if (packet->send_buf_index == NETVSC_INVALID_INDEX)
		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size = 0;
	else
		nvmsg.msg.v1_msg.send_rndis_pkt.send_buf_section_size =
			packet->total_data_buflen;
763

764
	if (packet->send_completion)
765
		req_id = (ulong)packet;
766 767 768
	else
		req_id = 0;

769 770 771
	if (out_channel->rescind)
		return -ENODEV;

772
	if (packet->page_buf_cnt) {
773 774
		pgbuf = packet->cp_partial ? packet->page_buf +
			packet->rmsg_pgcnt : packet->page_buf;
775
		ret = vmbus_sendpacket_pagebuffer(out_channel,
776
						  pgbuf,
777
						  packet->page_buf_cnt,
778
						  &nvmsg,
779
						  sizeof(struct nvsp_message),
780
						  req_id);
781
	} else {
782 783
		ret = vmbus_sendpacket(
				out_channel, &nvmsg,
784
				sizeof(struct nvsp_message),
785
				req_id,
786 787
				VM_PKT_DATA_INBAND,
				VMBUS_DATA_PACKET_FLAG_COMPLETION_REQUESTED);
788 789
	}

790 791
	if (ret == 0) {
		atomic_inc(&net_device->num_outstanding_sends);
K
KY Srinivasan 已提交
792
		atomic_inc(&net_device->queue_sends[q_idx]);
793 794

		if (hv_ringbuf_avail_percent(&out_channel->outbound) <
795
			RING_AVAIL_PERCENT_LOWATER) {
796
			netif_tx_stop_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
797
					    ndev, q_idx));
798

799
			if (atomic_read(&net_device->
K
KY Srinivasan 已提交
800
				queue_sends[q_idx]) < 1)
801
				netif_tx_wake_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
802
						    ndev, q_idx));
803
		}
804
	} else if (ret == -EAGAIN) {
805
		netif_tx_stop_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
806 807
				    ndev, q_idx));
		if (atomic_read(&net_device->queue_sends[q_idx]) < 1) {
808
			netif_tx_wake_queue(netdev_get_tx_queue(
K
KY Srinivasan 已提交
809
					    ndev, q_idx));
810 811
			ret = -ENOSPC;
		}
812
	} else {
813
		netdev_err(ndev, "Unable to send packet %p ret %d\n",
814
			   packet, ret);
815
	}
816

817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832
	return ret;
}

int netvsc_send(struct hv_device *device,
		struct hv_netvsc_packet *packet)
{
	struct netvsc_device *net_device;
	int ret = 0, m_ret = 0;
	struct vmbus_channel *out_channel;
	u16 q_idx = packet->q_idx;
	u32 pktlen = packet->total_data_buflen, msd_len = 0;
	unsigned int section_index = NETVSC_INVALID_INDEX;
	struct sk_buff *skb = NULL;
	unsigned long flag;
	struct multi_send_data *msdp;
	struct hv_netvsc_packet *msd_send = NULL, *cur_send = NULL;
833
	bool try_batch;
834 835 836 837 838 839 840 841 842 843 844 845 846

	net_device = get_outbound_net_device(device);
	if (!net_device)
		return -ENODEV;

	out_channel = net_device->chn_table[q_idx];
	if (!out_channel) {
		out_channel = device->channel;
		q_idx = 0;
		packet->q_idx = 0;
	}
	packet->channel = out_channel;
	packet->send_buf_index = NETVSC_INVALID_INDEX;
847
	packet->cp_partial = false;
848 849 850 851 852 853 854 855

	msdp = &net_device->msd[q_idx];

	/* batch packets in send buffer if possible */
	spin_lock_irqsave(&msdp->lock, flag);
	if (msdp->pkt)
		msd_len = msdp->pkt->total_data_buflen;

856 857 858 859
	try_batch = packet->is_data_pkt && msd_len > 0 && msdp->count <
		    net_device->max_pkt;

	if (try_batch && msd_len + pktlen + net_device->pkt_align <
860 861 862
	    net_device->send_section_size) {
		section_index = msdp->pkt->send_buf_index;

863 864 865 866 867
	} else if (try_batch && msd_len + packet->rmsg_size <
		   net_device->send_section_size) {
		section_index = msdp->pkt->send_buf_index;
		packet->cp_partial = true;

868 869 870 871 872 873 874 875 876 877 878 879 880 881 882
	} else if (packet->is_data_pkt && pktlen + net_device->pkt_align <
		   net_device->send_section_size) {
		section_index = netvsc_get_next_send_section(net_device);
		if (section_index != NETVSC_INVALID_INDEX) {
				msd_send = msdp->pkt;
				msdp->pkt = NULL;
				msdp->count = 0;
				msd_len = 0;
		}
	}

	if (section_index != NETVSC_INVALID_INDEX) {
		netvsc_copy_to_send_buf(net_device,
					section_index, msd_len,
					packet);
883

884
		packet->send_buf_index = section_index;
885 886 887 888 889 890 891 892 893 894 895 896 897

		if (packet->cp_partial) {
			packet->page_buf_cnt -= packet->rmsg_pgcnt;
			packet->total_data_buflen = msd_len + packet->rmsg_size;
		} else {
			packet->page_buf_cnt = 0;
			packet->total_data_buflen += msd_len;
			if (!packet->part_of_skb) {
				skb = (struct sk_buff *)(unsigned long)packet->
				       send_completion_tid;
				packet->send_completion_tid = 0;
			}
		}
898

899 900 901
		if (msdp->pkt)
			netvsc_xmit_completion(msdp->pkt);

902
		if (packet->xmit_more && !packet->cp_partial) {
903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924
			msdp->pkt = packet;
			msdp->count++;
		} else {
			cur_send = packet;
			msdp->pkt = NULL;
			msdp->count = 0;
		}
	} else {
		msd_send = msdp->pkt;
		msdp->pkt = NULL;
		msdp->count = 0;
		cur_send = packet;
	}

	spin_unlock_irqrestore(&msdp->lock, flag);

	if (msd_send) {
		m_ret = netvsc_send_pkt(msd_send, net_device);

		if (m_ret != 0) {
			netvsc_free_send_slot(net_device,
					      msd_send->send_buf_index);
925
			netvsc_xmit_completion(msd_send);
926 927 928 929 930 931
		}
	}

	if (cur_send)
		ret = netvsc_send_pkt(cur_send, net_device);

932 933 934 935 936 937 938
	if (ret != 0) {
		if (section_index != NETVSC_INVALID_INDEX)
			netvsc_free_send_slot(net_device, section_index);
	} else if (skb) {
		dev_kfree_skb_any(skb);
	}

939 940 941
	return ret;
}

942
static void netvsc_send_recv_completion(struct hv_device *device,
943
					struct vmbus_channel *channel,
944
					struct netvsc_device *net_device,
945
					u64 transaction_id, u32 status)
946 947 948 949
{
	struct nvsp_message recvcompMessage;
	int retries = 0;
	int ret;
950 951 952
	struct net_device *ndev;

	ndev = net_device->ndev;
953 954 955 956

	recvcompMessage.hdr.msg_type =
				NVSP_MSG1_TYPE_SEND_RNDIS_PKT_COMPLETE;

957
	recvcompMessage.msg.v1_msg.send_rndis_pkt_complete.status = status;
958 959 960

retry_send_cmplt:
	/* Send the completion */
961
	ret = vmbus_sendpacket(channel, &recvcompMessage,
962 963 964 965 966
			       sizeof(struct nvsp_message), transaction_id,
			       VM_PKT_COMP, 0);
	if (ret == 0) {
		/* success */
		/* no-op */
967
	} else if (ret == -EAGAIN) {
968 969
		/* no more room...wait a bit and attempt to retry 3 times */
		retries++;
970
		netdev_err(ndev, "unable to send receive completion pkt"
971
			" (tid %llx)...retrying %d\n", transaction_id, retries);
972 973 974 975 976

		if (retries < 4) {
			udelay(100);
			goto retry_send_cmplt;
		} else {
977
			netdev_err(ndev, "unable to send receive "
978
				"completion pkt (tid %llx)...give up retrying\n",
979 980 981
				transaction_id);
		}
	} else {
982
		netdev_err(ndev, "unable to send receive "
983
			"completion pkt - %llx\n", transaction_id);
984 985 986
	}
}

987
static void netvsc_receive(struct netvsc_device *net_device,
988
			struct vmbus_channel *channel,
989 990
			struct hv_device *device,
			struct vmpacket_descriptor *packet)
991
{
992 993
	struct vmtransfer_page_packet_header *vmxferpage_packet;
	struct nvsp_message *nvsp_packet;
994 995 996
	struct hv_netvsc_packet nv_pkt;
	struct hv_netvsc_packet *netvsc_packet = &nv_pkt;
	u32 status = NVSP_STAT_SUCCESS;
997 998
	int i;
	int count = 0;
999
	struct net_device *ndev;
1000

1001
	ndev = net_device->ndev;
1002

1003 1004 1005 1006
	/*
	 * All inbound packets other than send completion should be xfer page
	 * packet
	 */
1007
	if (packet->type != VM_PKT_DATA_USING_XFER_PAGES) {
1008
		netdev_err(ndev, "Unknown packet type received - %d\n",
1009
			   packet->type);
1010 1011 1012
		return;
	}

1013
	nvsp_packet = (struct nvsp_message *)((unsigned long)packet +
1014
			(packet->offset8 << 3));
1015

1016
	/* Make sure this is a valid nvsp packet */
1017 1018
	if (nvsp_packet->hdr.msg_type !=
	    NVSP_MSG1_TYPE_SEND_RNDIS_PKT) {
1019
		netdev_err(ndev, "Unknown nvsp packet type received-"
1020
			" %d\n", nvsp_packet->hdr.msg_type);
1021 1022 1023
		return;
	}

1024
	vmxferpage_packet = (struct vmtransfer_page_packet_header *)packet;
1025

1026
	if (vmxferpage_packet->xfer_pageset_id != NETVSC_RECEIVE_BUFFER_ID) {
1027
		netdev_err(ndev, "Invalid xfer page set id - "
1028
			   "expecting %x got %x\n", NETVSC_RECEIVE_BUFFER_ID,
1029
			   vmxferpage_packet->xfer_pageset_id);
1030 1031 1032
		return;
	}

1033 1034
	count = vmxferpage_packet->range_cnt;
	netvsc_packet->channel = channel;
1035

1036
	/* Each range represents 1 RNDIS pkt that contains 1 ethernet frame */
1037
	for (i = 0; i < count; i++) {
1038
		/* Initialize the netvsc packet */
1039
		netvsc_packet->status = NVSP_STAT_SUCCESS;
1040 1041
		netvsc_packet->data = (void *)((unsigned long)net_device->
			recv_buf + vmxferpage_packet->ranges[i].byte_offset);
1042
		netvsc_packet->total_data_buflen =
1043
					vmxferpage_packet->ranges[i].byte_count;
1044

1045
		/* Pass it to the upper layer */
1046
		rndis_filter_receive(device, netvsc_packet);
1047

1048 1049
		if (netvsc_packet->status != NVSP_STAT_SUCCESS)
			status = NVSP_STAT_FAIL;
1050 1051
	}

1052 1053
	netvsc_send_recv_completion(device, channel, net_device,
				    vmxferpage_packet->d.trans_id, status);
1054 1055
}

1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090

static void netvsc_send_table(struct hv_device *hdev,
			      struct vmpacket_descriptor *vmpkt)
{
	struct netvsc_device *nvscdev;
	struct net_device *ndev;
	struct nvsp_message *nvmsg;
	int i;
	u32 count, *tab;

	nvscdev = get_outbound_net_device(hdev);
	if (!nvscdev)
		return;
	ndev = nvscdev->ndev;

	nvmsg = (struct nvsp_message *)((unsigned long)vmpkt +
					(vmpkt->offset8 << 3));

	if (nvmsg->hdr.msg_type != NVSP_MSG5_TYPE_SEND_INDIRECTION_TABLE)
		return;

	count = nvmsg->msg.v5_msg.send_table.count;
	if (count != VRSS_SEND_TAB_SIZE) {
		netdev_err(ndev, "Received wrong send-table size:%u\n", count);
		return;
	}

	tab = (u32 *)((unsigned long)&nvmsg->msg.v5_msg.send_table +
		      nvmsg->msg.v5_msg.send_table.offset);

	for (i = 0; i < count; i++)
		nvscdev->send_table[i] = tab[i];
}

void netvsc_channel_cb(void *context)
1091
{
1092
	int ret;
1093 1094
	struct vmbus_channel *channel = (struct vmbus_channel *)context;
	struct hv_device *device;
1095 1096 1097
	struct netvsc_device *net_device;
	u32 bytes_recvd;
	u64 request_id;
1098
	struct vmpacket_descriptor *desc;
1099 1100
	unsigned char *buffer;
	int bufferlen = NETVSC_PACKET_SIZE;
1101
	struct net_device *ndev;
1102

1103 1104 1105 1106 1107
	if (channel->primary_channel != NULL)
		device = channel->primary_channel->device_obj;
	else
		device = channel->device_obj;

1108
	net_device = get_inbound_net_device(device);
1109
	if (!net_device)
1110
		return;
1111
	ndev = net_device->ndev;
1112
	buffer = get_per_channel_state(channel);
1113

1114
	do {
1115
		ret = vmbus_recvpacket_raw(channel, buffer, bufferlen,
1116
					   &bytes_recvd, &request_id);
1117
		if (ret == 0) {
1118
			if (bytes_recvd > 0) {
1119
				desc = (struct vmpacket_descriptor *)buffer;
1120 1121
				switch (desc->type) {
				case VM_PKT_COMP:
1122 1123
					netvsc_send_completion(net_device,
								device, desc);
1124 1125
					break;

1126
				case VM_PKT_DATA_USING_XFER_PAGES:
1127 1128 1129 1130 1131 1132
					netvsc_receive(net_device, channel,
						       device, desc);
					break;

				case VM_PKT_DATA_INBAND:
					netvsc_send_table(device, desc);
1133 1134 1135
					break;

				default:
1136
					netdev_err(ndev,
1137 1138
						   "unhandled packet type %d, "
						   "tid %llx len %d\n",
1139
						   desc->type, request_id,
1140
						   bytes_recvd);
1141
					break;
1142 1143
				}

1144
			} else {
1145 1146 1147
				/*
				 * We are done for this pass.
				 */
1148 1149
				break;
			}
1150

1151
		} else if (ret == -ENOBUFS) {
1152 1153
			if (bufferlen > NETVSC_PACKET_SIZE)
				kfree(buffer);
1154
			/* Handle large packet */
1155
			buffer = kmalloc(bytes_recvd, GFP_ATOMIC);
1156
			if (buffer == NULL) {
1157
				/* Try again next time around */
1158
				netdev_err(ndev,
1159
					   "unable to allocate buffer of size "
1160
					   "(%d)!!\n", bytes_recvd);
1161 1162 1163
				break;
			}

1164
			bufferlen = bytes_recvd;
1165 1166 1167
		}
	} while (1);

1168 1169
	if (bufferlen > NETVSC_PACKET_SIZE)
		kfree(buffer);
1170 1171
	return;
}
1172

1173 1174 1175 1176
/*
 * netvsc_device_add - Callback when the device belonging to this
 * driver is added
 */
1177
int netvsc_device_add(struct hv_device *device, void *additional_info)
1178 1179
{
	int ret = 0;
1180 1181
	int ring_size =
	((struct netvsc_device_info *)additional_info)->ring_size;
1182
	struct netvsc_device *net_device;
1183
	struct net_device *ndev;
1184 1185

	net_device = alloc_net_device(device);
1186 1187
	if (!net_device)
		return -ENOMEM;
1188

1189 1190
	net_device->ring_size = ring_size;

1191 1192 1193 1194 1195 1196 1197 1198 1199
	/*
	 * Coming into this function, struct net_device * is
	 * registered as the driver private data.
	 * In alloc_net_device(), we register struct netvsc_device *
	 * as the driver private data and stash away struct net_device *
	 * in struct netvsc_device *.
	 */
	ndev = net_device->ndev;

1200
	/* Initialize the NetVSC channel extension */
1201
	init_completion(&net_device->channel_init_wait);
1202

1203 1204
	set_per_channel_state(device->channel, net_device->cb_buffer);

1205
	/* Open the channel */
1206 1207
	ret = vmbus_open(device->channel, ring_size * PAGE_SIZE,
			 ring_size * PAGE_SIZE, NULL, 0,
1208
			 netvsc_channel_cb, device->channel);
1209 1210

	if (ret != 0) {
1211
		netdev_err(ndev, "unable to open channel: %d\n", ret);
1212 1213 1214 1215
		goto cleanup;
	}

	/* Channel is opened */
1216
	pr_info("hv_netvsc channel opened successfully\n");
1217

1218 1219
	net_device->chn_table[0] = device->channel;

1220 1221 1222
	/* Connect with the NetVsp */
	ret = netvsc_connect_vsp(device);
	if (ret != 0) {
1223
		netdev_err(ndev,
1224
			"unable to connect to NetVSP - %d\n", ret);
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234
		goto close;
	}

	return ret;

close:
	/* Now, we can close the channel safely */
	vmbus_close(device->channel);

cleanup:
1235
	free_netvsc_device(net_device);
1236 1237 1238

	return ret;
}