channel_mgmt.c 31.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24
#include <linux/interrupt.h>
25 26
#include <linux/sched.h>
#include <linux/wait.h>
27
#include <linux/mm.h>
28
#include <linux/slab.h>
29
#include <linux/list.h>
30
#include <linux/module.h>
31
#include <linux/completion.h>
32
#include <linux/delay.h>
33
#include <linux/hyperv.h>
34

35
#include "hyperv_vmbus.h"
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);

static const struct vmbus_device vmbus_devs[] = {
	/* IDE */
	{ .dev_type = HV_IDE,
	  HV_IDE_GUID,
	  .perf_device = true,
	},

	/* SCSI */
	{ .dev_type = HV_SCSI,
	  HV_SCSI_GUID,
	  .perf_device = true,
	},

	/* Fibre Channel */
	{ .dev_type = HV_FC,
	  HV_SYNTHFC_GUID,
	  .perf_device = true,
	},

	/* Synthetic NIC */
	{ .dev_type = HV_NIC,
	  HV_NIC_GUID,
	  .perf_device = true,
	},

	/* Network Direct */
	{ .dev_type = HV_ND,
	  HV_ND_GUID,
	  .perf_device = true,
	},

	/* PCIE */
	{ .dev_type = HV_PCIE,
	  HV_PCIE_GUID,
	  .perf_device = true,
	},

	/* Synthetic Frame Buffer */
	{ .dev_type = HV_FB,
	  HV_SYNTHVID_GUID,
	  .perf_device = false,
	},

	/* Synthetic Keyboard */
	{ .dev_type = HV_KBD,
	  HV_KBD_GUID,
	  .perf_device = false,
	},

	/* Synthetic MOUSE */
	{ .dev_type = HV_MOUSE,
	  HV_MOUSE_GUID,
	  .perf_device = false,
	},

	/* KVP */
	{ .dev_type = HV_KVP,
	  HV_KVP_GUID,
	  .perf_device = false,
	},

	/* Time Synch */
	{ .dev_type = HV_TS,
	  HV_TS_GUID,
	  .perf_device = false,
	},

	/* Heartbeat */
	{ .dev_type = HV_HB,
	  HV_HEART_BEAT_GUID,
	  .perf_device = false,
	},

	/* Shutdown */
	{ .dev_type = HV_SHUTDOWN,
	  HV_SHUTDOWN_GUID,
	  .perf_device = false,
	},

	/* File copy */
	{ .dev_type = HV_FCOPY,
	  HV_FCOPY_GUID,
	  .perf_device = false,
	},

	/* Backup */
	{ .dev_type = HV_BACKUP,
	  HV_VSS_GUID,
	  .perf_device = false,
	},

	/* Dynamic Memory */
	{ .dev_type = HV_DM,
	  HV_DM_GUID,
	  .perf_device = false,
	},

	/* Unknown GUID */
H
Haiyang Zhang 已提交
137
	{ .dev_type = HV_UNKNOWN,
138 139 140 141
	  .perf_device = false,
	},
};

142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
static const struct {
	uuid_le guid;
} vmbus_unsupported_devs[] = {
	{ HV_AVMA1_GUID },
	{ HV_AVMA2_GUID },
	{ HV_RDV_GUID	},
};

static bool is_unsupported_vmbus_devs(const uuid_le *guid)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
		if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
			return true;
	return false;
}

static u16 hv_get_dev_type(const struct vmbus_channel *channel)
161
{
162
	const uuid_le *guid = &channel->offermsg.offer.if_type;
163 164
	u16 i;

165
	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
H
Haiyang Zhang 已提交
166
		return HV_UNKNOWN;
167

H
Haiyang Zhang 已提交
168
	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
169 170 171 172 173 174
		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
			return i;
	}
	pr_info("Unknown GUID: %pUl\n", guid);
	return i;
}
175

176
/**
177
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
178 179 180 181 182 183
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
184 185
 * Set up and fill in default negotiate response message.
 *
186 187 188
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
189 190 191
 *
 * Mainly used by Hyper-V drivers.
 */
192
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
193
				struct icmsg_negotiate *negop, u8 *buf,
194
				int fw_version, int srv_version)
195
{
196 197 198 199
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
200
	int i;
201
	bool found_match = false;
202

203
	icmsghdrp->icmsgsize = 0x10;
204 205 206 207 208
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
209

210 211 212
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
213

214 215 216 217 218
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
219 220 221 222 223 224 225

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
226 227 228 229 230 231
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
232 233
	}

234 235 236 237 238
	if (!found_match)
		goto fw_error;

	found_match = false;

239 240
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
241 242 243 244 245 246
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
247
	}
248

249
	/*
250
	 * Respond with the framework and service
251 252
	 * version numbers we can support.
	 */
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
268
}
269

270
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
271

272
/*
273
 * alloc_channel - Allocate and initialize a vmbus channel object
274
 */
275
static struct vmbus_channel *alloc_channel(void)
276
{
277
	struct vmbus_channel *channel;
278

279
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
280 281 282
	if (!channel)
		return NULL;

283
	channel->acquire_ring_lock = true;
284
	spin_lock_init(&channel->inbound_lock);
285
	spin_lock_init(&channel->lock);
286 287

	INIT_LIST_HEAD(&channel->sc_list);
288
	INIT_LIST_HEAD(&channel->percpu_list);
289 290 291 292

	return channel;
}

293
/*
294
 * free_channel - Release the resources used by the vmbus channel object
295
 */
296
static void free_channel(struct vmbus_channel *channel)
297
{
298
	kfree(channel);
299 300
}

301 302 303 304 305 306 307
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
308

309 310 311 312 313 314
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
315

316

317
static void vmbus_release_relid(u32 relid)
318
{
319
	struct vmbus_channel_relid_released msg;
320

321
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
322
	msg.child_relid = relid;
323 324
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
325
}
326

327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
void hv_event_tasklet_disable(struct vmbus_channel *channel)
{
	struct tasklet_struct *tasklet;
	tasklet = hv_context.event_dpc[channel->target_cpu];
	tasklet_disable(tasklet);
}

void hv_event_tasklet_enable(struct vmbus_channel *channel)
{
	struct tasklet_struct *tasklet;
	tasklet = hv_context.event_dpc[channel->target_cpu];
	tasklet_enable(tasklet);

	/* In case there is any pending event */
	tasklet_schedule(tasklet);
}

344 345 346 347 348
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
	unsigned long flags;
	struct vmbus_channel *primary_channel;

349
	BUG_ON(!channel->rescind);
350
	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
351

352
	hv_event_tasklet_disable(channel);
353 354
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
355 356
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
357
	} else {
358
		percpu_channel_deq(channel);
359 360
		put_cpu();
	}
361
	hv_event_tasklet_enable(channel);
362

363 364
	if (channel->primary_channel == NULL) {
		list_del(&channel->listentry);
365 366

		primary_channel = channel;
367 368
	} else {
		primary_channel = channel->primary_channel;
369
		spin_lock_irqsave(&primary_channel->lock, flags);
370
		list_del(&channel->sc_list);
371
		primary_channel->num_sc--;
372
		spin_unlock_irqrestore(&primary_channel->lock, flags);
373
	}
374 375 376 377 378

	/*
	 * We need to free the bit for init_vp_index() to work in the case
	 * of sub-channel, when we reload drivers like hv_netvsc.
	 */
379 380 381
	if (channel->affinity_policy == HV_LOCALIZED)
		cpumask_clear_cpu(channel->target_cpu,
				  &primary_channel->alloced_cpus_in_node);
382

383 384
	vmbus_release_relid(relid);

385
	free_channel(channel);
386
}
387

388 389
void vmbus_free_channels(void)
{
390 391
	struct vmbus_channel *channel, *tmp;

392
	mutex_lock(&vmbus_connection.channel_mutex);
393 394
	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
395
		/* hv_process_channel_removal() needs this */
396
		channel->rescind = true;
397 398 399

		vmbus_device_unregister(channel->device_obj);
	}
400
	mutex_unlock(&vmbus_connection.channel_mutex);
401 402
}

403
/*
404
 * vmbus_process_offer - Process the offer by creating a channel/device
405
 * associated with this offer
406
 */
407
static void vmbus_process_offer(struct vmbus_channel *newchannel)
408
{
409
	struct vmbus_channel *channel;
410
	bool fnew = true;
411
	unsigned long flags;
412
	u16 dev_type;
413
	int ret;
414

415
	/* Make sure this is a new offer */
416
	mutex_lock(&vmbus_connection.channel_mutex);
417

418
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
419 420 421 422
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
423
			fnew = false;
424 425 426 427
			break;
		}
	}

428
	if (fnew)
429
		list_add_tail(&newchannel->listentry,
430
			      &vmbus_connection.chn_list);
431

432
	mutex_unlock(&vmbus_connection.channel_mutex);
433

434
	if (!fnew) {
435 436 437 438 439 440 441 442
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
443
			spin_lock_irqsave(&channel->lock, flags);
444
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
445
			channel->num_sc++;
446
			spin_unlock_irqrestore(&channel->lock, flags);
447 448 449
		} else
			goto err_free_chan;
	}
450

451
	dev_type = hv_get_dev_type(newchannel);
452 453

	init_vp_index(newchannel, dev_type);
454

455
	hv_event_tasklet_disable(newchannel);
456 457 458 459 460 461 462 463
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
464
	}
465
	hv_event_tasklet_enable(newchannel);
466

467 468 469 470 471 472 473
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

474 475 476 477 478 479
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

480 481 482
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
483
	 * vmbus_child_dev_add()
484
	 */
485
	newchannel->device_obj = vmbus_device_create(
486 487
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
488
		newchannel);
489
	if (!newchannel->device_obj)
490
		goto err_deq_chan;
491

492
	newchannel->device_obj->device_id = dev_type;
493 494 495 496 497
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
498 499 500 501 502
	mutex_lock(&vmbus_connection.channel_mutex);
	ret = vmbus_device_register(newchannel->device_obj);
	mutex_unlock(&vmbus_connection.channel_mutex);

	if (ret != 0) {
503 504 505 506 507
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
508
	return;
509

510
err_deq_chan:
511
	mutex_lock(&vmbus_connection.channel_mutex);
512
	list_del(&newchannel->listentry);
513
	mutex_unlock(&vmbus_connection.channel_mutex);
514

515
	hv_event_tasklet_disable(newchannel);
516 517 518 519 520 521 522 523
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}
524 525 526
	hv_event_tasklet_enable(newchannel);

	vmbus_release_relid(newchannel->offermsg.child_relid);
527

528 529
err_free_chan:
	free_channel(newchannel);
530 531
}

532 533 534
/*
 * We use this state to statically distribute the channel interrupt load.
 */
535
static int next_numa_node_id;
536 537 538

/*
 * Starting with Win8, we can statically distribute the incoming
539 540 541 542 543 544 545 546
 * channel interrupt load by binding a channel to VCPU.
 * We do this in a hierarchical fashion:
 * First distribute the primary channels across available NUMA nodes
 * and then distribute the subchannels amongst the CPUs in the NUMA
 * node assigned to the primary channel.
 *
 * For pre-win8 hosts or non-performance critical channels we assign the
 * first CPU in the first NUMA node.
547
 */
548
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
549 550
{
	u32 cur_cpu;
551
	bool perf_chn = vmbus_devs[dev_type].perf_device;
552 553 554
	struct vmbus_channel *primary = channel->primary_channel;
	int next_node;
	struct cpumask available_mask;
555
	struct cpumask *alloced_mask;
556 557 558 559 560 561 562 563 564

	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
565
		channel->numa_node = 0;
566
		channel->target_cpu = 0;
567
		channel->target_vp = hv_context.vp_index[0];
568
		return;
569
	}
570 571

	/*
572 573
	 * Based on the channel affinity policy, we will assign the NUMA
	 * nodes.
574
	 */
575 576

	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
577 578
		while (true) {
			next_node = next_numa_node_id++;
579
			if (next_node == nr_node_ids) {
580
				next_node = next_numa_node_id = 0;
581 582
				continue;
			}
583 584 585 586 587 588 589
			if (cpumask_empty(cpumask_of_node(next_node)))
				continue;
			break;
		}
		channel->numa_node = next_node;
		primary = channel;
	}
590
	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
591

592
	if (cpumask_weight(alloced_mask) ==
593
	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
594
		/*
595 596
		 * We have cycled through all the CPUs in the node;
		 * reset the alloced map.
597
		 */
598
		cpumask_clear(alloced_mask);
599 600
	}

601
	cpumask_xor(&available_mask, alloced_mask,
602 603
		    cpumask_of_node(primary->numa_node));

604
	cur_cpu = -1;
605

606 607 608 609 610 611 612 613 614 615 616
	if (primary->affinity_policy == HV_LOCALIZED) {
		/*
		 * Normally Hyper-V host doesn't create more subchannels
		 * than there are VCPUs on the node but it is possible when not
		 * all present VCPUs on the node are initialized by guest.
		 * Clear the alloced_cpus_in_node to start over.
		 */
		if (cpumask_equal(&primary->alloced_cpus_in_node,
				  cpumask_of_node(primary->numa_node)))
			cpumask_clear(&primary->alloced_cpus_in_node);
	}
617

618 619 620 621 622 623 624 625 626
	while (true) {
		cur_cpu = cpumask_next(cur_cpu, &available_mask);
		if (cur_cpu >= nr_cpu_ids) {
			cur_cpu = -1;
			cpumask_copy(&available_mask,
				     cpumask_of_node(primary->numa_node));
			continue;
		}

627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
		if (primary->affinity_policy == HV_LOCALIZED) {
			/*
			 * NOTE: in the case of sub-channel, we clear the
			 * sub-channel related bit(s) in
			 * primary->alloced_cpus_in_node in
			 * hv_process_channel_removal(), so when we
			 * reload drivers like hv_netvsc in SMP guest, here
			 * we're able to re-allocate
			 * bit from primary->alloced_cpus_in_node.
			 */
			if (!cpumask_test_cpu(cur_cpu,
					      &primary->alloced_cpus_in_node)) {
				cpumask_set_cpu(cur_cpu,
						&primary->alloced_cpus_in_node);
				cpumask_set_cpu(cur_cpu, alloced_mask);
				break;
			}
		} else {
645 646 647 648
			cpumask_set_cpu(cur_cpu, alloced_mask);
			break;
		}
	}
649

650 651
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
652 653
}

654 655
static void vmbus_wait_for_unload(void)
{
656 657 658
	int cpu;
	void *page_addr;
	struct hv_message *msg;
659
	struct vmbus_channel_message_header *hdr;
660
	u32 message_type;
661

662 663 664 665 666 667 668 669 670
	/*
	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
	 * used for initial contact or to CPU0 depending on host version. When
	 * we're crashing on a different CPU let's hope that IRQ handler on
	 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
	 * functional and vmbus_unload_response() will complete
	 * vmbus_connection.unload_event. If not, the last thing we can do is
	 * read message pages for all CPUs directly.
	 */
671
	while (1) {
672 673
		if (completion_done(&vmbus_connection.unload_event))
			break;
674

675 676 677 678
		for_each_online_cpu(cpu) {
			page_addr = hv_context.synic_message_page[cpu];
			msg = (struct hv_message *)page_addr +
				VMBUS_MESSAGE_SINT;
679

680 681 682
			message_type = READ_ONCE(msg->header.message_type);
			if (message_type == HVMSG_NONE)
				continue;
683

684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704
			hdr = (struct vmbus_channel_message_header *)
				msg->u.payload;

			if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
				complete(&vmbus_connection.unload_event);

			vmbus_signal_eom(msg, message_type);
		}

		mdelay(10);
	}

	/*
	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
	 * maybe-pending messages on all CPUs to be able to receive new
	 * messages after we reconnect.
	 */
	for_each_online_cpu(cpu) {
		page_addr = hv_context.synic_message_page[cpu];
		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
		msg->header.message_type = HVMSG_NONE;
705 706 707
	}
}

708 709 710 711 712 713 714 715 716 717 718 719
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

720
void vmbus_initiate_unload(bool crash)
721 722 723
{
	struct vmbus_channel_message_header hdr;

724 725 726 727
	/* Pre-Win2012R2 hosts don't support reconnect */
	if (vmbus_proto_version < VERSION_WIN8_1)
		return;

728 729 730 731 732
	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));

733 734 735 736
	/*
	 * vmbus_initiate_unload() is also called on crash and the crash can be
	 * happening in an interrupt context, where scheduling is impossible.
	 */
737
	if (!crash)
738 739 740
		wait_for_completion(&vmbus_connection.unload_event);
	else
		vmbus_wait_for_unload();
741 742
}

743
/*
744
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
745 746
 *
 */
747
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
748
{
749
	struct vmbus_channel_offer_channel *offer;
750
	struct vmbus_channel *newchannel;
751

752
	offer = (struct vmbus_channel_offer_channel *)hdr;
753

754
	/* Allocate the channel object and save this offer. */
755
	newchannel = alloc_channel();
756
	if (!newchannel) {
757
		pr_err("Unable to allocate channel object\n");
758 759 760
		return;
	}

761 762 763 764 765 766 767
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

788
	memcpy(&newchannel->offermsg, offer,
789
	       sizeof(struct vmbus_channel_offer_channel));
790 791
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
792

793
	vmbus_process_offer(newchannel);
794 795
}

796
/*
797
 * vmbus_onoffer_rescind - Rescind offer handler.
798 799 800
 *
 * We queue a work item to process this offer synchronously
 */
801
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
802
{
803
	struct vmbus_channel_rescind_offer *rescind;
804
	struct vmbus_channel *channel;
805 806
	unsigned long flags;
	struct device *dev;
807

808
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
809 810

	mutex_lock(&vmbus_connection.channel_mutex);
811
	channel = relid2channel(rescind->child_relid);
812

813
	if (channel == NULL) {
814 815 816 817 818
		/*
		 * This is very impossible, because in
		 * vmbus_process_offer(), we have already invoked
		 * vmbus_release_relid() on error.
		 */
819
		goto out;
820
	}
821

822 823 824 825 826
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
827 828
		if (channel->chn_rescind_callback) {
			channel->chn_rescind_callback(channel);
829
			goto out;
830
		}
831 832 833 834 835 836 837 838 839 840 841 842
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
843
	}
844 845 846 847 848 849 850 851 852 853 854 855 856 857 858

out:
	mutex_unlock(&vmbus_connection.channel_mutex);
}

void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
{
	mutex_lock(&vmbus_connection.channel_mutex);

	BUG_ON(!is_hvsock_channel(channel));

	channel->rescind = true;
	vmbus_device_unregister(channel->device_obj);

	mutex_unlock(&vmbus_connection.channel_mutex);
859
}
860 861
EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);

862

863
/*
864 865
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
866 867 868
 *
 * Nothing to do here.
 */
869
static void vmbus_onoffers_delivered(
870
			struct vmbus_channel_message_header *hdr)
871 872 873
{
}

874
/*
875
 * vmbus_onopen_result - Open result handler.
876 877 878 879 880
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
881
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
882
{
883
	struct vmbus_channel_open_result *result;
884 885 886
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
887
	unsigned long flags;
888

889
	result = (struct vmbus_channel_open_result *)hdr;
890

891 892 893
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
894
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
895

896 897
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
898
		requestheader =
899
			(struct vmbus_channel_message_header *)msginfo->msg;
900

901
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
902
			openmsg =
903 904 905 906
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
907
				       result,
908 909 910
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
911 912 913 914
				break;
			}
		}
	}
915
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
916 917
}

918
/*
919
 * vmbus_ongpadl_created - GPADL created handler.
920 921 922 923 924
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
925
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
926
{
927 928 929 930
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
931
	unsigned long flags;
932

933
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
934

935 936 937 938
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
939
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
940

941 942
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
943
		requestheader =
944
			(struct vmbus_channel_message_header *)msginfo->msg;
945

946
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
947 948 949
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

950 951 952 953
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
954
				       gpadlcreated,
955 956 957
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
958 959 960 961
				break;
			}
		}
	}
962
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
963 964
}

965
/*
966
 * vmbus_ongpadl_torndown - GPADL torndown handler.
967 968 969 970 971
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
972
static void vmbus_ongpadl_torndown(
973
			struct vmbus_channel_message_header *hdr)
974
{
975 976 977 978
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
979
	unsigned long flags;
980

981
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
982 983 984 985

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
986
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
987

988 989
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
990
		requestheader =
991
			(struct vmbus_channel_message_header *)msginfo->msg;
992

993
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
994 995
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
996

997 998
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
999
				       gpadl_torndown,
1000 1001 1002
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
1003 1004 1005 1006
				break;
			}
		}
	}
1007
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1008 1009
}

1010
/*
1011
 * vmbus_onversion_response - Version response handler
1012 1013 1014 1015 1016
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
1017
static void vmbus_onversion_response(
1018
		struct vmbus_channel_message_header *hdr)
1019
{
1020 1021 1022
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
1023
	unsigned long flags;
1024

1025
	version_response = (struct vmbus_channel_version_response *)hdr;
1026
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1027

1028 1029
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
1030
		requestheader =
1031
			(struct vmbus_channel_message_header *)msginfo->msg;
1032

1033 1034 1035
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
1036
			      version_response,
1037
			      sizeof(struct vmbus_channel_version_response));
1038
			complete(&msginfo->waitevent);
1039 1040
		}
	}
1041
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1042 1043
}

1044
/* Channel message dispatch table */
1045
struct vmbus_channel_message_table_entry
1046
	channel_message_table[CHANNELMSG_COUNT] = {
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
1064
	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
1065 1066 1067 1068
	{CHANNELMSG_18,				0, NULL},
	{CHANNELMSG_19,				0, NULL},
	{CHANNELMSG_20,				0, NULL},
	{CHANNELMSG_TL_CONNECT_REQUEST,		0, NULL},
1069 1070
};

1071
/*
1072
 * vmbus_onmessage - Handler for channel protocol messages.
1073 1074 1075
 *
 * This is invoked in the vmbus worker thread context.
 */
1076
void vmbus_onmessage(void *context)
1077
{
1078
	struct hv_message *msg = context;
1079
	struct vmbus_channel_message_header *hdr;
1080 1081
	int size;

1082 1083
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
1084

1085
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
1086
		pr_err("Received invalid channel message type %d size %d\n",
1087
			   hdr->msgtype, size);
1088
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1089
				     (unsigned char *)msg->u.payload, size);
1090 1091 1092
		return;
	}

1093 1094
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
1095
	else
1096
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1097 1098
}

1099
/*
1100
 * vmbus_request_offers - Send a request to get all our pending offers.
1101
 */
1102
int vmbus_request_offers(void)
1103
{
1104
	struct vmbus_channel_message_header *msg;
1105
	struct vmbus_channel_msginfo *msginfo;
1106
	int ret;
1107

1108
	msginfo = kmalloc(sizeof(*msginfo) +
1109 1110
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
1111
	if (!msginfo)
1112
		return -ENOMEM;
1113

1114
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1115

1116
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1117 1118


1119
	ret = vmbus_post_msg(msg,
1120 1121
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
1122
		pr_err("Unable to request offers - %d\n", ret);
1123

1124 1125
		goto cleanup;
	}
1126

1127
cleanup:
1128
	kfree(msginfo);
1129 1130 1131 1132

	return ret;
}

1133 1134
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
1135 1136
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
1137 1138 1139 1140
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
1141
	int cur_cpu;
1142 1143
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
1144 1145
	int next_channel;
	int i = 1;
1146 1147 1148 1149

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

1150 1151 1152 1153 1154 1155 1156
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

1157 1158
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
1159 1160 1161 1162 1163 1164 1165 1166
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

1167 1168
		if (i == next_channel)
			return cur_channel;
1169

1170
		i++;
1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1217 1218 1219 1220 1221 1222 1223

void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
		void (*chn_rescind_cb)(struct vmbus_channel *))
{
	channel->chn_rescind_callback = chn_rescind_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);