channel_mgmt.c 31.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24
#include <linux/interrupt.h>
25 26
#include <linux/sched.h>
#include <linux/wait.h>
27
#include <linux/mm.h>
28
#include <linux/slab.h>
29
#include <linux/list.h>
30
#include <linux/module.h>
31
#include <linux/completion.h>
32
#include <linux/delay.h>
33
#include <linux/hyperv.h>
34

35
#include "hyperv_vmbus.h"
36

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);

static const struct vmbus_device vmbus_devs[] = {
	/* IDE */
	{ .dev_type = HV_IDE,
	  HV_IDE_GUID,
	  .perf_device = true,
	},

	/* SCSI */
	{ .dev_type = HV_SCSI,
	  HV_SCSI_GUID,
	  .perf_device = true,
	},

	/* Fibre Channel */
	{ .dev_type = HV_FC,
	  HV_SYNTHFC_GUID,
	  .perf_device = true,
	},

	/* Synthetic NIC */
	{ .dev_type = HV_NIC,
	  HV_NIC_GUID,
	  .perf_device = true,
	},

	/* Network Direct */
	{ .dev_type = HV_ND,
	  HV_ND_GUID,
	  .perf_device = true,
	},

	/* PCIE */
	{ .dev_type = HV_PCIE,
	  HV_PCIE_GUID,
	  .perf_device = true,
	},

	/* Synthetic Frame Buffer */
	{ .dev_type = HV_FB,
	  HV_SYNTHVID_GUID,
	  .perf_device = false,
	},

	/* Synthetic Keyboard */
	{ .dev_type = HV_KBD,
	  HV_KBD_GUID,
	  .perf_device = false,
	},

	/* Synthetic MOUSE */
	{ .dev_type = HV_MOUSE,
	  HV_MOUSE_GUID,
	  .perf_device = false,
	},

	/* KVP */
	{ .dev_type = HV_KVP,
	  HV_KVP_GUID,
	  .perf_device = false,
	},

	/* Time Synch */
	{ .dev_type = HV_TS,
	  HV_TS_GUID,
	  .perf_device = false,
	},

	/* Heartbeat */
	{ .dev_type = HV_HB,
	  HV_HEART_BEAT_GUID,
	  .perf_device = false,
	},

	/* Shutdown */
	{ .dev_type = HV_SHUTDOWN,
	  HV_SHUTDOWN_GUID,
	  .perf_device = false,
	},

	/* File copy */
	{ .dev_type = HV_FCOPY,
	  HV_FCOPY_GUID,
	  .perf_device = false,
	},

	/* Backup */
	{ .dev_type = HV_BACKUP,
	  HV_VSS_GUID,
	  .perf_device = false,
	},

	/* Dynamic Memory */
	{ .dev_type = HV_DM,
	  HV_DM_GUID,
	  .perf_device = false,
	},

	/* Unknown GUID */
H
Haiyang Zhang 已提交
137
	{ .dev_type = HV_UNKNOWN,
138 139 140 141
	  .perf_device = false,
	},
};

142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
static const struct {
	uuid_le guid;
} vmbus_unsupported_devs[] = {
	{ HV_AVMA1_GUID },
	{ HV_AVMA2_GUID },
	{ HV_RDV_GUID	},
};

static bool is_unsupported_vmbus_devs(const uuid_le *guid)
{
	int i;

	for (i = 0; i < ARRAY_SIZE(vmbus_unsupported_devs); i++)
		if (!uuid_le_cmp(*guid, vmbus_unsupported_devs[i].guid))
			return true;
	return false;
}

static u16 hv_get_dev_type(const struct vmbus_channel *channel)
161
{
162
	const uuid_le *guid = &channel->offermsg.offer.if_type;
163 164
	u16 i;

165
	if (is_hvsock_channel(channel) || is_unsupported_vmbus_devs(guid))
H
Haiyang Zhang 已提交
166
		return HV_UNKNOWN;
167

H
Haiyang Zhang 已提交
168
	for (i = HV_IDE; i < HV_UNKNOWN; i++) {
169 170 171 172 173 174
		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
			return i;
	}
	pr_info("Unknown GUID: %pUl\n", guid);
	return i;
}
175

176
/**
177
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
178 179 180 181 182 183
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
184 185
 * Set up and fill in default negotiate response message.
 *
186 187 188
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
189 190 191
 *
 * Mainly used by Hyper-V drivers.
 */
192
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
193
				struct icmsg_negotiate *negop, u8 *buf,
194
				int fw_version, int srv_version)
195
{
196 197 198 199
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
200
	int i;
201
	bool found_match = false;
202

203
	icmsghdrp->icmsgsize = 0x10;
204 205 206 207 208
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
209

210 211 212
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
213

214 215 216 217 218
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
219 220 221 222 223 224 225

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
226 227 228 229 230 231
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
232 233
	}

234 235 236 237 238
	if (!found_match)
		goto fw_error;

	found_match = false;

239 240
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
241 242 243 244 245 246
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
247
	}
248

249
	/*
250
	 * Respond with the framework and service
251 252
	 * version numbers we can support.
	 */
253 254 255 256 257 258 259 260 261 262 263 264 265 266 267

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
268
}
269

270
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
271

272
/*
273
 * alloc_channel - Allocate and initialize a vmbus channel object
274
 */
275
static struct vmbus_channel *alloc_channel(void)
276
{
277
	struct vmbus_channel *channel;
278

279
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
280 281 282
	if (!channel)
		return NULL;

283
	channel->acquire_ring_lock = true;
284
	spin_lock_init(&channel->inbound_lock);
285
	spin_lock_init(&channel->lock);
286 287

	INIT_LIST_HEAD(&channel->sc_list);
288
	INIT_LIST_HEAD(&channel->percpu_list);
289 290 291 292

	return channel;
}

293
/*
294
 * free_channel - Release the resources used by the vmbus channel object
295
 */
296
static void free_channel(struct vmbus_channel *channel)
297
{
298
	kfree(channel);
299 300
}

301 302 303 304 305 306 307
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
308

309 310 311 312 313 314
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
315

316

317
static void vmbus_release_relid(u32 relid)
318
{
319
	struct vmbus_channel_relid_released msg;
320

321
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
322
	msg.child_relid = relid;
323
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
324 325
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released),
		       true);
326
}
327

328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344
void hv_event_tasklet_disable(struct vmbus_channel *channel)
{
	struct tasklet_struct *tasklet;
	tasklet = hv_context.event_dpc[channel->target_cpu];
	tasklet_disable(tasklet);
}

void hv_event_tasklet_enable(struct vmbus_channel *channel)
{
	struct tasklet_struct *tasklet;
	tasklet = hv_context.event_dpc[channel->target_cpu];
	tasklet_enable(tasklet);

	/* In case there is any pending event */
	tasklet_schedule(tasklet);
}

345 346 347 348 349
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
	unsigned long flags;
	struct vmbus_channel *primary_channel;

350
	BUG_ON(!channel->rescind);
351
	BUG_ON(!mutex_is_locked(&vmbus_connection.channel_mutex));
352

353
	hv_event_tasklet_disable(channel);
354 355
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
356 357
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
358
	} else {
359
		percpu_channel_deq(channel);
360 361
		put_cpu();
	}
362
	hv_event_tasklet_enable(channel);
363

364 365
	if (channel->primary_channel == NULL) {
		list_del(&channel->listentry);
366 367

		primary_channel = channel;
368 369
	} else {
		primary_channel = channel->primary_channel;
370
		spin_lock_irqsave(&primary_channel->lock, flags);
371
		list_del(&channel->sc_list);
372
		primary_channel->num_sc--;
373
		spin_unlock_irqrestore(&primary_channel->lock, flags);
374
	}
375 376 377 378 379

	/*
	 * We need to free the bit for init_vp_index() to work in the case
	 * of sub-channel, when we reload drivers like hv_netvsc.
	 */
380 381 382
	if (channel->affinity_policy == HV_LOCALIZED)
		cpumask_clear_cpu(channel->target_cpu,
				  &primary_channel->alloced_cpus_in_node);
383

384 385
	vmbus_release_relid(relid);

386
	free_channel(channel);
387
}
388

389 390
void vmbus_free_channels(void)
{
391 392
	struct vmbus_channel *channel, *tmp;

393
	mutex_lock(&vmbus_connection.channel_mutex);
394 395
	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
396
		/* hv_process_channel_removal() needs this */
397
		channel->rescind = true;
398 399 400

		vmbus_device_unregister(channel->device_obj);
	}
401
	mutex_unlock(&vmbus_connection.channel_mutex);
402 403
}

404
/*
405
 * vmbus_process_offer - Process the offer by creating a channel/device
406
 * associated with this offer
407
 */
408
static void vmbus_process_offer(struct vmbus_channel *newchannel)
409
{
410
	struct vmbus_channel *channel;
411
	bool fnew = true;
412
	unsigned long flags;
413
	u16 dev_type;
414
	int ret;
415

416
	/* Make sure this is a new offer */
417
	mutex_lock(&vmbus_connection.channel_mutex);
418

419
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
420 421 422 423
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
424
			fnew = false;
425 426 427 428
			break;
		}
	}

429
	if (fnew)
430
		list_add_tail(&newchannel->listentry,
431
			      &vmbus_connection.chn_list);
432

433
	mutex_unlock(&vmbus_connection.channel_mutex);
434

435
	if (!fnew) {
436 437 438 439 440 441 442 443
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
444
			spin_lock_irqsave(&channel->lock, flags);
445
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
446
			channel->num_sc++;
447
			spin_unlock_irqrestore(&channel->lock, flags);
448 449 450
		} else
			goto err_free_chan;
	}
451

452
	dev_type = hv_get_dev_type(newchannel);
453 454

	init_vp_index(newchannel, dev_type);
455

456
	hv_event_tasklet_disable(newchannel);
457 458 459 460 461 462 463 464
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
465
	}
466
	hv_event_tasklet_enable(newchannel);
467

468 469 470 471 472 473 474
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

475 476 477 478 479 480
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

481 482 483
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
484
	 * vmbus_child_dev_add()
485
	 */
486
	newchannel->device_obj = vmbus_device_create(
487 488
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
489
		newchannel);
490
	if (!newchannel->device_obj)
491
		goto err_deq_chan;
492

493
	newchannel->device_obj->device_id = dev_type;
494 495 496 497 498
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
499 500 501 502 503
	mutex_lock(&vmbus_connection.channel_mutex);
	ret = vmbus_device_register(newchannel->device_obj);
	mutex_unlock(&vmbus_connection.channel_mutex);

	if (ret != 0) {
504 505 506 507 508
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
509
	return;
510

511
err_deq_chan:
512
	mutex_lock(&vmbus_connection.channel_mutex);
513
	list_del(&newchannel->listentry);
514
	mutex_unlock(&vmbus_connection.channel_mutex);
515

516
	hv_event_tasklet_disable(newchannel);
517 518 519 520 521 522 523 524
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}
525 526 527
	hv_event_tasklet_enable(newchannel);

	vmbus_release_relid(newchannel->offermsg.child_relid);
528

529 530
err_free_chan:
	free_channel(newchannel);
531 532
}

533 534 535
/*
 * We use this state to statically distribute the channel interrupt load.
 */
536
static int next_numa_node_id;
537 538 539

/*
 * Starting with Win8, we can statically distribute the incoming
540 541 542 543 544 545 546 547
 * channel interrupt load by binding a channel to VCPU.
 * We do this in a hierarchical fashion:
 * First distribute the primary channels across available NUMA nodes
 * and then distribute the subchannels amongst the CPUs in the NUMA
 * node assigned to the primary channel.
 *
 * For pre-win8 hosts or non-performance critical channels we assign the
 * first CPU in the first NUMA node.
548
 */
549
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
550 551
{
	u32 cur_cpu;
552
	bool perf_chn = vmbus_devs[dev_type].perf_device;
553 554 555
	struct vmbus_channel *primary = channel->primary_channel;
	int next_node;
	struct cpumask available_mask;
556
	struct cpumask *alloced_mask;
557 558 559 560 561 562 563 564 565

	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
566
		channel->numa_node = 0;
567
		channel->target_cpu = 0;
568
		channel->target_vp = hv_context.vp_index[0];
569
		return;
570
	}
571 572

	/*
573 574
	 * Based on the channel affinity policy, we will assign the NUMA
	 * nodes.
575
	 */
576 577

	if ((channel->affinity_policy == HV_BALANCED) || (!primary)) {
578 579
		while (true) {
			next_node = next_numa_node_id++;
580
			if (next_node == nr_node_ids) {
581
				next_node = next_numa_node_id = 0;
582 583
				continue;
			}
584 585 586 587 588 589 590
			if (cpumask_empty(cpumask_of_node(next_node)))
				continue;
			break;
		}
		channel->numa_node = next_node;
		primary = channel;
	}
591
	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
592

593
	if (cpumask_weight(alloced_mask) ==
594
	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
595
		/*
596 597
		 * We have cycled through all the CPUs in the node;
		 * reset the alloced map.
598
		 */
599
		cpumask_clear(alloced_mask);
600 601
	}

602
	cpumask_xor(&available_mask, alloced_mask,
603 604
		    cpumask_of_node(primary->numa_node));

605
	cur_cpu = -1;
606

607 608 609 610 611 612 613 614 615 616 617
	if (primary->affinity_policy == HV_LOCALIZED) {
		/*
		 * Normally Hyper-V host doesn't create more subchannels
		 * than there are VCPUs on the node but it is possible when not
		 * all present VCPUs on the node are initialized by guest.
		 * Clear the alloced_cpus_in_node to start over.
		 */
		if (cpumask_equal(&primary->alloced_cpus_in_node,
				  cpumask_of_node(primary->numa_node)))
			cpumask_clear(&primary->alloced_cpus_in_node);
	}
618

619 620 621 622 623 624 625 626 627
	while (true) {
		cur_cpu = cpumask_next(cur_cpu, &available_mask);
		if (cur_cpu >= nr_cpu_ids) {
			cur_cpu = -1;
			cpumask_copy(&available_mask,
				     cpumask_of_node(primary->numa_node));
			continue;
		}

628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645
		if (primary->affinity_policy == HV_LOCALIZED) {
			/*
			 * NOTE: in the case of sub-channel, we clear the
			 * sub-channel related bit(s) in
			 * primary->alloced_cpus_in_node in
			 * hv_process_channel_removal(), so when we
			 * reload drivers like hv_netvsc in SMP guest, here
			 * we're able to re-allocate
			 * bit from primary->alloced_cpus_in_node.
			 */
			if (!cpumask_test_cpu(cur_cpu,
					      &primary->alloced_cpus_in_node)) {
				cpumask_set_cpu(cur_cpu,
						&primary->alloced_cpus_in_node);
				cpumask_set_cpu(cur_cpu, alloced_mask);
				break;
			}
		} else {
646 647 648 649
			cpumask_set_cpu(cur_cpu, alloced_mask);
			break;
		}
	}
650

651 652
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
653 654
}

655 656
static void vmbus_wait_for_unload(void)
{
657 658 659
	int cpu;
	void *page_addr;
	struct hv_message *msg;
660
	struct vmbus_channel_message_header *hdr;
661
	u32 message_type;
662

663 664 665 666 667 668 669 670 671
	/*
	 * CHANNELMSG_UNLOAD_RESPONSE is always delivered to the CPU which was
	 * used for initial contact or to CPU0 depending on host version. When
	 * we're crashing on a different CPU let's hope that IRQ handler on
	 * the cpu which receives CHANNELMSG_UNLOAD_RESPONSE is still
	 * functional and vmbus_unload_response() will complete
	 * vmbus_connection.unload_event. If not, the last thing we can do is
	 * read message pages for all CPUs directly.
	 */
672
	while (1) {
673 674
		if (completion_done(&vmbus_connection.unload_event))
			break;
675

676 677 678 679
		for_each_online_cpu(cpu) {
			page_addr = hv_context.synic_message_page[cpu];
			msg = (struct hv_message *)page_addr +
				VMBUS_MESSAGE_SINT;
680

681 682 683
			message_type = READ_ONCE(msg->header.message_type);
			if (message_type == HVMSG_NONE)
				continue;
684

685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705
			hdr = (struct vmbus_channel_message_header *)
				msg->u.payload;

			if (hdr->msgtype == CHANNELMSG_UNLOAD_RESPONSE)
				complete(&vmbus_connection.unload_event);

			vmbus_signal_eom(msg, message_type);
		}

		mdelay(10);
	}

	/*
	 * We're crashing and already got the UNLOAD_RESPONSE, cleanup all
	 * maybe-pending messages on all CPUs to be able to receive new
	 * messages after we reconnect.
	 */
	for_each_online_cpu(cpu) {
		page_addr = hv_context.synic_message_page[cpu];
		msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;
		msg->header.message_type = HVMSG_NONE;
706 707 708
	}
}

709 710 711 712 713 714 715 716 717 718 719 720
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

721
void vmbus_initiate_unload(bool crash)
722 723 724
{
	struct vmbus_channel_message_header hdr;

725 726 727 728
	/* Pre-Win2012R2 hosts don't support reconnect */
	if (vmbus_proto_version < VERSION_WIN8_1)
		return;

729 730 731
	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
732 733
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header),
		       !crash);
734

735 736 737 738
	/*
	 * vmbus_initiate_unload() is also called on crash and the crash can be
	 * happening in an interrupt context, where scheduling is impossible.
	 */
739
	if (!crash)
740 741 742
		wait_for_completion(&vmbus_connection.unload_event);
	else
		vmbus_wait_for_unload();
743 744
}

745
/*
746
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
747 748
 *
 */
749
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
750
{
751
	struct vmbus_channel_offer_channel *offer;
752
	struct vmbus_channel *newchannel;
753

754
	offer = (struct vmbus_channel_offer_channel *)hdr;
755

756
	/* Allocate the channel object and save this offer. */
757
	newchannel = alloc_channel();
758
	if (!newchannel) {
759
		pr_err("Unable to allocate channel object\n");
760 761 762
		return;
	}

763 764 765 766 767 768 769
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

790
	memcpy(&newchannel->offermsg, offer,
791
	       sizeof(struct vmbus_channel_offer_channel));
792 793
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
794

795
	vmbus_process_offer(newchannel);
796 797
}

798
/*
799
 * vmbus_onoffer_rescind - Rescind offer handler.
800 801 802
 *
 * We queue a work item to process this offer synchronously
 */
803
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
804
{
805
	struct vmbus_channel_rescind_offer *rescind;
806
	struct vmbus_channel *channel;
807 808
	unsigned long flags;
	struct device *dev;
809

810
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
811 812

	mutex_lock(&vmbus_connection.channel_mutex);
813
	channel = relid2channel(rescind->child_relid);
814

815
	if (channel == NULL) {
816 817 818 819 820
		/*
		 * This is very impossible, because in
		 * vmbus_process_offer(), we have already invoked
		 * vmbus_release_relid() on error.
		 */
821
		goto out;
822
	}
823

824 825 826 827 828
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
829 830
		if (channel->chn_rescind_callback) {
			channel->chn_rescind_callback(channel);
831
			goto out;
832
		}
833 834 835 836 837 838 839 840 841 842 843 844
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
845
	}
846 847 848 849 850 851 852 853 854 855 856 857 858 859 860

out:
	mutex_unlock(&vmbus_connection.channel_mutex);
}

void vmbus_hvsock_device_unregister(struct vmbus_channel *channel)
{
	mutex_lock(&vmbus_connection.channel_mutex);

	BUG_ON(!is_hvsock_channel(channel));

	channel->rescind = true;
	vmbus_device_unregister(channel->device_obj);

	mutex_unlock(&vmbus_connection.channel_mutex);
861
}
862 863
EXPORT_SYMBOL_GPL(vmbus_hvsock_device_unregister);

864

865
/*
866 867
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
868 869 870
 *
 * Nothing to do here.
 */
871
static void vmbus_onoffers_delivered(
872
			struct vmbus_channel_message_header *hdr)
873 874 875
{
}

876
/*
877
 * vmbus_onopen_result - Open result handler.
878 879 880 881 882
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
883
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
884
{
885
	struct vmbus_channel_open_result *result;
886 887 888
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
889
	unsigned long flags;
890

891
	result = (struct vmbus_channel_open_result *)hdr;
892

893 894 895
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
896
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
897

898 899
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
900
		requestheader =
901
			(struct vmbus_channel_message_header *)msginfo->msg;
902

903
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
904
			openmsg =
905 906 907 908
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
909
				       result,
910 911 912
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
913 914 915 916
				break;
			}
		}
	}
917
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
918 919
}

920
/*
921
 * vmbus_ongpadl_created - GPADL created handler.
922 923 924 925 926
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
927
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
928
{
929 930 931 932
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
933
	unsigned long flags;
934

935
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
936

937 938 939 940
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
941
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
942

943 944
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
945
		requestheader =
946
			(struct vmbus_channel_message_header *)msginfo->msg;
947

948
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
949 950 951
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

952 953 954 955
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
956
				       gpadlcreated,
957 958 959
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
960 961 962 963
				break;
			}
		}
	}
964
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
965 966
}

967
/*
968
 * vmbus_ongpadl_torndown - GPADL torndown handler.
969 970 971 972 973
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
974
static void vmbus_ongpadl_torndown(
975
			struct vmbus_channel_message_header *hdr)
976
{
977 978 979 980
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
981
	unsigned long flags;
982

983
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
984 985 986 987

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
988
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
989

990 991
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
992
		requestheader =
993
			(struct vmbus_channel_message_header *)msginfo->msg;
994

995
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
996 997
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
998

999 1000
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
1001
				       gpadl_torndown,
1002 1003 1004
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
1005 1006 1007 1008
				break;
			}
		}
	}
1009
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1010 1011
}

1012
/*
1013
 * vmbus_onversion_response - Version response handler
1014 1015 1016 1017 1018
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
1019
static void vmbus_onversion_response(
1020
		struct vmbus_channel_message_header *hdr)
1021
{
1022 1023 1024
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
1025
	unsigned long flags;
1026

1027
	version_response = (struct vmbus_channel_version_response *)hdr;
1028
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
1029

1030 1031
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
1032
		requestheader =
1033
			(struct vmbus_channel_message_header *)msginfo->msg;
1034

1035 1036 1037
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
1038
			      version_response,
1039
			      sizeof(struct vmbus_channel_version_response));
1040
			complete(&msginfo->waitevent);
1041 1042
		}
	}
1043
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
1044 1045
}

1046
/* Channel message dispatch table */
1047
struct vmbus_channel_message_table_entry
1048
	channel_message_table[CHANNELMSG_COUNT] = {
1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
1066
	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
1067 1068 1069 1070
	{CHANNELMSG_18,				0, NULL},
	{CHANNELMSG_19,				0, NULL},
	{CHANNELMSG_20,				0, NULL},
	{CHANNELMSG_TL_CONNECT_REQUEST,		0, NULL},
1071 1072
};

1073
/*
1074
 * vmbus_onmessage - Handler for channel protocol messages.
1075 1076 1077
 *
 * This is invoked in the vmbus worker thread context.
 */
1078
void vmbus_onmessage(void *context)
1079
{
1080
	struct hv_message *msg = context;
1081
	struct vmbus_channel_message_header *hdr;
1082 1083
	int size;

1084 1085
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
1086

1087
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
1088
		pr_err("Received invalid channel message type %d size %d\n",
1089
			   hdr->msgtype, size);
1090
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
1091
				     (unsigned char *)msg->u.payload, size);
1092 1093 1094
		return;
	}

1095 1096
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
1097
	else
1098
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
1099 1100
}

1101
/*
1102
 * vmbus_request_offers - Send a request to get all our pending offers.
1103
 */
1104
int vmbus_request_offers(void)
1105
{
1106
	struct vmbus_channel_message_header *msg;
1107
	struct vmbus_channel_msginfo *msginfo;
1108
	int ret;
1109

1110
	msginfo = kmalloc(sizeof(*msginfo) +
1111 1112
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
1113
	if (!msginfo)
1114
		return -ENOMEM;
1115

1116
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
1117

1118
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
1119 1120


1121 1122
	ret = vmbus_post_msg(msg, sizeof(struct vmbus_channel_message_header),
			     true);
1123
	if (ret != 0) {
1124
		pr_err("Unable to request offers - %d\n", ret);
1125

1126 1127
		goto cleanup;
	}
1128

1129
cleanup:
1130
	kfree(msginfo);
1131 1132 1133 1134

	return ret;
}

1135 1136
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
1137 1138
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
1139 1140 1141 1142
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
1143
	int cur_cpu;
1144 1145
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
1146 1147
	int next_channel;
	int i = 1;
1148 1149 1150 1151

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

1152 1153 1154 1155 1156 1157 1158
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

1159 1160
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
1161 1162 1163 1164 1165 1166 1167 1168
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

1169 1170
		if (i == next_channel)
			return cur_channel;
1171

1172
		i++;
1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);
1219 1220 1221 1222 1223 1224 1225

void vmbus_set_chn_rescind_callback(struct vmbus_channel *channel,
		void (*chn_rescind_cb)(struct vmbus_channel *))
{
	channel->chn_rescind_callback = chn_rescind_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_chn_rescind_callback);