channel_mgmt.c 27.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/list.h>
29
#include <linux/module.h>
30
#include <linux/completion.h>
31
#include <linux/hyperv.h>
32

33
#include "hyperv_vmbus.h"
34

35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type);

static const struct vmbus_device vmbus_devs[] = {
	/* IDE */
	{ .dev_type = HV_IDE,
	  HV_IDE_GUID,
	  .perf_device = true,
	},

	/* SCSI */
	{ .dev_type = HV_SCSI,
	  HV_SCSI_GUID,
	  .perf_device = true,
	},

	/* Fibre Channel */
	{ .dev_type = HV_FC,
	  HV_SYNTHFC_GUID,
	  .perf_device = true,
	},

	/* Synthetic NIC */
	{ .dev_type = HV_NIC,
	  HV_NIC_GUID,
	  .perf_device = true,
	},

	/* Network Direct */
	{ .dev_type = HV_ND,
	  HV_ND_GUID,
	  .perf_device = true,
	},

	/* PCIE */
	{ .dev_type = HV_PCIE,
	  HV_PCIE_GUID,
	  .perf_device = true,
	},

	/* Synthetic Frame Buffer */
	{ .dev_type = HV_FB,
	  HV_SYNTHVID_GUID,
	  .perf_device = false,
	},

	/* Synthetic Keyboard */
	{ .dev_type = HV_KBD,
	  HV_KBD_GUID,
	  .perf_device = false,
	},

	/* Synthetic MOUSE */
	{ .dev_type = HV_MOUSE,
	  HV_MOUSE_GUID,
	  .perf_device = false,
	},

	/* KVP */
	{ .dev_type = HV_KVP,
	  HV_KVP_GUID,
	  .perf_device = false,
	},

	/* Time Synch */
	{ .dev_type = HV_TS,
	  HV_TS_GUID,
	  .perf_device = false,
	},

	/* Heartbeat */
	{ .dev_type = HV_HB,
	  HV_HEART_BEAT_GUID,
	  .perf_device = false,
	},

	/* Shutdown */
	{ .dev_type = HV_SHUTDOWN,
	  HV_SHUTDOWN_GUID,
	  .perf_device = false,
	},

	/* File copy */
	{ .dev_type = HV_FCOPY,
	  HV_FCOPY_GUID,
	  .perf_device = false,
	},

	/* Backup */
	{ .dev_type = HV_BACKUP,
	  HV_VSS_GUID,
	  .perf_device = false,
	},

	/* Dynamic Memory */
	{ .dev_type = HV_DM,
	  HV_DM_GUID,
	  .perf_device = false,
	},

	/* Unknown GUID */
	{ .dev_type = HV_UNKOWN,
	  .perf_device = false,
	},
};

static u16 hv_get_dev_type(const uuid_le *guid)
{
	u16 i;

	for (i = HV_IDE; i < HV_UNKOWN; i++) {
		if (!uuid_le_cmp(*guid, vmbus_devs[i].guid))
			return i;
	}
	pr_info("Unknown GUID: %pUl\n", guid);
	return i;
}
151

152
/**
153
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
154 155 156 157 158 159
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
160 161
 * Set up and fill in default negotiate response message.
 *
162 163 164
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
165 166 167
 *
 * Mainly used by Hyper-V drivers.
 */
168
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
169
				struct icmsg_negotiate *negop, u8 *buf,
170
				int fw_version, int srv_version)
171
{
172 173 174 175
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
176
	int i;
177
	bool found_match = false;
178

179
	icmsghdrp->icmsgsize = 0x10;
180 181 182 183 184
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
185

186 187 188
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
189

190 191 192 193 194
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
195 196 197 198 199 200 201

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
202 203 204 205 206 207
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
208 209
	}

210 211 212 213 214
	if (!found_match)
		goto fw_error;

	found_match = false;

215 216
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
217 218 219 220 221 222
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
223
	}
224

225
	/*
226
	 * Respond with the framework and service
227 228
	 * version numbers we can support.
	 */
229 230 231 232 233 234 235 236 237 238 239 240 241 242 243

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
244
}
245

246
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
247

248
/*
249
 * alloc_channel - Allocate and initialize a vmbus channel object
250
 */
251
static struct vmbus_channel *alloc_channel(void)
252
{
253
	static atomic_t chan_num = ATOMIC_INIT(0);
254
	struct vmbus_channel *channel;
255

256
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
257 258 259
	if (!channel)
		return NULL;

260
	channel->id = atomic_inc_return(&chan_num);
261
	spin_lock_init(&channel->inbound_lock);
262
	spin_lock_init(&channel->lock);
263 264

	INIT_LIST_HEAD(&channel->sc_list);
265
	INIT_LIST_HEAD(&channel->percpu_list);
266 267 268 269

	return channel;
}

270
/*
271
 * free_channel - Release the resources used by the vmbus channel object
272
 */
273
static void free_channel(struct vmbus_channel *channel)
274
{
275
	kfree(channel);
276 277
}

278 279 280 281 282 283 284
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
285

286 287 288 289 290 291
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
292

293

294
static void vmbus_release_relid(u32 relid)
295
{
296
	struct vmbus_channel_relid_released msg;
297

298
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
299
	msg.child_relid = relid;
300 301
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));
302
}
303

304 305 306 307 308 309
void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
{
	unsigned long flags;
	struct vmbus_channel *primary_channel;

	vmbus_release_relid(relid);
310

311 312
	BUG_ON(!channel->rescind);

313 314
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
315 316
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
317
	} else {
318
		percpu_channel_deq(channel);
319 320
		put_cpu();
	}
321

322
	if (channel->primary_channel == NULL) {
323
		mutex_lock(&vmbus_connection.channel_mutex);
324
		list_del(&channel->listentry);
325
		mutex_unlock(&vmbus_connection.channel_mutex);
326 327

		primary_channel = channel;
328 329
	} else {
		primary_channel = channel->primary_channel;
330
		spin_lock_irqsave(&primary_channel->lock, flags);
331
		list_del(&channel->sc_list);
332
		primary_channel->num_sc--;
333
		spin_unlock_irqrestore(&primary_channel->lock, flags);
334
	}
335 336 337 338 339 340 341 342

	/*
	 * We need to free the bit for init_vp_index() to work in the case
	 * of sub-channel, when we reload drivers like hv_netvsc.
	 */
	cpumask_clear_cpu(channel->target_cpu,
			  &primary_channel->alloced_cpus_in_node);

343
	free_channel(channel);
344
}
345

346 347
void vmbus_free_channels(void)
{
348 349 350 351
	struct vmbus_channel *channel, *tmp;

	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
352
		/* hv_process_channel_removal() needs this */
353
		channel->rescind = true;
354 355 356 357 358

		vmbus_device_unregister(channel->device_obj);
	}
}

359
/*
360
 * vmbus_process_offer - Process the offer by creating a channel/device
361
 * associated with this offer
362
 */
363
static void vmbus_process_offer(struct vmbus_channel *newchannel)
364
{
365
	struct vmbus_channel *channel;
366
	bool fnew = true;
367
	unsigned long flags;
368
	u16 dev_type;
369

370
	/* Make sure this is a new offer */
371
	mutex_lock(&vmbus_connection.channel_mutex);
372

373
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
374 375 376 377
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
378
			fnew = false;
379 380 381 382
			break;
		}
	}

383
	if (fnew)
384
		list_add_tail(&newchannel->listentry,
385
			      &vmbus_connection.chn_list);
386

387
	mutex_unlock(&vmbus_connection.channel_mutex);
388

389
	if (!fnew) {
390 391 392 393 394 395 396 397
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
398
			spin_lock_irqsave(&channel->lock, flags);
399
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
400
			channel->num_sc++;
401
			spin_unlock_irqrestore(&channel->lock, flags);
402 403 404
		} else
			goto err_free_chan;
	}
405

406 407 408
	dev_type = hv_get_dev_type(&newchannel->offermsg.offer.if_type);

	init_vp_index(newchannel, dev_type);
409

410 411 412 413 414 415 416 417
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
418 419
	}

420 421 422 423 424 425 426
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

427 428 429 430 431 432
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

433 434 435
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
436
	 * vmbus_child_dev_add()
437
	 */
438
	newchannel->device_obj = vmbus_device_create(
439 440
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
441
		newchannel);
442
	if (!newchannel->device_obj)
443
		goto err_deq_chan;
444

445
	newchannel->device_obj->device_id = dev_type;
446 447 448 449 450
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
451 452 453 454 455 456
	if (vmbus_device_register(newchannel->device_obj) != 0) {
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
457
	return;
458

459
err_deq_chan:
460 461
	vmbus_release_relid(newchannel->offermsg.child_relid);

462
	mutex_lock(&vmbus_connection.channel_mutex);
463
	list_del(&newchannel->listentry);
464
	mutex_unlock(&vmbus_connection.channel_mutex);
465 466 467 468 469 470 471 472 473 474

	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}

475 476
err_free_chan:
	free_channel(newchannel);
477 478
}

479 480 481
/*
 * We use this state to statically distribute the channel interrupt load.
 */
482
static int next_numa_node_id;
483 484 485

/*
 * Starting with Win8, we can statically distribute the incoming
486 487 488 489 490 491 492 493
 * channel interrupt load by binding a channel to VCPU.
 * We do this in a hierarchical fashion:
 * First distribute the primary channels across available NUMA nodes
 * and then distribute the subchannels amongst the CPUs in the NUMA
 * node assigned to the primary channel.
 *
 * For pre-win8 hosts or non-performance critical channels we assign the
 * first CPU in the first NUMA node.
494
 */
495
static void init_vp_index(struct vmbus_channel *channel, u16 dev_type)
496 497
{
	u32 cur_cpu;
498
	bool perf_chn = vmbus_devs[dev_type].perf_device;
499 500 501
	struct vmbus_channel *primary = channel->primary_channel;
	int next_node;
	struct cpumask available_mask;
502
	struct cpumask *alloced_mask;
503 504 505 506 507 508 509 510 511

	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
512
		channel->numa_node = 0;
513
		channel->target_cpu = 0;
514
		channel->target_vp = hv_context.vp_index[0];
515
		return;
516
	}
517 518

	/*
519 520 521 522 523
	 * We distribute primary channels evenly across all the available
	 * NUMA nodes and within the assigned NUMA node we will assign the
	 * first available CPU to the primary channel.
	 * The sub-channels will be assigned to the CPUs available in the
	 * NUMA node evenly.
524
	 */
525 526 527 528 529 530 531 532 533 534 535 536
	if (!primary) {
		while (true) {
			next_node = next_numa_node_id++;
			if (next_node == nr_node_ids)
				next_node = next_numa_node_id = 0;
			if (cpumask_empty(cpumask_of_node(next_node)))
				continue;
			break;
		}
		channel->numa_node = next_node;
		primary = channel;
	}
537
	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
538

539
	if (cpumask_weight(alloced_mask) ==
540
	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
541
		/*
542 543
		 * We have cycled through all the CPUs in the node;
		 * reset the alloced map.
544
		 */
545
		cpumask_clear(alloced_mask);
546 547
	}

548
	cpumask_xor(&available_mask, alloced_mask,
549 550
		    cpumask_of_node(primary->numa_node));

551
	cur_cpu = -1;
552 553 554 555 556 557 558 559 560 561 562

	/*
	 * Normally Hyper-V host doesn't create more subchannels than there
	 * are VCPUs on the node but it is possible when not all present VCPUs
	 * on the node are initialized by guest. Clear the alloced_cpus_in_node
	 * to start over.
	 */
	if (cpumask_equal(&primary->alloced_cpus_in_node,
			  cpumask_of_node(primary->numa_node)))
		cpumask_clear(&primary->alloced_cpus_in_node);

563 564 565 566 567 568 569 570 571
	while (true) {
		cur_cpu = cpumask_next(cur_cpu, &available_mask);
		if (cur_cpu >= nr_cpu_ids) {
			cur_cpu = -1;
			cpumask_copy(&available_mask,
				     cpumask_of_node(primary->numa_node));
			continue;
		}

572 573 574 575 576 577 578
		/*
		 * NOTE: in the case of sub-channel, we clear the sub-channel
		 * related bit(s) in primary->alloced_cpus_in_node in
		 * hv_process_channel_removal(), so when we reload drivers
		 * like hv_netvsc in SMP guest, here we're able to re-allocate
		 * bit from primary->alloced_cpus_in_node.
		 */
579 580 581 582 583 584 585 586
		if (!cpumask_test_cpu(cur_cpu,
				&primary->alloced_cpus_in_node)) {
			cpumask_set_cpu(cur_cpu,
					&primary->alloced_cpus_in_node);
			cpumask_set_cpu(cur_cpu, alloced_mask);
			break;
		}
	}
587

588 589
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
590 591
}

592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

void vmbus_initiate_unload(void)
{
	struct vmbus_channel_message_header hdr;

608 609 610 611
	/* Pre-Win2012R2 hosts don't support reconnect */
	if (vmbus_proto_version < VERSION_WIN8_1)
		return;

612 613 614 615 616 617 618 619
	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));

	wait_for_completion(&vmbus_connection.unload_event);
}

620
/*
621
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
622 623
 *
 */
624
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
625
{
626
	struct vmbus_channel_offer_channel *offer;
627
	struct vmbus_channel *newchannel;
628

629
	offer = (struct vmbus_channel_offer_channel *)hdr;
630

631
	/* Allocate the channel object and save this offer. */
632
	newchannel = alloc_channel();
633
	if (!newchannel) {
634
		pr_err("Unable to allocate channel object\n");
635 636 637
		return;
	}

638 639 640 641 642 643 644
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

665
	memcpy(&newchannel->offermsg, offer,
666
	       sizeof(struct vmbus_channel_offer_channel));
667 668
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
669

670
	vmbus_process_offer(newchannel);
671 672
}

673
/*
674
 * vmbus_onoffer_rescind - Rescind offer handler.
675 676 677
 *
 * We queue a work item to process this offer synchronously
 */
678
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
679
{
680
	struct vmbus_channel_rescind_offer *rescind;
681
	struct vmbus_channel *channel;
682 683
	unsigned long flags;
	struct device *dev;
684

685
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
686
	channel = relid2channel(rescind->child_relid);
687

688
	if (channel == NULL) {
689 690 691 692 693
		/*
		 * This is very impossible, because in
		 * vmbus_process_offer(), we have already invoked
		 * vmbus_release_relid() on error.
		 */
694
		return;
695
	}
696

697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
714
	}
715 716
}

717
/*
718 719
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
720 721 722
 *
 * Nothing to do here.
 */
723
static void vmbus_onoffers_delivered(
724
			struct vmbus_channel_message_header *hdr)
725 726 727
{
}

728
/*
729
 * vmbus_onopen_result - Open result handler.
730 731 732 733 734
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
735
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
736
{
737
	struct vmbus_channel_open_result *result;
738 739 740
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
741
	unsigned long flags;
742

743
	result = (struct vmbus_channel_open_result *)hdr;
744

745 746 747
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
748
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
749

750 751
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
752
		requestheader =
753
			(struct vmbus_channel_message_header *)msginfo->msg;
754

755
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
756
			openmsg =
757 758 759 760
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
761
				       result,
762 763 764
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
765 766 767 768
				break;
			}
		}
	}
769
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
770 771
}

772
/*
773
 * vmbus_ongpadl_created - GPADL created handler.
774 775 776 777 778
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
779
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
780
{
781 782 783 784
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
785
	unsigned long flags;
786

787
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
788

789 790 791 792
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
793
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
794

795 796
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
797
		requestheader =
798
			(struct vmbus_channel_message_header *)msginfo->msg;
799

800
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
801 802 803
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

804 805 806 807
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
808
				       gpadlcreated,
809 810 811
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
812 813 814 815
				break;
			}
		}
	}
816
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
817 818
}

819
/*
820
 * vmbus_ongpadl_torndown - GPADL torndown handler.
821 822 823 824 825
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
826
static void vmbus_ongpadl_torndown(
827
			struct vmbus_channel_message_header *hdr)
828
{
829 830 831 832
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
833
	unsigned long flags;
834

835
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
836 837 838 839

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
840
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
841

842 843
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
844
		requestheader =
845
			(struct vmbus_channel_message_header *)msginfo->msg;
846

847
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
848 849
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
850

851 852
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
853
				       gpadl_torndown,
854 855 856
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
857 858 859 860
				break;
			}
		}
	}
861
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
862 863
}

864
/*
865
 * vmbus_onversion_response - Version response handler
866 867 868 869 870
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
871
static void vmbus_onversion_response(
872
		struct vmbus_channel_message_header *hdr)
873
{
874 875 876
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
877
	unsigned long flags;
878

879
	version_response = (struct vmbus_channel_version_response *)hdr;
880
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
881

882 883
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
884
		requestheader =
885
			(struct vmbus_channel_message_header *)msginfo->msg;
886

887 888 889
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
890
			      version_response,
891
			      sizeof(struct vmbus_channel_version_response));
892
			complete(&msginfo->waitevent);
893 894
		}
	}
895
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
896 897
}

898
/* Channel message dispatch table */
899
struct vmbus_channel_message_table_entry
900
	channel_message_table[CHANNELMSG_COUNT] = {
901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
918
	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
919 920
};

921
/*
922
 * vmbus_onmessage - Handler for channel protocol messages.
923 924 925
 *
 * This is invoked in the vmbus worker thread context.
 */
926
void vmbus_onmessage(void *context)
927
{
928
	struct hv_message *msg = context;
929
	struct vmbus_channel_message_header *hdr;
930 931
	int size;

932 933
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
934

935
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
936
		pr_err("Received invalid channel message type %d size %d\n",
937
			   hdr->msgtype, size);
938
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
939
				     (unsigned char *)msg->u.payload, size);
940 941 942
		return;
	}

943 944
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
945
	else
946
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
947 948
}

949
/*
950
 * vmbus_request_offers - Send a request to get all our pending offers.
951
 */
952
int vmbus_request_offers(void)
953
{
954
	struct vmbus_channel_message_header *msg;
955
	struct vmbus_channel_msginfo *msginfo;
956
	int ret;
957

958
	msginfo = kmalloc(sizeof(*msginfo) +
959 960
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
961
	if (!msginfo)
962
		return -ENOMEM;
963

964
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
965

966
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
967 968


969
	ret = vmbus_post_msg(msg,
970 971
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
972
		pr_err("Unable to request offers - %d\n", ret);
973

974 975
		goto cleanup;
	}
976

977
cleanup:
978
	kfree(msginfo);
979 980 981 982

	return ret;
}

983 984
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
985 986
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
987 988 989 990
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
991
	int cur_cpu;
992 993
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
994 995
	int next_channel;
	int i = 1;
996 997 998 999

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

1000 1001 1002 1003 1004 1005 1006
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

1007 1008
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
1009 1010 1011 1012 1013 1014 1015 1016
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

1017 1018
		if (i == next_channel)
			return cur_channel;
1019

1020
		i++;
1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);