channel_mgmt.c 24.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/list.h>
29
#include <linux/module.h>
30
#include <linux/completion.h>
31
#include <linux/hyperv.h>
32

33
#include "hyperv_vmbus.h"
34

35 36 37
static void init_vp_index(struct vmbus_channel *channel,
			  const uuid_le *type_guid);

38
/**
39
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
40 41 42 43 44 45
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
46 47
 * Set up and fill in default negotiate response message.
 *
48 49 50
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
51 52 53
 *
 * Mainly used by Hyper-V drivers.
 */
54
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
55
				struct icmsg_negotiate *negop, u8 *buf,
56
				int fw_version, int srv_version)
57
{
58 59 60 61
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
62
	int i;
63
	bool found_match = false;
64

65
	icmsghdrp->icmsgsize = 0x10;
66 67 68 69 70
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
71

72 73 74
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
75

76 77 78 79 80
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
81 82 83 84 85 86 87

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
88 89 90 91 92 93
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
94 95
	}

96 97 98 99 100
	if (!found_match)
		goto fw_error;

	found_match = false;

101 102
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
103 104 105 106 107 108
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
109
	}
110

111
	/*
112
	 * Respond with the framework and service
113 114
	 * version numbers we can support.
	 */
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
130
}
131

132
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
133

134
/*
135
 * alloc_channel - Allocate and initialize a vmbus channel object
136
 */
137
static struct vmbus_channel *alloc_channel(void)
138
{
139
	static atomic_t chan_num = ATOMIC_INIT(0);
140
	struct vmbus_channel *channel;
141

142
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
143 144 145
	if (!channel)
		return NULL;

146
	channel->id = atomic_inc_return(&chan_num);
147
	spin_lock_init(&channel->inbound_lock);
148
	spin_lock_init(&channel->lock);
149 150

	INIT_LIST_HEAD(&channel->sc_list);
151
	INIT_LIST_HEAD(&channel->percpu_list);
152 153 154 155

	return channel;
}

156
/*
157
 * free_channel - Release the resources used by the vmbus channel object
158
 */
159
static void free_channel(struct vmbus_channel *channel)
160
{
161
	kfree(channel);
162 163
}

164 165 166 167 168 169 170
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
171

172 173 174 175 176 177
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
178

179 180

void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
181
{
182
	struct vmbus_channel_relid_released msg;
183
	unsigned long flags;
184
	struct vmbus_channel *primary_channel;
185

186
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
187
	msg.child_relid = relid;
188 189 190
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));

191 192 193
	if (channel == NULL)
		return;

194 195
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
196 197
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
198
	} else {
199
		percpu_channel_deq(channel);
200 201
		put_cpu();
	}
202

203 204 205 206 207 208
	if (channel->primary_channel == NULL) {
		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
		list_del(&channel->listentry);
		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
	} else {
		primary_channel = channel->primary_channel;
209
		spin_lock_irqsave(&primary_channel->lock, flags);
210
		list_del(&channel->sc_list);
211
		primary_channel->num_sc--;
212
		spin_unlock_irqrestore(&primary_channel->lock, flags);
213
	}
214
	free_channel(channel);
215
}
216

217 218
void vmbus_free_channels(void)
{
219 220 221 222 223 224 225 226
	struct vmbus_channel *channel, *tmp;

	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
		/* if we don't set rescind to true, vmbus_close_internal()
		 * won't invoke hv_process_channel_removal().
		 */
		channel->rescind = true;
227 228 229 230 231

		vmbus_device_unregister(channel->device_obj);
	}
}

232
/*
233
 * vmbus_process_offer - Process the offer by creating a channel/device
234
 * associated with this offer
235
 */
236
static void vmbus_process_offer(struct vmbus_channel *newchannel)
237
{
238
	struct vmbus_channel *channel;
239
	bool fnew = true;
240
	unsigned long flags;
241

242
	/* Make sure this is a new offer */
243
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
244

245
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
246 247 248 249
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
250
			fnew = false;
251 252 253 254
			break;
		}
	}

255
	if (fnew)
256
		list_add_tail(&newchannel->listentry,
257
			      &vmbus_connection.chn_list);
258

259
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
260

261
	if (!fnew) {
262 263 264 265 266 267 268 269
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
270
			spin_lock_irqsave(&channel->lock, flags);
271
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
272
			channel->num_sc++;
273
			spin_unlock_irqrestore(&channel->lock, flags);
274 275 276
		} else
			goto err_free_chan;
	}
277

278 279
	init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);

280 281 282 283 284 285 286 287
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
288 289
	}

290 291 292 293 294 295 296
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

297 298 299 300 301 302
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

303 304 305
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
306
	 * vmbus_child_dev_add()
307
	 */
308
	newchannel->device_obj = vmbus_device_create(
309 310
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
311
		newchannel);
312
	if (!newchannel->device_obj)
313
		goto err_deq_chan;
314

315 316 317 318 319
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
320 321 322 323 324 325
	if (vmbus_device_register(newchannel->device_obj) != 0) {
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
326
	return;
327

328 329 330 331 332 333 334 335 336 337 338 339 340 341
err_deq_chan:
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
	list_del(&newchannel->listentry);
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);

	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}

342 343
err_free_chan:
	free_channel(newchannel);
344 345
}

346 347 348 349
enum {
	IDE = 0,
	SCSI,
	NIC,
350
	ND_NIC,
351 352 353 354
	MAX_PERF_CHN,
};

/*
355
 * This is an array of device_ids (device types) that are performance critical.
356 357 358
 * We attempt to distribute the interrupt load for these devices across
 * all available CPUs.
 */
359
static const struct hv_vmbus_device_id hp_devs[] = {
360
	/* IDE */
361
	{ HV_IDE_GUID, },
362
	/* Storage - SCSI */
363
	{ HV_SCSI_GUID, },
364
	/* Network */
365
	{ HV_NIC_GUID, },
366 367
	/* NetworkDirect Guest RDMA */
	{ HV_ND_GUID, },
368 369 370 371 372 373
};


/*
 * We use this state to statically distribute the channel interrupt load.
 */
374
static int next_numa_node_id;
375 376 377

/*
 * Starting with Win8, we can statically distribute the incoming
378 379 380 381 382 383 384 385
 * channel interrupt load by binding a channel to VCPU.
 * We do this in a hierarchical fashion:
 * First distribute the primary channels across available NUMA nodes
 * and then distribute the subchannels amongst the CPUs in the NUMA
 * node assigned to the primary channel.
 *
 * For pre-win8 hosts or non-performance critical channels we assign the
 * first CPU in the first NUMA node.
386
 */
387
static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
388 389 390 391
{
	u32 cur_cpu;
	int i;
	bool perf_chn = false;
392 393 394
	struct vmbus_channel *primary = channel->primary_channel;
	int next_node;
	struct cpumask available_mask;
395 396

	for (i = IDE; i < MAX_PERF_CHN; i++) {
397
		if (!memcmp(type_guid->b, hp_devs[i].guid,
398 399 400 401 402 403 404 405 406 407 408 409 410
				 sizeof(uuid_le))) {
			perf_chn = true;
			break;
		}
	}
	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
411 412
		channel->numa_node = 0;
		cpumask_set_cpu(0, &channel->alloced_cpus_in_node);
413
		channel->target_cpu = 0;
414
		channel->target_vp = hv_context.vp_index[0];
415
		return;
416
	}
417 418

	/*
419 420 421 422 423
	 * We distribute primary channels evenly across all the available
	 * NUMA nodes and within the assigned NUMA node we will assign the
	 * first available CPU to the primary channel.
	 * The sub-channels will be assigned to the CPUs available in the
	 * NUMA node evenly.
424
	 */
425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
	if (!primary) {
		while (true) {
			next_node = next_numa_node_id++;
			if (next_node == nr_node_ids)
				next_node = next_numa_node_id = 0;
			if (cpumask_empty(cpumask_of_node(next_node)))
				continue;
			break;
		}
		channel->numa_node = next_node;
		primary = channel;
	}

	if (cpumask_weight(&primary->alloced_cpus_in_node) ==
	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
440
		/*
441 442
		 * We have cycled through all the CPUs in the node;
		 * reset the alloced map.
443
		 */
444
		cpumask_clear(&primary->alloced_cpus_in_node);
445 446
	}

447 448 449 450 451 452
	cpumask_xor(&available_mask, &primary->alloced_cpus_in_node,
		    cpumask_of_node(primary->numa_node));

	cur_cpu = cpumask_next(-1, &available_mask);
	cpumask_set_cpu(cur_cpu, &primary->alloced_cpus_in_node);

453 454
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
455 456
}

457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

void vmbus_initiate_unload(void)
{
	struct vmbus_channel_message_header hdr;

473 474 475 476
	/* Pre-Win2012R2 hosts don't support reconnect */
	if (vmbus_proto_version < VERSION_WIN8_1)
		return;

477 478 479 480 481 482 483 484
	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));

	wait_for_completion(&vmbus_connection.unload_event);
}

485
/*
486
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
487 488
 *
 */
489
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
490
{
491
	struct vmbus_channel_offer_channel *offer;
492
	struct vmbus_channel *newchannel;
493

494
	offer = (struct vmbus_channel_offer_channel *)hdr;
495

496
	/* Allocate the channel object and save this offer. */
497
	newchannel = alloc_channel();
498
	if (!newchannel) {
499
		pr_err("Unable to allocate channel object\n");
500 501 502
		return;
	}

503 504 505 506 507 508 509
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

530
	memcpy(&newchannel->offermsg, offer,
531
	       sizeof(struct vmbus_channel_offer_channel));
532 533
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
534

535
	vmbus_process_offer(newchannel);
536 537
}

538
/*
539
 * vmbus_onoffer_rescind - Rescind offer handler.
540 541 542
 *
 * We queue a work item to process this offer synchronously
 */
543
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
544
{
545
	struct vmbus_channel_rescind_offer *rescind;
546
	struct vmbus_channel *channel;
547 548
	unsigned long flags;
	struct device *dev;
549

550
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
551
	channel = relid2channel(rescind->child_relid);
552

553 554
	if (channel == NULL) {
		hv_process_channel_removal(NULL, rescind->child_relid);
555
		return;
556
	}
557

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
575
	}
576 577
}

578
/*
579 580
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
581 582 583
 *
 * Nothing to do here.
 */
584
static void vmbus_onoffers_delivered(
585
			struct vmbus_channel_message_header *hdr)
586 587 588
{
}

589
/*
590
 * vmbus_onopen_result - Open result handler.
591 592 593 594 595
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
596
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
597
{
598
	struct vmbus_channel_open_result *result;
599 600 601
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
602
	unsigned long flags;
603

604
	result = (struct vmbus_channel_open_result *)hdr;
605

606 607 608
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
609
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
610

611 612
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
613
		requestheader =
614
			(struct vmbus_channel_message_header *)msginfo->msg;
615

616
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
617
			openmsg =
618 619 620 621
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
622
				       result,
623 624 625
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
626 627 628 629
				break;
			}
		}
	}
630
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
631 632
}

633
/*
634
 * vmbus_ongpadl_created - GPADL created handler.
635 636 637 638 639
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
640
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
641
{
642 643 644 645
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
646
	unsigned long flags;
647

648
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
649

650 651 652 653
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
654
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
655

656 657
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
658
		requestheader =
659
			(struct vmbus_channel_message_header *)msginfo->msg;
660

661
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
662 663 664
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

665 666 667 668
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
669
				       gpadlcreated,
670 671 672
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
673 674 675 676
				break;
			}
		}
	}
677
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
678 679
}

680
/*
681
 * vmbus_ongpadl_torndown - GPADL torndown handler.
682 683 684 685 686
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
687
static void vmbus_ongpadl_torndown(
688
			struct vmbus_channel_message_header *hdr)
689
{
690 691 692 693
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
694
	unsigned long flags;
695

696
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
697 698 699 700

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
701
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
702

703 704
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
705
		requestheader =
706
			(struct vmbus_channel_message_header *)msginfo->msg;
707

708
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
709 710
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
711

712 713
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
714
				       gpadl_torndown,
715 716 717
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
718 719 720 721
				break;
			}
		}
	}
722
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
723 724
}

725
/*
726
 * vmbus_onversion_response - Version response handler
727 728 729 730 731
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
732
static void vmbus_onversion_response(
733
		struct vmbus_channel_message_header *hdr)
734
{
735 736 737
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
738
	unsigned long flags;
739

740
	version_response = (struct vmbus_channel_version_response *)hdr;
741
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
742

743 744
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
745
		requestheader =
746
			(struct vmbus_channel_message_header *)msginfo->msg;
747

748 749 750
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
751
			      version_response,
752
			      sizeof(struct vmbus_channel_version_response));
753
			complete(&msginfo->waitevent);
754 755
		}
	}
756
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
757 758
}

759
/* Channel message dispatch table */
760
struct vmbus_channel_message_table_entry
761
	channel_message_table[CHANNELMSG_COUNT] = {
762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
779
	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
780 781
};

782
/*
783
 * vmbus_onmessage - Handler for channel protocol messages.
784 785 786
 *
 * This is invoked in the vmbus worker thread context.
 */
787
void vmbus_onmessage(void *context)
788
{
789
	struct hv_message *msg = context;
790
	struct vmbus_channel_message_header *hdr;
791 792
	int size;

793 794
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
795

796
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
797
		pr_err("Received invalid channel message type %d size %d\n",
798
			   hdr->msgtype, size);
799
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
800
				     (unsigned char *)msg->u.payload, size);
801 802 803
		return;
	}

804 805
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
806
	else
807
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
808 809
}

810
/*
811
 * vmbus_request_offers - Send a request to get all our pending offers.
812
 */
813
int vmbus_request_offers(void)
814
{
815
	struct vmbus_channel_message_header *msg;
816
	struct vmbus_channel_msginfo *msginfo;
817
	int ret;
818

819
	msginfo = kmalloc(sizeof(*msginfo) +
820 821
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
822
	if (!msginfo)
823
		return -ENOMEM;
824

825
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
826

827
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
828 829


830
	ret = vmbus_post_msg(msg,
831 832
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
833
		pr_err("Unable to request offers - %d\n", ret);
834

835 836
		goto cleanup;
	}
837

838
cleanup:
839
	kfree(msginfo);
840 841 842 843

	return ret;
}

844 845
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
846 847
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
848 849 850 851
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
852
	int cur_cpu;
853 854
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
855 856
	int next_channel;
	int i = 1;
857 858 859 860

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

861 862 863 864 865 866 867
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

868 869
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
870 871 872 873 874 875 876 877
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

878 879
		if (i == next_channel)
			return cur_channel;
880

881
		i++;
882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);