channel_mgmt.c 24.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/list.h>
29
#include <linux/module.h>
30
#include <linux/completion.h>
31
#include <linux/hyperv.h>
32

33
#include "hyperv_vmbus.h"
34

35 36 37
static void init_vp_index(struct vmbus_channel *channel,
			  const uuid_le *type_guid);

38
/**
39
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
40 41 42 43 44 45
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
46 47
 * Set up and fill in default negotiate response message.
 *
48 49 50
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
51 52 53
 *
 * Mainly used by Hyper-V drivers.
 */
54
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
55
				struct icmsg_negotiate *negop, u8 *buf,
56
				int fw_version, int srv_version)
57
{
58 59 60 61
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
62
	int i;
63
	bool found_match = false;
64

65
	icmsghdrp->icmsgsize = 0x10;
66 67 68 69 70
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
71

72 73 74
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
75

76 77 78 79 80
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
81 82 83 84 85 86 87

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
88 89 90 91 92 93
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
94 95
	}

96 97 98 99 100
	if (!found_match)
		goto fw_error;

	found_match = false;

101 102
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
103 104 105 106 107 108
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
109
	}
110

111
	/*
112
	 * Respond with the framework and service
113 114
	 * version numbers we can support.
	 */
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
130
}
131

132
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
133

134
/*
135
 * alloc_channel - Allocate and initialize a vmbus channel object
136
 */
137
static struct vmbus_channel *alloc_channel(void)
138
{
139
	static atomic_t chan_num = ATOMIC_INIT(0);
140
	struct vmbus_channel *channel;
141

142
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
143 144 145
	if (!channel)
		return NULL;

146
	channel->id = atomic_inc_return(&chan_num);
147
	spin_lock_init(&channel->inbound_lock);
148
	spin_lock_init(&channel->lock);
149 150

	INIT_LIST_HEAD(&channel->sc_list);
151
	INIT_LIST_HEAD(&channel->percpu_list);
152 153 154 155

	return channel;
}

156
/*
157
 * free_channel - Release the resources used by the vmbus channel object
158
 */
159
static void free_channel(struct vmbus_channel *channel)
160
{
161
	kfree(channel);
162 163
}

164 165 166 167 168 169 170
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
171

172 173 174 175 176 177
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
178

179 180

void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
181
{
182
	struct vmbus_channel_relid_released msg;
183
	unsigned long flags;
184
	struct vmbus_channel *primary_channel;
185

186
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
187
	msg.child_relid = relid;
188 189 190
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));

191 192 193
	if (channel == NULL)
		return;

194 195
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
196 197
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
198
	} else {
199
		percpu_channel_deq(channel);
200 201
		put_cpu();
	}
202

203 204 205 206 207 208
	if (channel->primary_channel == NULL) {
		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
		list_del(&channel->listentry);
		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
	} else {
		primary_channel = channel->primary_channel;
209
		spin_lock_irqsave(&primary_channel->lock, flags);
210
		list_del(&channel->sc_list);
211
		primary_channel->num_sc--;
212
		spin_unlock_irqrestore(&primary_channel->lock, flags);
213
	}
214
	free_channel(channel);
215
}
216

217 218
void vmbus_free_channels(void)
{
219 220 221 222 223 224 225 226
	struct vmbus_channel *channel, *tmp;

	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
		/* if we don't set rescind to true, vmbus_close_internal()
		 * won't invoke hv_process_channel_removal().
		 */
		channel->rescind = true;
227 228 229 230 231

		vmbus_device_unregister(channel->device_obj);
	}
}

232
/*
233
 * vmbus_process_offer - Process the offer by creating a channel/device
234
 * associated with this offer
235
 */
236
static void vmbus_process_offer(struct vmbus_channel *newchannel)
237
{
238
	struct vmbus_channel *channel;
239
	bool fnew = true;
240
	unsigned long flags;
241

242
	/* Make sure this is a new offer */
243
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
244

245
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
246 247 248 249
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
250
			fnew = false;
251 252 253 254
			break;
		}
	}

255
	if (fnew)
256
		list_add_tail(&newchannel->listentry,
257
			      &vmbus_connection.chn_list);
258

259
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
260

261
	if (!fnew) {
262 263 264 265 266 267 268 269
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
270
			spin_lock_irqsave(&channel->lock, flags);
271
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
272
			channel->num_sc++;
273
			spin_unlock_irqrestore(&channel->lock, flags);
274 275 276
		} else
			goto err_free_chan;
	}
277

278 279
	init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);

280 281 282 283 284 285 286 287
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
288 289
	}

290 291 292 293 294 295 296
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

297 298 299 300 301 302
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

303 304 305
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
306
	 * vmbus_child_dev_add()
307
	 */
308
	newchannel->device_obj = vmbus_device_create(
309 310
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
311
		newchannel);
312
	if (!newchannel->device_obj)
313
		goto err_deq_chan;
314

315 316 317 318 319
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
320 321 322 323 324 325
	if (vmbus_device_register(newchannel->device_obj) != 0) {
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
326
	return;
327

328 329 330 331 332 333 334 335 336 337 338 339 340 341
err_deq_chan:
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
	list_del(&newchannel->listentry);
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);

	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}

342 343
err_free_chan:
	free_channel(newchannel);
344 345
}

346 347 348 349 350 351 352 353
enum {
	IDE = 0,
	SCSI,
	NIC,
	MAX_PERF_CHN,
};

/*
354
 * This is an array of device_ids (device types) that are performance critical.
355 356 357
 * We attempt to distribute the interrupt load for these devices across
 * all available CPUs.
 */
358
static const struct hv_vmbus_device_id hp_devs[] = {
359
	/* IDE */
360
	{ HV_IDE_GUID, },
361
	/* Storage - SCSI */
362
	{ HV_SCSI_GUID, },
363
	/* Network */
364
	{ HV_NIC_GUID, },
365 366
	/* NetworkDirect Guest RDMA */
	{ HV_ND_GUID, },
367 368 369 370 371 372
};


/*
 * We use this state to statically distribute the channel interrupt load.
 */
373
static int next_numa_node_id;
374 375 376

/*
 * Starting with Win8, we can statically distribute the incoming
377 378 379 380 381 382 383 384
 * channel interrupt load by binding a channel to VCPU.
 * We do this in a hierarchical fashion:
 * First distribute the primary channels across available NUMA nodes
 * and then distribute the subchannels amongst the CPUs in the NUMA
 * node assigned to the primary channel.
 *
 * For pre-win8 hosts or non-performance critical channels we assign the
 * first CPU in the first NUMA node.
385
 */
386
static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
387 388 389 390
{
	u32 cur_cpu;
	int i;
	bool perf_chn = false;
391 392 393
	struct vmbus_channel *primary = channel->primary_channel;
	int next_node;
	struct cpumask available_mask;
394 395

	for (i = IDE; i < MAX_PERF_CHN; i++) {
396
		if (!memcmp(type_guid->b, hp_devs[i].guid,
397 398 399 400 401 402 403 404 405 406 407 408 409
				 sizeof(uuid_le))) {
			perf_chn = true;
			break;
		}
	}
	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
410 411
		channel->numa_node = 0;
		cpumask_set_cpu(0, &channel->alloced_cpus_in_node);
412
		channel->target_cpu = 0;
413
		channel->target_vp = hv_context.vp_index[0];
414
		return;
415
	}
416 417

	/*
418 419 420 421 422
	 * We distribute primary channels evenly across all the available
	 * NUMA nodes and within the assigned NUMA node we will assign the
	 * first available CPU to the primary channel.
	 * The sub-channels will be assigned to the CPUs available in the
	 * NUMA node evenly.
423
	 */
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
	if (!primary) {
		while (true) {
			next_node = next_numa_node_id++;
			if (next_node == nr_node_ids)
				next_node = next_numa_node_id = 0;
			if (cpumask_empty(cpumask_of_node(next_node)))
				continue;
			break;
		}
		channel->numa_node = next_node;
		primary = channel;
	}

	if (cpumask_weight(&primary->alloced_cpus_in_node) ==
	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
439
		/*
440 441
		 * We have cycled through all the CPUs in the node;
		 * reset the alloced map.
442
		 */
443
		cpumask_clear(&primary->alloced_cpus_in_node);
444 445
	}

446 447 448 449 450 451
	cpumask_xor(&available_mask, &primary->alloced_cpus_in_node,
		    cpumask_of_node(primary->numa_node));

	cur_cpu = cpumask_next(-1, &available_mask);
	cpumask_set_cpu(cur_cpu, &primary->alloced_cpus_in_node);

452 453
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
454 455
}

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

void vmbus_initiate_unload(void)
{
	struct vmbus_channel_message_header hdr;

	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));

	wait_for_completion(&vmbus_connection.unload_event);
}

480
/*
481
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
482 483
 *
 */
484
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
485
{
486
	struct vmbus_channel_offer_channel *offer;
487
	struct vmbus_channel *newchannel;
488

489
	offer = (struct vmbus_channel_offer_channel *)hdr;
490

491
	/* Allocate the channel object and save this offer. */
492
	newchannel = alloc_channel();
493
	if (!newchannel) {
494
		pr_err("Unable to allocate channel object\n");
495 496 497
		return;
	}

498 499 500 501 502 503 504
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

525
	memcpy(&newchannel->offermsg, offer,
526
	       sizeof(struct vmbus_channel_offer_channel));
527 528
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
529

530
	vmbus_process_offer(newchannel);
531 532
}

533
/*
534
 * vmbus_onoffer_rescind - Rescind offer handler.
535 536 537
 *
 * We queue a work item to process this offer synchronously
 */
538
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
539
{
540
	struct vmbus_channel_rescind_offer *rescind;
541
	struct vmbus_channel *channel;
542 543
	unsigned long flags;
	struct device *dev;
544

545
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
546
	channel = relid2channel(rescind->child_relid);
547

548 549
	if (channel == NULL) {
		hv_process_channel_removal(NULL, rescind->child_relid);
550
		return;
551
	}
552

553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
570
	}
571 572
}

573
/*
574 575
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
576 577 578
 *
 * Nothing to do here.
 */
579
static void vmbus_onoffers_delivered(
580
			struct vmbus_channel_message_header *hdr)
581 582 583
{
}

584
/*
585
 * vmbus_onopen_result - Open result handler.
586 587 588 589 590
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
591
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
592
{
593
	struct vmbus_channel_open_result *result;
594 595 596
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
597
	unsigned long flags;
598

599
	result = (struct vmbus_channel_open_result *)hdr;
600

601 602 603
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
604
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
605

606 607
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
608
		requestheader =
609
			(struct vmbus_channel_message_header *)msginfo->msg;
610

611
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
612
			openmsg =
613 614 615 616
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
617
				       result,
618 619 620
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
621 622 623 624
				break;
			}
		}
	}
625
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
626 627
}

628
/*
629
 * vmbus_ongpadl_created - GPADL created handler.
630 631 632 633 634
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
635
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
636
{
637 638 639 640
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
641
	unsigned long flags;
642

643
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
644

645 646 647 648
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
649
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
650

651 652
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
653
		requestheader =
654
			(struct vmbus_channel_message_header *)msginfo->msg;
655

656
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
657 658 659
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

660 661 662 663
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
664
				       gpadlcreated,
665 666 667
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
668 669 670 671
				break;
			}
		}
	}
672
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
673 674
}

675
/*
676
 * vmbus_ongpadl_torndown - GPADL torndown handler.
677 678 679 680 681
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
682
static void vmbus_ongpadl_torndown(
683
			struct vmbus_channel_message_header *hdr)
684
{
685 686 687 688
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
689
	unsigned long flags;
690

691
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
692 693 694 695

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
696
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
697

698 699
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
700
		requestheader =
701
			(struct vmbus_channel_message_header *)msginfo->msg;
702

703
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
704 705
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
706

707 708
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
709
				       gpadl_torndown,
710 711 712
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
713 714 715 716
				break;
			}
		}
	}
717
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
718 719
}

720
/*
721
 * vmbus_onversion_response - Version response handler
722 723 724 725 726
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
727
static void vmbus_onversion_response(
728
		struct vmbus_channel_message_header *hdr)
729
{
730 731 732
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
733
	unsigned long flags;
734

735
	version_response = (struct vmbus_channel_version_response *)hdr;
736
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
737

738 739
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
740
		requestheader =
741
			(struct vmbus_channel_message_header *)msginfo->msg;
742

743 744 745
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
746
			      version_response,
747
			      sizeof(struct vmbus_channel_version_response));
748
			complete(&msginfo->waitevent);
749 750
		}
	}
751
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
752 753
}

754
/* Channel message dispatch table */
755
struct vmbus_channel_message_table_entry
756
	channel_message_table[CHANNELMSG_COUNT] = {
757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
774
	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
775 776
};

777
/*
778
 * vmbus_onmessage - Handler for channel protocol messages.
779 780 781
 *
 * This is invoked in the vmbus worker thread context.
 */
782
void vmbus_onmessage(void *context)
783
{
784
	struct hv_message *msg = context;
785
	struct vmbus_channel_message_header *hdr;
786 787
	int size;

788 789
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
790

791
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
792
		pr_err("Received invalid channel message type %d size %d\n",
793
			   hdr->msgtype, size);
794
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
795
				     (unsigned char *)msg->u.payload, size);
796 797 798
		return;
	}

799 800
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
801
	else
802
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
803 804
}

805
/*
806
 * vmbus_request_offers - Send a request to get all our pending offers.
807
 */
808
int vmbus_request_offers(void)
809
{
810
	struct vmbus_channel_message_header *msg;
811
	struct vmbus_channel_msginfo *msginfo;
812
	int ret;
813

814
	msginfo = kmalloc(sizeof(*msginfo) +
815 816
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
817
	if (!msginfo)
818
		return -ENOMEM;
819

820
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
821

822
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
823 824


825
	ret = vmbus_post_msg(msg,
826 827
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
828
		pr_err("Unable to request offers - %d\n", ret);
829

830 831
		goto cleanup;
	}
832

833
cleanup:
834
	kfree(msginfo);
835 836 837 838

	return ret;
}

839 840
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
841 842
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
843 844 845 846
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
847
	int cur_cpu;
848 849
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
850 851
	int next_channel;
	int i = 1;
852 853 854 855

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

856 857 858 859 860 861 862
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

863 864
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
865 866 867 868 869 870 871 872
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

873 874
		if (i == next_channel)
			return cur_channel;
875

876
		i++;
877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);