channel_mgmt.c 24.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/list.h>
29
#include <linux/module.h>
30
#include <linux/completion.h>
31
#include <linux/hyperv.h>
32

33
#include "hyperv_vmbus.h"
34

35
struct vmbus_channel_message_table_entry {
36
	enum vmbus_channel_message_type message_type;
37
	void (*message_handler)(struct vmbus_channel_message_header *msg);
38
};
39

40 41

/**
42
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
43 44 45 46 47 48
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
49 50
 * Set up and fill in default negotiate response message.
 *
51 52 53
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
54 55 56
 *
 * Mainly used by Hyper-V drivers.
 */
57
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
58
				struct icmsg_negotiate *negop, u8 *buf,
59
				int fw_version, int srv_version)
60
{
61 62 63 64
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
65
	int i;
66
	bool found_match = false;
67

68
	icmsghdrp->icmsgsize = 0x10;
69 70 71 72 73
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
74

75 76 77
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
78

79 80 81 82 83
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
84 85 86 87 88 89 90

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
91 92 93 94 95 96
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
97 98
	}

99 100 101 102 103
	if (!found_match)
		goto fw_error;

	found_match = false;

104 105
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
106 107 108 109 110 111
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
112
	}
113

114
	/*
115
	 * Respond with the framework and service
116 117
	 * version numbers we can support.
	 */
118 119 120 121 122 123 124 125 126 127 128 129 130 131 132

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
133
}
134

135
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
136

137
/*
138
 * alloc_channel - Allocate and initialize a vmbus channel object
139
 */
140
static struct vmbus_channel *alloc_channel(void)
141
{
142
	static atomic_t chan_num = ATOMIC_INIT(0);
143
	struct vmbus_channel *channel;
144

145
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
146 147 148
	if (!channel)
		return NULL;

149
	channel->id = atomic_inc_return(&chan_num);
150
	spin_lock_init(&channel->inbound_lock);
151
	spin_lock_init(&channel->lock);
152 153

	INIT_LIST_HEAD(&channel->sc_list);
154
	INIT_LIST_HEAD(&channel->percpu_list);
155

156 157
	channel->controlwq = alloc_workqueue("hv_vmbus_ctl/%d", WQ_MEM_RECLAIM,
					     1, channel->id);
158
	if (!channel->controlwq) {
159
		kfree(channel);
160 161 162 163 164 165
		return NULL;
	}

	return channel;
}

166
/*
167
 * release_hannel - Release the vmbus channel object itself
168
 */
169
static void release_channel(struct work_struct *work)
170
{
171 172 173
	struct vmbus_channel *channel = container_of(work,
						     struct vmbus_channel,
						     work);
174

175
	destroy_workqueue(channel->controlwq);
176

177
	kfree(channel);
178 179
}

180
/*
181
 * free_channel - Release the resources used by the vmbus channel object
182
 */
183
static void free_channel(struct vmbus_channel *channel)
184 185
{

186 187 188 189 190
	/*
	 * We have to release the channel's workqueue/thread in the vmbus's
	 * workqueue/thread context
	 * ie we can't destroy ourselves.
	 */
191
	INIT_WORK(&channel->work, release_channel);
192
	queue_work(vmbus_connection.work_queue, &channel->work);
193 194
}

195 196 197 198 199 200 201
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
202

203 204 205 206 207 208
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
209

210 211 212 213 214 215 216 217 218
/*
 * vmbus_process_rescind_offer -
 * Rescind the offer by initiating a device removal
 */
static void vmbus_process_rescind_offer(struct work_struct *work)
{
	struct vmbus_channel *channel = container_of(work,
						     struct vmbus_channel,
						     work);
219
	unsigned long flags;
220
	struct vmbus_channel *primary_channel;
221
	struct vmbus_channel_relid_released msg;
222 223 224 225 226 227 228 229 230
	struct device *dev;

	if (channel->device_obj) {
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	}
231

232 233 234 235 236
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
	msg.child_relid = channel->offermsg.child_relid;
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));

237 238
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
239 240
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
241
	} else {
242
		percpu_channel_deq(channel);
243 244
		put_cpu();
	}
245

246 247 248 249 250 251
	if (channel->primary_channel == NULL) {
		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
		list_del(&channel->listentry);
		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
	} else {
		primary_channel = channel->primary_channel;
252
		spin_lock_irqsave(&primary_channel->lock, flags);
253
		list_del(&channel->sc_list);
254
		spin_unlock_irqrestore(&primary_channel->lock, flags);
255
	}
256
	free_channel(channel);
257
}
258

259 260 261 262 263 264 265 266 267 268
void vmbus_free_channels(void)
{
	struct vmbus_channel *channel;

	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
		vmbus_device_unregister(channel->device_obj);
		free_channel(channel);
	}
}

269
/*
270
 * vmbus_process_offer - Process the offer by creating a channel/device
271
 * associated with this offer
272
 */
273
static void vmbus_process_offer(struct work_struct *work)
274
{
275 276 277
	struct vmbus_channel *newchannel = container_of(work,
							struct vmbus_channel,
							work);
278
	struct vmbus_channel *channel;
279
	bool fnew = true;
280
	bool enq = false;
281
	int ret;
282
	unsigned long flags;
283

284
	/* Make sure this is a new offer */
285
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
286

287
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
288 289 290 291
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
292
			fnew = false;
293 294 295 296
			break;
		}
	}

297
	if (fnew) {
298
		list_add_tail(&newchannel->listentry,
299
			      &vmbus_connection.chn_list);
300 301
		enq = true;
	}
302

303
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
304

305
	if (enq) {
306 307
		if (newchannel->target_cpu != get_cpu()) {
			put_cpu();
308 309 310
			smp_call_function_single(newchannel->target_cpu,
						 percpu_channel_enq,
						 newchannel, true);
311
		} else {
312
			percpu_channel_enq(newchannel);
313 314
			put_cpu();
		}
315
	}
316
	if (!fnew) {
317 318 319 320 321 322 323 324
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
325
			spin_lock_irqsave(&channel->lock, flags);
326
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
327
			spin_unlock_irqrestore(&channel->lock, flags);
328

329 330
			if (newchannel->target_cpu != get_cpu()) {
				put_cpu();
331 332 333
				smp_call_function_single(newchannel->target_cpu,
							 percpu_channel_enq,
							 newchannel, true);
334
			} else {
335
				percpu_channel_enq(newchannel);
336 337
				put_cpu();
			}
338

339 340 341 342
			newchannel->state = CHANNEL_OPEN_STATE;
			if (channel->sc_creation_callback != NULL)
				channel->sc_creation_callback(newchannel);

343
			goto done_init_rescind;
344 345
		}

346
		goto err_free_chan;
347 348
	}

349 350 351 352 353 354 355
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

356 357 358
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
359
	 * vmbus_child_dev_add()
360
	 */
361
	newchannel->device_obj = vmbus_device_create(
362 363
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
364
		newchannel);
365 366
	if (!newchannel->device_obj)
		goto err_free_chan;
367

368 369 370 371 372
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
373
	ret = vmbus_device_register(newchannel->device_obj);
374
	if (ret != 0) {
375
		pr_err("unable to add child device object (relid %d)\n",
376
			   newchannel->offermsg.child_relid);
377

378
		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
379
		list_del(&newchannel->listentry);
380
		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
K
K. Y. Srinivasan 已提交
381
		kfree(newchannel->device_obj);
382
		goto err_free_chan;
383
	}
384 385 386 387 388 389 390 391
done_init_rescind:
	spin_lock_irqsave(&newchannel->lock, flags);
	/* The next possible work is rescind handling */
	INIT_WORK(&newchannel->work, vmbus_process_rescind_offer);
	/* Check if rescind offer was already received */
	if (newchannel->rescind)
		queue_work(newchannel->controlwq, &newchannel->work);
	spin_unlock_irqrestore(&newchannel->lock, flags);
392 393 394
	return;
err_free_chan:
	free_channel(newchannel);
395 396
}

397 398 399 400 401 402 403 404
enum {
	IDE = 0,
	SCSI,
	NIC,
	MAX_PERF_CHN,
};

/*
405
 * This is an array of device_ids (device types) that are performance critical.
406 407 408
 * We attempt to distribute the interrupt load for these devices across
 * all available CPUs.
 */
409
static const struct hv_vmbus_device_id hp_devs[] = {
410
	/* IDE */
411
	{ HV_IDE_GUID, },
412
	/* Storage - SCSI */
413
	{ HV_SCSI_GUID, },
414
	/* Network */
415
	{ HV_NIC_GUID, },
416 417
	/* NetworkDirect Guest RDMA */
	{ HV_ND_GUID, },
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
};


/*
 * We use this state to statically distribute the channel interrupt load.
 */
static u32  next_vp;

/*
 * Starting with Win8, we can statically distribute the incoming
 * channel interrupt load by binding a channel to VCPU. We
 * implement here a simple round robin scheme for distributing
 * the interrupt load.
 * We will bind channels that are not performance critical to cpu 0 and
 * performance critical channels (IDE, SCSI and Network) will be uniformly
 * distributed across all available CPUs.
 */
435
static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
436 437 438 439 440 441 442
{
	u32 cur_cpu;
	int i;
	bool perf_chn = false;
	u32 max_cpus = num_online_cpus();

	for (i = IDE; i < MAX_PERF_CHN; i++) {
443
		if (!memcmp(type_guid->b, hp_devs[i].guid,
444 445 446 447 448 449 450 451 452 453 454 455 456
				 sizeof(uuid_le))) {
			perf_chn = true;
			break;
		}
	}
	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
457 458 459
		channel->target_cpu = 0;
		channel->target_vp = 0;
		return;
460 461
	}
	cur_cpu = (++next_vp % max_cpus);
462 463
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
464 465
}

466
/*
467
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
468 469
 *
 */
470
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
471
{
472
	struct vmbus_channel_offer_channel *offer;
473
	struct vmbus_channel *newchannel;
474

475
	offer = (struct vmbus_channel_offer_channel *)hdr;
476

477
	/* Allocate the channel object and save this offer. */
478
	newchannel = alloc_channel();
479
	if (!newchannel) {
480
		pr_err("Unable to allocate channel object\n");
481 482 483
		return;
	}

484 485 486 487 488 489 490
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

511
	init_vp_index(newchannel, &offer->offer.if_type);
512

513
	memcpy(&newchannel->offermsg, offer,
514
	       sizeof(struct vmbus_channel_offer_channel));
515 516
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
517

518 519
	INIT_WORK(&newchannel->work, vmbus_process_offer);
	queue_work(newchannel->controlwq, &newchannel->work);
520 521
}

522
/*
523
 * vmbus_onoffer_rescind - Rescind offer handler.
524 525 526
 *
 * We queue a work item to process this offer synchronously
 */
527
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
528
{
529
	struct vmbus_channel_rescind_offer *rescind;
530
	struct vmbus_channel *channel;
531
	unsigned long flags;
532

533
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
534
	channel = relid2channel(rescind->child_relid);
535 536 537

	if (channel == NULL)
		/* Just return here, no channel found */
538 539
		return;

540
	spin_lock_irqsave(&channel->lock, flags);
541
	channel->rescind = true;
542 543 544 545 546 547 548 549 550 551
	/*
	 * channel->work.func != vmbus_process_rescind_offer means we are still
	 * processing offer request and the rescind offer processing should be
	 * postponed. It will be done at the very end of vmbus_process_offer()
	 * as rescind flag is being checked there.
	 */
	if (channel->work.func == vmbus_process_rescind_offer)
		/* work is initialized for vmbus_process_rescind_offer() from
		 * vmbus_process_offer() where the channel got created */
		queue_work(channel->controlwq, &channel->work);
552

553
	spin_unlock_irqrestore(&channel->lock, flags);
554 555
}

556
/*
557 558
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
559 560 561
 *
 * Nothing to do here.
 */
562
static void vmbus_onoffers_delivered(
563
			struct vmbus_channel_message_header *hdr)
564 565 566
{
}

567
/*
568
 * vmbus_onopen_result - Open result handler.
569 570 571 572 573
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
574
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
575
{
576
	struct vmbus_channel_open_result *result;
577 578 579
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
580
	unsigned long flags;
581

582
	result = (struct vmbus_channel_open_result *)hdr;
583

584 585 586
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
587
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
588

589 590
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
591
		requestheader =
592
			(struct vmbus_channel_message_header *)msginfo->msg;
593

594
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
595
			openmsg =
596 597 598 599
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
600
				       result,
601 602 603
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
604 605 606 607
				break;
			}
		}
	}
608
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
609 610
}

611
/*
612
 * vmbus_ongpadl_created - GPADL created handler.
613 614 615 616 617
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
618
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
619
{
620 621 622 623
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
624
	unsigned long flags;
625

626
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
627

628 629 630 631
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
632
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
633

634 635
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
636
		requestheader =
637
			(struct vmbus_channel_message_header *)msginfo->msg;
638

639
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
640 641 642
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

643 644 645 646
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
647
				       gpadlcreated,
648 649 650
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
651 652 653 654
				break;
			}
		}
	}
655
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
656 657
}

658
/*
659
 * vmbus_ongpadl_torndown - GPADL torndown handler.
660 661 662 663 664
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
665
static void vmbus_ongpadl_torndown(
666
			struct vmbus_channel_message_header *hdr)
667
{
668 669 670 671
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
672
	unsigned long flags;
673

674
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
675 676 677 678

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
679
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
680

681 682
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
683
		requestheader =
684
			(struct vmbus_channel_message_header *)msginfo->msg;
685

686
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
687 688
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
689

690 691
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
692
				       gpadl_torndown,
693 694 695
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
696 697 698 699
				break;
			}
		}
	}
700
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
701 702
}

703
/*
704
 * vmbus_onversion_response - Version response handler
705 706 707 708 709
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
710
static void vmbus_onversion_response(
711
		struct vmbus_channel_message_header *hdr)
712
{
713 714 715
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
716
	unsigned long flags;
717

718
	version_response = (struct vmbus_channel_version_response *)hdr;
719
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
720

721 722
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
723
		requestheader =
724
			(struct vmbus_channel_message_header *)msginfo->msg;
725

726 727 728
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
729
			      version_response,
730
			      sizeof(struct vmbus_channel_version_response));
731
			complete(&msginfo->waitevent);
732 733
		}
	}
734
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
735 736
}

737 738
/* Channel message dispatch table */
static struct vmbus_channel_message_table_entry
739
	channel_message_table[CHANNELMSG_COUNT] = {
740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
	{CHANNELMSG_INVALID,			NULL},
	{CHANNELMSG_OFFERCHANNEL,		vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,	vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		NULL},
	{CHANNELMSG_GPADL_HEADER,		NULL},
	{CHANNELMSG_GPADL_BODY,		NULL},
	{CHANNELMSG_GPADL_CREATED,		vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		NULL},
	{CHANNELMSG_INITIATE_CONTACT,		NULL},
	{CHANNELMSG_VERSION_RESPONSE,		vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			NULL},
757 758
};

759
/*
760
 * vmbus_onmessage - Handler for channel protocol messages.
761 762 763
 *
 * This is invoked in the vmbus worker thread context.
 */
764
void vmbus_onmessage(void *context)
765
{
766
	struct hv_message *msg = context;
767
	struct vmbus_channel_message_header *hdr;
768 769
	int size;

770 771
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
772

773
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
774
		pr_err("Received invalid channel message type %d size %d\n",
775
			   hdr->msgtype, size);
776
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
777
				     (unsigned char *)msg->u.payload, size);
778 779 780
		return;
	}

781 782
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
783
	else
784
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
785 786
}

787
/*
788
 * vmbus_request_offers - Send a request to get all our pending offers.
789
 */
790
int vmbus_request_offers(void)
791
{
792
	struct vmbus_channel_message_header *msg;
793
	struct vmbus_channel_msginfo *msginfo;
794 795
	int ret;
	unsigned long t;
796

797
	msginfo = kmalloc(sizeof(*msginfo) +
798 799
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
800
	if (!msginfo)
801
		return -ENOMEM;
802

803
	init_completion(&msginfo->waitevent);
804

805
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
806

807
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
808 809


810
	ret = vmbus_post_msg(msg,
811 812
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
813
		pr_err("Unable to request offers - %d\n", ret);
814

815 816
		goto cleanup;
	}
817

818
	t = wait_for_completion_timeout(&msginfo->waitevent, 5*HZ);
819
	if (t == 0) {
820 821
		ret = -ETIMEDOUT;
		goto cleanup;
822 823 824 825
	}



826
cleanup:
827
	kfree(msginfo);
828 829 830 831

	return ret;
}

832 833 834 835 836 837 838 839 840
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
 * When a primary channel has multiple sub-channels, we choose a
 * channel whose VCPU binding is closest to the VCPU on which
 * this call is being made.
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
841
	int cur_cpu;
842 843 844 845 846 847 848
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
	int cpu_distance, new_cpu_distance;

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

849 850
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

		cpu_distance = ((outgoing_channel->target_vp > cur_cpu) ?
				(outgoing_channel->target_vp - cur_cpu) :
				(cur_cpu - outgoing_channel->target_vp));

		new_cpu_distance = ((cur_channel->target_vp > cur_cpu) ?
				(cur_channel->target_vp - cur_cpu) :
				(cur_cpu - cur_channel->target_vp));

		if (cpu_distance < new_cpu_distance)
			continue;

		outgoing_channel = cur_channel;
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);