channel_mgmt.c 22.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/list.h>
29
#include <linux/module.h>
30
#include <linux/completion.h>
31
#include <linux/hyperv.h>
32

33
#include "hyperv_vmbus.h"
34

35
/**
36
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
37 38 39 40 41 42
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
43 44
 * Set up and fill in default negotiate response message.
 *
45 46 47
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
48 49 50
 *
 * Mainly used by Hyper-V drivers.
 */
51
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
52
				struct icmsg_negotiate *negop, u8 *buf,
53
				int fw_version, int srv_version)
54
{
55 56 57 58
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
59
	int i;
60
	bool found_match = false;
61

62
	icmsghdrp->icmsgsize = 0x10;
63 64 65 66 67
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
68

69 70 71
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
72

73 74 75 76 77
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
78 79 80 81 82 83 84

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
85 86 87 88 89 90
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
91 92
	}

93 94 95 96 97
	if (!found_match)
		goto fw_error;

	found_match = false;

98 99
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
100 101 102 103 104 105
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
106
	}
107

108
	/*
109
	 * Respond with the framework and service
110 111
	 * version numbers we can support.
	 */
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
127
}
128

129
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
130

131
/*
132
 * alloc_channel - Allocate and initialize a vmbus channel object
133
 */
134
static struct vmbus_channel *alloc_channel(void)
135
{
136
	static atomic_t chan_num = ATOMIC_INIT(0);
137
	struct vmbus_channel *channel;
138

139
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
140 141 142
	if (!channel)
		return NULL;

143
	channel->id = atomic_inc_return(&chan_num);
144
	spin_lock_init(&channel->inbound_lock);
145
	spin_lock_init(&channel->lock);
146 147

	INIT_LIST_HEAD(&channel->sc_list);
148
	INIT_LIST_HEAD(&channel->percpu_list);
149 150 151 152

	return channel;
}

153
/*
154
 * free_channel - Release the resources used by the vmbus channel object
155
 */
156
static void free_channel(struct vmbus_channel *channel)
157
{
158
	kfree(channel);
159 160
}

161 162 163 164 165 166 167
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
168

169 170 171 172 173 174
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
175

176 177

void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
178
{
179
	struct vmbus_channel_relid_released msg;
180
	unsigned long flags;
181
	struct vmbus_channel *primary_channel;
182

183
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
184
	msg.child_relid = relid;
185 186 187
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));

188 189 190
	if (channel == NULL)
		return;

191 192
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
193 194
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
195
	} else {
196
		percpu_channel_deq(channel);
197 198
		put_cpu();
	}
199

200 201 202 203 204 205
	if (channel->primary_channel == NULL) {
		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
		list_del(&channel->listentry);
		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
	} else {
		primary_channel = channel->primary_channel;
206
		spin_lock_irqsave(&primary_channel->lock, flags);
207
		list_del(&channel->sc_list);
208
		spin_unlock_irqrestore(&primary_channel->lock, flags);
209
	}
210
	free_channel(channel);
211
}
212

213 214 215 216 217 218 219 220 221 222
void vmbus_free_channels(void)
{
	struct vmbus_channel *channel;

	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
		vmbus_device_unregister(channel->device_obj);
		free_channel(channel);
	}
}

223
/*
224
 * vmbus_process_offer - Process the offer by creating a channel/device
225
 * associated with this offer
226
 */
227
static void vmbus_process_offer(struct vmbus_channel *newchannel)
228
{
229
	struct vmbus_channel *channel;
230
	bool fnew = true;
231
	bool enq = false;
232
	unsigned long flags;
233

234
	/* Make sure this is a new offer */
235
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
236

237
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
238 239 240 241
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
242
			fnew = false;
243 244 245 246
			break;
		}
	}

247
	if (fnew) {
248
		list_add_tail(&newchannel->listentry,
249
			      &vmbus_connection.chn_list);
250 251
		enq = true;
	}
252

253
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
254

255
	if (enq) {
256 257
		if (newchannel->target_cpu != get_cpu()) {
			put_cpu();
258 259 260
			smp_call_function_single(newchannel->target_cpu,
						 percpu_channel_enq,
						 newchannel, true);
261
		} else {
262
			percpu_channel_enq(newchannel);
263 264
			put_cpu();
		}
265
	}
266
	if (!fnew) {
267 268 269 270 271 272 273 274
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
275
			spin_lock_irqsave(&channel->lock, flags);
276
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
277
			spin_unlock_irqrestore(&channel->lock, flags);
278

279 280
			if (newchannel->target_cpu != get_cpu()) {
				put_cpu();
281 282 283
				smp_call_function_single(newchannel->target_cpu,
							 percpu_channel_enq,
							 newchannel, true);
284
			} else {
285
				percpu_channel_enq(newchannel);
286 287
				put_cpu();
			}
288

289
			newchannel->state = CHANNEL_OPEN_STATE;
290
			channel->num_sc++;
291 292
			if (channel->sc_creation_callback != NULL)
				channel->sc_creation_callback(newchannel);
293 294

			return;
295 296
		}

297
		goto err_free_chan;
298 299
	}

300 301 302 303 304 305 306
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

307 308 309
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
310
	 * vmbus_child_dev_add()
311
	 */
312
	newchannel->device_obj = vmbus_device_create(
313 314
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
315
		newchannel);
316
	if (!newchannel->device_obj)
317
		goto err_deq_chan;
318

319 320 321 322 323
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
324 325 326 327 328 329
	if (vmbus_device_register(newchannel->device_obj) != 0) {
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
330
	return;
331

332 333 334 335 336 337 338 339 340 341 342 343 344 345
err_deq_chan:
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
	list_del(&newchannel->listentry);
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);

	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}

346 347
err_free_chan:
	free_channel(newchannel);
348 349
}

350 351 352 353 354 355 356 357
enum {
	IDE = 0,
	SCSI,
	NIC,
	MAX_PERF_CHN,
};

/*
358
 * This is an array of device_ids (device types) that are performance critical.
359 360 361
 * We attempt to distribute the interrupt load for these devices across
 * all available CPUs.
 */
362
static const struct hv_vmbus_device_id hp_devs[] = {
363
	/* IDE */
364
	{ HV_IDE_GUID, },
365
	/* Storage - SCSI */
366
	{ HV_SCSI_GUID, },
367
	/* Network */
368
	{ HV_NIC_GUID, },
369 370
	/* NetworkDirect Guest RDMA */
	{ HV_ND_GUID, },
371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
};


/*
 * We use this state to statically distribute the channel interrupt load.
 */
static u32  next_vp;

/*
 * Starting with Win8, we can statically distribute the incoming
 * channel interrupt load by binding a channel to VCPU. We
 * implement here a simple round robin scheme for distributing
 * the interrupt load.
 * We will bind channels that are not performance critical to cpu 0 and
 * performance critical channels (IDE, SCSI and Network) will be uniformly
 * distributed across all available CPUs.
 */
388
static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
389 390 391 392 393 394 395
{
	u32 cur_cpu;
	int i;
	bool perf_chn = false;
	u32 max_cpus = num_online_cpus();

	for (i = IDE; i < MAX_PERF_CHN; i++) {
396
		if (!memcmp(type_guid->b, hp_devs[i].guid,
397 398 399 400 401 402 403 404 405 406 407 408 409
				 sizeof(uuid_le))) {
			perf_chn = true;
			break;
		}
	}
	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
410 411 412
		channel->target_cpu = 0;
		channel->target_vp = 0;
		return;
413 414
	}
	cur_cpu = (++next_vp % max_cpus);
415 416
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
417 418
}

419
/*
420
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
421 422
 *
 */
423
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
424
{
425
	struct vmbus_channel_offer_channel *offer;
426
	struct vmbus_channel *newchannel;
427

428
	offer = (struct vmbus_channel_offer_channel *)hdr;
429

430
	/* Allocate the channel object and save this offer. */
431
	newchannel = alloc_channel();
432
	if (!newchannel) {
433
		pr_err("Unable to allocate channel object\n");
434 435 436
		return;
	}

437 438 439 440 441 442 443
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

464
	init_vp_index(newchannel, &offer->offer.if_type);
465

466
	memcpy(&newchannel->offermsg, offer,
467
	       sizeof(struct vmbus_channel_offer_channel));
468 469
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
470

471
	vmbus_process_offer(newchannel);
472 473
}

474
/*
475
 * vmbus_onoffer_rescind - Rescind offer handler.
476 477 478
 *
 * We queue a work item to process this offer synchronously
 */
479
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
480
{
481
	struct vmbus_channel_rescind_offer *rescind;
482
	struct vmbus_channel *channel;
483 484
	unsigned long flags;
	struct device *dev;
485

486
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
487
	channel = relid2channel(rescind->child_relid);
488

489 490
	if (channel == NULL) {
		hv_process_channel_removal(NULL, rescind->child_relid);
491
		return;
492
	}
493

494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
511
	}
512 513
}

514
/*
515 516
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
517 518 519
 *
 * Nothing to do here.
 */
520
static void vmbus_onoffers_delivered(
521
			struct vmbus_channel_message_header *hdr)
522 523 524
{
}

525
/*
526
 * vmbus_onopen_result - Open result handler.
527 528 529 530 531
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
532
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
533
{
534
	struct vmbus_channel_open_result *result;
535 536 537
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
538
	unsigned long flags;
539

540
	result = (struct vmbus_channel_open_result *)hdr;
541

542 543 544
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
545
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
546

547 548
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
549
		requestheader =
550
			(struct vmbus_channel_message_header *)msginfo->msg;
551

552
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
553
			openmsg =
554 555 556 557
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
558
				       result,
559 560 561
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
562 563 564 565
				break;
			}
		}
	}
566
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
567 568
}

569
/*
570
 * vmbus_ongpadl_created - GPADL created handler.
571 572 573 574 575
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
576
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
577
{
578 579 580 581
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
582
	unsigned long flags;
583

584
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
585

586 587 588 589
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
590
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
591

592 593
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
594
		requestheader =
595
			(struct vmbus_channel_message_header *)msginfo->msg;
596

597
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
598 599 600
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

601 602 603 604
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
605
				       gpadlcreated,
606 607 608
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
609 610 611 612
				break;
			}
		}
	}
613
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
614 615
}

616
/*
617
 * vmbus_ongpadl_torndown - GPADL torndown handler.
618 619 620 621 622
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
623
static void vmbus_ongpadl_torndown(
624
			struct vmbus_channel_message_header *hdr)
625
{
626 627 628 629
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
630
	unsigned long flags;
631

632
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
633 634 635 636

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
637
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
638

639 640
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
641
		requestheader =
642
			(struct vmbus_channel_message_header *)msginfo->msg;
643

644
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
645 646
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
647

648 649
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
650
				       gpadl_torndown,
651 652 653
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
654 655 656 657
				break;
			}
		}
	}
658
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
659 660
}

661
/*
662
 * vmbus_onversion_response - Version response handler
663 664 665 666 667
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
668
static void vmbus_onversion_response(
669
		struct vmbus_channel_message_header *hdr)
670
{
671 672 673
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
674
	unsigned long flags;
675

676
	version_response = (struct vmbus_channel_version_response *)hdr;
677
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
678

679 680
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
681
		requestheader =
682
			(struct vmbus_channel_message_header *)msginfo->msg;
683

684 685 686
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
687
			      version_response,
688
			      sizeof(struct vmbus_channel_version_response));
689
			complete(&msginfo->waitevent);
690 691
		}
	}
692
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
693 694
}

695
/* Channel message dispatch table */
696
struct vmbus_channel_message_table_entry
697
	channel_message_table[CHANNELMSG_COUNT] = {
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
715 716
};

717
/*
718
 * vmbus_onmessage - Handler for channel protocol messages.
719 720 721
 *
 * This is invoked in the vmbus worker thread context.
 */
722
void vmbus_onmessage(void *context)
723
{
724
	struct hv_message *msg = context;
725
	struct vmbus_channel_message_header *hdr;
726 727
	int size;

728 729
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
730

731
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
732
		pr_err("Received invalid channel message type %d size %d\n",
733
			   hdr->msgtype, size);
734
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
735
				     (unsigned char *)msg->u.payload, size);
736 737 738
		return;
	}

739 740
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
741
	else
742
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
743 744
}

745
/*
746
 * vmbus_request_offers - Send a request to get all our pending offers.
747
 */
748
int vmbus_request_offers(void)
749
{
750
	struct vmbus_channel_message_header *msg;
751
	struct vmbus_channel_msginfo *msginfo;
752
	int ret;
753

754
	msginfo = kmalloc(sizeof(*msginfo) +
755 756
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
757
	if (!msginfo)
758
		return -ENOMEM;
759

760
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
761

762
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
763 764


765
	ret = vmbus_post_msg(msg,
766 767
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
768
		pr_err("Unable to request offers - %d\n", ret);
769

770 771
		goto cleanup;
	}
772

773
cleanup:
774
	kfree(msginfo);
775 776 777 778

	return ret;
}

779 780
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
781 782
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
783 784 785 786
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
787
	int cur_cpu;
788 789
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
790 791
	int next_channel;
	int i = 1;
792 793 794 795

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

796 797 798 799 800 801 802
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

803 804
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
805 806 807 808 809 810 811 812
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

813 814
		if (i == next_channel)
			return cur_channel;
815

816
		i++;
817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);