channel_mgmt.c 24.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/list.h>
29
#include <linux/module.h>
30
#include <linux/completion.h>
31
#include <linux/hyperv.h>
32

33
#include "hyperv_vmbus.h"
34

35 36 37
static void init_vp_index(struct vmbus_channel *channel,
			  const uuid_le *type_guid);

38
/**
39
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
40 41 42 43 44 45
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
46 47
 * Set up and fill in default negotiate response message.
 *
48 49 50
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
51 52 53
 *
 * Mainly used by Hyper-V drivers.
 */
54
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
55
				struct icmsg_negotiate *negop, u8 *buf,
56
				int fw_version, int srv_version)
57
{
58 59 60 61
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
62
	int i;
63
	bool found_match = false;
64

65
	icmsghdrp->icmsgsize = 0x10;
66 67 68 69 70
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
71

72 73 74
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
75

76 77 78 79 80
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
81 82 83 84 85 86 87

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
88 89 90 91 92 93
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
94 95
	}

96 97 98 99 100
	if (!found_match)
		goto fw_error;

	found_match = false;

101 102
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
103 104 105 106 107 108
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
109
	}
110

111
	/*
112
	 * Respond with the framework and service
113 114
	 * version numbers we can support.
	 */
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
130
}
131

132
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
133

134
/*
135
 * alloc_channel - Allocate and initialize a vmbus channel object
136
 */
137
static struct vmbus_channel *alloc_channel(void)
138
{
139
	static atomic_t chan_num = ATOMIC_INIT(0);
140
	struct vmbus_channel *channel;
141

142
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
143 144 145
	if (!channel)
		return NULL;

146
	channel->id = atomic_inc_return(&chan_num);
147
	spin_lock_init(&channel->inbound_lock);
148
	spin_lock_init(&channel->lock);
149 150

	INIT_LIST_HEAD(&channel->sc_list);
151
	INIT_LIST_HEAD(&channel->percpu_list);
152 153 154 155

	return channel;
}

156
/*
157
 * free_channel - Release the resources used by the vmbus channel object
158
 */
159
static void free_channel(struct vmbus_channel *channel)
160
{
161
	kfree(channel);
162 163
}

164 165 166 167 168 169 170
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
171

172 173 174 175 176 177
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
178

179 180

void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
181
{
182
	struct vmbus_channel_relid_released msg;
183
	unsigned long flags;
184
	struct vmbus_channel *primary_channel;
185

186
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
187
	msg.child_relid = relid;
188 189 190
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));

191 192 193
	if (channel == NULL)
		return;

194 195
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
196 197
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
198
	} else {
199
		percpu_channel_deq(channel);
200 201
		put_cpu();
	}
202

203 204 205 206 207 208
	if (channel->primary_channel == NULL) {
		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
		list_del(&channel->listentry);
		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
	} else {
		primary_channel = channel->primary_channel;
209
		spin_lock_irqsave(&primary_channel->lock, flags);
210
		list_del(&channel->sc_list);
211
		primary_channel->num_sc--;
212
		spin_unlock_irqrestore(&primary_channel->lock, flags);
213
	}
214
	free_channel(channel);
215
}
216

217 218
void vmbus_free_channels(void)
{
219 220 221 222 223 224 225 226
	struct vmbus_channel *channel, *tmp;

	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
		/* if we don't set rescind to true, vmbus_close_internal()
		 * won't invoke hv_process_channel_removal().
		 */
		channel->rescind = true;
227 228 229 230 231

		vmbus_device_unregister(channel->device_obj);
	}
}

232
/*
233
 * vmbus_process_offer - Process the offer by creating a channel/device
234
 * associated with this offer
235
 */
236
static void vmbus_process_offer(struct vmbus_channel *newchannel)
237
{
238
	struct vmbus_channel *channel;
239
	bool fnew = true;
240
	unsigned long flags;
241

242
	/* Make sure this is a new offer */
243
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
244

245
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
246 247 248 249
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
250
			fnew = false;
251 252 253 254
			break;
		}
	}

255
	if (fnew)
256
		list_add_tail(&newchannel->listentry,
257
			      &vmbus_connection.chn_list);
258

259
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
260

261
	if (!fnew) {
262 263 264 265 266 267 268 269
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
270
			spin_lock_irqsave(&channel->lock, flags);
271
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
272
			channel->num_sc++;
273
			spin_unlock_irqrestore(&channel->lock, flags);
274 275 276
		} else
			goto err_free_chan;
	}
277

278 279
	init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);

280 281 282 283 284 285 286 287
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
288 289
	}

290 291 292 293 294 295 296
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

297 298 299 300 301 302
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

303 304 305
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
306
	 * vmbus_child_dev_add()
307
	 */
308
	newchannel->device_obj = vmbus_device_create(
309 310
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
311
		newchannel);
312
	if (!newchannel->device_obj)
313
		goto err_deq_chan;
314

315 316 317 318 319
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
320 321 322 323 324 325
	if (vmbus_device_register(newchannel->device_obj) != 0) {
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
326
	return;
327

328 329 330 331 332 333 334 335 336 337 338 339 340 341
err_deq_chan:
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
	list_del(&newchannel->listentry);
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);

	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}

342 343
err_free_chan:
	free_channel(newchannel);
344 345
}

346 347 348 349 350 351 352 353
enum {
	IDE = 0,
	SCSI,
	NIC,
	MAX_PERF_CHN,
};

/*
354
 * This is an array of device_ids (device types) that are performance critical.
355 356 357
 * We attempt to distribute the interrupt load for these devices across
 * all available CPUs.
 */
358
static const struct hv_vmbus_device_id hp_devs[] = {
359
	/* IDE */
360
	{ HV_IDE_GUID, },
361
	/* Storage - SCSI */
362
	{ HV_SCSI_GUID, },
363
	/* Network */
364
	{ HV_NIC_GUID, },
365 366
	/* NetworkDirect Guest RDMA */
	{ HV_ND_GUID, },
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
};


/*
 * We use this state to statically distribute the channel interrupt load.
 */
static u32  next_vp;

/*
 * Starting with Win8, we can statically distribute the incoming
 * channel interrupt load by binding a channel to VCPU. We
 * implement here a simple round robin scheme for distributing
 * the interrupt load.
 * We will bind channels that are not performance critical to cpu 0 and
 * performance critical channels (IDE, SCSI and Network) will be uniformly
 * distributed across all available CPUs.
 */
384
static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
385 386 387 388 389
{
	u32 cur_cpu;
	int i;
	bool perf_chn = false;
	u32 max_cpus = num_online_cpus();
390 391
	struct vmbus_channel *primary = channel->primary_channel, *prev;
	unsigned long flags;
392 393

	for (i = IDE; i < MAX_PERF_CHN; i++) {
394
		if (!memcmp(type_guid->b, hp_devs[i].guid,
395 396 397 398 399 400 401 402 403 404 405 406 407
				 sizeof(uuid_le))) {
			perf_chn = true;
			break;
		}
	}
	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
408
		channel->target_cpu = 0;
409
		channel->target_vp = hv_context.vp_index[0];
410
		return;
411
	}
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437

	/*
	 * Primary channels are distributed evenly across all vcpus we have.
	 * When the host asks us to create subchannels it usually makes us
	 * num_cpus-1 offers and we are supposed to distribute the work evenly
	 * among the channel itself and all its subchannels. Make sure they are
	 * all assigned to different vcpus.
	 */
	if (!primary)
		cur_cpu = (++next_vp % max_cpus);
	else {
		/*
		 * Let's assign the first subchannel of a channel to the
		 * primary->target_cpu+1 and all the subsequent channels to
		 * the prev->target_cpu+1.
		 */
		spin_lock_irqsave(&primary->lock, flags);
		if (primary->num_sc == 1)
			cur_cpu = (primary->target_cpu + 1) % max_cpus;
		else {
			prev = list_prev_entry(channel, sc_list);
			cur_cpu = (prev->target_cpu + 1) % max_cpus;
		}
		spin_unlock_irqrestore(&primary->lock, flags);
	}

438 439
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
440 441
}

442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

void vmbus_initiate_unload(void)
{
	struct vmbus_channel_message_header hdr;

	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));

	wait_for_completion(&vmbus_connection.unload_event);
}

466
/*
467
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
468 469
 *
 */
470
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
471
{
472
	struct vmbus_channel_offer_channel *offer;
473
	struct vmbus_channel *newchannel;
474

475
	offer = (struct vmbus_channel_offer_channel *)hdr;
476

477
	/* Allocate the channel object and save this offer. */
478
	newchannel = alloc_channel();
479
	if (!newchannel) {
480
		pr_err("Unable to allocate channel object\n");
481 482 483
		return;
	}

484 485 486 487 488 489 490
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

511
	memcpy(&newchannel->offermsg, offer,
512
	       sizeof(struct vmbus_channel_offer_channel));
513 514
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
515

516
	vmbus_process_offer(newchannel);
517 518
}

519
/*
520
 * vmbus_onoffer_rescind - Rescind offer handler.
521 522 523
 *
 * We queue a work item to process this offer synchronously
 */
524
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
525
{
526
	struct vmbus_channel_rescind_offer *rescind;
527
	struct vmbus_channel *channel;
528 529
	unsigned long flags;
	struct device *dev;
530

531
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
532
	channel = relid2channel(rescind->child_relid);
533

534 535
	if (channel == NULL) {
		hv_process_channel_removal(NULL, rescind->child_relid);
536
		return;
537
	}
538

539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
556
	}
557 558
}

559
/*
560 561
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
562 563 564
 *
 * Nothing to do here.
 */
565
static void vmbus_onoffers_delivered(
566
			struct vmbus_channel_message_header *hdr)
567 568 569
{
}

570
/*
571
 * vmbus_onopen_result - Open result handler.
572 573 574 575 576
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
577
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
578
{
579
	struct vmbus_channel_open_result *result;
580 581 582
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
583
	unsigned long flags;
584

585
	result = (struct vmbus_channel_open_result *)hdr;
586

587 588 589
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
590
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
591

592 593
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
594
		requestheader =
595
			(struct vmbus_channel_message_header *)msginfo->msg;
596

597
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
598
			openmsg =
599 600 601 602
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
603
				       result,
604 605 606
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
607 608 609 610
				break;
			}
		}
	}
611
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
612 613
}

614
/*
615
 * vmbus_ongpadl_created - GPADL created handler.
616 617 618 619 620
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
621
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
622
{
623 624 625 626
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
627
	unsigned long flags;
628

629
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
630

631 632 633 634
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
635
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
636

637 638
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
639
		requestheader =
640
			(struct vmbus_channel_message_header *)msginfo->msg;
641

642
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
643 644 645
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

646 647 648 649
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
650
				       gpadlcreated,
651 652 653
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
654 655 656 657
				break;
			}
		}
	}
658
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
659 660
}

661
/*
662
 * vmbus_ongpadl_torndown - GPADL torndown handler.
663 664 665 666 667
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
668
static void vmbus_ongpadl_torndown(
669
			struct vmbus_channel_message_header *hdr)
670
{
671 672 673 674
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
675
	unsigned long flags;
676

677
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
678 679 680 681

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
682
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
683

684 685
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
686
		requestheader =
687
			(struct vmbus_channel_message_header *)msginfo->msg;
688

689
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
690 691
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
692

693 694
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
695
				       gpadl_torndown,
696 697 698
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
699 700 701 702
				break;
			}
		}
	}
703
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
704 705
}

706
/*
707
 * vmbus_onversion_response - Version response handler
708 709 710 711 712
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
713
static void vmbus_onversion_response(
714
		struct vmbus_channel_message_header *hdr)
715
{
716 717 718
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
719
	unsigned long flags;
720

721
	version_response = (struct vmbus_channel_version_response *)hdr;
722
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
723

724 725
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
726
		requestheader =
727
			(struct vmbus_channel_message_header *)msginfo->msg;
728

729 730 731
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
732
			      version_response,
733
			      sizeof(struct vmbus_channel_version_response));
734
			complete(&msginfo->waitevent);
735 736
		}
	}
737
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
738 739
}

740
/* Channel message dispatch table */
741
struct vmbus_channel_message_table_entry
742
	channel_message_table[CHANNELMSG_COUNT] = {
743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
760
	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
761 762
};

763
/*
764
 * vmbus_onmessage - Handler for channel protocol messages.
765 766 767
 *
 * This is invoked in the vmbus worker thread context.
 */
768
void vmbus_onmessage(void *context)
769
{
770
	struct hv_message *msg = context;
771
	struct vmbus_channel_message_header *hdr;
772 773
	int size;

774 775
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
776

777
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
778
		pr_err("Received invalid channel message type %d size %d\n",
779
			   hdr->msgtype, size);
780
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
781
				     (unsigned char *)msg->u.payload, size);
782 783 784
		return;
	}

785 786
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
787
	else
788
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
789 790
}

791
/*
792
 * vmbus_request_offers - Send a request to get all our pending offers.
793
 */
794
int vmbus_request_offers(void)
795
{
796
	struct vmbus_channel_message_header *msg;
797
	struct vmbus_channel_msginfo *msginfo;
798
	int ret;
799

800
	msginfo = kmalloc(sizeof(*msginfo) +
801 802
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
803
	if (!msginfo)
804
		return -ENOMEM;
805

806
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
807

808
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
809 810


811
	ret = vmbus_post_msg(msg,
812 813
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
814
		pr_err("Unable to request offers - %d\n", ret);
815

816 817
		goto cleanup;
	}
818

819
cleanup:
820
	kfree(msginfo);
821 822 823 824

	return ret;
}

825 826
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
827 828
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
829 830 831 832
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
833
	int cur_cpu;
834 835
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
836 837
	int next_channel;
	int i = 1;
838 839 840 841

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

842 843 844 845 846 847 848
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

849 850
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
851 852 853 854 855 856 857 858
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

859 860
		if (i == next_channel)
			return cur_channel;
861

862
		i++;
863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);