channel_mgmt.c 24.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
 */
21 22
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

23
#include <linux/kernel.h>
24 25
#include <linux/sched.h>
#include <linux/wait.h>
26
#include <linux/mm.h>
27
#include <linux/slab.h>
28
#include <linux/list.h>
29
#include <linux/module.h>
30
#include <linux/completion.h>
31
#include <linux/hyperv.h>
32

33
#include "hyperv_vmbus.h"
34

35 36 37
static void init_vp_index(struct vmbus_channel *channel,
			  const uuid_le *type_guid);

38
/**
39
 * vmbus_prep_negotiate_resp() - Create default response for Hyper-V Negotiate message
40 41 42 43 44 45
 * @icmsghdrp: Pointer to msg header structure
 * @icmsg_negotiate: Pointer to negotiate message structure
 * @buf: Raw buffer channel data
 *
 * @icmsghdrp is of type &struct icmsg_hdr.
 * @negop is of type &struct icmsg_negotiate.
46 47
 * Set up and fill in default negotiate response message.
 *
48 49 50
 * The fw_version specifies the  framework version that
 * we can support and srv_version specifies the service
 * version we can support.
51 52 53
 *
 * Mainly used by Hyper-V drivers.
 */
54
bool vmbus_prep_negotiate_resp(struct icmsg_hdr *icmsghdrp,
55
				struct icmsg_negotiate *negop, u8 *buf,
56
				int fw_version, int srv_version)
57
{
58 59 60 61
	int icframe_major, icframe_minor;
	int icmsg_major, icmsg_minor;
	int fw_major, fw_minor;
	int srv_major, srv_minor;
62
	int i;
63
	bool found_match = false;
64

65
	icmsghdrp->icmsgsize = 0x10;
66 67 68 69 70
	fw_major = (fw_version >> 16);
	fw_minor = (fw_version & 0xFFFF);

	srv_major = (srv_version >> 16);
	srv_minor = (srv_version & 0xFFFF);
71

72 73 74
	negop = (struct icmsg_negotiate *)&buf[
		sizeof(struct vmbuspipe_hdr) +
		sizeof(struct icmsg_hdr)];
75

76 77 78 79 80
	icframe_major = negop->icframe_vercnt;
	icframe_minor = 0;

	icmsg_major = negop->icmsg_vercnt;
	icmsg_minor = 0;
81 82 83 84 85 86 87

	/*
	 * Select the framework version number we will
	 * support.
	 */

	for (i = 0; i < negop->icframe_vercnt; i++) {
88 89 90 91 92 93
		if ((negop->icversion_data[i].major == fw_major) &&
		   (negop->icversion_data[i].minor == fw_minor)) {
			icframe_major = negop->icversion_data[i].major;
			icframe_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
94 95
	}

96 97 98 99 100
	if (!found_match)
		goto fw_error;

	found_match = false;

101 102
	for (i = negop->icframe_vercnt;
		 (i < negop->icframe_vercnt + negop->icmsg_vercnt); i++) {
103 104 105 106 107 108
		if ((negop->icversion_data[i].major == srv_major) &&
		   (negop->icversion_data[i].minor == srv_minor)) {
			icmsg_major = negop->icversion_data[i].major;
			icmsg_minor = negop->icversion_data[i].minor;
			found_match = true;
		}
109
	}
110

111
	/*
112
	 * Respond with the framework and service
113 114
	 * version numbers we can support.
	 */
115 116 117 118 119 120 121 122 123 124 125 126 127 128 129

fw_error:
	if (!found_match) {
		negop->icframe_vercnt = 0;
		negop->icmsg_vercnt = 0;
	} else {
		negop->icframe_vercnt = 1;
		negop->icmsg_vercnt = 1;
	}

	negop->icversion_data[0].major = icframe_major;
	negop->icversion_data[0].minor = icframe_minor;
	negop->icversion_data[1].major = icmsg_major;
	negop->icversion_data[1].minor = icmsg_minor;
	return found_match;
130
}
131

132
EXPORT_SYMBOL_GPL(vmbus_prep_negotiate_resp);
133

134
/*
135
 * alloc_channel - Allocate and initialize a vmbus channel object
136
 */
137
static struct vmbus_channel *alloc_channel(void)
138
{
139
	static atomic_t chan_num = ATOMIC_INIT(0);
140
	struct vmbus_channel *channel;
141

142
	channel = kzalloc(sizeof(*channel), GFP_ATOMIC);
143 144 145
	if (!channel)
		return NULL;

146
	channel->id = atomic_inc_return(&chan_num);
147
	spin_lock_init(&channel->inbound_lock);
148
	spin_lock_init(&channel->lock);
149 150

	INIT_LIST_HEAD(&channel->sc_list);
151
	INIT_LIST_HEAD(&channel->percpu_list);
152 153 154 155

	return channel;
}

156
/*
157
 * free_channel - Release the resources used by the vmbus channel object
158
 */
159
static void free_channel(struct vmbus_channel *channel)
160
{
161
	kfree(channel);
162 163
}

164 165 166 167 168 169 170
static void percpu_channel_enq(void *arg)
{
	struct vmbus_channel *channel = arg;
	int cpu = smp_processor_id();

	list_add_tail(&channel->percpu_list, &hv_context.percpu_list[cpu]);
}
171

172 173 174 175 176 177
static void percpu_channel_deq(void *arg)
{
	struct vmbus_channel *channel = arg;

	list_del(&channel->percpu_list);
}
178

179 180

void hv_process_channel_removal(struct vmbus_channel *channel, u32 relid)
181
{
182
	struct vmbus_channel_relid_released msg;
183
	unsigned long flags;
184
	struct vmbus_channel *primary_channel;
185

186
	memset(&msg, 0, sizeof(struct vmbus_channel_relid_released));
187
	msg.child_relid = relid;
188 189 190
	msg.header.msgtype = CHANNELMSG_RELID_RELEASED;
	vmbus_post_msg(&msg, sizeof(struct vmbus_channel_relid_released));

191 192 193
	if (channel == NULL)
		return;

194 195
	if (channel->target_cpu != get_cpu()) {
		put_cpu();
196 197
		smp_call_function_single(channel->target_cpu,
					 percpu_channel_deq, channel, true);
198
	} else {
199
		percpu_channel_deq(channel);
200 201
		put_cpu();
	}
202

203 204 205 206 207 208
	if (channel->primary_channel == NULL) {
		spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
		list_del(&channel->listentry);
		spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
	} else {
		primary_channel = channel->primary_channel;
209
		spin_lock_irqsave(&primary_channel->lock, flags);
210
		list_del(&channel->sc_list);
211
		primary_channel->num_sc--;
212
		spin_unlock_irqrestore(&primary_channel->lock, flags);
213
	}
214
	free_channel(channel);
215
}
216

217 218
void vmbus_free_channels(void)
{
219 220 221 222 223 224 225 226
	struct vmbus_channel *channel, *tmp;

	list_for_each_entry_safe(channel, tmp, &vmbus_connection.chn_list,
		listentry) {
		/* if we don't set rescind to true, vmbus_close_internal()
		 * won't invoke hv_process_channel_removal().
		 */
		channel->rescind = true;
227 228 229 230 231

		vmbus_device_unregister(channel->device_obj);
	}
}

232
/*
233
 * vmbus_process_offer - Process the offer by creating a channel/device
234
 * associated with this offer
235
 */
236
static void vmbus_process_offer(struct vmbus_channel *newchannel)
237
{
238
	struct vmbus_channel *channel;
239
	bool fnew = true;
240
	unsigned long flags;
241

242
	/* Make sure this is a new offer */
243
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
244

245
	list_for_each_entry(channel, &vmbus_connection.chn_list, listentry) {
246 247 248 249
		if (!uuid_le_cmp(channel->offermsg.offer.if_type,
			newchannel->offermsg.offer.if_type) &&
			!uuid_le_cmp(channel->offermsg.offer.if_instance,
				newchannel->offermsg.offer.if_instance)) {
250
			fnew = false;
251 252 253 254
			break;
		}
	}

255
	if (fnew)
256
		list_add_tail(&newchannel->listentry,
257
			      &vmbus_connection.chn_list);
258

259
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);
260

261
	if (!fnew) {
262 263 264 265 266 267 268 269
		/*
		 * Check to see if this is a sub-channel.
		 */
		if (newchannel->offermsg.offer.sub_channel_index != 0) {
			/*
			 * Process the sub-channel.
			 */
			newchannel->primary_channel = channel;
270
			spin_lock_irqsave(&channel->lock, flags);
271
			list_add_tail(&newchannel->sc_list, &channel->sc_list);
272
			channel->num_sc++;
273
			spin_unlock_irqrestore(&channel->lock, flags);
274 275 276
		} else
			goto err_free_chan;
	}
277

278 279
	init_vp_index(newchannel, &newchannel->offermsg.offer.if_type);

280 281 282 283 284 285 286 287
	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_enq,
					 newchannel, true);
	} else {
		percpu_channel_enq(newchannel);
		put_cpu();
288 289
	}

290 291 292 293 294 295 296
	/*
	 * This state is used to indicate a successful open
	 * so that when we do close the channel normally, we
	 * can cleanup properly
	 */
	newchannel->state = CHANNEL_OPEN_STATE;

297 298 299 300 301 302
	if (!fnew) {
		if (channel->sc_creation_callback != NULL)
			channel->sc_creation_callback(newchannel);
		return;
	}

303 304 305
	/*
	 * Start the process of binding this offer to the driver
	 * We need to set the DeviceObject field before calling
306
	 * vmbus_child_dev_add()
307
	 */
308
	newchannel->device_obj = vmbus_device_create(
309 310
		&newchannel->offermsg.offer.if_type,
		&newchannel->offermsg.offer.if_instance,
311
		newchannel);
312
	if (!newchannel->device_obj)
313
		goto err_deq_chan;
314

315 316 317 318 319
	/*
	 * Add the new device to the bus. This will kick off device-driver
	 * binding which eventually invokes the device driver's AddDevice()
	 * method.
	 */
320 321 322 323 324 325
	if (vmbus_device_register(newchannel->device_obj) != 0) {
		pr_err("unable to add child device object (relid %d)\n",
			newchannel->offermsg.child_relid);
		kfree(newchannel->device_obj);
		goto err_deq_chan;
	}
326
	return;
327

328 329 330 331 332 333 334 335 336 337 338 339 340 341
err_deq_chan:
	spin_lock_irqsave(&vmbus_connection.channel_lock, flags);
	list_del(&newchannel->listentry);
	spin_unlock_irqrestore(&vmbus_connection.channel_lock, flags);

	if (newchannel->target_cpu != get_cpu()) {
		put_cpu();
		smp_call_function_single(newchannel->target_cpu,
					 percpu_channel_deq, newchannel, true);
	} else {
		percpu_channel_deq(newchannel);
		put_cpu();
	}

342 343
err_free_chan:
	free_channel(newchannel);
344 345
}

346 347 348 349
enum {
	IDE = 0,
	SCSI,
	NIC,
350
	ND_NIC,
351 352 353 354
	MAX_PERF_CHN,
};

/*
355
 * This is an array of device_ids (device types) that are performance critical.
356 357 358
 * We attempt to distribute the interrupt load for these devices across
 * all available CPUs.
 */
359
static const struct hv_vmbus_device_id hp_devs[] = {
360
	/* IDE */
361
	{ HV_IDE_GUID, },
362
	/* Storage - SCSI */
363
	{ HV_SCSI_GUID, },
364
	/* Network */
365
	{ HV_NIC_GUID, },
366 367
	/* NetworkDirect Guest RDMA */
	{ HV_ND_GUID, },
368 369 370 371 372 373
};


/*
 * We use this state to statically distribute the channel interrupt load.
 */
374
static int next_numa_node_id;
375 376 377

/*
 * Starting with Win8, we can statically distribute the incoming
378 379 380 381 382 383 384 385
 * channel interrupt load by binding a channel to VCPU.
 * We do this in a hierarchical fashion:
 * First distribute the primary channels across available NUMA nodes
 * and then distribute the subchannels amongst the CPUs in the NUMA
 * node assigned to the primary channel.
 *
 * For pre-win8 hosts or non-performance critical channels we assign the
 * first CPU in the first NUMA node.
386
 */
387
static void init_vp_index(struct vmbus_channel *channel, const uuid_le *type_guid)
388 389 390 391
{
	u32 cur_cpu;
	int i;
	bool perf_chn = false;
392 393 394
	struct vmbus_channel *primary = channel->primary_channel;
	int next_node;
	struct cpumask available_mask;
395
	struct cpumask *alloced_mask;
396 397

	for (i = IDE; i < MAX_PERF_CHN; i++) {
398
		if (!memcmp(type_guid->b, hp_devs[i].guid,
399 400 401 402 403 404 405 406 407 408 409 410 411
				 sizeof(uuid_le))) {
			perf_chn = true;
			break;
		}
	}
	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7) || (!perf_chn)) {
		/*
		 * Prior to win8, all channel interrupts are
		 * delivered on cpu 0.
		 * Also if the channel is not a performance critical
		 * channel, bind it to cpu 0.
		 */
412
		channel->numa_node = 0;
413
		channel->target_cpu = 0;
414
		channel->target_vp = hv_context.vp_index[0];
415
		return;
416
	}
417 418

	/*
419 420 421 422 423
	 * We distribute primary channels evenly across all the available
	 * NUMA nodes and within the assigned NUMA node we will assign the
	 * first available CPU to the primary channel.
	 * The sub-channels will be assigned to the CPUs available in the
	 * NUMA node evenly.
424
	 */
425 426 427 428 429 430 431 432 433 434 435 436
	if (!primary) {
		while (true) {
			next_node = next_numa_node_id++;
			if (next_node == nr_node_ids)
				next_node = next_numa_node_id = 0;
			if (cpumask_empty(cpumask_of_node(next_node)))
				continue;
			break;
		}
		channel->numa_node = next_node;
		primary = channel;
	}
437
	alloced_mask = &hv_context.hv_numa_map[primary->numa_node];
438

439
	if (cpumask_weight(alloced_mask) ==
440
	    cpumask_weight(cpumask_of_node(primary->numa_node))) {
441
		/*
442 443
		 * We have cycled through all the CPUs in the node;
		 * reset the alloced map.
444
		 */
445
		cpumask_clear(alloced_mask);
446 447
	}

448
	cpumask_xor(&available_mask, alloced_mask,
449 450 451
		    cpumask_of_node(primary->numa_node));

	cur_cpu = cpumask_next(-1, &available_mask);
452
	cpumask_set_cpu(cur_cpu, alloced_mask);
453

454 455
	channel->target_cpu = cur_cpu;
	channel->target_vp = hv_context.vp_index[cur_cpu];
456 457
}

458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
/*
 * vmbus_unload_response - Handler for the unload response.
 */
static void vmbus_unload_response(struct vmbus_channel_message_header *hdr)
{
	/*
	 * This is a global event; just wakeup the waiting thread.
	 * Once we successfully unload, we can cleanup the monitor state.
	 */
	complete(&vmbus_connection.unload_event);
}

void vmbus_initiate_unload(void)
{
	struct vmbus_channel_message_header hdr;

474 475 476 477
	/* Pre-Win2012R2 hosts don't support reconnect */
	if (vmbus_proto_version < VERSION_WIN8_1)
		return;

478 479 480 481 482 483 484 485
	init_completion(&vmbus_connection.unload_event);
	memset(&hdr, 0, sizeof(struct vmbus_channel_message_header));
	hdr.msgtype = CHANNELMSG_UNLOAD;
	vmbus_post_msg(&hdr, sizeof(struct vmbus_channel_message_header));

	wait_for_completion(&vmbus_connection.unload_event);
}

486
/*
487
 * vmbus_onoffer - Handler for channel offers from vmbus in parent partition.
488 489
 *
 */
490
static void vmbus_onoffer(struct vmbus_channel_message_header *hdr)
491
{
492
	struct vmbus_channel_offer_channel *offer;
493
	struct vmbus_channel *newchannel;
494

495
	offer = (struct vmbus_channel_offer_channel *)hdr;
496

497
	/* Allocate the channel object and save this offer. */
498
	newchannel = alloc_channel();
499
	if (!newchannel) {
500
		pr_err("Unable to allocate channel object\n");
501 502 503
		return;
	}

504 505 506 507 508 509 510
	/*
	 * By default we setup state to enable batched
	 * reading. A specific service can choose to
	 * disable this prior to opening the channel.
	 */
	newchannel->batched_reading = true;

511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530
	/*
	 * Setup state for signalling the host.
	 */
	newchannel->sig_event = (struct hv_input_signal_event *)
				(ALIGN((unsigned long)
				&newchannel->sig_buf,
				HV_HYPERCALL_PARAM_ALIGN));

	newchannel->sig_event->connectionid.asu32 = 0;
	newchannel->sig_event->connectionid.u.id = VMBUS_EVENT_CONNECTION_ID;
	newchannel->sig_event->flag_number = 0;
	newchannel->sig_event->rsvdz = 0;

	if (vmbus_proto_version != VERSION_WS2008) {
		newchannel->is_dedicated_interrupt =
				(offer->is_dedicated_interrupt != 0);
		newchannel->sig_event->connectionid.u.id =
				offer->connection_id;
	}

531
	memcpy(&newchannel->offermsg, offer,
532
	       sizeof(struct vmbus_channel_offer_channel));
533 534
	newchannel->monitor_grp = (u8)offer->monitorid / 32;
	newchannel->monitor_bit = (u8)offer->monitorid % 32;
535

536
	vmbus_process_offer(newchannel);
537 538
}

539
/*
540
 * vmbus_onoffer_rescind - Rescind offer handler.
541 542 543
 *
 * We queue a work item to process this offer synchronously
 */
544
static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
545
{
546
	struct vmbus_channel_rescind_offer *rescind;
547
	struct vmbus_channel *channel;
548 549
	unsigned long flags;
	struct device *dev;
550

551
	rescind = (struct vmbus_channel_rescind_offer *)hdr;
552
	channel = relid2channel(rescind->child_relid);
553

554 555
	if (channel == NULL) {
		hv_process_channel_removal(NULL, rescind->child_relid);
556
		return;
557
	}
558

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
	spin_lock_irqsave(&channel->lock, flags);
	channel->rescind = true;
	spin_unlock_irqrestore(&channel->lock, flags);

	if (channel->device_obj) {
		/*
		 * We will have to unregister this device from the
		 * driver core.
		 */
		dev = get_device(&channel->device_obj->device);
		if (dev) {
			vmbus_device_unregister(channel->device_obj);
			put_device(dev);
		}
	} else {
		hv_process_channel_removal(channel,
			channel->offermsg.child_relid);
576
	}
577 578
}

579
/*
580 581
 * vmbus_onoffers_delivered -
 * This is invoked when all offers have been delivered.
582 583 584
 *
 * Nothing to do here.
 */
585
static void vmbus_onoffers_delivered(
586
			struct vmbus_channel_message_header *hdr)
587 588 589
{
}

590
/*
591
 * vmbus_onopen_result - Open result handler.
592 593 594 595 596
 *
 * This is invoked when we received a response to our channel open request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
597
static void vmbus_onopen_result(struct vmbus_channel_message_header *hdr)
598
{
599
	struct vmbus_channel_open_result *result;
600 601 602
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_open_channel *openmsg;
603
	unsigned long flags;
604

605
	result = (struct vmbus_channel_open_result *)hdr;
606

607 608 609
	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
610
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
611

612 613
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
614
		requestheader =
615
			(struct vmbus_channel_message_header *)msginfo->msg;
616

617
		if (requestheader->msgtype == CHANNELMSG_OPENCHANNEL) {
618
			openmsg =
619 620 621 622
			(struct vmbus_channel_open_channel *)msginfo->msg;
			if (openmsg->child_relid == result->child_relid &&
			    openmsg->openid == result->openid) {
				memcpy(&msginfo->response.open_result,
623
				       result,
624 625 626
				       sizeof(
					struct vmbus_channel_open_result));
				complete(&msginfo->waitevent);
627 628 629 630
				break;
			}
		}
	}
631
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
632 633
}

634
/*
635
 * vmbus_ongpadl_created - GPADL created handler.
636 637 638 639 640
 *
 * This is invoked when we received a response to our gpadl create request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
641
static void vmbus_ongpadl_created(struct vmbus_channel_message_header *hdr)
642
{
643 644 645 646
	struct vmbus_channel_gpadl_created *gpadlcreated;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_header *gpadlheader;
647
	unsigned long flags;
648

649
	gpadlcreated = (struct vmbus_channel_gpadl_created *)hdr;
650

651 652 653 654
	/*
	 * Find the establish msg, copy the result and signal/unblock the wait
	 * event
	 */
655
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
656

657 658
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
659
		requestheader =
660
			(struct vmbus_channel_message_header *)msginfo->msg;
661

662
		if (requestheader->msgtype == CHANNELMSG_GPADL_HEADER) {
663 664 665
			gpadlheader =
			(struct vmbus_channel_gpadl_header *)requestheader;

666 667 668 669
			if ((gpadlcreated->child_relid ==
			     gpadlheader->child_relid) &&
			    (gpadlcreated->gpadl == gpadlheader->gpadl)) {
				memcpy(&msginfo->response.gpadl_created,
670
				       gpadlcreated,
671 672 673
				       sizeof(
					struct vmbus_channel_gpadl_created));
				complete(&msginfo->waitevent);
674 675 676 677
				break;
			}
		}
	}
678
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
679 680
}

681
/*
682
 * vmbus_ongpadl_torndown - GPADL torndown handler.
683 684 685 686 687
 *
 * This is invoked when we received a response to our gpadl teardown request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
688
static void vmbus_ongpadl_torndown(
689
			struct vmbus_channel_message_header *hdr)
690
{
691 692 693 694
	struct vmbus_channel_gpadl_torndown *gpadl_torndown;
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_gpadl_teardown *gpadl_teardown;
695
	unsigned long flags;
696

697
	gpadl_torndown = (struct vmbus_channel_gpadl_torndown *)hdr;
698 699 700 701

	/*
	 * Find the open msg, copy the result and signal/unblock the wait event
	 */
702
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
703

704 705
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
706
		requestheader =
707
			(struct vmbus_channel_message_header *)msginfo->msg;
708

709
		if (requestheader->msgtype == CHANNELMSG_GPADL_TEARDOWN) {
710 711
			gpadl_teardown =
			(struct vmbus_channel_gpadl_teardown *)requestheader;
712

713 714
			if (gpadl_torndown->gpadl == gpadl_teardown->gpadl) {
				memcpy(&msginfo->response.gpadl_torndown,
715
				       gpadl_torndown,
716 717 718
				       sizeof(
					struct vmbus_channel_gpadl_torndown));
				complete(&msginfo->waitevent);
719 720 721 722
				break;
			}
		}
	}
723
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
724 725
}

726
/*
727
 * vmbus_onversion_response - Version response handler
728 729 730 731 732
 *
 * This is invoked when we received a response to our initiate contact request.
 * Find the matching request, copy the response and signal the requesting
 * thread.
 */
733
static void vmbus_onversion_response(
734
		struct vmbus_channel_message_header *hdr)
735
{
736 737 738
	struct vmbus_channel_msginfo *msginfo;
	struct vmbus_channel_message_header *requestheader;
	struct vmbus_channel_version_response *version_response;
739
	unsigned long flags;
740

741
	version_response = (struct vmbus_channel_version_response *)hdr;
742
	spin_lock_irqsave(&vmbus_connection.channelmsg_lock, flags);
743

744 745
	list_for_each_entry(msginfo, &vmbus_connection.chn_msg_list,
				msglistentry) {
746
		requestheader =
747
			(struct vmbus_channel_message_header *)msginfo->msg;
748

749 750 751
		if (requestheader->msgtype ==
		    CHANNELMSG_INITIATE_CONTACT) {
			memcpy(&msginfo->response.version_response,
752
			      version_response,
753
			      sizeof(struct vmbus_channel_version_response));
754
			complete(&msginfo->waitevent);
755 756
		}
	}
757
	spin_unlock_irqrestore(&vmbus_connection.channelmsg_lock, flags);
758 759
}

760
/* Channel message dispatch table */
761
struct vmbus_channel_message_table_entry
762
	channel_message_table[CHANNELMSG_COUNT] = {
763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
	{CHANNELMSG_INVALID,			0, NULL},
	{CHANNELMSG_OFFERCHANNEL,		0, vmbus_onoffer},
	{CHANNELMSG_RESCIND_CHANNELOFFER,	0, vmbus_onoffer_rescind},
	{CHANNELMSG_REQUESTOFFERS,		0, NULL},
	{CHANNELMSG_ALLOFFERS_DELIVERED,	1, vmbus_onoffers_delivered},
	{CHANNELMSG_OPENCHANNEL,		0, NULL},
	{CHANNELMSG_OPENCHANNEL_RESULT,		1, vmbus_onopen_result},
	{CHANNELMSG_CLOSECHANNEL,		0, NULL},
	{CHANNELMSG_GPADL_HEADER,		0, NULL},
	{CHANNELMSG_GPADL_BODY,			0, NULL},
	{CHANNELMSG_GPADL_CREATED,		1, vmbus_ongpadl_created},
	{CHANNELMSG_GPADL_TEARDOWN,		0, NULL},
	{CHANNELMSG_GPADL_TORNDOWN,		1, vmbus_ongpadl_torndown},
	{CHANNELMSG_RELID_RELEASED,		0, NULL},
	{CHANNELMSG_INITIATE_CONTACT,		0, NULL},
	{CHANNELMSG_VERSION_RESPONSE,		1, vmbus_onversion_response},
	{CHANNELMSG_UNLOAD,			0, NULL},
780
	{CHANNELMSG_UNLOAD_RESPONSE,		1, vmbus_unload_response},
781 782
};

783
/*
784
 * vmbus_onmessage - Handler for channel protocol messages.
785 786 787
 *
 * This is invoked in the vmbus worker thread context.
 */
788
void vmbus_onmessage(void *context)
789
{
790
	struct hv_message *msg = context;
791
	struct vmbus_channel_message_header *hdr;
792 793
	int size;

794 795
	hdr = (struct vmbus_channel_message_header *)msg->u.payload;
	size = msg->header.payload_size;
796

797
	if (hdr->msgtype >= CHANNELMSG_COUNT) {
798
		pr_err("Received invalid channel message type %d size %d\n",
799
			   hdr->msgtype, size);
800
		print_hex_dump_bytes("", DUMP_PREFIX_NONE,
801
				     (unsigned char *)msg->u.payload, size);
802 803 804
		return;
	}

805 806
	if (channel_message_table[hdr->msgtype].message_handler)
		channel_message_table[hdr->msgtype].message_handler(hdr);
807
	else
808
		pr_err("Unhandled channel message type %d\n", hdr->msgtype);
809 810
}

811
/*
812
 * vmbus_request_offers - Send a request to get all our pending offers.
813
 */
814
int vmbus_request_offers(void)
815
{
816
	struct vmbus_channel_message_header *msg;
817
	struct vmbus_channel_msginfo *msginfo;
818
	int ret;
819

820
	msginfo = kmalloc(sizeof(*msginfo) +
821 822
			  sizeof(struct vmbus_channel_message_header),
			  GFP_KERNEL);
823
	if (!msginfo)
824
		return -ENOMEM;
825

826
	msg = (struct vmbus_channel_message_header *)msginfo->msg;
827

828
	msg->msgtype = CHANNELMSG_REQUESTOFFERS;
829 830


831
	ret = vmbus_post_msg(msg,
832 833
			       sizeof(struct vmbus_channel_message_header));
	if (ret != 0) {
834
		pr_err("Unable to request offers - %d\n", ret);
835

836 837
		goto cleanup;
	}
838

839
cleanup:
840
	kfree(msginfo);
841 842 843 844

	return ret;
}

845 846
/*
 * Retrieve the (sub) channel on which to send an outgoing request.
847 848
 * When a primary channel has multiple sub-channels, we try to
 * distribute the load equally amongst all available channels.
849 850 851 852
 */
struct vmbus_channel *vmbus_get_outgoing_channel(struct vmbus_channel *primary)
{
	struct list_head *cur, *tmp;
853
	int cur_cpu;
854 855
	struct vmbus_channel *cur_channel;
	struct vmbus_channel *outgoing_channel = primary;
856 857
	int next_channel;
	int i = 1;
858 859 860 861

	if (list_empty(&primary->sc_list))
		return outgoing_channel;

862 863 864 865 866 867 868
	next_channel = primary->next_oc++;

	if (next_channel > (primary->num_sc)) {
		primary->next_oc = 0;
		return outgoing_channel;
	}

869 870
	cur_cpu = hv_context.vp_index[get_cpu()];
	put_cpu();
871 872 873 874 875 876 877 878
	list_for_each_safe(cur, tmp, &primary->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);
		if (cur_channel->state != CHANNEL_OPENED_STATE)
			continue;

		if (cur_channel->target_vp == cur_cpu)
			return cur_channel;

879 880
		if (i == next_channel)
			return cur_channel;
881

882
		i++;
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
	}

	return outgoing_channel;
}
EXPORT_SYMBOL_GPL(vmbus_get_outgoing_channel);

static void invoke_sc_cb(struct vmbus_channel *primary_channel)
{
	struct list_head *cur, *tmp;
	struct vmbus_channel *cur_channel;

	if (primary_channel->sc_creation_callback == NULL)
		return;

	list_for_each_safe(cur, tmp, &primary_channel->sc_list) {
		cur_channel = list_entry(cur, struct vmbus_channel, sc_list);

		primary_channel->sc_creation_callback(cur_channel);
	}
}

void vmbus_set_sc_create_callback(struct vmbus_channel *primary_channel,
				void (*sc_cr_cb)(struct vmbus_channel *new_sc))
{
	primary_channel->sc_creation_callback = sc_cr_cb;
}
EXPORT_SYMBOL_GPL(vmbus_set_sc_create_callback);

bool vmbus_are_subchannels_present(struct vmbus_channel *primary)
{
	bool ret;

	ret = !list_empty(&primary->sc_list);

	if (ret) {
		/*
		 * Invoke the callback on sub-channel creation.
		 * This will present a uniform interface to the
		 * clients.
		 */
		invoke_sc_cb(primary);
	}

	return ret;
}
EXPORT_SYMBOL_GPL(vmbus_are_subchannels_present);