vmbus_drv.c 28.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
20
 *   K. Y. Srinivasan <kys@microsoft.com>
21
 *
22
 */
23 24
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

25 26 27 28 29
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/sysctl.h>
30
#include <linux/slab.h>
31
#include <linux/acpi.h>
32
#include <linux/completion.h>
33
#include <linux/hyperv.h>
34
#include <linux/kernel_stat.h>
35
#include <linux/clockchips.h>
36
#include <linux/cpu.h>
37
#include <asm/hyperv.h>
38
#include <asm/hypervisor.h>
39
#include <asm/mshyperv.h>
40 41
#include <linux/notifier.h>
#include <linux/ptrace.h>
42
#include "hyperv_vmbus.h"
43

44
static struct acpi_device  *hv_acpi_dev;
45

46
static struct tasklet_struct msg_dpc;
47
static struct completion probe_event;
48
static int irq;
49

50

K
kbuild test robot 已提交
51
static int hyperv_panic_event(struct notifier_block *nb,
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
			unsigned long event, void *ptr)
{
	struct pt_regs *regs;

	regs = current_pt_regs();

	wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip);
	wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax);
	wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx);
	wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx);
	wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx);

	/*
	 * Let Hyper-V know there is crash data available
	 */
	wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
	return NOTIFY_DONE;
}

static struct notifier_block hyperv_panic_block = {
	.notifier_call = hyperv_panic_event,
};

75 76 77 78 79
struct resource hyperv_mmio = {
	.name  = "hyperv mmio",
	.flags = IORESOURCE_MEM,
};
EXPORT_SYMBOL_GPL(hyperv_mmio);
80

81 82 83 84 85 86 87 88
static int vmbus_exists(void)
{
	if (hv_acpi_dev == NULL)
		return -ENODEV;

	return 0;
}

89 90 91 92 93 94 95 96
#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
{
	int i;
	for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
		sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
}

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
static u8 channel_monitor_group(struct vmbus_channel *channel)
{
	return (u8)channel->offermsg.monitorid / 32;
}

static u8 channel_monitor_offset(struct vmbus_channel *channel)
{
	return (u8)channel->offermsg.monitorid % 32;
}

static u32 channel_pending(struct vmbus_channel *channel,
			   struct hv_monitor_page *monitor_page)
{
	u8 monitor_group = channel_monitor_group(channel);
	return monitor_page->trigger_group[monitor_group].pending;
}

114 115 116 117 118 119 120 121
static u32 channel_latency(struct vmbus_channel *channel,
			   struct hv_monitor_page *monitor_page)
{
	u8 monitor_group = channel_monitor_group(channel);
	u8 monitor_offset = channel_monitor_offset(channel);
	return monitor_page->latency[monitor_group][monitor_offset];
}

122 123 124 125 126 127 128 129
static u32 channel_conn_id(struct vmbus_channel *channel,
			   struct hv_monitor_page *monitor_page)
{
	u8 monitor_group = channel_monitor_group(channel);
	u8 monitor_offset = channel_monitor_offset(channel);
	return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id;
}

130 131 132 133 134 135 136 137 138 139 140
static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
		       char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}
static DEVICE_ATTR_RO(id);

141 142 143 144 145 146 147 148 149 150 151
static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
			  char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n", hv_dev->channel->state);
}
static DEVICE_ATTR_RO(state);

152 153 154 155 156 157 158 159 160 161 162
static ssize_t monitor_id_show(struct device *dev,
			       struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
}
static DEVICE_ATTR_RO(monitor_id);

163 164 165 166 167 168 169 170 171 172 173 174
static ssize_t class_id_show(struct device *dev,
			       struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "{%pUl}\n",
		       hv_dev->channel->offermsg.offer.if_type.b);
}
static DEVICE_ATTR_RO(class_id);

175 176 177 178 179 180 181 182 183 184 185 186
static ssize_t device_id_show(struct device *dev,
			      struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "{%pUl}\n",
		       hv_dev->channel->offermsg.offer.if_instance.b);
}
static DEVICE_ATTR_RO(device_id);

187 188 189 190 191 192 193 194 195 196 197
static ssize_t modalias_show(struct device *dev,
			     struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	char alias_name[VMBUS_ALIAS_LEN + 1];

	print_alias_name(hv_dev, alias_name);
	return sprintf(buf, "vmbus:%s\n", alias_name);
}
static DEVICE_ATTR_RO(modalias);

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
static ssize_t server_monitor_pending_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_pending(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(server_monitor_pending);

static ssize_t client_monitor_pending_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_pending(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_pending);
225

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
static ssize_t server_monitor_latency_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_latency(hv_dev->channel,
				       vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_latency);

static ssize_t client_monitor_latency_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_latency(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_latency);

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static ssize_t server_monitor_conn_id_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_conn_id(hv_dev->channel,
				       vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_conn_id);

static ssize_t client_monitor_conn_id_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_conn_id(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_conn_id);

282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
static ssize_t out_intr_mask_show(struct device *dev,
				  struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);

static ssize_t out_read_index_show(struct device *dev,
				   struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);

static ssize_t out_write_index_show(struct device *dev,
				    struct device_attribute *dev_attr,
				    char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);

static ssize_t out_read_bytes_avail_show(struct device *dev,
					 struct device_attribute *dev_attr,
					 char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);

static ssize_t out_write_bytes_avail_show(struct device *dev,
					  struct device_attribute *dev_attr,
					  char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);

static ssize_t in_intr_mask_show(struct device *dev,
				 struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);

static ssize_t in_read_index_show(struct device *dev,
				  struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);

static ssize_t in_write_index_show(struct device *dev,
				   struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);

static ssize_t in_read_bytes_avail_show(struct device *dev,
					struct device_attribute *dev_attr,
					char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);

static ssize_t in_write_bytes_avail_show(struct device *dev,
					 struct device_attribute *dev_attr,
					 char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);

/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
418 419
static struct attribute *vmbus_attrs[] = {
	&dev_attr_id.attr,
420
	&dev_attr_state.attr,
421
	&dev_attr_monitor_id.attr,
422
	&dev_attr_class_id.attr,
423
	&dev_attr_device_id.attr,
424
	&dev_attr_modalias.attr,
425 426
	&dev_attr_server_monitor_pending.attr,
	&dev_attr_client_monitor_pending.attr,
427 428
	&dev_attr_server_monitor_latency.attr,
	&dev_attr_client_monitor_latency.attr,
429 430
	&dev_attr_server_monitor_conn_id.attr,
	&dev_attr_client_monitor_conn_id.attr,
431 432 433 434 435 436 437 438 439 440
	&dev_attr_out_intr_mask.attr,
	&dev_attr_out_read_index.attr,
	&dev_attr_out_write_index.attr,
	&dev_attr_out_read_bytes_avail.attr,
	&dev_attr_out_write_bytes_avail.attr,
	&dev_attr_in_intr_mask.attr,
	&dev_attr_in_read_index.attr,
	&dev_attr_in_write_index.attr,
	&dev_attr_in_read_bytes_avail.attr,
	&dev_attr_in_write_bytes_avail.attr,
441 442 443 444
	NULL,
};
ATTRIBUTE_GROUPS(vmbus);

445 446 447 448 449 450
/*
 * vmbus_uevent - add uevent for our device
 *
 * This routine is invoked when a device is added or removed on the vmbus to
 * generate a uevent to udev in the userspace. The udev will then look at its
 * rule and the uevent generated here to load the appropriate driver
451 452 453 454
 *
 * The alias string will be of the form vmbus:guid where guid is the string
 * representation of the device guid (each byte of the guid will be
 * represented with two hex characters.
455 456 457 458
 */
static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
{
	struct hv_device *dev = device_to_hv_device(device);
459 460
	int ret;
	char alias_name[VMBUS_ALIAS_LEN + 1];
461

462
	print_alias_name(dev, alias_name);
463 464
	ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
	return ret;
465 466
}

S
stephen hemminger 已提交
467
static const uuid_le null_guid;
468 469 470 471 472 473 474 475

static inline bool is_null_guid(const __u8 *guid)
{
	if (memcmp(guid, &null_guid, sizeof(uuid_le)))
		return false;
	return true;
}

476 477 478 479 480 481
/*
 * Return a matching hv_vmbus_device_id pointer.
 * If there is no match, return NULL.
 */
static const struct hv_vmbus_device_id *hv_vmbus_get_id(
					const struct hv_vmbus_device_id *id,
S
stephen hemminger 已提交
482
					const __u8 *guid)
483 484 485 486 487 488 489 490 491
{
	for (; !is_null_guid(id->guid); id++)
		if (!memcmp(&id->guid, guid, sizeof(uuid_le)))
			return id;

	return NULL;
}


492 493 494 495 496 497 498

/*
 * vmbus_match - Attempt to match the specified device to the specified driver
 */
static int vmbus_match(struct device *device, struct device_driver *driver)
{
	struct hv_driver *drv = drv_to_hv_drv(driver);
499
	struct hv_device *hv_dev = device_to_hv_device(device);
500

501 502
	if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b))
		return 1;
503

504
	return 0;
505 506
}

507 508 509 510 511 512 513 514
/*
 * vmbus_probe - Add the new vmbus's child device
 */
static int vmbus_probe(struct device *child_device)
{
	int ret = 0;
	struct hv_driver *drv =
			drv_to_hv_drv(child_device->driver);
515
	struct hv_device *dev = device_to_hv_device(child_device);
516
	const struct hv_vmbus_device_id *dev_id;
517

518
	dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b);
519
	if (drv->probe) {
520
		ret = drv->probe(dev, dev_id);
521
		if (ret != 0)
522 523
			pr_err("probe failed for device %s (%d)\n",
			       dev_name(child_device), ret);
524 525

	} else {
526 527
		pr_err("probe not set for driver %s\n",
		       dev_name(child_device));
528
		ret = -ENODEV;
529 530 531 532
	}
	return ret;
}

533 534 535 536 537
/*
 * vmbus_remove - Remove a vmbus device
 */
static int vmbus_remove(struct device *child_device)
{
538
	struct hv_driver *drv;
539
	struct hv_device *dev = device_to_hv_device(child_device);
540
	u32 relid = dev->channel->offermsg.child_relid;
541

542 543 544 545
	if (child_device->driver) {
		drv = drv_to_hv_drv(child_device->driver);
		if (drv->remove)
			drv->remove(dev);
546 547
		else {
			hv_process_channel_removal(dev->channel, relid);
548 549
			pr_err("remove not set for driver %s\n",
				dev_name(child_device));
550 551 552 553 554 555 556
		}
	} else {
		/*
		 * We don't have a driver for this device; deal with the
		 * rescind message by removing the channel.
		 */
		hv_process_channel_removal(dev->channel, relid);
557
	}
558 559 560 561

	return 0;
}

562 563 564 565 566 567 568

/*
 * vmbus_shutdown - Shutdown a vmbus device
 */
static void vmbus_shutdown(struct device *child_device)
{
	struct hv_driver *drv;
569
	struct hv_device *dev = device_to_hv_device(child_device);
570 571 572 573 574 575 576 577


	/* The device may not be attached yet */
	if (!child_device->driver)
		return;

	drv = drv_to_hv_drv(child_device->driver);

578 579
	if (drv->shutdown)
		drv->shutdown(dev);
580 581 582 583

	return;
}

584 585 586 587 588 589

/*
 * vmbus_device_release - Final callback release of the vmbus child device
 */
static void vmbus_device_release(struct device *device)
{
590
	struct hv_device *hv_dev = device_to_hv_device(device);
591

592
	kfree(hv_dev);
593 594 595

}

596
/* The one and only one */
597 598 599 600 601 602 603
static struct bus_type  hv_bus = {
	.name =		"vmbus",
	.match =		vmbus_match,
	.shutdown =		vmbus_shutdown,
	.remove =		vmbus_remove,
	.probe =		vmbus_probe,
	.uevent =		vmbus_uevent,
604
	.dev_groups =		vmbus_groups,
605 606
};

607 608 609 610 611 612 613 614 615
struct onmessage_work_context {
	struct work_struct work;
	struct hv_message msg;
};

static void vmbus_onmessage_work(struct work_struct *work)
{
	struct onmessage_work_context *ctx;

616 617 618 619
	/* Do not process messages if we're in DISCONNECTED state */
	if (vmbus_connection.conn_state == DISCONNECTED)
		return;

620 621 622 623 624 625
	ctx = container_of(work, struct onmessage_work_context,
			   work);
	vmbus_onmessage(&ctx->msg);
	kfree(ctx);
}

626
static void hv_process_timer_expiration(struct hv_message *msg, int cpu)
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
{
	struct clock_event_device *dev = hv_context.clk_evt[cpu];

	if (dev->event_handler)
		dev->event_handler(dev);

	msg->header.message_type = HVMSG_NONE;

	/*
	 * Make sure the write to MessageType (ie set to
	 * HVMSG_NONE) happens before we read the
	 * MessagePending and EOMing. Otherwise, the EOMing
	 * will not deliver any more messages since there is
	 * no empty slot
	 */
	mb();

	if (msg->header.message_flags.msg_pending) {
		/*
		 * This will cause message queue rescan to
		 * possibly deliver another msg from the
		 * hypervisor
		 */
		wrmsrl(HV_X64_MSR_EOM, 0);
	}
}

654
static void vmbus_on_msg_dpc(unsigned long data)
G
Greg Kroah-Hartman 已提交
655 656 657 658 659
{
	int cpu = smp_processor_id();
	void *page_addr = hv_context.synic_message_page[cpu];
	struct hv_message *msg = (struct hv_message *)page_addr +
				  VMBUS_MESSAGE_SINT;
660 661
	struct vmbus_channel_message_header *hdr;
	struct vmbus_channel_message_table_entry *entry;
662
	struct onmessage_work_context *ctx;
G
Greg Kroah-Hartman 已提交
663 664

	while (1) {
665
		if (msg->header.message_type == HVMSG_NONE)
G
Greg Kroah-Hartman 已提交
666 667
			/* no msg */
			break;
668 669 670 671 672 673 674 675 676 677

		hdr = (struct vmbus_channel_message_header *)msg->u.payload;

		if (hdr->msgtype >= CHANNELMSG_COUNT) {
			WARN_ONCE(1, "unknown msgtype=%d\n", hdr->msgtype);
			goto msg_handled;
		}

		entry = &channel_message_table[hdr->msgtype];
		if (entry->handler_type	== VMHT_BLOCKING) {
678 679
			ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
			if (ctx == NULL)
G
Greg Kroah-Hartman 已提交
680
				continue;
681

682 683
			INIT_WORK(&ctx->work, vmbus_onmessage_work);
			memcpy(&ctx->msg, msg, sizeof(*msg));
684

685
			queue_work(vmbus_connection.work_queue, &ctx->work);
686 687
		} else
			entry->message_handler(hdr);
G
Greg Kroah-Hartman 已提交
688

689
msg_handled:
G
Greg Kroah-Hartman 已提交
690 691 692 693 694 695 696 697 698
		msg->header.message_type = HVMSG_NONE;

		/*
		 * Make sure the write to MessageType (ie set to
		 * HVMSG_NONE) happens before we read the
		 * MessagePending and EOMing. Otherwise, the EOMing
		 * will not deliver any more messages since there is
		 * no empty slot
		 */
699
		mb();
G
Greg Kroah-Hartman 已提交
700 701 702 703 704 705 706 707 708 709 710 711

		if (msg->header.message_flags.msg_pending) {
			/*
			 * This will cause message queue rescan to
			 * possibly deliver another msg from the
			 * hypervisor
			 */
			wrmsrl(HV_X64_MSR_EOM, 0);
		}
	}
}

712
static void vmbus_isr(void)
G
Greg Kroah-Hartman 已提交
713 714 715 716 717
{
	int cpu = smp_processor_id();
	void *page_addr;
	struct hv_message *msg;
	union hv_synic_event_flags *event;
718
	bool handled = false;
G
Greg Kroah-Hartman 已提交
719

720 721
	page_addr = hv_context.synic_event_page[cpu];
	if (page_addr == NULL)
722
		return;
723 724 725

	event = (union hv_synic_event_flags *)page_addr +
					 VMBUS_MESSAGE_SINT;
726 727 728 729 730
	/*
	 * Check for events before checking for messages. This is the order
	 * in which events and messages are checked in Windows guests on
	 * Hyper-V, and the Windows team suggested we do the same.
	 */
G
Greg Kroah-Hartman 已提交
731

732 733
	if ((vmbus_proto_version == VERSION_WS2008) ||
		(vmbus_proto_version == VERSION_WIN7)) {
G
Greg Kroah-Hartman 已提交
734

735 736 737 738 739 740 741 742 743 744 745 746
		/* Since we are a child, we only need to check bit 0 */
		if (sync_test_and_clear_bit(0,
			(unsigned long *) &event->flags32[0])) {
			handled = true;
		}
	} else {
		/*
		 * Our host is win8 or above. The signaling mechanism
		 * has changed and we can directly look at the event page.
		 * If bit n is set then we have an interrup on the channel
		 * whose id is n.
		 */
747 748
		handled = true;
	}
749

750
	if (handled)
751
		tasklet_schedule(hv_context.event_dpc[cpu]);
752 753


754 755 756 757
	page_addr = hv_context.synic_message_page[cpu];
	msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;

	/* Check if there are actual msgs to be processed */
758 759 760 761 762 763
	if (msg->header.message_type != HVMSG_NONE) {
		if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
			hv_process_timer_expiration(msg, cpu);
		else
			tasklet_schedule(&msg_dpc);
	}
764 765
}

766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798
#ifdef CONFIG_HOTPLUG_CPU
static int hyperv_cpu_disable(void)
{
	return -ENOSYS;
}

static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
{
	static void *previous_cpu_disable;

	/*
	 * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8,
	 * ...) is not supported at this moment as channel interrupts are
	 * distributed across all of them.
	 */

	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7))
		return;

	if (vmbus_loaded) {
		previous_cpu_disable = smp_ops.cpu_disable;
		smp_ops.cpu_disable = hyperv_cpu_disable;
		pr_notice("CPU offlining is not supported by hypervisor\n");
	} else if (previous_cpu_disable)
		smp_ops.cpu_disable = previous_cpu_disable;
}
#else
static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
{
}
#endif

799
/*
800 801 802
 * vmbus_bus_init -Main vmbus driver initialization routine.
 *
 * Here, we
803 804 805 806
 *	- initialize the vmbus driver context
 *	- invoke the vmbus hv main init routine
 *	- get the irq resource
 *	- retrieve the channel offers
807
 */
808
static int vmbus_bus_init(int irq)
809
{
810
	int ret;
811

812 813
	/* Hypervisor initialization...setup hypercall page..etc */
	ret = hv_init();
814
	if (ret != 0) {
815
		pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
816
		return ret;
817 818
	}

819
	tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0);
820

821
	ret = bus_register(&hv_bus);
822
	if (ret)
823
		goto err_cleanup;
824

825
	hv_setup_vmbus_irq(vmbus_isr);
826

827 828 829
	ret = hv_synic_alloc();
	if (ret)
		goto err_alloc;
830
	/*
831
	 * Initialize the per-cpu interrupt state and
832 833
	 * connect to the host.
	 */
834
	on_each_cpu(hv_synic_init, NULL, 1);
835
	ret = vmbus_connect();
836
	if (ret)
837
		goto err_alloc;
838

839
	hv_cpu_hotplug_quirk(true);
840 841 842 843 844 845 846 847 848

	/*
	 * Only register if the crash MSRs are available
	 */
	if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
		atomic_notifier_chain_register(&panic_notifier_list,
					       &hyperv_panic_block);
	}

849
	vmbus_request_offers();
850

851
	return 0;
852

853 854
err_alloc:
	hv_synic_free();
855
	hv_remove_vmbus_irq();
856 857 858 859 860 861 862

	bus_unregister(&hv_bus);

err_cleanup:
	hv_cleanup();

	return ret;
863 864
}

865
/**
866 867 868 869
 * __vmbus_child_driver_register - Register a vmbus's driver
 * @drv: Pointer to driver structure you want to register
 * @owner: owner module of the drv
 * @mod_name: module name string
870 871
 *
 * Registers the given driver with Linux through the 'driver_register()' call
872
 * and sets up the hyper-v vmbus handling for this driver.
873 874
 * It will return the state of the 'driver_register()' call.
 *
875
 */
876
int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name)
877
{
878
	int ret;
879

880
	pr_info("registering driver %s\n", hv_driver->name);
881

882 883 884 885
	ret = vmbus_exists();
	if (ret < 0)
		return ret;

886 887 888 889
	hv_driver->driver.name = hv_driver->name;
	hv_driver->driver.owner = owner;
	hv_driver->driver.mod_name = mod_name;
	hv_driver->driver.bus = &hv_bus;
890

891
	ret = driver_register(&hv_driver->driver);
892

893
	return ret;
894
}
895
EXPORT_SYMBOL_GPL(__vmbus_driver_register);
896

897
/**
898 899
 * vmbus_driver_unregister() - Unregister a vmbus's driver
 * @drv: Pointer to driver structure you want to un-register
900
 *
901 902
 * Un-register the given driver that was previous registered with a call to
 * vmbus_driver_register()
903
 */
904
void vmbus_driver_unregister(struct hv_driver *hv_driver)
905
{
906
	pr_info("unregistering driver %s\n", hv_driver->name);
907

908
	if (!vmbus_exists())
909
		driver_unregister(&hv_driver->driver);
910
}
911
EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
912

913
/*
914
 * vmbus_device_create - Creates and registers a new child device
915
 * on the vmbus.
916
 */
S
stephen hemminger 已提交
917 918 919
struct hv_device *vmbus_device_create(const uuid_le *type,
				      const uuid_le *instance,
				      struct vmbus_channel *channel)
920
{
921
	struct hv_device *child_device_obj;
922

923 924
	child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL);
	if (!child_device_obj) {
925
		pr_err("Unable to allocate device object for child device\n");
926 927 928
		return NULL;
	}

929
	child_device_obj->channel = channel;
930
	memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
931
	memcpy(&child_device_obj->dev_instance, instance,
932
	       sizeof(uuid_le));
933 934 935 936 937


	return child_device_obj;
}

938
/*
939
 * vmbus_device_register - Register the child device
940
 */
941
int vmbus_device_register(struct hv_device *child_device_obj)
942
{
943
	int ret = 0;
944

945 946
	dev_set_name(&child_device_obj->device, "vmbus_%d",
		     child_device_obj->channel->id);
947

948
	child_device_obj->device.bus = &hv_bus;
949
	child_device_obj->device.parent = &hv_acpi_dev->dev;
950
	child_device_obj->device.release = vmbus_device_release;
951

952 953 954 955
	/*
	 * Register with the LDM. This will kick off the driver/device
	 * binding...which will eventually call vmbus_match() and vmbus_probe()
	 */
956
	ret = device_register(&child_device_obj->device);
957 958

	if (ret)
959
		pr_err("Unable to register child device\n");
960
	else
961
		pr_debug("child device %s registered\n",
962
			dev_name(&child_device_obj->device));
963 964 965 966

	return ret;
}

967
/*
968
 * vmbus_device_unregister - Remove the specified child device
969
 * from the vmbus.
970
 */
971
void vmbus_device_unregister(struct hv_device *device_obj)
972
{
973 974 975
	pr_debug("child device %s unregistered\n",
		dev_name(&device_obj->device));

976 977 978 979
	/*
	 * Kick off the process of unregistering the device.
	 * This will call vmbus_remove() and eventually vmbus_device_release()
	 */
980
	device_unregister(&device_obj->device);
981 982 983
}


984
/*
985 986
 * VMBUS is an acpi enumerated device. Get the the information we
 * need from DSDT.
987 988
 */

989
static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
990
{
991 992 993
	switch (res->type) {
	case ACPI_RESOURCE_TYPE_IRQ:
		irq = res->data.irq.interrupts[0];
G
Gerd Hoffmann 已提交
994
		break;
995

996
	case ACPI_RESOURCE_TYPE_ADDRESS64:
997 998
		hyperv_mmio.start = res->data.address64.address.minimum;
		hyperv_mmio.end = res->data.address64.address.maximum;
G
Gerd Hoffmann 已提交
999
		break;
1000 1001 1002 1003 1004 1005 1006 1007
	}

	return AE_OK;
}

static int vmbus_acpi_add(struct acpi_device *device)
{
	acpi_status result;
1008
	int ret_val = -ENODEV;
1009

1010 1011
	hv_acpi_dev = device;

1012
	result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
1013
					vmbus_walk_resources, NULL);
1014

1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027
	if (ACPI_FAILURE(result))
		goto acpi_walk_err;
	/*
	 * The parent of the vmbus acpi device (Gen2 firmware) is the VMOD that
	 * has the mmio ranges. Get that.
	 */
	if (device->parent) {
		result = acpi_walk_resources(device->parent->handle,
					METHOD_NAME__CRS,
					vmbus_walk_resources, NULL);

		if (ACPI_FAILURE(result))
			goto acpi_walk_err;
1028 1029
		if (hyperv_mmio.start && hyperv_mmio.end)
			request_resource(&iomem_resource, &hyperv_mmio);
1030
	}
1031 1032 1033
	ret_val = 0;

acpi_walk_err:
1034
	complete(&probe_event);
1035
	return ret_val;
1036 1037
}

1038 1039 1040 1041 1042 1043 1044 1045 1046
static int vmbus_acpi_remove(struct acpi_device *device)
{
	int ret = 0;

	if (hyperv_mmio.start && hyperv_mmio.end)
		ret = release_resource(&hyperv_mmio);
	return ret;
}

1047 1048
static const struct acpi_device_id vmbus_acpi_device_ids[] = {
	{"VMBUS", 0},
1049
	{"VMBus", 0},
1050 1051 1052 1053 1054 1055 1056 1057 1058
	{"", 0},
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);

static struct acpi_driver vmbus_acpi_driver = {
	.name = "vmbus",
	.ids = vmbus_acpi_device_ids,
	.ops = {
		.add = vmbus_acpi_add,
1059
		.remove = vmbus_acpi_remove,
1060 1061 1062
	},
};

1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073
static void hv_kexec_handler(void)
{
	int cpu;

	hv_synic_clockevents_cleanup();
	vmbus_initiate_unload();
	for_each_online_cpu(cpu)
		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
	hv_cleanup();
};

1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085
static void hv_crash_handler(struct pt_regs *regs)
{
	vmbus_initiate_unload();
	/*
	 * In crash handler we can't schedule synic cleanup for all CPUs,
	 * doing the cleanup for current CPU only. This should be sufficient
	 * for kdump.
	 */
	hv_synic_cleanup(NULL);
	hv_cleanup();
};

1086
static int __init hv_acpi_init(void)
1087
{
1088
	int ret, t;
1089

1090
	if (x86_hyper != &x86_hyper_ms_hyperv)
1091 1092
		return -ENODEV;

1093 1094 1095 1096 1097
	init_completion(&probe_event);

	/*
	 * Get irq resources first.
	 */
1098 1099
	ret = acpi_bus_register_driver(&vmbus_acpi_driver);

1100 1101 1102
	if (ret)
		return ret;

1103 1104 1105 1106 1107
	t = wait_for_completion_timeout(&probe_event, 5*HZ);
	if (t == 0) {
		ret = -ETIMEDOUT;
		goto cleanup;
	}
1108 1109

	if (irq <= 0) {
1110 1111
		ret = -ENODEV;
		goto cleanup;
1112 1113
	}

1114 1115
	ret = vmbus_bus_init(irq);
	if (ret)
1116 1117
		goto cleanup;

1118
	hv_setup_kexec_handler(hv_kexec_handler);
1119
	hv_setup_crash_handler(hv_crash_handler);
1120

1121 1122 1123 1124
	return 0;

cleanup:
	acpi_bus_unregister_driver(&vmbus_acpi_driver);
1125
	hv_acpi_dev = NULL;
1126
	return ret;
1127 1128
}

1129 1130
static void __exit vmbus_exit(void)
{
1131 1132
	int cpu;

1133
	hv_remove_kexec_handler();
1134
	hv_remove_crash_handler();
1135
	vmbus_connection.conn_state = DISCONNECTED;
1136
	hv_synic_clockevents_cleanup();
1137
	vmbus_disconnect();
1138
	hv_remove_vmbus_irq();
1139
	tasklet_kill(&msg_dpc);
1140
	vmbus_free_channels();
1141 1142 1143 1144
	if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
		atomic_notifier_chain_unregister(&panic_notifier_list,
						 &hyperv_panic_block);
	}
1145 1146
	bus_unregister(&hv_bus);
	hv_cleanup();
1147 1148
	for_each_online_cpu(cpu) {
		tasklet_kill(hv_context.event_dpc[cpu]);
1149
		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
1150
	}
1151
	hv_synic_free();
1152
	acpi_bus_unregister_driver(&vmbus_acpi_driver);
1153
	hv_cpu_hotplug_quirk(false);
1154 1155
}

1156

1157
MODULE_LICENSE("GPL");
1158

1159
subsys_initcall(hv_acpi_init);
1160
module_exit(vmbus_exit);