vmbus_drv.c 27.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (c) 2009, Microsoft Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
 * Authors:
 *   Haiyang Zhang <haiyangz@microsoft.com>
 *   Hank Janssen  <hjanssen@microsoft.com>
20
 *   K. Y. Srinivasan <kys@microsoft.com>
21
 *
22
 */
23 24
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

25 26 27 28 29
#include <linux/init.h>
#include <linux/module.h>
#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/sysctl.h>
30
#include <linux/slab.h>
31
#include <linux/acpi.h>
32
#include <linux/completion.h>
33
#include <linux/hyperv.h>
34
#include <linux/kernel_stat.h>
35
#include <linux/clockchips.h>
36
#include <linux/cpu.h>
37
#include <asm/hyperv.h>
38
#include <asm/hypervisor.h>
39
#include <asm/mshyperv.h>
40 41
#include <linux/notifier.h>
#include <linux/ptrace.h>
42
#include "hyperv_vmbus.h"
43

44
static struct acpi_device  *hv_acpi_dev;
45

46
static struct tasklet_struct msg_dpc;
47
static struct completion probe_event;
48
static int irq;
49

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74

int hyperv_panic_event(struct notifier_block *nb,
			unsigned long event, void *ptr)
{
	struct pt_regs *regs;

	regs = current_pt_regs();

	wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip);
	wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax);
	wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx);
	wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx);
	wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx);

	/*
	 * Let Hyper-V know there is crash data available
	 */
	wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY);
	return NOTIFY_DONE;
}

static struct notifier_block hyperv_panic_block = {
	.notifier_call = hyperv_panic_event,
};

75 76 77 78 79
struct resource hyperv_mmio = {
	.name  = "hyperv mmio",
	.flags = IORESOURCE_MEM,
};
EXPORT_SYMBOL_GPL(hyperv_mmio);
80

81 82 83 84 85 86 87 88
static int vmbus_exists(void)
{
	if (hv_acpi_dev == NULL)
		return -ENODEV;

	return 0;
}

89 90 91 92 93 94 95 96
#define VMBUS_ALIAS_LEN ((sizeof((struct hv_vmbus_device_id *)0)->guid) * 2)
static void print_alias_name(struct hv_device *hv_dev, char *alias_name)
{
	int i;
	for (i = 0; i < VMBUS_ALIAS_LEN; i += 2)
		sprintf(&alias_name[i], "%02x", hv_dev->dev_type.b[i/2]);
}

97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113
static u8 channel_monitor_group(struct vmbus_channel *channel)
{
	return (u8)channel->offermsg.monitorid / 32;
}

static u8 channel_monitor_offset(struct vmbus_channel *channel)
{
	return (u8)channel->offermsg.monitorid % 32;
}

static u32 channel_pending(struct vmbus_channel *channel,
			   struct hv_monitor_page *monitor_page)
{
	u8 monitor_group = channel_monitor_group(channel);
	return monitor_page->trigger_group[monitor_group].pending;
}

114 115 116 117 118 119 120 121
static u32 channel_latency(struct vmbus_channel *channel,
			   struct hv_monitor_page *monitor_page)
{
	u8 monitor_group = channel_monitor_group(channel);
	u8 monitor_offset = channel_monitor_offset(channel);
	return monitor_page->latency[monitor_group][monitor_offset];
}

122 123 124 125 126 127 128 129
static u32 channel_conn_id(struct vmbus_channel *channel,
			   struct hv_monitor_page *monitor_page)
{
	u8 monitor_group = channel_monitor_group(channel);
	u8 monitor_offset = channel_monitor_offset(channel);
	return monitor_page->parameter[monitor_group][monitor_offset].connectionid.u.id;
}

130 131 132 133 134 135 136 137 138 139 140
static ssize_t id_show(struct device *dev, struct device_attribute *dev_attr,
		       char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n", hv_dev->channel->offermsg.child_relid);
}
static DEVICE_ATTR_RO(id);

141 142 143 144 145 146 147 148 149 150 151
static ssize_t state_show(struct device *dev, struct device_attribute *dev_attr,
			  char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n", hv_dev->channel->state);
}
static DEVICE_ATTR_RO(state);

152 153 154 155 156 157 158 159 160 161 162
static ssize_t monitor_id_show(struct device *dev,
			       struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n", hv_dev->channel->offermsg.monitorid);
}
static DEVICE_ATTR_RO(monitor_id);

163 164 165 166 167 168 169 170 171 172 173 174
static ssize_t class_id_show(struct device *dev,
			       struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "{%pUl}\n",
		       hv_dev->channel->offermsg.offer.if_type.b);
}
static DEVICE_ATTR_RO(class_id);

175 176 177 178 179 180 181 182 183 184 185 186
static ssize_t device_id_show(struct device *dev,
			      struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "{%pUl}\n",
		       hv_dev->channel->offermsg.offer.if_instance.b);
}
static DEVICE_ATTR_RO(device_id);

187 188 189 190 191 192 193 194 195 196 197
static ssize_t modalias_show(struct device *dev,
			     struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	char alias_name[VMBUS_ALIAS_LEN + 1];

	print_alias_name(hv_dev, alias_name);
	return sprintf(buf, "vmbus:%s\n", alias_name);
}
static DEVICE_ATTR_RO(modalias);

198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
static ssize_t server_monitor_pending_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_pending(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(server_monitor_pending);

static ssize_t client_monitor_pending_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_pending(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_pending);
225

226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
static ssize_t server_monitor_latency_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_latency(hv_dev->channel,
				       vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_latency);

static ssize_t client_monitor_latency_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_latency(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_latency);

254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static ssize_t server_monitor_conn_id_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_conn_id(hv_dev->channel,
				       vmbus_connection.monitor_pages[0]));
}
static DEVICE_ATTR_RO(server_monitor_conn_id);

static ssize_t client_monitor_conn_id_show(struct device *dev,
					   struct device_attribute *dev_attr,
					   char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);

	if (!hv_dev->channel)
		return -ENODEV;
	return sprintf(buf, "%d\n",
		       channel_conn_id(hv_dev->channel,
				       vmbus_connection.monitor_pages[1]));
}
static DEVICE_ATTR_RO(client_monitor_conn_id);

282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
static ssize_t out_intr_mask_show(struct device *dev,
				  struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(out_intr_mask);

static ssize_t out_read_index_show(struct device *dev,
				   struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.current_read_index);
}
static DEVICE_ATTR_RO(out_read_index);

static ssize_t out_write_index_show(struct device *dev,
				    struct device_attribute *dev_attr,
				    char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.current_write_index);
}
static DEVICE_ATTR_RO(out_write_index);

static ssize_t out_read_bytes_avail_show(struct device *dev,
					 struct device_attribute *dev_attr,
					 char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(out_read_bytes_avail);

static ssize_t out_write_bytes_avail_show(struct device *dev,
					  struct device_attribute *dev_attr,
					  char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info outbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->outbound, &outbound);
	return sprintf(buf, "%d\n", outbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(out_write_bytes_avail);

static ssize_t in_intr_mask_show(struct device *dev,
				 struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.current_interrupt_mask);
}
static DEVICE_ATTR_RO(in_intr_mask);

static ssize_t in_read_index_show(struct device *dev,
				  struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.current_read_index);
}
static DEVICE_ATTR_RO(in_read_index);

static ssize_t in_write_index_show(struct device *dev,
				   struct device_attribute *dev_attr, char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.current_write_index);
}
static DEVICE_ATTR_RO(in_write_index);

static ssize_t in_read_bytes_avail_show(struct device *dev,
					struct device_attribute *dev_attr,
					char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.bytes_avail_toread);
}
static DEVICE_ATTR_RO(in_read_bytes_avail);

static ssize_t in_write_bytes_avail_show(struct device *dev,
					 struct device_attribute *dev_attr,
					 char *buf)
{
	struct hv_device *hv_dev = device_to_hv_device(dev);
	struct hv_ring_buffer_debug_info inbound;

	if (!hv_dev->channel)
		return -ENODEV;
	hv_ringbuffer_get_debuginfo(&hv_dev->channel->inbound, &inbound);
	return sprintf(buf, "%d\n", inbound.bytes_avail_towrite);
}
static DEVICE_ATTR_RO(in_write_bytes_avail);

/* Set up per device attributes in /sys/bus/vmbus/devices/<bus device> */
418 419
static struct attribute *vmbus_attrs[] = {
	&dev_attr_id.attr,
420
	&dev_attr_state.attr,
421
	&dev_attr_monitor_id.attr,
422
	&dev_attr_class_id.attr,
423
	&dev_attr_device_id.attr,
424
	&dev_attr_modalias.attr,
425 426
	&dev_attr_server_monitor_pending.attr,
	&dev_attr_client_monitor_pending.attr,
427 428
	&dev_attr_server_monitor_latency.attr,
	&dev_attr_client_monitor_latency.attr,
429 430
	&dev_attr_server_monitor_conn_id.attr,
	&dev_attr_client_monitor_conn_id.attr,
431 432 433 434 435 436 437 438 439 440
	&dev_attr_out_intr_mask.attr,
	&dev_attr_out_read_index.attr,
	&dev_attr_out_write_index.attr,
	&dev_attr_out_read_bytes_avail.attr,
	&dev_attr_out_write_bytes_avail.attr,
	&dev_attr_in_intr_mask.attr,
	&dev_attr_in_read_index.attr,
	&dev_attr_in_write_index.attr,
	&dev_attr_in_read_bytes_avail.attr,
	&dev_attr_in_write_bytes_avail.attr,
441 442 443 444
	NULL,
};
ATTRIBUTE_GROUPS(vmbus);

445 446 447 448 449 450
/*
 * vmbus_uevent - add uevent for our device
 *
 * This routine is invoked when a device is added or removed on the vmbus to
 * generate a uevent to udev in the userspace. The udev will then look at its
 * rule and the uevent generated here to load the appropriate driver
451 452 453 454
 *
 * The alias string will be of the form vmbus:guid where guid is the string
 * representation of the device guid (each byte of the guid will be
 * represented with two hex characters.
455 456 457 458
 */
static int vmbus_uevent(struct device *device, struct kobj_uevent_env *env)
{
	struct hv_device *dev = device_to_hv_device(device);
459 460
	int ret;
	char alias_name[VMBUS_ALIAS_LEN + 1];
461

462
	print_alias_name(dev, alias_name);
463 464
	ret = add_uevent_var(env, "MODALIAS=vmbus:%s", alias_name);
	return ret;
465 466
}

S
stephen hemminger 已提交
467
static const uuid_le null_guid;
468 469 470 471 472 473 474 475

static inline bool is_null_guid(const __u8 *guid)
{
	if (memcmp(guid, &null_guid, sizeof(uuid_le)))
		return false;
	return true;
}

476 477 478 479 480 481
/*
 * Return a matching hv_vmbus_device_id pointer.
 * If there is no match, return NULL.
 */
static const struct hv_vmbus_device_id *hv_vmbus_get_id(
					const struct hv_vmbus_device_id *id,
S
stephen hemminger 已提交
482
					const __u8 *guid)
483 484 485 486 487 488 489 490 491
{
	for (; !is_null_guid(id->guid); id++)
		if (!memcmp(&id->guid, guid, sizeof(uuid_le)))
			return id;

	return NULL;
}


492 493 494 495 496 497 498

/*
 * vmbus_match - Attempt to match the specified device to the specified driver
 */
static int vmbus_match(struct device *device, struct device_driver *driver)
{
	struct hv_driver *drv = drv_to_hv_drv(driver);
499
	struct hv_device *hv_dev = device_to_hv_device(device);
500

501 502
	if (hv_vmbus_get_id(drv->id_table, hv_dev->dev_type.b))
		return 1;
503

504
	return 0;
505 506
}

507 508 509 510 511 512 513 514
/*
 * vmbus_probe - Add the new vmbus's child device
 */
static int vmbus_probe(struct device *child_device)
{
	int ret = 0;
	struct hv_driver *drv =
			drv_to_hv_drv(child_device->driver);
515
	struct hv_device *dev = device_to_hv_device(child_device);
516
	const struct hv_vmbus_device_id *dev_id;
517

518
	dev_id = hv_vmbus_get_id(drv->id_table, dev->dev_type.b);
519
	if (drv->probe) {
520
		ret = drv->probe(dev, dev_id);
521
		if (ret != 0)
522 523
			pr_err("probe failed for device %s (%d)\n",
			       dev_name(child_device), ret);
524 525

	} else {
526 527
		pr_err("probe not set for driver %s\n",
		       dev_name(child_device));
528
		ret = -ENODEV;
529 530 531 532
	}
	return ret;
}

533 534 535 536 537
/*
 * vmbus_remove - Remove a vmbus device
 */
static int vmbus_remove(struct device *child_device)
{
538
	struct hv_driver *drv;
539
	struct hv_device *dev = device_to_hv_device(child_device);
540
	u32 relid = dev->channel->offermsg.child_relid;
541

542 543 544 545
	if (child_device->driver) {
		drv = drv_to_hv_drv(child_device->driver);
		if (drv->remove)
			drv->remove(dev);
546 547
		else {
			hv_process_channel_removal(dev->channel, relid);
548 549
			pr_err("remove not set for driver %s\n",
				dev_name(child_device));
550 551 552 553 554 555 556
		}
	} else {
		/*
		 * We don't have a driver for this device; deal with the
		 * rescind message by removing the channel.
		 */
		hv_process_channel_removal(dev->channel, relid);
557
	}
558 559 560 561

	return 0;
}

562 563 564 565 566 567 568

/*
 * vmbus_shutdown - Shutdown a vmbus device
 */
static void vmbus_shutdown(struct device *child_device)
{
	struct hv_driver *drv;
569
	struct hv_device *dev = device_to_hv_device(child_device);
570 571 572 573 574 575 576 577


	/* The device may not be attached yet */
	if (!child_device->driver)
		return;

	drv = drv_to_hv_drv(child_device->driver);

578 579
	if (drv->shutdown)
		drv->shutdown(dev);
580 581 582 583

	return;
}

584 585 586 587 588 589

/*
 * vmbus_device_release - Final callback release of the vmbus child device
 */
static void vmbus_device_release(struct device *device)
{
590
	struct hv_device *hv_dev = device_to_hv_device(device);
591

592
	kfree(hv_dev);
593 594 595

}

596
/* The one and only one */
597 598 599 600 601 602 603
static struct bus_type  hv_bus = {
	.name =		"vmbus",
	.match =		vmbus_match,
	.shutdown =		vmbus_shutdown,
	.remove =		vmbus_remove,
	.probe =		vmbus_probe,
	.uevent =		vmbus_uevent,
604
	.dev_groups =		vmbus_groups,
605 606
};

607 608 609 610 611 612 613 614 615
struct onmessage_work_context {
	struct work_struct work;
	struct hv_message msg;
};

static void vmbus_onmessage_work(struct work_struct *work)
{
	struct onmessage_work_context *ctx;

616 617 618 619
	/* Do not process messages if we're in DISCONNECTED state */
	if (vmbus_connection.conn_state == DISCONNECTED)
		return;

620 621 622 623 624 625
	ctx = container_of(work, struct onmessage_work_context,
			   work);
	vmbus_onmessage(&ctx->msg);
	kfree(ctx);
}

626
static void hv_process_timer_expiration(struct hv_message *msg, int cpu)
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
{
	struct clock_event_device *dev = hv_context.clk_evt[cpu];

	if (dev->event_handler)
		dev->event_handler(dev);

	msg->header.message_type = HVMSG_NONE;

	/*
	 * Make sure the write to MessageType (ie set to
	 * HVMSG_NONE) happens before we read the
	 * MessagePending and EOMing. Otherwise, the EOMing
	 * will not deliver any more messages since there is
	 * no empty slot
	 */
	mb();

	if (msg->header.message_flags.msg_pending) {
		/*
		 * This will cause message queue rescan to
		 * possibly deliver another msg from the
		 * hypervisor
		 */
		wrmsrl(HV_X64_MSR_EOM, 0);
	}
}

654
static void vmbus_on_msg_dpc(unsigned long data)
G
Greg Kroah-Hartman 已提交
655 656 657 658 659
{
	int cpu = smp_processor_id();
	void *page_addr = hv_context.synic_message_page[cpu];
	struct hv_message *msg = (struct hv_message *)page_addr +
				  VMBUS_MESSAGE_SINT;
660
	struct onmessage_work_context *ctx;
G
Greg Kroah-Hartman 已提交
661 662 663 664 665 666

	while (1) {
		if (msg->header.message_type == HVMSG_NONE) {
			/* no msg */
			break;
		} else {
667 668
			ctx = kmalloc(sizeof(*ctx), GFP_ATOMIC);
			if (ctx == NULL)
G
Greg Kroah-Hartman 已提交
669
				continue;
670 671
			INIT_WORK(&ctx->work, vmbus_onmessage_work);
			memcpy(&ctx->msg, msg, sizeof(*msg));
672
			queue_work(vmbus_connection.work_queue, &ctx->work);
G
Greg Kroah-Hartman 已提交
673 674 675 676 677 678 679 680 681 682 683
		}

		msg->header.message_type = HVMSG_NONE;

		/*
		 * Make sure the write to MessageType (ie set to
		 * HVMSG_NONE) happens before we read the
		 * MessagePending and EOMing. Otherwise, the EOMing
		 * will not deliver any more messages since there is
		 * no empty slot
		 */
684
		mb();
G
Greg Kroah-Hartman 已提交
685 686 687 688 689 690 691 692 693 694 695 696

		if (msg->header.message_flags.msg_pending) {
			/*
			 * This will cause message queue rescan to
			 * possibly deliver another msg from the
			 * hypervisor
			 */
			wrmsrl(HV_X64_MSR_EOM, 0);
		}
	}
}

697
static void vmbus_isr(void)
G
Greg Kroah-Hartman 已提交
698 699 700 701 702
{
	int cpu = smp_processor_id();
	void *page_addr;
	struct hv_message *msg;
	union hv_synic_event_flags *event;
703
	bool handled = false;
G
Greg Kroah-Hartman 已提交
704

705 706
	page_addr = hv_context.synic_event_page[cpu];
	if (page_addr == NULL)
707
		return;
708 709 710

	event = (union hv_synic_event_flags *)page_addr +
					 VMBUS_MESSAGE_SINT;
711 712 713 714 715
	/*
	 * Check for events before checking for messages. This is the order
	 * in which events and messages are checked in Windows guests on
	 * Hyper-V, and the Windows team suggested we do the same.
	 */
G
Greg Kroah-Hartman 已提交
716

717 718
	if ((vmbus_proto_version == VERSION_WS2008) ||
		(vmbus_proto_version == VERSION_WIN7)) {
G
Greg Kroah-Hartman 已提交
719

720 721 722 723 724 725 726 727 728 729 730 731
		/* Since we are a child, we only need to check bit 0 */
		if (sync_test_and_clear_bit(0,
			(unsigned long *) &event->flags32[0])) {
			handled = true;
		}
	} else {
		/*
		 * Our host is win8 or above. The signaling mechanism
		 * has changed and we can directly look at the event page.
		 * If bit n is set then we have an interrup on the channel
		 * whose id is n.
		 */
732 733
		handled = true;
	}
734

735
	if (handled)
736
		tasklet_schedule(hv_context.event_dpc[cpu]);
737 738


739 740 741 742
	page_addr = hv_context.synic_message_page[cpu];
	msg = (struct hv_message *)page_addr + VMBUS_MESSAGE_SINT;

	/* Check if there are actual msgs to be processed */
743 744 745 746 747 748
	if (msg->header.message_type != HVMSG_NONE) {
		if (msg->header.message_type == HVMSG_TIMER_EXPIRED)
			hv_process_timer_expiration(msg, cpu);
		else
			tasklet_schedule(&msg_dpc);
	}
749 750
}

751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
#ifdef CONFIG_HOTPLUG_CPU
static int hyperv_cpu_disable(void)
{
	return -ENOSYS;
}

static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
{
	static void *previous_cpu_disable;

	/*
	 * Offlining a CPU when running on newer hypervisors (WS2012R2, Win8,
	 * ...) is not supported at this moment as channel interrupts are
	 * distributed across all of them.
	 */

	if ((vmbus_proto_version == VERSION_WS2008) ||
	    (vmbus_proto_version == VERSION_WIN7))
		return;

	if (vmbus_loaded) {
		previous_cpu_disable = smp_ops.cpu_disable;
		smp_ops.cpu_disable = hyperv_cpu_disable;
		pr_notice("CPU offlining is not supported by hypervisor\n");
	} else if (previous_cpu_disable)
		smp_ops.cpu_disable = previous_cpu_disable;
}
#else
static void hv_cpu_hotplug_quirk(bool vmbus_loaded)
{
}
#endif

784
/*
785 786 787
 * vmbus_bus_init -Main vmbus driver initialization routine.
 *
 * Here, we
788 789 790 791
 *	- initialize the vmbus driver context
 *	- invoke the vmbus hv main init routine
 *	- get the irq resource
 *	- retrieve the channel offers
792
 */
793
static int vmbus_bus_init(int irq)
794
{
795
	int ret;
796

797 798
	/* Hypervisor initialization...setup hypercall page..etc */
	ret = hv_init();
799
	if (ret != 0) {
800
		pr_err("Unable to initialize the hypervisor - 0x%x\n", ret);
801
		return ret;
802 803
	}

804
	tasklet_init(&msg_dpc, vmbus_on_msg_dpc, 0);
805

806
	ret = bus_register(&hv_bus);
807
	if (ret)
808
		goto err_cleanup;
809

810
	hv_setup_vmbus_irq(vmbus_isr);
811

812 813 814
	ret = hv_synic_alloc();
	if (ret)
		goto err_alloc;
815
	/*
816
	 * Initialize the per-cpu interrupt state and
817 818
	 * connect to the host.
	 */
819
	on_each_cpu(hv_synic_init, NULL, 1);
820
	ret = vmbus_connect();
821
	if (ret)
822
		goto err_alloc;
823

824
	hv_cpu_hotplug_quirk(true);
825 826 827 828 829 830 831 832 833

	/*
	 * Only register if the crash MSRs are available
	 */
	if (ms_hyperv.features & HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE) {
		atomic_notifier_chain_register(&panic_notifier_list,
					       &hyperv_panic_block);
	}

834
	vmbus_request_offers();
835

836
	return 0;
837

838 839
err_alloc:
	hv_synic_free();
840
	hv_remove_vmbus_irq();
841 842 843 844 845 846 847

	bus_unregister(&hv_bus);

err_cleanup:
	hv_cleanup();

	return ret;
848 849
}

850
/**
851 852 853 854
 * __vmbus_child_driver_register - Register a vmbus's driver
 * @drv: Pointer to driver structure you want to register
 * @owner: owner module of the drv
 * @mod_name: module name string
855 856
 *
 * Registers the given driver with Linux through the 'driver_register()' call
857
 * and sets up the hyper-v vmbus handling for this driver.
858 859
 * It will return the state of the 'driver_register()' call.
 *
860
 */
861
int __vmbus_driver_register(struct hv_driver *hv_driver, struct module *owner, const char *mod_name)
862
{
863
	int ret;
864

865
	pr_info("registering driver %s\n", hv_driver->name);
866

867 868 869 870
	ret = vmbus_exists();
	if (ret < 0)
		return ret;

871 872 873 874
	hv_driver->driver.name = hv_driver->name;
	hv_driver->driver.owner = owner;
	hv_driver->driver.mod_name = mod_name;
	hv_driver->driver.bus = &hv_bus;
875

876
	ret = driver_register(&hv_driver->driver);
877

878
	return ret;
879
}
880
EXPORT_SYMBOL_GPL(__vmbus_driver_register);
881

882
/**
883 884
 * vmbus_driver_unregister() - Unregister a vmbus's driver
 * @drv: Pointer to driver structure you want to un-register
885
 *
886 887
 * Un-register the given driver that was previous registered with a call to
 * vmbus_driver_register()
888
 */
889
void vmbus_driver_unregister(struct hv_driver *hv_driver)
890
{
891
	pr_info("unregistering driver %s\n", hv_driver->name);
892

893
	if (!vmbus_exists())
894
		driver_unregister(&hv_driver->driver);
895
}
896
EXPORT_SYMBOL_GPL(vmbus_driver_unregister);
897

898
/*
899
 * vmbus_device_create - Creates and registers a new child device
900
 * on the vmbus.
901
 */
S
stephen hemminger 已提交
902 903 904
struct hv_device *vmbus_device_create(const uuid_le *type,
				      const uuid_le *instance,
				      struct vmbus_channel *channel)
905
{
906
	struct hv_device *child_device_obj;
907

908 909
	child_device_obj = kzalloc(sizeof(struct hv_device), GFP_KERNEL);
	if (!child_device_obj) {
910
		pr_err("Unable to allocate device object for child device\n");
911 912 913
		return NULL;
	}

914
	child_device_obj->channel = channel;
915
	memcpy(&child_device_obj->dev_type, type, sizeof(uuid_le));
916
	memcpy(&child_device_obj->dev_instance, instance,
917
	       sizeof(uuid_le));
918 919 920 921 922


	return child_device_obj;
}

923
/*
924
 * vmbus_device_register - Register the child device
925
 */
926
int vmbus_device_register(struct hv_device *child_device_obj)
927
{
928
	int ret = 0;
929

930 931
	dev_set_name(&child_device_obj->device, "vmbus_%d",
		     child_device_obj->channel->id);
932

933
	child_device_obj->device.bus = &hv_bus;
934
	child_device_obj->device.parent = &hv_acpi_dev->dev;
935
	child_device_obj->device.release = vmbus_device_release;
936

937 938 939 940
	/*
	 * Register with the LDM. This will kick off the driver/device
	 * binding...which will eventually call vmbus_match() and vmbus_probe()
	 */
941
	ret = device_register(&child_device_obj->device);
942 943

	if (ret)
944
		pr_err("Unable to register child device\n");
945
	else
946
		pr_debug("child device %s registered\n",
947
			dev_name(&child_device_obj->device));
948 949 950 951

	return ret;
}

952
/*
953
 * vmbus_device_unregister - Remove the specified child device
954
 * from the vmbus.
955
 */
956
void vmbus_device_unregister(struct hv_device *device_obj)
957
{
958 959 960
	pr_debug("child device %s unregistered\n",
		dev_name(&device_obj->device));

961 962 963 964
	/*
	 * Kick off the process of unregistering the device.
	 * This will call vmbus_remove() and eventually vmbus_device_release()
	 */
965
	device_unregister(&device_obj->device);
966 967 968
}


969
/*
970 971
 * VMBUS is an acpi enumerated device. Get the the information we
 * need from DSDT.
972 973
 */

974
static acpi_status vmbus_walk_resources(struct acpi_resource *res, void *ctx)
975
{
976 977 978
	switch (res->type) {
	case ACPI_RESOURCE_TYPE_IRQ:
		irq = res->data.irq.interrupts[0];
G
Gerd Hoffmann 已提交
979
		break;
980

981
	case ACPI_RESOURCE_TYPE_ADDRESS64:
982 983
		hyperv_mmio.start = res->data.address64.address.minimum;
		hyperv_mmio.end = res->data.address64.address.maximum;
G
Gerd Hoffmann 已提交
984
		break;
985 986 987 988 989 990 991 992
	}

	return AE_OK;
}

static int vmbus_acpi_add(struct acpi_device *device)
{
	acpi_status result;
993
	int ret_val = -ENODEV;
994

995 996
	hv_acpi_dev = device;

997
	result = acpi_walk_resources(device->handle, METHOD_NAME__CRS,
998
					vmbus_walk_resources, NULL);
999

1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
	if (ACPI_FAILURE(result))
		goto acpi_walk_err;
	/*
	 * The parent of the vmbus acpi device (Gen2 firmware) is the VMOD that
	 * has the mmio ranges. Get that.
	 */
	if (device->parent) {
		result = acpi_walk_resources(device->parent->handle,
					METHOD_NAME__CRS,
					vmbus_walk_resources, NULL);

		if (ACPI_FAILURE(result))
			goto acpi_walk_err;
1013 1014
		if (hyperv_mmio.start && hyperv_mmio.end)
			request_resource(&iomem_resource, &hyperv_mmio);
1015
	}
1016 1017 1018
	ret_val = 0;

acpi_walk_err:
1019
	complete(&probe_event);
1020
	return ret_val;
1021 1022 1023 1024
}

static const struct acpi_device_id vmbus_acpi_device_ids[] = {
	{"VMBUS", 0},
1025
	{"VMBus", 0},
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
	{"", 0},
};
MODULE_DEVICE_TABLE(acpi, vmbus_acpi_device_ids);

static struct acpi_driver vmbus_acpi_driver = {
	.name = "vmbus",
	.ids = vmbus_acpi_device_ids,
	.ops = {
		.add = vmbus_acpi_add,
	},
};

1038
static int __init hv_acpi_init(void)
1039
{
1040
	int ret, t;
1041

1042
	if (x86_hyper != &x86_hyper_ms_hyperv)
1043 1044
		return -ENODEV;

1045 1046 1047 1048 1049
	init_completion(&probe_event);

	/*
	 * Get irq resources first.
	 */
1050 1051
	ret = acpi_bus_register_driver(&vmbus_acpi_driver);

1052 1053 1054
	if (ret)
		return ret;

1055 1056 1057 1058 1059
	t = wait_for_completion_timeout(&probe_event, 5*HZ);
	if (t == 0) {
		ret = -ETIMEDOUT;
		goto cleanup;
	}
1060 1061

	if (irq <= 0) {
1062 1063
		ret = -ENODEV;
		goto cleanup;
1064 1065
	}

1066 1067
	ret = vmbus_bus_init(irq);
	if (ret)
1068 1069 1070 1071 1072 1073
		goto cleanup;

	return 0;

cleanup:
	acpi_bus_unregister_driver(&vmbus_acpi_driver);
1074
	hv_acpi_dev = NULL;
1075
	return ret;
1076 1077
}

1078 1079
static void __exit vmbus_exit(void)
{
1080 1081
	int cpu;

1082
	vmbus_connection.conn_state = DISCONNECTED;
1083
	hv_synic_clockevents_cleanup();
1084
	hv_remove_vmbus_irq();
1085 1086 1087
	vmbus_free_channels();
	bus_unregister(&hv_bus);
	hv_cleanup();
1088 1089
	for_each_online_cpu(cpu)
		smp_call_function_single(cpu, hv_synic_cleanup, NULL, 1);
1090
	acpi_bus_unregister_driver(&vmbus_acpi_driver);
1091
	hv_cpu_hotplug_quirk(false);
1092
	vmbus_disconnect();
1093 1094
}

1095

1096
MODULE_LICENSE("GPL");
1097

1098
subsys_initcall(hv_acpi_init);
1099
module_exit(vmbus_exit);