dmar.c 40.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * Copyright (c) 2006, Intel Corporation.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 *
 * You should have received a copy of the GNU General Public License along with
 * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
 * Place - Suite 330, Boston, MA 02111-1307 USA.
 *
17 18 19 20
 * Copyright (C) 2006-2008 Intel Corporation
 * Author: Ashok Raj <ashok.raj@intel.com>
 * Author: Shaohua Li <shaohua.li@intel.com>
 * Author: Anil S Keshavamurthy <anil.s.keshavamurthy@intel.com>
21
 *
22
 * This file implements early detection/parsing of Remapping Devices
23 24
 * reported to OS through BIOS via DMA remapping reporting (DMAR) ACPI
 * tables.
25 26
 *
 * These routines are used by both DMA-remapping and Interrupt-remapping
27 28
 */

29 30
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt /* has to precede printk.h */

31 32
#include <linux/pci.h>
#include <linux/dmar.h>
K
Kay, Allen M 已提交
33 34
#include <linux/iova.h>
#include <linux/intel-iommu.h>
35
#include <linux/timer.h>
36 37
#include <linux/irq.h>
#include <linux/interrupt.h>
38
#include <linux/tboot.h>
39
#include <linux/dmi.h>
40
#include <linux/slab.h>
41
#include <asm/irq_remapping.h>
42
#include <asm/iommu_table.h>
43

44 45
#include "irq_remapping.h"

46 47 48 49 50 51 52 53 54 55 56
/*
 * Assumptions:
 * 1) The hotplug framework guarentees that DMAR unit will be hot-added
 *    before IO devices managed by that unit.
 * 2) The hotplug framework guarantees that DMAR unit will be hot-removed
 *    after IO devices managed by that unit.
 * 3) Hotplug events are rare.
 *
 * Locking rules for DMA and interrupt remapping related global data structures:
 * 1) Use dmar_global_lock in process context
 * 2) Use RCU in interrupt context
57
 */
58
DECLARE_RWSEM(dmar_global_lock);
59 60
LIST_HEAD(dmar_drhd_units);

61
struct acpi_table_header * __initdata dmar_tbl;
62
static acpi_size dmar_tbl_size;
63
static int dmar_dev_scope_status = 1;
64

65
static int alloc_iommu(struct dmar_drhd_unit *drhd);
66
static void free_iommu(struct intel_iommu *iommu);
67

68 69 70 71 72 73 74
static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
{
	/*
	 * add INCLUDE_ALL at the tail, so scan the list will find it at
	 * the very end.
	 */
	if (drhd->include_all)
75
		list_add_tail_rcu(&drhd->list, &dmar_drhd_units);
76
	else
77
		list_add_rcu(&drhd->list, &dmar_drhd_units);
78 79
}

80
void *dmar_alloc_dev_scope(void *start, void *end, int *cnt)
81 82 83 84 85 86
{
	struct acpi_dmar_device_scope *scope;

	*cnt = 0;
	while (start < end) {
		scope = start;
87 88
		if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ACPI ||
		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT ||
89 90
		    scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE)
			(*cnt)++;
91 92
		else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC &&
			scope->entry_type != ACPI_DMAR_SCOPE_TYPE_HPET) {
93
			pr_warn("Unsupported device scope\n");
94
		}
95 96 97
		start += scope->length;
	}
	if (*cnt == 0)
98 99
		return NULL;

100
	return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL);
101 102
}

103
void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt)
104
{
105
	int i;
106
	struct device *tmp_dev;
107

108
	if (*devices && *cnt) {
109
		for_each_active_dev_scope(*devices, *cnt, i, tmp_dev)
110
			put_device(tmp_dev);
111 112
		kfree(*devices);
	}
113 114 115

	*devices = NULL;
	*cnt = 0;
116 117
}

118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
/* Optimize out kzalloc()/kfree() for normal cases */
static char dmar_pci_notify_info_buf[64];

static struct dmar_pci_notify_info *
dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
{
	int level = 0;
	size_t size;
	struct pci_dev *tmp;
	struct dmar_pci_notify_info *info;

	BUG_ON(dev->is_virtfn);

	/* Only generate path[] for device addition event */
	if (event == BUS_NOTIFY_ADD_DEVICE)
		for (tmp = dev; tmp; tmp = tmp->bus->self)
			level++;

	size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path);
	if (size <= sizeof(dmar_pci_notify_info_buf)) {
		info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf;
	} else {
		info = kzalloc(size, GFP_KERNEL);
		if (!info) {
			pr_warn("Out of memory when allocating notify_info "
				"for %s.\n", pci_name(dev));
144 145
			if (dmar_dev_scope_status == 0)
				dmar_dev_scope_status = -ENOMEM;
146 147 148 149 150 151 152 153 154
			return NULL;
		}
	}

	info->event = event;
	info->dev = dev;
	info->seg = pci_domain_nr(dev->bus);
	info->level = level;
	if (event == BUS_NOTIFY_ADD_DEVICE) {
155 156
		for (tmp = dev; tmp; tmp = tmp->bus->self) {
			level--;
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
			info->path[level].device = PCI_SLOT(tmp->devfn);
			info->path[level].function = PCI_FUNC(tmp->devfn);
			if (pci_is_root_bus(tmp->bus))
				info->bus = tmp->bus->number;
		}
	}

	return info;
}

static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info)
{
	if ((void *)info != dmar_pci_notify_info_buf)
		kfree(info);
}

static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus,
				struct acpi_dmar_pci_path *path, int count)
{
	int i;

	if (info->bus != bus)
		return false;
	if (info->level != count)
		return false;

	for (i = 0; i < count; i++) {
		if (path[i].device != info->path[i].device ||
		    path[i].function != info->path[i].function)
			return false;
	}

	return true;
}

/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */
int dmar_insert_dev_scope(struct dmar_pci_notify_info *info,
			  void *start, void*end, u16 segment,
195 196
			  struct dmar_dev_scope *devices,
			  int devices_cnt)
197 198
{
	int i, level;
199
	struct device *tmp, *dev = &info->dev->dev;
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
	struct acpi_dmar_device_scope *scope;
	struct acpi_dmar_pci_path *path;

	if (segment != info->seg)
		return 0;

	for (; start < end; start += scope->length) {
		scope = start;
		if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT &&
		    scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE)
			continue;

		path = (struct acpi_dmar_pci_path *)(scope + 1);
		level = (scope->length - sizeof(*scope)) / sizeof(*path);
		if (!dmar_match_pci_path(info, scope->bus, path, level))
			continue;

		if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^
218
		    (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) {
219
			pr_warn("Device scope type does not match for %s\n",
220
				pci_name(info->dev));
221 222 223 224 225
			return -EINVAL;
		}

		for_each_dev_scope(devices, devices_cnt, i, tmp)
			if (tmp == NULL) {
226 227 228 229
				devices[i].bus = info->dev->bus->number;
				devices[i].devfn = info->dev->devfn;
				rcu_assign_pointer(devices[i].dev,
						   get_device(dev));
230 231 232 233 234 235 236 237 238
				return 1;
			}
		BUG_ON(i >= devices_cnt);
	}

	return 0;
}

int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment,
239
			  struct dmar_dev_scope *devices, int count)
240 241
{
	int index;
242
	struct device *tmp;
243 244 245 246 247

	if (info->seg != segment)
		return 0;

	for_each_active_dev_scope(devices, count, index, tmp)
248 249
		if (tmp == &info->dev->dev) {
			rcu_assign_pointer(devices[index].dev, NULL);
250
			synchronize_rcu();
251
			put_device(tmp);
252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
			return 1;
		}

	return 0;
}

static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info)
{
	int ret = 0;
	struct dmar_drhd_unit *dmaru;
	struct acpi_dmar_hardware_unit *drhd;

	for_each_drhd_unit(dmaru) {
		if (dmaru->include_all)
			continue;

		drhd = container_of(dmaru->hdr,
				    struct acpi_dmar_hardware_unit, header);
		ret = dmar_insert_dev_scope(info, (void *)(drhd + 1),
				((void *)drhd) + drhd->header.length,
				dmaru->segment,
				dmaru->devices, dmaru->devices_cnt);
		if (ret != 0)
			break;
	}
	if (ret >= 0)
		ret = dmar_iommu_notify_scope_dev(info);
279 280
	if (ret < 0 && dmar_dev_scope_status == 0)
		dmar_dev_scope_status = ret;
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328

	return ret;
}

static void  dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info)
{
	struct dmar_drhd_unit *dmaru;

	for_each_drhd_unit(dmaru)
		if (dmar_remove_dev_scope(info, dmaru->segment,
			dmaru->devices, dmaru->devices_cnt))
			break;
	dmar_iommu_notify_scope_dev(info);
}

static int dmar_pci_bus_notifier(struct notifier_block *nb,
				 unsigned long action, void *data)
{
	struct pci_dev *pdev = to_pci_dev(data);
	struct dmar_pci_notify_info *info;

	/* Only care about add/remove events for physical functions */
	if (pdev->is_virtfn)
		return NOTIFY_DONE;
	if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE)
		return NOTIFY_DONE;

	info = dmar_alloc_pci_notify_info(pdev, action);
	if (!info)
		return NOTIFY_DONE;

	down_write(&dmar_global_lock);
	if (action == BUS_NOTIFY_ADD_DEVICE)
		dmar_pci_bus_add_dev(info);
	else if (action == BUS_NOTIFY_DEL_DEVICE)
		dmar_pci_bus_del_dev(info);
	up_write(&dmar_global_lock);

	dmar_free_pci_notify_info(info);

	return NOTIFY_OK;
}

static struct notifier_block dmar_pci_bus_nb = {
	.notifier_call = dmar_pci_bus_notifier,
	.priority = INT_MIN,
};

329 330 331 332 333 334 335 336 337 338 339 340
/**
 * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
 * structure which uniquely represent one DMA remapping hardware unit
 * present in the platform
 */
static int __init
dmar_parse_one_drhd(struct acpi_dmar_header *header)
{
	struct acpi_dmar_hardware_unit *drhd;
	struct dmar_drhd_unit *dmaru;
	int ret = 0;

341
	drhd = (struct acpi_dmar_hardware_unit *)header;
342 343 344 345
	dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
	if (!dmaru)
		return -ENOMEM;

346
	dmaru->hdr = header;
347
	dmaru->reg_base_addr = drhd->address;
348
	dmaru->segment = drhd->segment;
349
	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
350 351 352 353 354 355
	dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1),
					      ((void *)drhd) + drhd->header.length,
					      &dmaru->devices_cnt);
	if (dmaru->devices_cnt && dmaru->devices == NULL) {
		kfree(dmaru);
		return -ENOMEM;
356
	}
357

358 359
	ret = alloc_iommu(dmaru);
	if (ret) {
360 361
		dmar_free_dev_scope(&dmaru->devices,
				    &dmaru->devices_cnt);
362 363 364 365 366 367 368
		kfree(dmaru);
		return ret;
	}
	dmar_register_drhd_unit(dmaru);
	return 0;
}

369 370 371 372 373 374 375 376 377
static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
{
	if (dmaru->devices && dmaru->devices_cnt)
		dmar_free_dev_scope(&dmaru->devices, &dmaru->devices_cnt);
	if (dmaru->iommu)
		free_iommu(dmaru->iommu);
	kfree(dmaru);
}

D
David Woodhouse 已提交
378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
{
	struct acpi_dmar_andd *andd = (void *)header;

	/* Check for NUL termination within the designated length */
	if (strnlen(andd->object_name, header->length - 8) == header->length - 8) {
		WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND,
			   "Your BIOS is broken; ANDD object name is not NUL-terminated\n"
			   "BIOS vendor: %s; Ver: %s; Product Version: %s\n",
			   dmi_get_system_info(DMI_BIOS_VENDOR),
			   dmi_get_system_info(DMI_BIOS_VERSION),
			   dmi_get_system_info(DMI_PRODUCT_VERSION));
		return -EINVAL;
	}
	pr_info("ANDD device: %x name: %s\n", andd->device_number,
		andd->object_name);

	return 0;
}

398
#ifdef CONFIG_ACPI_NUMA
399 400 401 402 403 404 405
static int __init
dmar_parse_one_rhsa(struct acpi_dmar_header *header)
{
	struct acpi_dmar_rhsa *rhsa;
	struct dmar_drhd_unit *drhd;

	rhsa = (struct acpi_dmar_rhsa *)header;
406
	for_each_drhd_unit(drhd) {
407 408 409 410 411 412
		if (drhd->reg_base_addr == rhsa->base_address) {
			int node = acpi_map_pxm_to_node(rhsa->proximity_domain);

			if (!node_online(node))
				node = -1;
			drhd->iommu->node = node;
413 414
			return 0;
		}
415
	}
416 417 418 419 420 421 422 423
	WARN_TAINT(
		1, TAINT_FIRMWARE_WORKAROUND,
		"Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n"
		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
		drhd->reg_base_addr,
		dmi_get_system_info(DMI_BIOS_VENDOR),
		dmi_get_system_info(DMI_BIOS_VERSION),
		dmi_get_system_info(DMI_PRODUCT_VERSION));
424

425
	return 0;
426
}
427
#endif
428

429 430 431 432 433
static void __init
dmar_table_print_dmar_entry(struct acpi_dmar_header *header)
{
	struct acpi_dmar_hardware_unit *drhd;
	struct acpi_dmar_reserved_memory *rmrr;
434
	struct acpi_dmar_atsr *atsr;
435
	struct acpi_dmar_rhsa *rhsa;
436 437 438

	switch (header->type) {
	case ACPI_DMAR_TYPE_HARDWARE_UNIT:
439 440
		drhd = container_of(header, struct acpi_dmar_hardware_unit,
				    header);
441
		pr_info("DRHD base: %#016Lx flags: %#x\n",
442
			(unsigned long long)drhd->address, drhd->flags);
443 444
		break;
	case ACPI_DMAR_TYPE_RESERVED_MEMORY:
445 446
		rmrr = container_of(header, struct acpi_dmar_reserved_memory,
				    header);
447
		pr_info("RMRR base: %#016Lx end: %#016Lx\n",
F
Fenghua Yu 已提交
448 449
			(unsigned long long)rmrr->base_address,
			(unsigned long long)rmrr->end_address);
450
		break;
451 452
	case ACPI_DMAR_TYPE_ATSR:
		atsr = container_of(header, struct acpi_dmar_atsr, header);
453
		pr_info("ATSR flags: %#x\n", atsr->flags);
454
		break;
455 456
	case ACPI_DMAR_HARDWARE_AFFINITY:
		rhsa = container_of(header, struct acpi_dmar_rhsa, header);
457
		pr_info("RHSA base: %#016Lx proximity domain: %#x\n",
458 459 460
		       (unsigned long long)rhsa->base_address,
		       rhsa->proximity_domain);
		break;
D
David Woodhouse 已提交
461 462 463 464
	case ACPI_DMAR_TYPE_ANDD:
		/* We don't print this here because we need to sanity-check
		   it first. So print it in dmar_parse_one_andd() instead. */
		break;
465 466 467
	}
}

468 469 470 471 472 473 474 475
/**
 * dmar_table_detect - checks to see if the platform supports DMAR devices
 */
static int __init dmar_table_detect(void)
{
	acpi_status status = AE_OK;

	/* if we could find DMAR table, then there are DMAR devices */
476 477 478
	status = acpi_get_table_with_size(ACPI_SIG_DMAR, 0,
				(struct acpi_table_header **)&dmar_tbl,
				&dmar_tbl_size);
479 480

	if (ACPI_SUCCESS(status) && !dmar_tbl) {
481
		pr_warn("Unable to map DMAR\n");
482 483 484 485 486
		status = AE_NOT_FOUND;
	}

	return (ACPI_SUCCESS(status) ? 1 : 0);
}
487

488 489 490 491 492 493 494 495 496
/**
 * parse_dmar_table - parses the DMA reporting table
 */
static int __init
parse_dmar_table(void)
{
	struct acpi_table_dmar *dmar;
	struct acpi_dmar_header *entry_header;
	int ret = 0;
497
	int drhd_count = 0;
498

499 500 501 502 503 504
	/*
	 * Do it again, earlier dmar_tbl mapping could be mapped with
	 * fixed map.
	 */
	dmar_table_detect();

505 506 507 508 509 510
	/*
	 * ACPI tables may not be DMA protected by tboot, so use DMAR copy
	 * SINIT saved in SinitMleData in TXT heap (which is DMA protected)
	 */
	dmar_tbl = tboot_get_dmar_table(dmar_tbl);

511 512 513 514
	dmar = (struct acpi_table_dmar *)dmar_tbl;
	if (!dmar)
		return -ENODEV;

F
Fenghua Yu 已提交
515
	if (dmar->width < PAGE_SHIFT - 1) {
516
		pr_warn("Invalid DMAR haw\n");
517 518 519
		return -EINVAL;
	}

520
	pr_info("Host address width %d\n", dmar->width + 1);
521 522 523 524

	entry_header = (struct acpi_dmar_header *)(dmar + 1);
	while (((unsigned long)entry_header) <
			(((unsigned long)dmar) + dmar_tbl->length)) {
525 526
		/* Avoid looping forever on bad ACPI tables */
		if (entry_header->length == 0) {
527
			pr_warn("Invalid 0-length structure\n");
528 529 530 531
			ret = -EINVAL;
			break;
		}

532 533 534 535
		dmar_table_print_dmar_entry(entry_header);

		switch (entry_header->type) {
		case ACPI_DMAR_TYPE_HARDWARE_UNIT:
536
			drhd_count++;
537 538 539 540
			ret = dmar_parse_one_drhd(entry_header);
			break;
		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
			ret = dmar_parse_one_rmrr(entry_header);
541 542 543
			break;
		case ACPI_DMAR_TYPE_ATSR:
			ret = dmar_parse_one_atsr(entry_header);
544
			break;
545
		case ACPI_DMAR_HARDWARE_AFFINITY:
546
#ifdef CONFIG_ACPI_NUMA
547
			ret = dmar_parse_one_rhsa(entry_header);
548
#endif
549
			break;
D
David Woodhouse 已提交
550 551 552
		case ACPI_DMAR_TYPE_ANDD:
			ret = dmar_parse_one_andd(entry_header);
			break;
553
		default:
554
			pr_warn("Unknown DMAR structure type %d\n",
555
				entry_header->type);
556 557 558 559 560 561 562 563
			ret = 0; /* for forward compatibility */
			break;
		}
		if (ret)
			break;

		entry_header = ((void *)entry_header + entry_header->length);
	}
564 565
	if (drhd_count == 0)
		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
566 567 568
	return ret;
}

569 570
static int dmar_pci_device_match(struct dmar_dev_scope devices[],
				 int cnt, struct pci_dev *dev)
571 572
{
	int index;
573
	struct device *tmp;
574 575

	while (dev) {
576
		for_each_active_dev_scope(devices, cnt, index, tmp)
577
			if (dev_is_pci(tmp) && dev == to_pci_dev(tmp))
578 579 580 581 582 583 584 585 586 587 588 589
				return 1;

		/* Check our parent */
		dev = dev->bus->self;
	}

	return 0;
}

struct dmar_drhd_unit *
dmar_find_matched_drhd_unit(struct pci_dev *dev)
{
590
	struct dmar_drhd_unit *dmaru;
591 592
	struct acpi_dmar_hardware_unit *drhd;

593 594
	dev = pci_physfn(dev);

595
	rcu_read_lock();
596
	for_each_drhd_unit(dmaru) {
597 598 599 600 601 602
		drhd = container_of(dmaru->hdr,
				    struct acpi_dmar_hardware_unit,
				    header);

		if (dmaru->include_all &&
		    drhd->segment == pci_domain_nr(dev->bus))
603
			goto out;
604

605 606
		if (dmar_pci_device_match(dmaru->devices,
					  dmaru->devices_cnt, dev))
607
			goto out;
608
	}
609 610 611
	dmaru = NULL;
out:
	rcu_read_unlock();
612

613
	return dmaru;
614 615
}

616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
static void __init dmar_acpi_insert_dev_scope(u8 device_number,
					      struct acpi_device *adev)
{
	struct dmar_drhd_unit *dmaru;
	struct acpi_dmar_hardware_unit *drhd;
	struct acpi_dmar_device_scope *scope;
	struct device *tmp;
	int i;
	struct acpi_dmar_pci_path *path;

	for_each_drhd_unit(dmaru) {
		drhd = container_of(dmaru->hdr,
				    struct acpi_dmar_hardware_unit,
				    header);

		for (scope = (void *)(drhd + 1);
		     (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length;
		     scope = ((void *)scope) + scope->length) {
			if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ACPI)
				continue;
			if (scope->enumeration_id != device_number)
				continue;

			path = (void *)(scope + 1);
			pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n",
				dev_name(&adev->dev), dmaru->reg_base_addr,
				scope->bus, path->device, path->function);
			for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp)
				if (tmp == NULL) {
					dmaru->devices[i].bus = scope->bus;
					dmaru->devices[i].devfn = PCI_DEVFN(path->device,
									    path->function);
					rcu_assign_pointer(dmaru->devices[i].dev,
							   get_device(&adev->dev));
					return;
				}
			BUG_ON(i >= dmaru->devices_cnt);
		}
	}
	pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n",
		device_number, dev_name(&adev->dev));
}

static int __init dmar_acpi_dev_scope_init(void)
{
661 662 663 664 665
	struct acpi_dmar_andd *andd;

	if (dmar_tbl == NULL)
		return -ENODEV;

666 667 668
	for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar);
	     ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length;
	     andd = ((void *)andd) + andd->header.length) {
669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
		if (andd->header.type == ACPI_DMAR_TYPE_ANDD) {
			acpi_handle h;
			struct acpi_device *adev;

			if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT,
							  andd->object_name,
							  &h))) {
				pr_err("Failed to find handle for ACPI object %s\n",
				       andd->object_name);
				continue;
			}
			acpi_bus_get_device(h, &adev);
			if (!adev) {
				pr_err("Failed to get device for ACPI object %s\n",
				       andd->object_name);
				continue;
			}
			dmar_acpi_insert_dev_scope(andd->device_number, adev);
		}
	}
	return 0;
}

692 693
int __init dmar_dev_scope_init(void)
{
694 695
	struct pci_dev *dev = NULL;
	struct dmar_pci_notify_info *info;
696

697 698
	if (dmar_dev_scope_status != 1)
		return dmar_dev_scope_status;
699

700 701 702 703 704
	if (list_empty(&dmar_drhd_units)) {
		dmar_dev_scope_status = -ENODEV;
	} else {
		dmar_dev_scope_status = 0;

705 706
		dmar_acpi_dev_scope_init();

707 708 709 710 711 712 713 714 715 716 717 718 719
		for_each_pci_dev(dev) {
			if (dev->is_virtfn)
				continue;

			info = dmar_alloc_pci_notify_info(dev,
					BUS_NOTIFY_ADD_DEVICE);
			if (!info) {
				return dmar_dev_scope_status;
			} else {
				dmar_pci_bus_add_dev(info);
				dmar_free_pci_notify_info(info);
			}
		}
720

721
		bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb);
722 723
	}

724
	return dmar_dev_scope_status;
725 726
}

727 728 729

int __init dmar_table_init(void)
{
730
	static int dmar_table_initialized;
F
Fenghua Yu 已提交
731 732
	int ret;

733 734 735 736 737 738 739 740 741
	if (dmar_table_initialized == 0) {
		ret = parse_dmar_table();
		if (ret < 0) {
			if (ret != -ENODEV)
				pr_info("parse DMAR table failure.\n");
		} else  if (list_empty(&dmar_drhd_units)) {
			pr_info("No DMAR devices found\n");
			ret = -ENODEV;
		}
F
Fenghua Yu 已提交
742

743 744 745 746
		if (ret < 0)
			dmar_table_initialized = ret;
		else
			dmar_table_initialized = 1;
747
	}
F
Fenghua Yu 已提交
748

749
	return dmar_table_initialized < 0 ? dmar_table_initialized : 0;
750 751
}

752 753
static void warn_invalid_dmar(u64 addr, const char *message)
{
754 755 756 757 758 759 760 761
	WARN_TAINT_ONCE(
		1, TAINT_FIRMWARE_WORKAROUND,
		"Your BIOS is broken; DMAR reported at address %llx%s!\n"
		"BIOS vendor: %s; Ver: %s; Product Version: %s\n",
		addr, message,
		dmi_get_system_info(DMI_BIOS_VENDOR),
		dmi_get_system_info(DMI_BIOS_VERSION),
		dmi_get_system_info(DMI_PRODUCT_VERSION));
762
}
763

764
static int __init check_zero_address(void)
765 766 767 768 769 770 771 772 773 774 775 776
{
	struct acpi_table_dmar *dmar;
	struct acpi_dmar_header *entry_header;
	struct acpi_dmar_hardware_unit *drhd;

	dmar = (struct acpi_table_dmar *)dmar_tbl;
	entry_header = (struct acpi_dmar_header *)(dmar + 1);

	while (((unsigned long)entry_header) <
			(((unsigned long)dmar) + dmar_tbl->length)) {
		/* Avoid looping forever on bad ACPI tables */
		if (entry_header->length == 0) {
777
			pr_warn("Invalid 0-length structure\n");
778 779 780 781
			return 0;
		}

		if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
782 783 784
			void __iomem *addr;
			u64 cap, ecap;

785 786
			drhd = (void *)entry_header;
			if (!drhd->address) {
787
				warn_invalid_dmar(0, "");
788 789 790 791 792 793 794 795 796 797 798 799
				goto failed;
			}

			addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
			if (!addr ) {
				printk("IOMMU: can't validate: %llx\n", drhd->address);
				goto failed;
			}
			cap = dmar_readq(addr + DMAR_CAP_REG);
			ecap = dmar_readq(addr + DMAR_ECAP_REG);
			early_iounmap(addr, VTD_PAGE_SIZE);
			if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
800 801
				warn_invalid_dmar(drhd->address,
						  " returns all ones");
802
				goto failed;
803 804 805 806 807 808
			}
		}

		entry_header = ((void *)entry_header + entry_header->length);
	}
	return 1;
809 810 811

failed:
	return 0;
812 813
}

814
int __init detect_intel_iommu(void)
815 816 817
{
	int ret;

818
	down_write(&dmar_global_lock);
819
	ret = dmar_table_detect();
820 821
	if (ret)
		ret = check_zero_address();
822
	{
823
		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
824
			iommu_detected = 1;
C
Chris Wright 已提交
825 826 827
			/* Make sure ACS will be enabled */
			pci_request_acs();
		}
828

829 830 831
#ifdef CONFIG_X86
		if (ret)
			x86_init.iommu.iommu_init = intel_iommu_init;
832
#endif
833
	}
834
	early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
835
	dmar_tbl = NULL;
836
	up_write(&dmar_global_lock);
837

838
	return ret ? 1 : -ENODEV;
839 840 841
}


842 843 844 845 846 847 848 849 850 851
static void unmap_iommu(struct intel_iommu *iommu)
{
	iounmap(iommu->reg);
	release_mem_region(iommu->reg_phys, iommu->reg_size);
}

/**
 * map_iommu: map the iommu's registers
 * @iommu: the iommu to map
 * @phys_addr: the physical address of the base resgister
852
 *
853
 * Memory map the iommu's registers.  Start w/ a single page, and
854
 * possibly expand if that turns out to be insufficent.
855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
 */
static int map_iommu(struct intel_iommu *iommu, u64 phys_addr)
{
	int map_size, err=0;

	iommu->reg_phys = phys_addr;
	iommu->reg_size = VTD_PAGE_SIZE;

	if (!request_mem_region(iommu->reg_phys, iommu->reg_size, iommu->name)) {
		pr_err("IOMMU: can't reserve memory\n");
		err = -EBUSY;
		goto out;
	}

	iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
	if (!iommu->reg) {
		pr_err("IOMMU: can't map the region\n");
		err = -ENOMEM;
		goto release;
	}

	iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG);
	iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG);

	if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) {
		err = -EINVAL;
		warn_invalid_dmar(phys_addr, " returns all ones");
		goto unmap;
	}

	/* the registers might be more than one page */
	map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap),
			 cap_max_fault_reg_offset(iommu->cap));
	map_size = VTD_PAGE_ALIGN(map_size);
	if (map_size > iommu->reg_size) {
		iounmap(iommu->reg);
		release_mem_region(iommu->reg_phys, iommu->reg_size);
		iommu->reg_size = map_size;
		if (!request_mem_region(iommu->reg_phys, iommu->reg_size,
					iommu->name)) {
			pr_err("IOMMU: can't reserve memory\n");
			err = -EBUSY;
			goto out;
		}
		iommu->reg = ioremap(iommu->reg_phys, iommu->reg_size);
		if (!iommu->reg) {
			pr_err("IOMMU: can't map the region\n");
			err = -ENOMEM;
			goto release;
		}
	}
	err = 0;
	goto out;

unmap:
	iounmap(iommu->reg);
release:
	release_mem_region(iommu->reg_phys, iommu->reg_size);
out:
	return err;
}

917
static int alloc_iommu(struct dmar_drhd_unit *drhd)
918
{
919
	struct intel_iommu *iommu;
920
	u32 ver, sts;
921
	static int iommu_allocated = 0;
922
	int agaw = 0;
F
Fenghua Yu 已提交
923
	int msagaw = 0;
924
	int err;
925

926
	if (!drhd->reg_base_addr) {
927
		warn_invalid_dmar(0, "");
928 929 930
		return -EINVAL;
	}

931 932
	iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
	if (!iommu)
933
		return -ENOMEM;
934 935

	iommu->seq_id = iommu_allocated++;
936
	sprintf (iommu->name, "dmar%d", iommu->seq_id);
937

938 939 940
	err = map_iommu(iommu, drhd->reg_base_addr);
	if (err) {
		pr_err("IOMMU: failed to map %s\n", iommu->name);
941 942
		goto error;
	}
943

944
	err = -EINVAL;
W
Weidong Han 已提交
945 946
	agaw = iommu_calculate_agaw(iommu);
	if (agaw < 0) {
947 948
		pr_err("Cannot get a valid agaw for iommu (seq_id = %d)\n",
			iommu->seq_id);
949
		goto err_unmap;
F
Fenghua Yu 已提交
950 951 952
	}
	msagaw = iommu_calculate_max_sagaw(iommu);
	if (msagaw < 0) {
953
		pr_err("Cannot get a valid max agaw for iommu (seq_id = %d)\n",
W
Weidong Han 已提交
954
			iommu->seq_id);
955
		goto err_unmap;
W
Weidong Han 已提交
956 957
	}
	iommu->agaw = agaw;
F
Fenghua Yu 已提交
958
	iommu->msagaw = msagaw;
959
	iommu->segment = drhd->segment;
W
Weidong Han 已提交
960

961 962
	iommu->node = -1;

963
	ver = readl(iommu->reg + DMAR_VER_REG);
Y
Yinghai Lu 已提交
964 965
	pr_info("IOMMU %d: reg_base_addr %llx ver %d:%d cap %llx ecap %llx\n",
		iommu->seq_id,
F
Fenghua Yu 已提交
966 967 968 969
		(unsigned long long)drhd->reg_base_addr,
		DMAR_VER_MAJOR(ver), DMAR_VER_MINOR(ver),
		(unsigned long long)iommu->cap,
		(unsigned long long)iommu->ecap);
970

971 972 973 974 975 976 977 978 979
	/* Reflect status in gcmd */
	sts = readl(iommu->reg + DMAR_GSTS_REG);
	if (sts & DMA_GSTS_IRES)
		iommu->gcmd |= DMA_GCMD_IRE;
	if (sts & DMA_GSTS_TES)
		iommu->gcmd |= DMA_GCMD_TE;
	if (sts & DMA_GSTS_QIES)
		iommu->gcmd |= DMA_GCMD_QIE;

980
	raw_spin_lock_init(&iommu->register_lock);
981 982

	drhd->iommu = iommu;
983
	return 0;
984 985

 err_unmap:
986
	unmap_iommu(iommu);
987
 error:
988
	kfree(iommu);
989
	return err;
990 991
}

992
static void free_iommu(struct intel_iommu *iommu)
993
{
994 995 996
	if (iommu->irq) {
		free_irq(iommu->irq, iommu);
		irq_set_handler_data(iommu->irq, NULL);
997
		dmar_free_hwirq(iommu->irq);
998
	}
999

1000 1001 1002 1003 1004 1005
	if (iommu->qi) {
		free_page((unsigned long)iommu->qi->desc);
		kfree(iommu->qi->desc_status);
		kfree(iommu->qi);
	}

1006
	if (iommu->reg)
1007 1008
		unmap_iommu(iommu);

1009 1010
	kfree(iommu);
}
1011 1012 1013 1014 1015 1016

/*
 * Reclaim all the submitted descriptors which have completed its work.
 */
static inline void reclaim_free_desc(struct q_inval *qi)
{
1017 1018
	while (qi->desc_status[qi->free_tail] == QI_DONE ||
	       qi->desc_status[qi->free_tail] == QI_ABORT) {
1019 1020 1021 1022 1023 1024
		qi->desc_status[qi->free_tail] = QI_FREE;
		qi->free_tail = (qi->free_tail + 1) % QI_LENGTH;
		qi->free_cnt++;
	}
}

1025 1026 1027
static int qi_check_fault(struct intel_iommu *iommu, int index)
{
	u32 fault;
1028
	int head, tail;
1029 1030 1031
	struct q_inval *qi = iommu->qi;
	int wait_index = (index + 1) % QI_LENGTH;

1032 1033 1034
	if (qi->desc_status[wait_index] == QI_ABORT)
		return -EAGAIN;

1035 1036 1037 1038 1039 1040 1041 1042 1043
	fault = readl(iommu->reg + DMAR_FSTS_REG);

	/*
	 * If IQE happens, the head points to the descriptor associated
	 * with the error. No new descriptors are fetched until the IQE
	 * is cleared.
	 */
	if (fault & DMA_FSTS_IQE) {
		head = readl(iommu->reg + DMAR_IQH_REG);
1044
		if ((head >> DMAR_IQ_SHIFT) == index) {
1045
			pr_err("VT-d detected invalid descriptor: "
1046 1047 1048
				"low=%llx, high=%llx\n",
				(unsigned long long)qi->desc[index].low,
				(unsigned long long)qi->desc[index].high);
1049 1050 1051 1052 1053 1054 1055 1056 1057
			memcpy(&qi->desc[index], &qi->desc[wait_index],
					sizeof(struct qi_desc));
			__iommu_flush_cache(iommu, &qi->desc[index],
					sizeof(struct qi_desc));
			writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG);
			return -EINVAL;
		}
	}

1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083
	/*
	 * If ITE happens, all pending wait_desc commands are aborted.
	 * No new descriptors are fetched until the ITE is cleared.
	 */
	if (fault & DMA_FSTS_ITE) {
		head = readl(iommu->reg + DMAR_IQH_REG);
		head = ((head >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;
		head |= 1;
		tail = readl(iommu->reg + DMAR_IQT_REG);
		tail = ((tail >> DMAR_IQ_SHIFT) - 1 + QI_LENGTH) % QI_LENGTH;

		writel(DMA_FSTS_ITE, iommu->reg + DMAR_FSTS_REG);

		do {
			if (qi->desc_status[head] == QI_IN_USE)
				qi->desc_status[head] = QI_ABORT;
			head = (head - 2 + QI_LENGTH) % QI_LENGTH;
		} while (head != tail);

		if (qi->desc_status[wait_index] == QI_ABORT)
			return -EAGAIN;
	}

	if (fault & DMA_FSTS_ICE)
		writel(DMA_FSTS_ICE, iommu->reg + DMAR_FSTS_REG);

1084 1085 1086
	return 0;
}

1087 1088 1089 1090
/*
 * Submit the queued invalidation descriptor to the remapping
 * hardware unit and wait for its completion.
 */
1091
int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu)
1092
{
1093
	int rc;
1094 1095 1096 1097 1098 1099
	struct q_inval *qi = iommu->qi;
	struct qi_desc *hw, wait_desc;
	int wait_index, index;
	unsigned long flags;

	if (!qi)
1100
		return 0;
1101 1102 1103

	hw = qi->desc;

1104 1105 1106
restart:
	rc = 0;

1107
	raw_spin_lock_irqsave(&qi->q_lock, flags);
1108
	while (qi->free_cnt < 3) {
1109
		raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1110
		cpu_relax();
1111
		raw_spin_lock_irqsave(&qi->q_lock, flags);
1112 1113 1114 1115 1116 1117 1118 1119 1120
	}

	index = qi->free_head;
	wait_index = (index + 1) % QI_LENGTH;

	qi->desc_status[index] = qi->desc_status[wait_index] = QI_IN_USE;

	hw[index] = *desc;

1121 1122
	wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) |
			QI_IWD_STATUS_WRITE | QI_IWD_TYPE;
1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136
	wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]);

	hw[wait_index] = wait_desc;

	__iommu_flush_cache(iommu, &hw[index], sizeof(struct qi_desc));
	__iommu_flush_cache(iommu, &hw[wait_index], sizeof(struct qi_desc));

	qi->free_head = (qi->free_head + 2) % QI_LENGTH;
	qi->free_cnt -= 2;

	/*
	 * update the HW tail register indicating the presence of
	 * new descriptors.
	 */
1137
	writel(qi->free_head << DMAR_IQ_SHIFT, iommu->reg + DMAR_IQT_REG);
1138 1139

	while (qi->desc_status[wait_index] != QI_DONE) {
1140 1141 1142 1143 1144 1145 1146
		/*
		 * We will leave the interrupts disabled, to prevent interrupt
		 * context to queue another cmd while a cmd is already submitted
		 * and waiting for completion on this cpu. This is to avoid
		 * a deadlock where the interrupt context can wait indefinitely
		 * for free slots in the queue.
		 */
1147 1148
		rc = qi_check_fault(iommu, index);
		if (rc)
1149
			break;
1150

1151
		raw_spin_unlock(&qi->q_lock);
1152
		cpu_relax();
1153
		raw_spin_lock(&qi->q_lock);
1154
	}
1155 1156

	qi->desc_status[index] = QI_DONE;
1157 1158

	reclaim_free_desc(qi);
1159
	raw_spin_unlock_irqrestore(&qi->q_lock, flags);
1160

1161 1162 1163
	if (rc == -EAGAIN)
		goto restart;

1164
	return rc;
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176
}

/*
 * Flush the global interrupt entry cache.
 */
void qi_global_iec(struct intel_iommu *iommu)
{
	struct qi_desc desc;

	desc.low = QI_IEC_TYPE;
	desc.high = 0;

1177
	/* should never fail */
1178 1179 1180
	qi_submit_sync(&desc, iommu);
}

1181 1182
void qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm,
		      u64 type)
1183 1184 1185 1186 1187 1188 1189
{
	struct qi_desc desc;

	desc.low = QI_CC_FM(fm) | QI_CC_SID(sid) | QI_CC_DID(did)
			| QI_CC_GRAN(type) | QI_CC_TYPE;
	desc.high = 0;

1190
	qi_submit_sync(&desc, iommu);
1191 1192
}

1193 1194
void qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr,
		    unsigned int size_order, u64 type)
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
{
	u8 dw = 0, dr = 0;

	struct qi_desc desc;
	int ih = 0;

	if (cap_write_drain(iommu->cap))
		dw = 1;

	if (cap_read_drain(iommu->cap))
		dr = 1;

	desc.low = QI_IOTLB_DID(did) | QI_IOTLB_DR(dr) | QI_IOTLB_DW(dw)
		| QI_IOTLB_GRAN(type) | QI_IOTLB_TYPE;
	desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih)
		| QI_IOTLB_AM(size_order);

1212
	qi_submit_sync(&desc, iommu);
1213 1214
}

1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
void qi_flush_dev_iotlb(struct intel_iommu *iommu, u16 sid, u16 qdep,
			u64 addr, unsigned mask)
{
	struct qi_desc desc;

	if (mask) {
		BUG_ON(addr & ((1 << (VTD_PAGE_SHIFT + mask)) - 1));
		addr |= (1 << (VTD_PAGE_SHIFT + mask - 1)) - 1;
		desc.high = QI_DEV_IOTLB_ADDR(addr) | QI_DEV_IOTLB_SIZE;
	} else
		desc.high = QI_DEV_IOTLB_ADDR(addr);

	if (qdep >= QI_DEV_IOTLB_MAX_INVS)
		qdep = 0;

	desc.low = QI_DEV_IOTLB_SID(sid) | QI_DEV_IOTLB_QDEP(qdep) |
		   QI_DIOTLB_TYPE;

	qi_submit_sync(&desc, iommu);
}

1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247
/*
 * Disable Queued Invalidation interface.
 */
void dmar_disable_qi(struct intel_iommu *iommu)
{
	unsigned long flags;
	u32 sts;
	cycles_t start_time = get_cycles();

	if (!ecap_qis(iommu->ecap))
		return;

1248
	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267

	sts =  dmar_readq(iommu->reg + DMAR_GSTS_REG);
	if (!(sts & DMA_GSTS_QIES))
		goto end;

	/*
	 * Give a chance to HW to complete the pending invalidation requests.
	 */
	while ((readl(iommu->reg + DMAR_IQT_REG) !=
		readl(iommu->reg + DMAR_IQH_REG)) &&
		(DMAR_OPERATION_TIMEOUT > (get_cycles() - start_time)))
		cpu_relax();

	iommu->gcmd &= ~DMA_GCMD_QIE;
	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);

	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl,
		      !(sts & DMA_GSTS_QIES), sts);
end:
1268
	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1269 1270
}

1271 1272 1273 1274 1275
/*
 * Enable queued invalidation.
 */
static void __dmar_enable_qi(struct intel_iommu *iommu)
{
1276
	u32 sts;
1277 1278 1279 1280 1281 1282
	unsigned long flags;
	struct q_inval *qi = iommu->qi;

	qi->free_head = qi->free_tail = 0;
	qi->free_cnt = QI_LENGTH;

1283
	raw_spin_lock_irqsave(&iommu->register_lock, flags);
1284 1285 1286 1287 1288 1289 1290

	/* write zero to the tail reg */
	writel(0, iommu->reg + DMAR_IQT_REG);

	dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc));

	iommu->gcmd |= DMA_GCMD_QIE;
1291
	writel(iommu->gcmd, iommu->reg + DMAR_GCMD_REG);
1292 1293 1294 1295

	/* Make sure hardware complete it */
	IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts);

1296
	raw_spin_unlock_irqrestore(&iommu->register_lock, flags);
1297 1298
}

1299 1300 1301 1302 1303 1304 1305 1306
/*
 * Enable Queued Invalidation interface. This is a must to support
 * interrupt-remapping. Also used by DMA-remapping, which replaces
 * register based IOTLB invalidation.
 */
int dmar_enable_qi(struct intel_iommu *iommu)
{
	struct q_inval *qi;
1307
	struct page *desc_page;
1308 1309 1310 1311 1312 1313 1314 1315 1316 1317

	if (!ecap_qis(iommu->ecap))
		return -ENOENT;

	/*
	 * queued invalidation is already setup and enabled.
	 */
	if (iommu->qi)
		return 0;

1318
	iommu->qi = kmalloc(sizeof(*qi), GFP_ATOMIC);
1319 1320 1321 1322 1323
	if (!iommu->qi)
		return -ENOMEM;

	qi = iommu->qi;

1324 1325 1326

	desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0);
	if (!desc_page) {
1327
		kfree(qi);
1328
		iommu->qi = NULL;
1329 1330 1331
		return -ENOMEM;
	}

1332 1333
	qi->desc = page_address(desc_page);

1334
	qi->desc_status = kzalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC);
1335 1336 1337
	if (!qi->desc_status) {
		free_page((unsigned long) qi->desc);
		kfree(qi);
1338
		iommu->qi = NULL;
1339 1340 1341 1342 1343 1344
		return -ENOMEM;
	}

	qi->free_head = qi->free_tail = 0;
	qi->free_cnt = QI_LENGTH;

1345
	raw_spin_lock_init(&qi->q_lock);
1346

1347
	__dmar_enable_qi(iommu);
1348 1349 1350

	return 0;
}
1351 1352 1353

/* iommu interrupt handling. Most stuff are MSI-like. */

1354 1355 1356 1357 1358 1359 1360
enum faulttype {
	DMA_REMAP,
	INTR_REMAP,
	UNKNOWN,
};

static const char *dma_remap_fault_reasons[] =
1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
{
	"Software",
	"Present bit in root entry is clear",
	"Present bit in context entry is clear",
	"Invalid context entry",
	"Access beyond MGAW",
	"PTE Write access is not set",
	"PTE Read access is not set",
	"Next page table ptr is invalid",
	"Root table address invalid",
	"Context table ptr is invalid",
	"non-zero reserved fields in RTP",
	"non-zero reserved fields in CTP",
	"non-zero reserved fields in PTE",
1375
	"PCE for translation request specifies blocking",
1376
};
1377

1378
static const char *irq_remap_fault_reasons[] =
1379 1380 1381 1382 1383 1384 1385 1386 1387 1388
{
	"Detected reserved fields in the decoded interrupt-remapped request",
	"Interrupt index exceeded the interrupt-remapping table size",
	"Present field in the IRTE entry is clear",
	"Error accessing interrupt-remapping table pointed by IRTA_REG",
	"Detected reserved fields in the IRTE entry",
	"Blocked a compatibility format interrupt request",
	"Blocked an interrupt request due to source-id verification failure",
};

1389
static const char *dmar_get_fault_reason(u8 fault_reason, int *fault_type)
1390
{
1391 1392
	if (fault_reason >= 0x20 && (fault_reason - 0x20 <
					ARRAY_SIZE(irq_remap_fault_reasons))) {
1393
		*fault_type = INTR_REMAP;
1394
		return irq_remap_fault_reasons[fault_reason - 0x20];
1395 1396 1397 1398 1399
	} else if (fault_reason < ARRAY_SIZE(dma_remap_fault_reasons)) {
		*fault_type = DMA_REMAP;
		return dma_remap_fault_reasons[fault_reason];
	} else {
		*fault_type = UNKNOWN;
1400
		return "Unknown";
1401
	}
1402 1403
}

1404
void dmar_msi_unmask(struct irq_data *data)
1405
{
1406
	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1407 1408 1409
	unsigned long flag;

	/* unmask it */
1410
	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1411 1412 1413
	writel(0, iommu->reg + DMAR_FECTL_REG);
	/* Read a reg to force flush the post write */
	readl(iommu->reg + DMAR_FECTL_REG);
1414
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1415 1416
}

1417
void dmar_msi_mask(struct irq_data *data)
1418 1419
{
	unsigned long flag;
1420
	struct intel_iommu *iommu = irq_data_get_irq_handler_data(data);
1421 1422

	/* mask it */
1423
	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1424 1425 1426
	writel(DMA_FECTL_IM, iommu->reg + DMAR_FECTL_REG);
	/* Read a reg to force flush the post write */
	readl(iommu->reg + DMAR_FECTL_REG);
1427
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1428 1429 1430 1431
}

void dmar_msi_write(int irq, struct msi_msg *msg)
{
1432
	struct intel_iommu *iommu = irq_get_handler_data(irq);
1433 1434
	unsigned long flag;

1435
	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1436 1437 1438
	writel(msg->data, iommu->reg + DMAR_FEDATA_REG);
	writel(msg->address_lo, iommu->reg + DMAR_FEADDR_REG);
	writel(msg->address_hi, iommu->reg + DMAR_FEUADDR_REG);
1439
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1440 1441 1442 1443
}

void dmar_msi_read(int irq, struct msi_msg *msg)
{
1444
	struct intel_iommu *iommu = irq_get_handler_data(irq);
1445 1446
	unsigned long flag;

1447
	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1448 1449 1450
	msg->data = readl(iommu->reg + DMAR_FEDATA_REG);
	msg->address_lo = readl(iommu->reg + DMAR_FEADDR_REG);
	msg->address_hi = readl(iommu->reg + DMAR_FEUADDR_REG);
1451
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1452 1453 1454 1455 1456 1457
}

static int dmar_fault_do_one(struct intel_iommu *iommu, int type,
		u8 fault_reason, u16 source_id, unsigned long long addr)
{
	const char *reason;
1458
	int fault_type;
1459

1460
	reason = dmar_get_fault_reason(fault_reason, &fault_type);
1461

1462
	if (fault_type == INTR_REMAP)
1463
		pr_err("INTR-REMAP: Request device [[%02x:%02x.%d] "
1464 1465 1466 1467 1468 1469
		       "fault index %llx\n"
			"INTR-REMAP:[fault reason %02d] %s\n",
			(source_id >> 8), PCI_SLOT(source_id & 0xFF),
			PCI_FUNC(source_id & 0xFF), addr >> 48,
			fault_reason, reason);
	else
1470
		pr_err("DMAR:[%s] Request device [%02x:%02x.%d] "
1471 1472 1473 1474 1475
		       "fault addr %llx \n"
		       "DMAR:[fault reason %02d] %s\n",
		       (type ? "DMA Read" : "DMA Write"),
		       (source_id >> 8), PCI_SLOT(source_id & 0xFF),
		       PCI_FUNC(source_id & 0xFF), addr, fault_reason, reason);
1476 1477 1478 1479
	return 0;
}

#define PRIMARY_FAULT_REG_LEN (16)
1480
irqreturn_t dmar_fault(int irq, void *dev_id)
1481 1482 1483 1484 1485 1486
{
	struct intel_iommu *iommu = dev_id;
	int reg, fault_index;
	u32 fault_status;
	unsigned long flag;

1487
	raw_spin_lock_irqsave(&iommu->register_lock, flag);
1488
	fault_status = readl(iommu->reg + DMAR_FSTS_REG);
1489
	if (fault_status)
1490
		pr_err("DRHD: handling fault status reg %x\n", fault_status);
1491 1492 1493

	/* TBD: ignore advanced fault log currently */
	if (!(fault_status & DMA_FSTS_PPF))
1494
		goto unlock_exit;
1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524

	fault_index = dma_fsts_fault_record_index(fault_status);
	reg = cap_fault_reg_offset(iommu->cap);
	while (1) {
		u8 fault_reason;
		u16 source_id;
		u64 guest_addr;
		int type;
		u32 data;

		/* highest 32 bits */
		data = readl(iommu->reg + reg +
				fault_index * PRIMARY_FAULT_REG_LEN + 12);
		if (!(data & DMA_FRCD_F))
			break;

		fault_reason = dma_frcd_fault_reason(data);
		type = dma_frcd_type(data);

		data = readl(iommu->reg + reg +
				fault_index * PRIMARY_FAULT_REG_LEN + 8);
		source_id = dma_frcd_source_id(data);

		guest_addr = dmar_readq(iommu->reg + reg +
				fault_index * PRIMARY_FAULT_REG_LEN);
		guest_addr = dma_frcd_page_addr(guest_addr);
		/* clear the fault */
		writel(DMA_FRCD_F, iommu->reg + reg +
			fault_index * PRIMARY_FAULT_REG_LEN + 12);

1525
		raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1526 1527 1528 1529 1530

		dmar_fault_do_one(iommu, type, fault_reason,
				source_id, guest_addr);

		fault_index++;
1531
		if (fault_index >= cap_num_fault_regs(iommu->cap))
1532
			fault_index = 0;
1533
		raw_spin_lock_irqsave(&iommu->register_lock, flag);
1534 1535
	}

1536 1537 1538
	writel(DMA_FSTS_PFO | DMA_FSTS_PPF, iommu->reg + DMAR_FSTS_REG);

unlock_exit:
1539
	raw_spin_unlock_irqrestore(&iommu->register_lock, flag);
1540 1541 1542 1543 1544 1545 1546
	return IRQ_HANDLED;
}

int dmar_set_interrupt(struct intel_iommu *iommu)
{
	int irq, ret;

1547 1548 1549 1550 1551 1552
	/*
	 * Check if the fault interrupt is already initialized.
	 */
	if (iommu->irq)
		return 0;

1553
	irq = dmar_alloc_hwirq();
1554
	if (irq <= 0) {
1555
		pr_err("IOMMU: no free vectors\n");
1556 1557 1558
		return -EINVAL;
	}

1559
	irq_set_handler_data(irq, iommu);
1560 1561 1562 1563
	iommu->irq = irq;

	ret = arch_setup_dmar_msi(irq);
	if (ret) {
1564
		irq_set_handler_data(irq, NULL);
1565
		iommu->irq = 0;
1566
		dmar_free_hwirq(irq);
1567
		return ret;
1568 1569
	}

1570
	ret = request_irq(irq, dmar_fault, IRQF_NO_THREAD, iommu->name, iommu);
1571
	if (ret)
1572
		pr_err("IOMMU: can't request irq\n");
1573 1574
	return ret;
}
1575 1576 1577 1578

int __init enable_drhd_fault_handling(void)
{
	struct dmar_drhd_unit *drhd;
1579
	struct intel_iommu *iommu;
1580 1581 1582 1583

	/*
	 * Enable fault control interrupt.
	 */
1584
	for_each_iommu(iommu, drhd) {
1585
		u32 fault_status;
1586
		int ret = dmar_set_interrupt(iommu);
1587 1588

		if (ret) {
1589
			pr_err("DRHD %Lx: failed to enable fault, interrupt, ret %d\n",
1590 1591 1592
			       (unsigned long long)drhd->reg_base_addr, ret);
			return -1;
		}
1593 1594 1595 1596 1597

		/*
		 * Clear any previous faults.
		 */
		dmar_fault(iommu->irq, iommu);
1598 1599
		fault_status = readl(iommu->reg + DMAR_FSTS_REG);
		writel(fault_status, iommu->reg + DMAR_FSTS_REG);
1600 1601 1602 1603
	}

	return 0;
}
1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628

/*
 * Re-enable Queued Invalidation interface.
 */
int dmar_reenable_qi(struct intel_iommu *iommu)
{
	if (!ecap_qis(iommu->ecap))
		return -ENOENT;

	if (!iommu->qi)
		return -ENOENT;

	/*
	 * First disable queued invalidation.
	 */
	dmar_disable_qi(iommu);
	/*
	 * Then enable queued invalidation again. Since there is no pending
	 * invalidation requests now, it's safe to re-enable queued
	 * invalidation.
	 */
	__dmar_enable_qi(iommu);

	return 0;
}
1629 1630 1631 1632

/*
 * Check interrupt remapping support in DMAR table description.
 */
1633
int __init dmar_ir_support(void)
1634 1635 1636
{
	struct acpi_table_dmar *dmar;
	dmar = (struct acpi_table_dmar *)dmar_tbl;
1637 1638
	if (!dmar)
		return 0;
1639 1640
	return dmar->flags & 0x1;
}
1641

1642 1643 1644 1645 1646 1647 1648 1649
static int __init dmar_free_unused_resources(void)
{
	struct dmar_drhd_unit *dmaru, *dmaru_n;

	/* DMAR units are in use */
	if (irq_remapping_enabled || intel_iommu_enabled)
		return 0;

1650 1651
	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
		bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb);
1652

1653
	down_write(&dmar_global_lock);
1654 1655 1656 1657
	list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) {
		list_del(&dmaru->list);
		dmar_free_drhd(dmaru);
	}
1658
	up_write(&dmar_global_lock);
1659 1660 1661 1662 1663

	return 0;
}

late_initcall(dmar_free_unused_resources);
1664
IOMMU_INIT_POST(detect_intel_iommu);