diff --git a/arch/i386/Kconfig b/arch/i386/Kconfig index 1596101cfaf87352b2f45a3b3e79a849029fb247..2206bf6637de765c06323a0ea590860a5b82dd1d 100644 --- a/arch/i386/Kconfig +++ b/arch/i386/Kconfig @@ -1054,6 +1054,10 @@ config SCx200 This support is also available as a module. If compiled as a module, it will be called scx200. +config K8_NB + def_bool y + depends on AGP_AMD64 + source "drivers/pcmcia/Kconfig" source "drivers/pci/hotplug/Kconfig" diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index 96fb8a020af25938eef052da80846f6a4c767c70..28b14d6c7b1a4a66b03af5dcf372565dab25c8e3 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -37,6 +37,7 @@ obj-$(CONFIG_EFI) += efi.o efi_stub.o obj-$(CONFIG_DOUBLEFAULT) += doublefault.o obj-$(CONFIG_VM86) += vm86.o obj-$(CONFIG_EARLY_PRINTK) += early_printk.o +obj-$(CONFIG_K8_NB) += k8.o EXTRA_AFLAGS := -traditional @@ -76,3 +77,6 @@ SYSCFLAGS_vsyscall-syms.o = -r $(obj)/vsyscall-syms.o: $(src)/vsyscall.lds \ $(obj)/vsyscall-sysenter.o $(obj)/vsyscall-note.o FORCE $(call if_changed,syscall) + +k8-y += ../../x86_64/kernel/k8.o + diff --git a/arch/x86_64/Kconfig b/arch/x86_64/Kconfig index af44130f0d65f54f642d6556e1a826bffe3db659..fc75275d8c72af72f055daa0d2f936b52dd5eead 100644 --- a/arch/x86_64/Kconfig +++ b/arch/x86_64/Kconfig @@ -501,6 +501,10 @@ config REORDER optimal TLB usage. If you have pretty much any version of binutils, this can increase your kernel build time by roughly one minute. +config K8_NB + def_bool y + depends on AGP_AMD64 || GART_IOMMU || (PCI && NUMA) + endmenu # diff --git a/arch/x86_64/kernel/Makefile b/arch/x86_64/kernel/Makefile index 381bc6ad743e79682d9895350a5fe370c63305c5..f927d11065fef50bd56f7b9a37fe5da807c212d1 100644 --- a/arch/x86_64/kernel/Makefile +++ b/arch/x86_64/kernel/Makefile @@ -33,6 +33,7 @@ obj-$(CONFIG_SWIOTLB) += pci-swiotlb.o obj-$(CONFIG_KPROBES) += kprobes.o obj-$(CONFIG_X86_PM_TIMER) += pmtimer.o obj-$(CONFIG_X86_VSMP) += vsmp.o +obj-$(CONFIG_K8_NB) += k8.o obj-$(CONFIG_MODULES) += module.o diff --git a/arch/x86_64/kernel/aperture.c b/arch/x86_64/kernel/aperture.c index 70b9d21ed675b1aabe1874d785fa5f0b98e3b87d..a7ad03ee98cf5e3fb6c5e8f072ac17c75f20e7b9 100644 --- a/arch/x86_64/kernel/aperture.c +++ b/arch/x86_64/kernel/aperture.c @@ -24,6 +24,7 @@ #include #include #include +#include int iommu_aperture; int iommu_aperture_disabled __initdata = 0; @@ -37,8 +38,6 @@ int fix_aperture __initdata = 1; /* This code runs before the PCI subsystem is initialized, so just access the northbridge directly. */ -#define NB_ID_3 (PCI_VENDOR_ID_AMD | (0x1103<<16)) - static u32 __init allocate_aperture(void) { pg_data_t *nd0 = NODE_DATA(0); @@ -68,20 +67,20 @@ static u32 __init allocate_aperture(void) return (u32)__pa(p); } -static int __init aperture_valid(char *name, u64 aper_base, u32 aper_size) +static int __init aperture_valid(u64 aper_base, u32 aper_size) { if (!aper_base) return 0; if (aper_size < 64*1024*1024) { - printk("Aperture from %s too small (%d MB)\n", name, aper_size>>20); + printk("Aperture too small (%d MB)\n", aper_size>>20); return 0; } if (aper_base + aper_size >= 0xffffffff) { - printk("Aperture from %s beyond 4GB. Ignoring.\n",name); + printk("Aperture beyond 4GB. Ignoring.\n"); return 0; } if (e820_any_mapped(aper_base, aper_base + aper_size, E820_RAM)) { - printk("Aperture from %s pointing to e820 RAM. Ignoring.\n",name); + printk("Aperture pointing to e820 RAM. Ignoring.\n"); return 0; } return 1; @@ -140,7 +139,7 @@ static __u32 __init read_agp(int num, int slot, int func, int cap, u32 *order) printk("Aperture from AGP @ %Lx size %u MB (APSIZE %x)\n", aper, 32 << *order, apsizereg); - if (!aperture_valid("AGP bridge", aper, (32*1024*1024) << *order)) + if (!aperture_valid(aper, (32*1024*1024) << *order)) return 0; return (u32)aper; } @@ -208,9 +207,8 @@ void __init iommu_hole_init(void) fix = 0; for (num = 24; num < 32; num++) { - char name[30]; - if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) - continue; + if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) + continue; iommu_aperture = 1; @@ -222,9 +220,7 @@ void __init iommu_hole_init(void) printk("CPU %d: aperture @ %Lx size %u MB\n", num-24, aper_base, aper_size>>20); - sprintf(name, "northbridge cpu %d", num-24); - - if (!aperture_valid(name, aper_base, aper_size)) { + if (!aperture_valid(aper_base, aper_size)) { fix = 1; break; } @@ -273,7 +269,7 @@ void __init iommu_hole_init(void) /* Fix up the north bridges */ for (num = 24; num < 32; num++) { - if (read_pci_config(0, num, 3, 0x00) != NB_ID_3) + if (!early_is_k8_nb(read_pci_config(0, num, 3, 0x00))) continue; /* Don't enable translation yet. That is done later. diff --git a/arch/x86_64/kernel/k8.c b/arch/x86_64/kernel/k8.c new file mode 100644 index 0000000000000000000000000000000000000000..6416682d33d08b878ed771ea32d0497aedbdb960 --- /dev/null +++ b/arch/x86_64/kernel/k8.c @@ -0,0 +1,118 @@ +/* + * Shared support code for AMD K8 northbridges and derivates. + * Copyright 2006 Andi Kleen, SUSE Labs. Subject to GPLv2. + */ +#include +#include +#include +#include +#include +#include +#include + +int num_k8_northbridges; +EXPORT_SYMBOL(num_k8_northbridges); + +static u32 *flush_words; + +struct pci_device_id k8_nb_ids[] = { + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, + { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1203) }, + {} +}; +EXPORT_SYMBOL(k8_nb_ids); + +struct pci_dev **k8_northbridges; +EXPORT_SYMBOL(k8_northbridges); + +static struct pci_dev *next_k8_northbridge(struct pci_dev *dev) +{ + do { + dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev); + if (!dev) + break; + } while (!pci_match_id(&k8_nb_ids[0], dev)); + return dev; +} + +int cache_k8_northbridges(void) +{ + int i; + struct pci_dev *dev; + if (num_k8_northbridges) + return 0; + + num_k8_northbridges = 0; + dev = NULL; + while ((dev = next_k8_northbridge(dev)) != NULL) + num_k8_northbridges++; + + k8_northbridges = kmalloc((num_k8_northbridges + 1) * sizeof(void *), + GFP_KERNEL); + if (!k8_northbridges) + return -ENOMEM; + + flush_words = kmalloc(num_k8_northbridges * sizeof(u32), GFP_KERNEL); + if (!flush_words) { + kfree(k8_northbridges); + return -ENOMEM; + } + + dev = NULL; + i = 0; + while ((dev = next_k8_northbridge(dev)) != NULL) { + k8_northbridges[i++] = dev; + pci_read_config_dword(dev, 0x9c, &flush_words[i]); + } + k8_northbridges[i] = NULL; + return 0; +} +EXPORT_SYMBOL_GPL(cache_k8_northbridges); + +/* Ignores subdevice/subvendor but as far as I can figure out + they're useless anyways */ +int __init early_is_k8_nb(u32 device) +{ + struct pci_device_id *id; + u32 vendor = device & 0xffff; + device >>= 16; + for (id = k8_nb_ids; id->vendor; id++) + if (vendor == id->vendor && device == id->device) + return 1; + return 0; +} + +void k8_flush_garts(void) +{ + int flushed, i; + unsigned long flags; + static DEFINE_SPINLOCK(gart_lock); + + /* Avoid races between AGP and IOMMU. In theory it's not needed + but I'm not sure if the hardware won't lose flush requests + when another is pending. This whole thing is so expensive anyways + that it doesn't matter to serialize more. -AK */ + spin_lock_irqsave(&gart_lock, flags); + flushed = 0; + for (i = 0; i < num_k8_northbridges; i++) { + pci_write_config_dword(k8_northbridges[i], 0x9c, + flush_words[i]|1); + flushed++; + } + for (i = 0; i < num_k8_northbridges; i++) { + u32 w; + /* Make sure the hardware actually executed the flush*/ + for (;;) { + pci_read_config_dword(k8_northbridges[i], + 0x9c, &w); + if (!(w & 1)) + break; + cpu_relax(); + } + } + spin_unlock_irqrestore(&gart_lock, flags); + if (!flushed) + printk("nothing to flush?\n"); +} +EXPORT_SYMBOL_GPL(k8_flush_garts); + diff --git a/arch/x86_64/kernel/pci-gart.c b/arch/x86_64/kernel/pci-gart.c index ea8f4041794e269945dc70a0f4ddb0e22b4706f9..ded3af3bceece6d52537f3b30d01a9f4267b5e30 100644 --- a/arch/x86_64/kernel/pci-gart.c +++ b/arch/x86_64/kernel/pci-gart.c @@ -32,6 +32,7 @@ #include #include #include +#include unsigned long iommu_bus_base; /* GART remapping area (physical) */ static unsigned long iommu_size; /* size of remapping area bytes */ @@ -46,8 +47,6 @@ u32 *iommu_gatt_base; /* Remapping table */ also seen with Qlogic at least). */ int iommu_fullflush = 1; -#define MAX_NB 8 - /* Allocation bitmap for the remapping area */ static DEFINE_SPINLOCK(iommu_bitmap_lock); static unsigned long *iommu_gart_bitmap; /* guarded by iommu_bitmap_lock */ @@ -63,13 +62,6 @@ static u32 gart_unmapped_entry; #define to_pages(addr,size) \ (round_up(((addr) & ~PAGE_MASK) + (size), PAGE_SIZE) >> PAGE_SHIFT) -#define for_all_nb(dev) \ - dev = NULL; \ - while ((dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, dev))!=NULL) - -static struct pci_dev *northbridges[MAX_NB]; -static u32 northbridge_flush_word[MAX_NB]; - #define EMERGENCY_PAGES 32 /* = 128KB */ #ifdef CONFIG_AGP @@ -120,44 +112,17 @@ static void free_iommu(unsigned long offset, int size) /* * Use global flush state to avoid races with multiple flushers. */ -static void flush_gart(struct device *dev) +static void flush_gart(void) { unsigned long flags; - int flushed = 0; - int i, max; - spin_lock_irqsave(&iommu_bitmap_lock, flags); - if (need_flush) { - max = 0; - for (i = 0; i < MAX_NB; i++) { - if (!northbridges[i]) - continue; - pci_write_config_dword(northbridges[i], 0x9c, - northbridge_flush_word[i] | 1); - flushed++; - max = i; - } - for (i = 0; i <= max; i++) { - u32 w; - if (!northbridges[i]) - continue; - /* Make sure the hardware actually executed the flush. */ - for (;;) { - pci_read_config_dword(northbridges[i], 0x9c, &w); - if (!(w & 1)) - break; - cpu_relax(); - } - } - if (!flushed) - printk("nothing to flush?\n"); + if (need_flush) { + k8_flush_garts(); need_flush = 0; } spin_unlock_irqrestore(&iommu_bitmap_lock, flags); } - - #ifdef CONFIG_IOMMU_LEAK #define SET_LEAK(x) if (iommu_leak_tab) \ @@ -266,7 +231,7 @@ static dma_addr_t gart_map_simple(struct device *dev, char *buf, size_t size, int dir) { dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); - flush_gart(dev); + flush_gart(); return map; } @@ -351,7 +316,7 @@ static int dma_map_sg_nonforce(struct device *dev, struct scatterlist *sg, s->dma_address = addr; s->dma_length = s->length; } - flush_gart(dev); + flush_gart(); return nents; } @@ -458,13 +423,13 @@ int gart_map_sg(struct device *dev, struct scatterlist *sg, int nents, int dir) if (dma_map_cont(sg, start, i, sg+out, pages, need) < 0) goto error; out++; - flush_gart(dev); + flush_gart(); if (out < nents) sg[out].dma_length = 0; return out; error: - flush_gart(NULL); + flush_gart(); gart_unmap_sg(dev, sg, nents, dir); /* When it was forced or merged try again in a dumb way */ if (force_iommu || iommu_merge) { @@ -532,10 +497,13 @@ static __init int init_k8_gatt(struct agp_kern_info *info) void *gatt; unsigned aper_base, new_aper_base; unsigned aper_size, gatt_size, new_aper_size; - + int i; + printk(KERN_INFO "PCI-DMA: Disabling AGP.\n"); aper_size = aper_base = info->aper_size = 0; - for_all_nb(dev) { + dev = NULL; + for (i = 0; i < num_k8_northbridges; i++) { + dev = k8_northbridges[i]; new_aper_base = read_aperture(dev, &new_aper_size); if (!new_aper_base) goto nommu; @@ -558,11 +526,12 @@ static __init int init_k8_gatt(struct agp_kern_info *info) panic("Cannot allocate GATT table"); memset(gatt, 0, gatt_size); agp_gatt_table = gatt; - - for_all_nb(dev) { + + for (i = 0; i < num_k8_northbridges; i++) { u32 ctl; u32 gatt_reg; + dev = k8_northbridges[i]; gatt_reg = __pa(gatt) >> 12; gatt_reg <<= 4; pci_write_config_dword(dev, 0x98, gatt_reg); @@ -573,7 +542,7 @@ static __init int init_k8_gatt(struct agp_kern_info *info) pci_write_config_dword(dev, 0x90, ctl); } - flush_gart(NULL); + flush_gart(); printk("PCI-DMA: aperture base @ %x size %u KB\n",aper_base, aper_size>>10); return 0; @@ -607,10 +576,14 @@ static int __init pci_iommu_init(void) struct agp_kern_info info; unsigned long aper_size; unsigned long iommu_start; - struct pci_dev *dev; unsigned long scratch; long i; + if (cache_k8_northbridges() < 0 || num_k8_northbridges == 0) { + printk(KERN_INFO "PCI-GART: No AMD northbridge found.\n"); + return -1; + } + #ifndef CONFIG_AGP_AMD64 no_agp = 1; #else @@ -637,14 +610,6 @@ static int __init pci_iommu_init(void) return -1; } - i = 0; - for_all_nb(dev) - i++; - if (i > MAX_NB) { - printk(KERN_ERR "PCI-GART: Too many northbridges (%ld). Disabled\n", i); - return -1; - } - printk(KERN_INFO "PCI-DMA: using GART IOMMU.\n"); aper_size = info.aper_size * 1024 * 1024; iommu_size = check_iommu_size(info.aper_base, aper_size); @@ -707,20 +672,8 @@ static int __init pci_iommu_init(void) for (i = EMERGENCY_PAGES; i < iommu_pages; i++) iommu_gatt_base[i] = gart_unmapped_entry; - for_all_nb(dev) { - u32 flag; - int cpu = PCI_SLOT(dev->devfn) - 24; - if (cpu >= MAX_NB) - continue; - northbridges[cpu] = dev; - pci_read_config_dword(dev, 0x9c, &flag); /* cache flush word */ - northbridge_flush_word[cpu] = flag; - } - - flush_gart(NULL); - + flush_gart(); dma_ops = &gart_dma_ops; - return 0; } diff --git a/arch/x86_64/pci/k8-bus.c b/arch/x86_64/pci/k8-bus.c index 3acf60ded2a0b54144a72b5b0faf5f8bc2c00933..b50a7c7c47f8984b45329d07447a5b9cbb1447d3 100644 --- a/arch/x86_64/pci/k8-bus.c +++ b/arch/x86_64/pci/k8-bus.c @@ -2,6 +2,7 @@ #include #include #include +#include /* * This discovers the pcibus <-> node mapping on AMD K8. @@ -18,7 +19,6 @@ #define NR_LDT_BUS_NUMBER_REGISTERS 3 #define SECONDARY_LDT_BUS_NUMBER(dword) ((dword >> 8) & 0xFF) #define SUBORDINATE_LDT_BUS_NUMBER(dword) ((dword >> 16) & 0xFF) -#define PCI_DEVICE_ID_K8HTCONFIG 0x1100 /** * fill_mp_bus_to_cpumask() @@ -28,8 +28,7 @@ __init static int fill_mp_bus_to_cpumask(void) { - struct pci_dev *nb_dev = NULL; - int i, j; + int i, j, k; u32 ldtbus, nid; static int lbnr[3] = { LDT_BUS_NUMBER_REGISTER_0, @@ -37,8 +36,9 @@ fill_mp_bus_to_cpumask(void) LDT_BUS_NUMBER_REGISTER_2 }; - while ((nb_dev = pci_get_device(PCI_VENDOR_ID_AMD, - PCI_DEVICE_ID_K8HTCONFIG, nb_dev))) { + cache_k8_northbridges(); + for (k = 0; k < num_k8_northbridges; k++) { + struct pci_dev *nb_dev = k8_northbridges[k]; pci_read_config_dword(nb_dev, NODE_ID_REGISTER, &nid); for (i = 0; i < NR_LDT_BUS_NUMBER_REGISTERS; i++) { diff --git a/drivers/char/agp/amd64-agp.c b/drivers/char/agp/amd64-agp.c index ac3c33a2e37d7b7ed7d1b3da0c7428f0e448dec8..229d015757f9a13cedc3b3f598d2a87631fdc0bf 100644 --- a/drivers/char/agp/amd64-agp.c +++ b/drivers/char/agp/amd64-agp.c @@ -15,11 +15,9 @@ #include #include #include /* PAGE_SIZE */ +#include #include "agp.h" -/* Will need to be increased if AMD64 ever goes >8-way. */ -#define MAX_HAMMER_GARTS 8 - /* PTE bits. */ #define GPTE_VALID 1 #define GPTE_COHERENT 2 @@ -53,28 +51,12 @@ #define ULI_X86_64_HTT_FEA_REG 0x50 #define ULI_X86_64_ENU_SCR_REG 0x54 -static int nr_garts; -static struct pci_dev * hammers[MAX_HAMMER_GARTS]; - static struct resource *aperture_resource; static int __initdata agp_try_unsupported = 1; -#define for_each_nb() for(gart_iterator=0;gart_iteratorgatt_table_real); - int gart_iterator; + int i; /* Configure AGP regs in each x86-64 host bridge. */ - for_each_nb() { + for (i = 0; i < num_k8_northbridges; i++) { agp_bridge->gart_bus_addr = - amd64_configure(hammers[gart_iterator],gatt_bus); + amd64_configure(k8_northbridges[i], gatt_bus); } + k8_flush_garts(); return 0; } @@ -236,12 +216,13 @@ static int amd_8151_configure(void) static void amd64_cleanup(void) { u32 tmp; - int gart_iterator; - for_each_nb() { + int i; + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; /* disable gart translation */ - pci_read_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, &tmp); + pci_read_config_dword (dev, AMD64_GARTAPERTURECTL, &tmp); tmp &= ~AMD64_GARTEN; - pci_write_config_dword (hammers[gart_iterator], AMD64_GARTAPERTURECTL, tmp); + pci_write_config_dword (dev, AMD64_GARTAPERTURECTL, tmp); } } @@ -361,17 +342,15 @@ static __devinit int fix_northbridge(struct pci_dev *nb, struct pci_dev *agp, static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) { - struct pci_dev *loop_dev = NULL; - int i = 0; - - /* cache pci_devs of northbridges. */ - while ((loop_dev = pci_get_device(PCI_VENDOR_ID_AMD, 0x1103, loop_dev)) - != NULL) { - if (i == MAX_HAMMER_GARTS) { - printk(KERN_ERR PFX "Too many northbridges for AGP\n"); - return -1; - } - if (fix_northbridge(loop_dev, pdev, cap_ptr) < 0) { + int i; + + if (cache_k8_northbridges() < 0) + return -ENODEV; + + i = 0; + for (i = 0; i < num_k8_northbridges; i++) { + struct pci_dev *dev = k8_northbridges[i]; + if (fix_northbridge(dev, pdev, cap_ptr) < 0) { printk(KERN_ERR PFX "No usable aperture found.\n"); #ifdef __x86_64__ /* should port this to i386 */ @@ -379,10 +358,8 @@ static __devinit int cache_nbs (struct pci_dev *pdev, u32 cap_ptr) #endif return -1; } - hammers[i++] = loop_dev; } - nr_garts = i; - return i == 0 ? -1 : 0; + return 0; } /* Handle AMD 8151 quirks */ @@ -450,7 +427,7 @@ static int __devinit uli_agp_init(struct pci_dev *pdev) } /* shadow x86-64 registers into ULi registers */ - pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &httfea); + pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &httfea); /* if x86-64 aperture base is beyond 4G, exit here */ if ((httfea & 0x7fff) >> (32 - 25)) @@ -513,7 +490,7 @@ static int __devinit nforce3_agp_init(struct pci_dev *pdev) pci_write_config_dword(dev1, NVIDIA_X86_64_1_APSIZE, tmp); /* shadow x86-64 registers into NVIDIA registers */ - pci_read_config_dword (hammers[0], AMD64_GARTAPERTUREBASE, &apbase); + pci_read_config_dword (k8_northbridges[0], AMD64_GARTAPERTUREBASE, &apbase); /* if x86-64 aperture base is beyond 4G, exit here */ if ( (apbase & 0x7fff) >> (32 - 25) ) { @@ -754,10 +731,6 @@ static struct pci_driver agp_amd64_pci_driver = { int __init agp_amd64_init(void) { int err = 0; - static struct pci_device_id amd64nb[] = { - { PCI_DEVICE(PCI_VENDOR_ID_AMD, 0x1103) }, - { }, - }; if (agp_off) return -EINVAL; @@ -774,7 +747,7 @@ int __init agp_amd64_init(void) } /* First check that we have at least one AMD64 NB */ - if (!pci_dev_present(amd64nb)) + if (!pci_dev_present(k8_nb_ids)) return -ENODEV; /* Look for any AGP bridge */ diff --git a/include/asm-i386/k8.h b/include/asm-i386/k8.h new file mode 100644 index 0000000000000000000000000000000000000000..dfd88a6e604032cfe9dbbfdd1657a0998bcdbea7 --- /dev/null +++ b/include/asm-i386/k8.h @@ -0,0 +1 @@ +#include diff --git a/include/asm-x86_64/k8.h b/include/asm-x86_64/k8.h new file mode 100644 index 0000000000000000000000000000000000000000..699dd6961eda06ae0943d62916b1bedc1396bb1b --- /dev/null +++ b/include/asm-x86_64/k8.h @@ -0,0 +1,14 @@ +#ifndef _ASM_K8_H +#define _ASM_K8_H 1 + +#include + +extern struct pci_device_id k8_nb_ids[]; + +extern int early_is_k8_nb(u32 value); +extern struct pci_dev **k8_northbridges; +extern int num_k8_northbridges; +extern int cache_k8_northbridges(void); +extern void k8_flush_garts(void); + +#endif