diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index 23f315c9f21560a10f4202676e75e4181a9c7f4a..325c05294fc40dfc662a52187eba92516e568e94 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -355,7 +355,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, if (heap > 0x3fffffffffffUL) error("Destination address too large"); #else - if (heap > ((-__PAGE_OFFSET-(512<<20)-1) & 0x7fffffff)) + if (heap > ((-__PAGE_OFFSET-(128<<20)-1) & 0x7fffffff)) error("Destination address too large"); #endif #ifndef CONFIG_RELOCATABLE diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3f838d537392b4ddb6d061e82bbf1ce8a2a3b9d2..78218135b48e6169d155fb4a097e5b6c8e30e53a 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1389,6 +1389,14 @@ void __cpuinit end_local_APIC_setup(void) setup_apic_nmi_watchdog(NULL); apic_pm_activate(); + + /* + * Now that local APIC setup is completed for BP, configure the fault + * handling for interrupt remapping. + */ + if (!smp_processor_id() && intr_remapping_enabled) + enable_drhd_fault_handling(); + } #ifdef CONFIG_X86_X2APIC diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 7cc0a721f628c302d0d420ba0bf3af1945d5a1c0..fadcd743a74f8bdcd5effbaf7e28b01ea3003532 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2430,13 +2430,12 @@ static void ack_apic_level(struct irq_data *data) { struct irq_cfg *cfg = data->chip_data; int i, do_unmask_irq = 0, irq = data->irq; - struct irq_desc *desc = irq_to_desc(irq); unsigned long v; irq_complete_move(cfg); #ifdef CONFIG_GENERIC_PENDING_IRQ /* If we are moving the irq we need to mask it */ - if (unlikely(desc->status & IRQ_MOVE_PENDING)) { + if (unlikely(irq_to_desc(irq)->status & IRQ_MOVE_PENDING)) { do_unmask_irq = 1; mask_ioapic(cfg); } @@ -3413,6 +3412,7 @@ dmar_msi_set_affinity(struct irq_data *data, const struct cpumask *mask, msg.data |= MSI_DATA_VECTOR(cfg->vector); msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK; msg.address_lo |= MSI_ADDR_DEST_ID(dest); + msg.address_hi = MSI_ADDR_BASE_HI | MSI_ADDR_EXT_DEST_ID(dest); dmar_msi_write(irq, &msg); diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index f9e4e6a54073e3d901d0475da9c5deceb21d55d8..d8c4a6feb2862f3a967f7eb4dcc4f503f2dac2f8 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -79,13 +79,6 @@ void __init default_setup_apic_routing(void) /* need to update phys_pkg_id */ apic->phys_pkg_id = apicid_phys_pkg_id; } - - /* - * Now that apic routing model is selected, configure the - * fault handling for intr remapping. - */ - if (intr_remapping_enabled) - enable_drhd_fault_handling(); } /* Same for both flat and physical. */ diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index f0bea76f6ea591f29f1eedad35fa7d3205f8fa45..c0dbd9ac24f0d5cf7e87f8f0439275656b877f73 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -60,16 +60,18 @@ #define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) #endif +/* Number of possible pages in the lowmem region */ +LOWMEM_PAGES = (((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) + /* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = \ - PAGE_TABLE_SIZE(((1<<32) - __PAGE_OFFSET) >> PAGE_SHIFT) << PAGE_SHIFT +MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT /* * Worst-case size of the kernel mapping we need to make: - * the worst-case size of the kernel itself, plus the extra we need - * to map for the linear map. + * a relocatable kernel can live anywhere in lowmem, so we need to be able + * to map all of lowmem. */ -KERNEL_PAGES = (KERNEL_IMAGE_SIZE + MAPPING_BEYOND_END)>>PAGE_SHIFT +KERNEL_PAGES = LOWMEM_PAGES INIT_MAP_SIZE = PAGE_TABLE_SIZE(KERNEL_PAGES) * PAGE_SIZE_asm RESERVE_BRK(pagetables, INIT_MAP_SIZE) diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index ae03cab4352e8535946a835787c83e5c8563079b..4ff5968f12d295ac00a55ecbbae06dd97675a6c9 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -27,6 +27,9 @@ #define HPET_DEV_FSB_CAP 0x1000 #define HPET_DEV_PERI_CAP 0x2000 +#define HPET_MIN_CYCLES 128 +#define HPET_MIN_PROG_DELTA (HPET_MIN_CYCLES + (HPET_MIN_CYCLES >> 1)) + #define EVT_TO_HPET_DEV(evt) container_of(evt, struct hpet_dev, evt) /* @@ -299,8 +302,9 @@ static void hpet_legacy_clockevent_register(void) /* Calculate the min / max delta */ hpet_clockevent.max_delta_ns = clockevent_delta2ns(0x7FFFFFFF, &hpet_clockevent); - /* 5 usec minimum reprogramming delta. */ - hpet_clockevent.min_delta_ns = 5000; + /* Setup minimum reprogramming delta. */ + hpet_clockevent.min_delta_ns = clockevent_delta2ns(HPET_MIN_PROG_DELTA, + &hpet_clockevent); /* * Start hpet with the boot cpu mask and make it @@ -393,22 +397,24 @@ static int hpet_next_event(unsigned long delta, * the wraparound into account) nor a simple count down event * mode. Further the write to the comparator register is * delayed internally up to two HPET clock cycles in certain - * chipsets (ATI, ICH9,10). We worked around that by reading - * back the compare register, but that required another - * workaround for ICH9,10 chips where the first readout after - * write can return the old stale value. We already have a - * minimum delta of 5us enforced, but a NMI or SMI hitting + * chipsets (ATI, ICH9,10). Some newer AMD chipsets have even + * longer delays. We worked around that by reading back the + * compare register, but that required another workaround for + * ICH9,10 chips where the first readout after write can + * return the old stale value. We already had a minimum + * programming delta of 5us enforced, but a NMI or SMI hitting * between the counter readout and the comparator write can * move us behind that point easily. Now instead of reading * the compare register back several times, we make the ETIME * decision based on the following: Return ETIME if the - * counter value after the write is less than 8 HPET cycles + * counter value after the write is less than HPET_MIN_CYCLES * away from the event or if the counter is already ahead of - * the event. + * the event. The minimum programming delta for the generic + * clockevents code is set to 1.5 * HPET_MIN_CYCLES. */ res = (s32)(cnt - hpet_readl(HPET_COUNTER)); - return res < 8 ? -ETIME : 0; + return res < HPET_MIN_CYCLES ? -ETIME : 0; } static void hpet_legacy_set_mode(enum clock_event_mode mode, diff --git a/arch/x86/kernel/xsave.c b/arch/x86/kernel/xsave.c index 9c253bd65e24ba3e803b044fca3ad57224a41c52..547128546cc3bd4644a189955c1b804f5c150770 100644 --- a/arch/x86/kernel/xsave.c +++ b/arch/x86/kernel/xsave.c @@ -394,7 +394,8 @@ static void __init setup_xstate_init(void) * Setup init_xstate_buf to represent the init state of * all the features managed by the xsave */ - init_xstate_buf = alloc_bootmem(xstate_size); + init_xstate_buf = alloc_bootmem_align(xstate_size, + __alignof__(struct xsave_struct)); init_xstate_buf->i387.mxcsr = MXCSR_DEFAULT; clts(); diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 4a2afa1bac51f58d35e27ff6ffd18c4efc064f4b..b6552b189bcdbb43b1f3627616f1f69d488f9efa 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -25,7 +25,7 @@ targets += vdso.so vdso.so.dbg vdso.lds $(vobjs-y) export CPPFLAGS_vdso.lds += -P -C -VDSO_LDFLAGS_vdso.lds = -m elf_x86_64 -Wl,-soname=linux-vdso.so.1 \ +VDSO_LDFLAGS_vdso.lds = -m64 -Wl,-soname=linux-vdso.so.1 \ -Wl,-z,max-page-size=4096 -Wl,-z,common-page-size=4096 $(obj)/vdso.o: $(src)/vdso.S $(obj)/vdso.so @@ -69,7 +69,7 @@ vdso32.so-$(VDSO32-y) += sysenter vdso32-images = $(vdso32.so-y:%=vdso32-%.so) CPPFLAGS_vdso32.lds = $(CPPFLAGS_vdso.lds) -VDSO_LDFLAGS_vdso32.lds = -m elf_i386 -Wl,-soname=linux-gate.so.1 +VDSO_LDFLAGS_vdso32.lds = -m32 -Wl,-soname=linux-gate.so.1 # This makes sure the $(obj) subdirectory exists even though vdso32/ # is not a kbuild sub-make subdirectory. diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 0157708d474da57ad0532a3bb8bbbd90f0204aeb..09933eb9126be48f154e809bdcfdc57292a4858a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1417,6 +1417,11 @@ int __init enable_drhd_fault_handling(void) (unsigned long long)drhd->reg_base_addr, ret); return -1; } + + /* + * Clear any previous faults. + */ + dmar_fault(iommu->irq, iommu); } return 0; diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c index 313c0bda0a8dbc94e650a8321d80f8818ad82633..53a786fd0d40c88d978bd4a656971be7b09cdb4a 100644 --- a/drivers/pci/quirks.c +++ b/drivers/pci/quirks.c @@ -2767,6 +2767,29 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_m DECLARE_PCI_FIXUP_RESUME_EARLY(PCI_VENDOR_ID_RICOH, PCI_DEVICE_ID_RICOH_R5C832, ricoh_mmc_fixup_r5c832); #endif /*CONFIG_MMC_RICOH_MMC*/ +#if defined(CONFIG_DMAR) || defined(CONFIG_INTR_REMAP) +#define VTUNCERRMSK_REG 0x1ac +#define VTD_MSK_SPEC_ERRORS (1 << 31) +/* + * This is a quirk for masking vt-d spec defined errors to platform error + * handling logic. With out this, platforms using Intel 7500, 5500 chipsets + * (and the derivative chipsets like X58 etc) seem to generate NMI/SMI (based + * on the RAS config settings of the platform) when a vt-d fault happens. + * The resulting SMI caused the system to hang. + * + * VT-d spec related errors are already handled by the VT-d OS code, so no + * need to report the same error through other channels. + */ +static void vtd_mask_spec_errors(struct pci_dev *dev) +{ + u32 word; + + pci_read_config_dword(dev, VTUNCERRMSK_REG, &word); + pci_write_config_dword(dev, VTUNCERRMSK_REG, word | VTD_MSK_SPEC_ERRORS); +} +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x342e, vtd_mask_spec_errors); +DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x3c28, vtd_mask_spec_errors); +#endif static void pci_do_fixups(struct pci_dev *dev, struct pci_fixup *f, struct pci_fixup *end) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index 266ab92912327bb7bfe9bf1d87ff008260cc424f..499dfe982a0e8a21066e815daa400ac8b57ec5b4 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -105,6 +105,8 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define alloc_bootmem(x) \ __alloc_bootmem(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_align(x, align) \ + __alloc_bootmem(x, align, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_nopanic(x) \ __alloc_bootmem_nopanic(x, SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) #define alloc_bootmem_pages(x) \ diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index de2c41758e29e20d6a709303f21a6d8242696fa0..4f1279e105ee143e4317219b3cb093bc8bbdd954 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -887,6 +887,7 @@ struct perf_cpu_context { int exclusive; struct list_head rotation_list; int jiffies_interval; + struct pmu *active_pmu; }; struct perf_output_handle { diff --git a/kernel/perf_event.c b/kernel/perf_event.c index eac7e3364335a7a3f94d902e69b9ed4eac4df74a..2870feee81dd7a046703645c9ec50022d4339f39 100644 --- a/kernel/perf_event.c +++ b/kernel/perf_event.c @@ -3824,6 +3824,8 @@ static void perf_event_task_event(struct perf_task_event *task_event) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + if (cpuctx->active_pmu != pmu) + goto next; perf_event_task_ctx(&cpuctx->ctx, task_event); ctx = task_event->task_ctx; @@ -3959,6 +3961,8 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + if (cpuctx->active_pmu != pmu) + goto next; perf_event_comm_ctx(&cpuctx->ctx, comm_event); ctxn = pmu->task_ctx_nr; @@ -4144,6 +4148,8 @@ static void perf_event_mmap_event(struct perf_mmap_event *mmap_event) rcu_read_lock(); list_for_each_entry_rcu(pmu, &pmus, entry) { cpuctx = get_cpu_ptr(pmu->pmu_cpu_context); + if (cpuctx->active_pmu != pmu) + goto next; perf_event_mmap_ctx(&cpuctx->ctx, mmap_event, vma->vm_flags & VM_EXEC); @@ -4713,7 +4719,7 @@ static int perf_swevent_init(struct perf_event *event) break; } - if (event_id > PERF_COUNT_SW_MAX) + if (event_id >= PERF_COUNT_SW_MAX) return -ENOENT; if (!event->parent) { @@ -5145,20 +5151,36 @@ static void *find_pmu_context(int ctxn) return NULL; } -static void free_pmu_context(void * __percpu cpu_context) +static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu) { - struct pmu *pmu; + int cpu; + + for_each_possible_cpu(cpu) { + struct perf_cpu_context *cpuctx; + + cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu); + + if (cpuctx->active_pmu == old_pmu) + cpuctx->active_pmu = pmu; + } +} + +static void free_pmu_context(struct pmu *pmu) +{ + struct pmu *i; mutex_lock(&pmus_lock); /* * Like a real lame refcount. */ - list_for_each_entry(pmu, &pmus, entry) { - if (pmu->pmu_cpu_context == cpu_context) + list_for_each_entry(i, &pmus, entry) { + if (i->pmu_cpu_context == pmu->pmu_cpu_context) { + update_pmu_context(i, pmu); goto out; + } } - free_percpu(cpu_context); + free_percpu(pmu->pmu_cpu_context); out: mutex_unlock(&pmus_lock); } @@ -5190,6 +5212,7 @@ int perf_pmu_register(struct pmu *pmu) cpuctx->ctx.pmu = pmu; cpuctx->jiffies_interval = 1; INIT_LIST_HEAD(&cpuctx->rotation_list); + cpuctx->active_pmu = pmu; } got_cpu_context: @@ -5241,7 +5264,7 @@ void perf_pmu_unregister(struct pmu *pmu) synchronize_rcu(); free_percpu(pmu->pmu_disable_count); - free_pmu_context(pmu->pmu_cpu_context); + free_pmu_context(pmu); } struct pmu *perf_init_event(struct perf_event *event) diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index c380612273bf75d683ddebcf5e7863b30bece806..f8cf959bad456dead3a5dec2fdd1a21e384f22bb 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2338,11 +2338,19 @@ tracing_write_stub(struct file *filp, const char __user *ubuf, return count; } +static loff_t tracing_seek(struct file *file, loff_t offset, int origin) +{ + if (file->f_mode & FMODE_READ) + return seq_lseek(file, offset, origin); + else + return 0; +} + static const struct file_operations tracing_fops = { .open = tracing_open, .read = seq_read, .write = tracing_write_stub, - .llseek = seq_lseek, + .llseek = tracing_seek, .release = tracing_release, }; diff --git a/scripts/recordmcount.h b/scripts/recordmcount.h index 58e933a20544a2cd379446661d01f3755494bf97..39667174971d1eb86105edd927aa92d39c0954bf 100644 --- a/scripts/recordmcount.h +++ b/scripts/recordmcount.h @@ -119,7 +119,7 @@ static uint_t (*Elf_r_sym)(Elf_Rel const *rp) = fn_ELF_R_SYM; static void fn_ELF_R_INFO(Elf_Rel *const rp, unsigned sym, unsigned type) { - rp->r_info = ELF_R_INFO(sym, type); + rp->r_info = _w(ELF_R_INFO(sym, type)); } static void (*Elf_r_info)(Elf_Rel *const rp, unsigned sym, unsigned type) = fn_ELF_R_INFO; diff --git a/scripts/tags.sh b/scripts/tags.sh index 8509bb51293530fae488f60594199031cd16bf07..bbbe584d44943077a41b22684b5ecdbce18f532a 100755 --- a/scripts/tags.sh +++ b/scripts/tags.sh @@ -125,7 +125,9 @@ exuberant() -I DEFINE_TRACE,EXPORT_TRACEPOINT_SYMBOL,EXPORT_TRACEPOINT_SYMBOL_GPL \ --extra=+f --c-kinds=-px \ --regex-asm='/^ENTRY\(([^)]*)\).*/\1/' \ - --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' + --regex-c='/^SYSCALL_DEFINE[[:digit:]]?\(([^,)]*).*/sys_\1/' \ + --regex-c++='/^TRACE_EVENT\(([^,)]*).*/trace_\1/' \ + --regex-c++='/^DEFINE_EVENT\(([^,)]*).*/trace_\1/' all_kconfigs | xargs $1 -a \ --langdef=kconfig --language-force=kconfig \