- 20 11月, 2008 1 次提交
-
-
由 Ulrich Drepper 提交于
Introduce a new accept4() system call. The addition of this system call matches analogous changes in 2.6.27 (dup3(), evenfd2(), signalfd4(), inotify_init1(), epoll_create1(), pipe2()) which added new system calls that differed from analogous traditional system calls in adding a flags argument that can be used to access additional functionality. The accept4() system call is exactly the same as accept(), except that it adds a flags bit-mask argument. Two flags are initially implemented. (Most of the new system calls in 2.6.27 also had both of these flags.) SOCK_CLOEXEC causes the close-on-exec (FD_CLOEXEC) flag to be enabled for the new file descriptor returned by accept4(). This is a useful security feature to avoid leaking information in a multithreaded program where one thread is doing an accept() at the same time as another thread is doing a fork() plus exec(). More details here: http://udrepper.livejournal.com/20407.html "Secure File Descriptor Handling", Ulrich Drepper). The other flag is SOCK_NONBLOCK, which causes the O_NONBLOCK flag to be enabled on the new open file description created by accept4(). (This flag is merely a convenience, saving the use of additional calls fcntl(F_GETFL) and fcntl (F_SETFL) to achieve the same result. Here's a test program. Works on x86-32. Should work on x86-64, but I (mtk) don't have a system to hand to test with. It tests accept4() with each of the four possible combinations of SOCK_CLOEXEC and SOCK_NONBLOCK set/clear in 'flags', and verifies that the appropriate flags are set on the file descriptor/open file description returned by accept4(). I tested Ulrich's patch in this thread by applying against 2.6.28-rc2, and it passes according to my test program. /* test_accept4.c Copyright (C) 2008, Linux Foundation, written by Michael Kerrisk <mtk.manpages@gmail.com> Licensed under the GNU GPLv2 or later. */ #define _GNU_SOURCE #include <unistd.h> #include <sys/syscall.h> #include <sys/socket.h> #include <netinet/in.h> #include <stdlib.h> #include <fcntl.h> #include <stdio.h> #include <string.h> #define PORT_NUM 33333 #define die(msg) do { perror(msg); exit(EXIT_FAILURE); } while (0) /**********************************************************************/ /* The following is what we need until glibc gets a wrapper for accept4() */ /* Flags for socket(), socketpair(), accept4() */ #ifndef SOCK_CLOEXEC #define SOCK_CLOEXEC O_CLOEXEC #endif #ifndef SOCK_NONBLOCK #define SOCK_NONBLOCK O_NONBLOCK #endif #ifdef __x86_64__ #define SYS_accept4 288 #elif __i386__ #define USE_SOCKETCALL 1 #define SYS_ACCEPT4 18 #else #error "Sorry -- don't know the syscall # on this architecture" #endif static int accept4(int fd, struct sockaddr *sockaddr, socklen_t *addrlen, int flags) { printf("Calling accept4(): flags = %x", flags); if (flags != 0) { printf(" ("); if (flags & SOCK_CLOEXEC) printf("SOCK_CLOEXEC"); if ((flags & SOCK_CLOEXEC) && (flags & SOCK_NONBLOCK)) printf(" "); if (flags & SOCK_NONBLOCK) printf("SOCK_NONBLOCK"); printf(")"); } printf("\n"); #if USE_SOCKETCALL long args[6]; args[0] = fd; args[1] = (long) sockaddr; args[2] = (long) addrlen; args[3] = flags; return syscall(SYS_socketcall, SYS_ACCEPT4, args); #else return syscall(SYS_accept4, fd, sockaddr, addrlen, flags); #endif } /**********************************************************************/ static int do_test(int lfd, struct sockaddr_in *conn_addr, int closeonexec_flag, int nonblock_flag) { int connfd, acceptfd; int fdf, flf, fdf_pass, flf_pass; struct sockaddr_in claddr; socklen_t addrlen; printf("=======================================\n"); connfd = socket(AF_INET, SOCK_STREAM, 0); if (connfd == -1) die("socket"); if (connect(connfd, (struct sockaddr *) conn_addr, sizeof(struct sockaddr_in)) == -1) die("connect"); addrlen = sizeof(struct sockaddr_in); acceptfd = accept4(lfd, (struct sockaddr *) &claddr, &addrlen, closeonexec_flag | nonblock_flag); if (acceptfd == -1) { perror("accept4()"); close(connfd); return 0; } fdf = fcntl(acceptfd, F_GETFD); if (fdf == -1) die("fcntl:F_GETFD"); fdf_pass = ((fdf & FD_CLOEXEC) != 0) == ((closeonexec_flag & SOCK_CLOEXEC) != 0); printf("Close-on-exec flag is %sset (%s); ", (fdf & FD_CLOEXEC) ? "" : "not ", fdf_pass ? "OK" : "failed"); flf = fcntl(acceptfd, F_GETFL); if (flf == -1) die("fcntl:F_GETFD"); flf_pass = ((flf & O_NONBLOCK) != 0) == ((nonblock_flag & SOCK_NONBLOCK) !=0); printf("nonblock flag is %sset (%s)\n", (flf & O_NONBLOCK) ? "" : "not ", flf_pass ? "OK" : "failed"); close(acceptfd); close(connfd); printf("Test result: %s\n", (fdf_pass && flf_pass) ? "PASS" : "FAIL"); return fdf_pass && flf_pass; } static int create_listening_socket(int port_num) { struct sockaddr_in svaddr; int lfd; int optval; memset(&svaddr, 0, sizeof(struct sockaddr_in)); svaddr.sin_family = AF_INET; svaddr.sin_addr.s_addr = htonl(INADDR_ANY); svaddr.sin_port = htons(port_num); lfd = socket(AF_INET, SOCK_STREAM, 0); if (lfd == -1) die("socket"); optval = 1; if (setsockopt(lfd, SOL_SOCKET, SO_REUSEADDR, &optval, sizeof(optval)) == -1) die("setsockopt"); if (bind(lfd, (struct sockaddr *) &svaddr, sizeof(struct sockaddr_in)) == -1) die("bind"); if (listen(lfd, 5) == -1) die("listen"); return lfd; } int main(int argc, char *argv[]) { struct sockaddr_in conn_addr; int lfd; int port_num; int passed; passed = 1; port_num = (argc > 1) ? atoi(argv[1]) : PORT_NUM; memset(&conn_addr, 0, sizeof(struct sockaddr_in)); conn_addr.sin_family = AF_INET; conn_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); conn_addr.sin_port = htons(port_num); lfd = create_listening_socket(port_num); if (!do_test(lfd, &conn_addr, 0, 0)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, 0)) passed = 0; if (!do_test(lfd, &conn_addr, 0, SOCK_NONBLOCK)) passed = 0; if (!do_test(lfd, &conn_addr, SOCK_CLOEXEC, SOCK_NONBLOCK)) passed = 0; close(lfd); exit(passed ? EXIT_SUCCESS : EXIT_FAILURE); } [mtk.manpages@gmail.com: rewrote changelog, updated test program] Signed-off-by: NUlrich Drepper <drepper@redhat.com> Tested-by: NMichael Kerrisk <mtk.manpages@gmail.com> Acked-by: NMichael Kerrisk <mtk.manpages@gmail.com> Cc: <linux-api@vger.kernel.org> Cc: <linux-arch@vger.kernel.org> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 18 11月, 2008 7 次提交
-
-
由 Philipp Kohlbecher 提交于
Impact: widen the reach of the low-memory-protect DMI quirk Phoenix BIOSes variously identify their vendor as "Phoenix Technologies, LTD" or "Phoenix Technologies LTD" (without the comma.) This patch makes the identification string in the bad_bios_dmi_table more general (following a suggestion by Ingo Molnar), so that both versions are handled. Again, the patched file compiles cleanly and the patch has been tested successfully on my machine. Signed-off-by: NPhilipp Kohlbecher <xt28@gmx.de> Cc: <stable@kernel.org> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Joerg Roedel 提交于
Impact: fix possible use of stale IO/TLB entries Signed-off-by: NJoerg Roedel <joerg.roedel@amd.com>
-
由 Joerg Roedel 提交于
Impact: fix comparison length for 'fullflush' Signed-off-by: NJoerg Roedel <joerg.roedel@amd.com>
-
由 Joerg Roedel 提交于
Impact: makes device isolation the default for AMD IOMMU Some device drivers showed double-free bugs of DMA memory while testing them with AMD IOMMU. If all devices share the same protection domain this can lead to data corruption and data loss. Prevent this by putting each device into its own protection domain per default. Signed-off-by: NJoerg Roedel <joerg.roedel@amd.com>
-
由 Joerg Roedel 提交于
Impact: add a new AMD IOMMU kernel command line parameter Signed-off-by: NJoerg Roedel <joerg.roedel@amd.com>
-
由 Ingo Molnar 提交于
this compiler warning: arch/x86/kernel/ds.c: In function 'ds_request': arch/x86/kernel/ds.c:368: warning: 'context' may be used uninitialized in this function Shows that the code flow in ds_request() is buggy - it goes into the unlock+release-context path even when the context is not allocated yet. First allocate the context, then do the other checks. Also, take care with GFP allocations under the ds_lock spinlock. Cc: <stable@kernel.org> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Venki Pallipadi 提交于
Impact: fix incorrectly marked unstable TSC clock Patch (commit 0d12cdd5 "sched: improve sched_clock() performance") has a regression on one of the test systems here. With the patch, I see: checking TSC synchronization [CPU#0 -> CPU#1]: Measured 28 cycles TSC warp between CPUs, turning off TSC clock. Marking TSC unstable due to check_tsc_sync_source failed Whereas, without the patch syncs pass fine on all CPUs: checking TSC synchronization [CPU#0 -> CPU#1]: passed. Due to this, TSC is marked unstable, when it is not actually unstable. This is because syncs in check_tsc_wrap() goes away due to this commit. As per the discussion on this thread, correct way to fix this is to add explicit syncs as below? Signed-off-by: NVenkatesh Pallipadi <venkatesh.pallipadi@intel.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
- 16 11月, 2008 3 次提交
-
-
由 Yinghai Lu 提交于
Impact: fix es7000 build CC arch/x86/kernel/es7000_32.o arch/x86/kernel/es7000_32.c: In function find_unisys_acpi_oem_table: arch/x86/kernel/es7000_32.c:255: error: implicit declaration of function acpi_get_table_with_size arch/x86/kernel/es7000_32.c:261: error: implicit declaration of function early_acpi_os_unmap_memory arch/x86/kernel/es7000_32.c: In function unmap_unisys_acpi_oem_table: arch/x86/kernel/es7000_32.c:277: error: implicit declaration of function __acpi_unmap_table make[1]: *** [arch/x86/kernel/es7000_32.o] Error 1 we applied one patch out of order... | commit a73aaedd | Author: Yinghai Lu <yhlu.kernel@gmail.com> | Date: Sun Sep 14 02:33:14 2008 -0700 | | x86: check dsdt before find oem table for es7000, v2 | | v2: use __acpi_unmap_table() that patch need: x86: use early_ioremap in __acpi_map_table x86: always explicitly map acpi memory acpi: remove final __acpi_map_table mapping before setting acpi_gbl_permanent_mmap acpi/x86: introduce __apci_map_table, v4 submitted to the ACPI tree but not upstream yet. fix it until those patches applied, need to revert this one Signed-off-by: NYinghai Lu <yinghai@kernel.org> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Markus Metzger 提交于
Fix a problem where ds_request() returned an error without releasing the ds lock. Reported-by: NStephane Eranian <eranian@gmail.com> Signed-off-by: NMarkus Metzger <markus.t.metzger@gmail.com> Cc: <stable@kernel.org> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 David Woodhouse 提交于
This reverts commit e51af663, which was wrongly hoovered up and submitted about a month after a better fix had already been merged. The better fix is commit cbda1ba8 ("PCI/iommu: blacklist DMAR on Intel G31/G33 chipsets"), where we do this blacklisting based on the DMI identification for the offending motherboard, since sometimes this chipset (or at least a chipset with the same PCI ID) apparently _does_ actually have an IOMMU. Signed-off-by: NDavid Woodhouse <David.Woodhouse@intel.com> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
- 13 11月, 2008 2 次提交
-
-
由 Rafael J. Wysocki 提交于
My previous patch to make CONFIG_NUMA on x86_32 depend on BROKEN turned out to be unnecessary, after all, since the source of the hibernation vs CONFIG_NUMA problem turned out to be the fact that we didn't take the NUMA KVA remapping into account in the hibernation code. Signed-off-by: NRafael J. Wysocki <rjw@sisk.pl> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Rafael J. Wysocki 提交于
Impact: fix crash during hibernation on 32-bit NUMA The NUMA code on x86_32 creates special memory mapping that allows each node's pgdat to be located in this node's memory. For this purpose it allocates a memory area at the end of each node's memory and maps this area so that it is accessible with virtual addresses belonging to low memory. As a result, if there is high memory, these NUMA-allocated areas are physically located in high memory, although they are mapped to low memory addresses. Our hibernation code does not take that into account and for this reason hibernation fails on all x86_32 systems with CONFIG_NUMA=y and with high memory present. Fix this by adding a special mapping for the NUMA-allocated memory areas to the temporary page tables created during the last phase of resume. Signed-off-by: NRafael J. Wysocki <rjw@sisk.pl> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
- 12 11月, 2008 6 次提交
-
-
由 Bjorn Helgaas 提交于
This removes the acpi_irq_balance_set() interface from the PCI interrupt link driver. x86 used acpi_irq_balance_set() to tell the PCI interrupt link driver to configure links to minimize IRQ sharing. But the link driver can easily figure out whether to turn on IRQ balancing based on the IRQ model (PIC/IOAPIC/etc), so we can get rid of that external interface. It's better for the driver to figure this out at init-time. If we set it externally via the x86 code, the interface reduces modularity, and we depend on the fact that acpi_process_madt() happens before we process the kernel command line. Signed-off-by: NBjorn Helgaas <bjorn.helgaas@hp.com> Signed-off-by: NLen Brown <len.brown@intel.com>
-
由 Avi Kivity 提交于
Reported-By: NDaniel Marjamäki <danielm77@spray.se> Signed-off-by: NAvi Kivity <avi@qumranet.com>
-
由 Sheng Yang 提交于
There is a potential issue that, when guest using pagetable without vmexit when EPT enabled, guest would use PAT/PCD/PWT bits to index PAT msr for it's memory, which would be inconsistent with host side and would cause host MCE due to inconsistent cache attribute. The patch set IGMT bit in EPT entry to ignore guest PAT and use WB as default memory type to protect host (notice that all memory mapped by KVM should be WB). Signed-off-by: NSheng Yang <sheng@linux.intel.com> Signed-off-by: NAvi Kivity <avi@redhat.com>
-
由 Avi Kivity 提交于
PCI device assignment makes calls to pci code, so require it to be built into the kernel. Signed-off-by: NAvi Kivity <avi@qumranet.com>
-
由 Rakib Mullick 提交于
WARNING: arch/x86/kernel/built-in.o(.text+0x1722c): Section mismatch in reference from the function kvm_setup_secondary_clock() to the function .devinit.text:setup_secondary_APIC_clock() The function kvm_setup_secondary_clock() references the function __devinit setup_secondary_APIC_clock(). This is often because kvm_setup_secondary_clock lacks a __devinit annotation or the annotation of setup_secondary_APIC_clock is wrong. Signed-off-by: NMd.Rakib H. Mullick <rakib.mullick@gmail.com> Cc: Ingo Molnar <mingo@elte.hu> Signed-off-by: NAndrew Morton <akpm@linux-foundation.org> Signed-off-by: NAvi Kivity <avi@redhat.com>
-
由 Marcelo Tosatti 提交于
The page fault path can use two rmap_desc structures, if: - walk_addr's dirty pte update allocates one rmap_desc. - mmu_lock is dropped, sptes are zapped resulting in rmap_desc being freed. - fetch->mmu_set_spte allocates another rmap_desc. Increase to 4 for safety. Signed-off-by: NMarcelo Tosatti <mtosatti@redhat.com> Signed-off-by: NAvi Kivity <avi@redhat.com>
-
- 11 11月, 2008 5 次提交
-
-
由 James Bottomley 提交于
Impact: build/boot fix for x86/Voyager This change: | commit 3d442233 | Author: Jens Axboe <jens.axboe@oracle.com> | Date: Thu Jun 26 11:21:34 2008 +0200 | | Add generic helpers for arch IPI function calls didn't wire up the voyager smp call function correctly, so do that here. Also make CONFIG_USE_GENERIC_SMP_HELPERS a def_bool y again, since we now use the generic helpers for every x86 architecture. Signed-off-by: NJames Bottomley <James.Bottomley@HansenPartnership.com> Cc: Jens Axboe <Jens.Axboe@oracle.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Rafael J. Wysocki 提交于
While investigating the failure of hibernation on 32-bit x86 with CONFIG_NUMA set, as described in this message http://marc.info/?l=linux-kernel&m=122634118116226&w=4 I asked some people for help and I was told that it wasn't really worth the effort, because CONFIG_NUMA was generally broken on 32-bit x86 systems and it shouldn't be used in such configs. For this reason, make CONFIG_NUMA depend on BROKEN instead of EXPERIMENTAL on x86-32. Signed-off-by: NRafael J. Wysocki <rjw@sisk.pl> Cc: Andi Kleen <andi@firstfloor.org> Cc: Pavel Machek <pavel@suse.cz> Cc: Peter Zijlstra <peterz@infradead.org> Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
-
由 Matt Fleming 提交于
Some functions that may be called from this handler require that interrupts are disabled. Also, combining IRQF_DISABLED and IRQF_SHARED does not reliably disable interrupts in a handler, so remove IRQF_SHARED from the irq flags (this irq is not shared anyway). Signed-off-by: NMatt Fleming <mjf@gentoo.org> Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Cc: "Will Newton" <will.newton@gmail.com> Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
-
由 Matt Fleming 提交于
In hpet_next_event() we check that the value we just wrote to HPET_Tn_CMP(timer) has reached the chip. Currently, we're checking that the value we wrote to HPET_Tn_CMP(timer) is in HPET_T0_CMP, which, if timer is anything other than timer 0, is likely to fail. Signed-off-by: NMatt Fleming <mjf@gentoo.org> Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
-
由 Matt Fleming 提交于
It is possible to flood the console with call traces if the WARN_ON condition is true because of the frequency with which this function is called. Signed-off-by: NMatt Fleming <mjf@gentoo.org> Cc: mingo@elte.hu Cc: venkatesh.pallipadi@intel.com Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
-
- 10 11月, 2008 1 次提交
-
-
由 Arjan van de Ven 提交于
a new file was accidentally added to include/asm-x86; move it to the new arch/x86/include/asm location Signed-off-by: NArjan van de Ven <arjan@linux.intel.com>
-
- 09 11月, 2008 1 次提交
-
-
由 Ingo Molnar 提交于
sched_clock() uses cycles_2_ns() needlessly - which is an irq-disabling variant of __cycles_2_ns(). Most of the time sched_clock() is called with irqs disabled already. The few places that call it with irqs enabled need to be updated. Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
- 08 11月, 2008 2 次提交
-
-
由 Ingo Molnar 提交于
in scheduler-intense workloads native_read_tsc() overhead accounts for 20% of the system overhead: 659567 system_call 41222.9375 686796 schedule 435.7843 718382 __switch_to 665.1685 823875 switch_mm 4526.7857 1883122 native_read_tsc 55385.9412 9761990 total 2.8468 this is large part due to the rdtsc_barrier() that is done before and after reading the TSC. But sched_clock() is not a precise clock in the GTOD sense, using such barriers is completely pointless. So remove the barriers and only use them in vget_cycles(). This improves lat_ctx performance by about 5%. Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Andi Kleen 提交于
Fix the counter overflow check for CPUs with counter width > 32 I had a similar change in a different patch that I didn't submit and I didn't notice the problem earlier because it was always tested together. Signed-off-by: NAndi Kleen <ak@linux.intel.com> Signed-off-by: NRobert Richter <robert.richter@amd.com>
-
- 07 11月, 2008 2 次提交
-
-
由 Jeremy Fitzhardinge 提交于
Xen requires that all mappings of pagetable pages are read-only, so that they can't be updated illegally. As a result, if a page is being turned into a pagetable page, we need to make sure all its mappings are RO. If the page had been used for ioremap or vmalloc, it may still have left over mappings as a result of not having been lazily unmapped. This change makes sure we explicitly mop them all up before pinning the page. Unlike aliases created by kmap, the there can be vmalloc aliases even for non-high pages, so we must do the flush unconditionally. Signed-off-by: NJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Cc: Linux Memory Management List <linux-mm@kvack.org> Cc: Nick Piggin <nickpiggin@yahoo.com.au> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Jeremy Fitzhardinge 提交于
Impact: fix 32-bit Xen guest boot crash On 32-bit PAE, pud_page, for no good reason, didn't really return a struct page *. Since Jan Beulich's fix "i386/PAE: fix pud_page()", pud_page does return a struct page *. Because PAE has 3 pagetable levels, the pud level is folded into the pgd level, so pgd_page() is the same as pud_page(), and now returns a struct page *. Update the xen/mmu.c code which uses pgd_page() accordingly. Signed-off-by: NJeremy Fitzhardinge <jeremy.fitzhardinge@citrix.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
- 06 11月, 2008 9 次提交
-
-
由 Eduardo Habkost 提交于
This reverts commit c7ffa6c2. the assumptio of this change was that this would not break any existing machine. Andrey Borzenkov reported troubles with the ACPI reboot method: the system would hang on reboot, necessiating a power cycle. Probably more systems are affected as well. Also, there are patches queued up for v2.6.29 to disable virtualization on emergency_restart() - which was the original motivation of this change. Reported-by: NAndrey Borzenkov <arvidjaar@mail.ru> Bisected-by: NAndrey Borzenkov <arvidjaar@mail.ru> Signed-off-by: NEduardo Habkost <ehabkost@redhat.com> Acked-by: NAvi Kivity <avi@redhat.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Hugh Dickins 提交于
Impact: right-align /proc/meminfo consistent with other fields When the split-LRU patches added Inactive(anon) and Inactive(file) lines to /proc/meminfo, all counts were moved two columns rightwards to fit in. Now move x86's DirectMap lines two columns rightwards to line up. Signed-off-by: NHugh Dickins <hugh@veritas.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Joerg Roedel 提交于
Lazy flushing needs to take care of the unmap path too which is not yet implemented and leads to stale IO/TLB entries. This is fixed by this patch. Signed-off-by: NJoerg Roedel <joerg.roedel@amd.com>
-
由 Suresh Siddha 提交于
Impact: fix rare x2apic hang On x86, x2apic mode accesses for sending IPI's don't have serializing semantics. If the IPI receivner refers(in lock-free fashion) to some memory setup by the sender, the need for smp_mb() before sending the IPI becomes critical in x2apic mode. Add the smp_mb() in native_flush_tlb_others() before sending the IPI. Signed-off-by: NSuresh Siddha <suresh.b.siddha@intel.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Yinghai Lu 提交于
Impact: fix warning message when PARAVIRT is set in config Remove stale #ifdef components from our IRQ sizing logic. x86/Voyager is the only holdout. Signed-off-by: NYinghai Lu <yinghai@kernel.org> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Bjorn Helgaas 提交于
Impact: clarify menuconfig text Mention ACPI in the top-level menu to give a clue as to where it lives. This matches what ia64 does. Signed-off-by: NBjorn Helgaas <bjorn.helgaas@hp.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Yinghai Lu 提交于
Impact: make NR_IRQS big enough for system with lots of apic/pins If lots of IO_APIC's are there (or can be there), size the same way as 64-bit, depending on MAX_IO_APICS and NR_CPUS. This fixes the boot problem reported by Ben Hutchings on a 32-bit server with 5 IO-APICs and 240 IO-APIC pins. Signed-off-by: NYinghai <yinghai@kernel.org> Tested-by: NBen Hutchings <bhutchings@solarflare.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Ben Hutchings 提交于
Impact: fix boot hang on 32-bit systems with more than 224 IO-APIC pins On some 32-bit systems with a lot of IO-APICs probe_nr_irqs() can return a value larger than NR_IRQS. This will lead to probe_irq_on() overrunning the irq_desc array. I hit this when running net-next-2.6 (close to 2.6.28-rc3) on a Supermicro dual Xeon system. NR_IRQS is 224 but probe_nr_irqs() detects 5 IOAPICs and returns 240. Here are the log messages: Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x01] address[0xfec00000] gsi_base[0]) Tue Nov 4 16:53:47 2008 IOAPIC[0]: apic_id 1, version 32, address 0xfec00000, GSI 0-23 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x02] address[0xfec81000] gsi_base[24]) Tue Nov 4 16:53:47 2008 IOAPIC[1]: apic_id 2, version 32, address 0xfec81000, GSI 24-47 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x03] address[0xfec81400] gsi_base[48]) Tue Nov 4 16:53:47 2008 IOAPIC[2]: apic_id 3, version 32, address 0xfec81400, GSI 48-71 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x04] address[0xfec82000] gsi_base[72]) Tue Nov 4 16:53:47 2008 IOAPIC[3]: apic_id 4, version 32, address 0xfec82000, GSI 72-95 Tue Nov 4 16:53:47 2008 ACPI: IOAPIC (id[0x05] address[0xfec82400] gsi_base[96]) Tue Nov 4 16:53:47 2008 IOAPIC[4]: apic_id 5, version 32, address 0xfec82400, GSI 96-119 Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 0 global_irq 2 high edge) Tue Nov 4 16:53:47 2008 ACPI: INT_SRC_OVR (bus 0 bus_irq 9 global_irq 9 high level) Tue Nov 4 16:53:47 2008 Enabling APIC mode: Flat. Using 5 I/O APICs Signed-off-by: NBen Hutchings <bhutchings@solarflare.com> Acked-by: NYinghai Lu <yhlu.kernel@gmail.com> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
由 Ingo Molnar 提交于
Impact: improve wakeup affinity on NUMA systems, tweak SMP systems Given the fixes+tweaks to the wakeup-buddy code, re-tweak the domain balancing defaults on NUMA and SMP systems. Turn on SD_WAKE_AFFINE which was off on x86 NUMA - there's no reason why we would not want to have wakeup affinity across nodes as well. (we already do this in the standard NUMA template.) lat_ctx on a NUMA box is particularly happy about this change: before: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.60 | 2 5.70 after: | phoenix:~/l> ./lat_ctx -s 0 2 | "size=0k ovr=2.65 | 2 2.07 a 2.75x speedup. pipe-test is similarly happy about it too: | phoenix:~/sched-tests> ./pipe-test | 18.26 usecs/loop. | 14.70 usecs/loop. | 14.38 usecs/loop. | 10.55 usecs/loop. # +WAKE_AFFINE on domain0+domain1 | 8.63 usecs/loop. | 8.59 usecs/loop. | 9.03 usecs/loop. | 8.94 usecs/loop. | 8.96 usecs/loop. | 8.63 usecs/loop. Also: - disable SD_BALANCE_NEWIDLE on NUMA and SMP domains (keep it for siblings) - enable SD_WAKE_BALANCE on SMP domains Sysbench+postgresql improves all around the board, quite significantly: .28-rc3-11474e2c .28-rc3-11474e2c-tune ------------------------------------------------- 1: 571 688 +17.08% 2: 1236 1206 -2.55% 4: 2381 2642 +9.89% 8: 4958 5164 +3.99% 16: 9580 9574 -0.07% 32: 7128 8118 +12.20% 64: 7342 8266 +11.18% 128: 7342 8064 +8.95% 256: 7519 7884 +4.62% 512: 7350 7731 +4.93% ------------------------------------------------- SUM: 55412 59341 +6.62% So it's a win both for the runup portion, the peak area and the tail. Signed-off-by: NIngo Molnar <mingo@elte.hu>
-
- 04 11月, 2008 1 次提交
-
-
由 Alok Kataria 提交于
Impact: fix udelay when "notsc" boot parameter is passed With notsc passed on commandline, tsc may not be used for udelays, make sure that we do not use tsc_khz to calculate the lpj value in such cases. Reported-by: NBartlomiej Zolnierkiewicz <bzolnier@gmail.com> Signed-off-by: NAlok N Kataria <akataria@vmware.com> Cc: <stable@kernel.org> Signed-off-by: NIngo Molnar <mingo@elte.hu>
-