diff --git a/Documentation/feature-removal-schedule.txt b/Documentation/feature-removal-schedule.txt index af0e9393bf684d62c70afcaa1c620709fceebb86..309c47b91598e4fba5d23eaec16234d560624895 100644 --- a/Documentation/feature-removal-schedule.txt +++ b/Documentation/feature-removal-schedule.txt @@ -282,6 +282,13 @@ Why: Not used in-tree. The current out-of-tree users used it to out-of-tree driver. Who: Thomas Gleixner +---------------------------- + +What: usedac i386 kernel parameter +When: 2.6.27 +Why: replaced by allowdac and no dac combination +Who: Glauber Costa + --------------------------- What: /sys/o2cb symlink diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 4b0f1ae31a4c152f4547050e2b3d8cc6b375bb31..f4839606988beb27b95ac651eb6d12d71bd13279 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1280,8 +1280,16 @@ and is between 256 and 4096 characters. It is defined in the file noexec [IA-64] noexec [X86-32,X86-64] + On X86-32 available only on PAE configured kernels. noexec=on: enable non-executable mappings (default) - noexec=off: disable nn-executable mappings + noexec=off: disable non-executable mappings + + noexec32 [X86-64] + This affects only 32-bit executables. + noexec32=on: enable non-executable mappings (default) + read doesn't imply executable mappings + noexec32=off: disable non-executable mappings + read implies executable mappings nofxsr [BUGS=X86-32] Disables x86 floating point extended register save and restore. The kernel will only save diff --git a/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c new file mode 100644 index 0000000000000000000000000000000000000000..f8e8e95e81fd063f6f2b7f43878641f5184b5f9c --- /dev/null +++ b/Documentation/prctl/disable-tsc-ctxt-sw-stress-test.c @@ -0,0 +1,96 @@ +/* + * Tests for prctl(PR_GET_TSC, ...) / prctl(PR_SET_TSC, ...) + * + * Tests if the control register is updated correctly + * at context switches + * + * Warning: this test will cause a very high load for a few seconds + * + */ + +#include +#include +#include +#include +#include +#include + + +#include +#include + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +uint64_t rdtsc() { +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +void sigsegv_expect(int sig) +{ + /* */ +} + +void segvtask(void) +{ + if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) + { + perror("prctl"); + exit(0); + } + signal(SIGSEGV, sigsegv_expect); + alarm(10); + rdtsc(); + fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); + exit(0); +} + + +void sigsegv_fail(int sig) +{ + fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); + exit(0); +} + +void rdtsctask(void) +{ + if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) + { + perror("prctl"); + exit(0); + } + signal(SIGSEGV, sigsegv_fail); + alarm(10); + for(;;) rdtsc(); +} + + +int main(int argc, char **argv) +{ + int n_tasks = 100, i; + + fprintf(stderr, "[No further output means we're allright]\n"); + + for (i=0; i +#include +#include +#include +#include +#include + + +#include +#include + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +/* snippet from wikipedia :-) */ + +uint64_t rdtsc() { +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +int should_segv = 0; + +void sigsegv_cb(int sig) +{ + if (!should_segv) + { + fprintf(stderr, "FATAL ERROR, rdtsc() failed while enabled\n"); + exit(0); + } + if (prctl(PR_SET_TSC, PR_TSC_ENABLE) < 0) + { + perror("prctl"); + exit(0); + } + should_segv = 0; + + rdtsc(); +} + +void task(void) +{ + signal(SIGSEGV, sigsegv_cb); + alarm(10); + for(;;) + { + rdtsc(); + if (should_segv) + { + fprintf(stderr, "FATAL ERROR, rdtsc() succeeded while disabled\n"); + exit(0); + } + if (prctl(PR_SET_TSC, PR_TSC_SIGSEGV) < 0) + { + perror("prctl"); + exit(0); + } + should_segv = 1; + } +} + + +int main(int argc, char **argv) +{ + int n_tasks = 100, i; + + fprintf(stderr, "[No further output means we're allright]\n"); + + for (i=0; i +#include +#include +#include +#include + + +#include +#include + +/* Get/set the process' ability to use the timestamp counter instruction */ +#ifndef PR_GET_TSC +#define PR_GET_TSC 25 +#define PR_SET_TSC 26 +# define PR_TSC_ENABLE 1 /* allow the use of the timestamp counter */ +# define PR_TSC_SIGSEGV 2 /* throw a SIGSEGV instead of reading the TSC */ +#endif + +const char *tsc_names[] = +{ + [0] = "[not set]", + [PR_TSC_ENABLE] = "PR_TSC_ENABLE", + [PR_TSC_SIGSEGV] = "PR_TSC_SIGSEGV", +}; + +uint64_t rdtsc() { +uint32_t lo, hi; +/* We cannot use "=A", since this would use %rax on x86_64 */ +__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); +return (uint64_t)hi << 32 | lo; +} + +void sigsegv_cb(int sig) +{ + int tsc_val = 0; + + printf("[ SIG_SEGV ]\n"); + printf("prctl(PR_GET_TSC, &tsc_val); "); + fflush(stdout); + + if ( prctl(PR_GET_TSC, &tsc_val) == -1) + perror("prctl"); + + printf("tsc_val == %s\n", tsc_names[tsc_val]); + printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); + fflush(stdout); + if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) + perror("prctl"); + + printf("rdtsc() == "); +} + +int main(int argc, char **argv) +{ + int tsc_val = 0; + + signal(SIGSEGV, sigsegv_cb); + + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_GET_TSC, &tsc_val); "); + fflush(stdout); + + if ( prctl(PR_GET_TSC, &tsc_val) == -1) + perror("prctl"); + + printf("tsc_val == %s\n", tsc_names[tsc_val]); + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_SET_TSC, PR_TSC_ENABLE)\n"); + fflush(stdout); + + if ( prctl(PR_SET_TSC, PR_TSC_ENABLE) == -1) + perror("prctl"); + + printf("rdtsc() == %llu\n", (unsigned long long)rdtsc()); + printf("prctl(PR_SET_TSC, PR_TSC_SIGSEGV)\n"); + fflush(stdout); + + if ( prctl(PR_SET_TSC, PR_TSC_SIGSEGV) == -1) + perror("prctl"); + + printf("rdtsc() == "); + fflush(stdout); + printf("%llu\n", (unsigned long long)rdtsc()); + fflush(stdout); + + exit(EXIT_SUCCESS); +} + diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 2a59dbb28248b7da44c9695250e97bf68a05799c..07cf771135653744cd109a2f2ab07bc1d7b92b90 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -903,6 +903,15 @@ config X86_64_ACPI_NUMA help Enable ACPI SRAT based node topology detection. +# Some NUMA nodes have memory ranges that span +# other nodes. Even though a pfn is valid and +# between a node's start and end pfns, it may not +# reside on that node. See memmap_init_zone() +# for details. +config NODES_SPAN_OTHER_NODES + def_bool y + depends on X86_64_ACPI_NUMA + config NUMA_EMU bool "NUMA emulation" depends on X86_64 && NUMA diff --git a/arch/x86/boot/a20.c b/arch/x86/boot/a20.c index 31348d054fca6129db01e2fd1e6f3600dc3038dd..90943f83e84d69ba1172ab745febe31a231f58fc 100644 --- a/arch/x86/boot/a20.c +++ b/arch/x86/boot/a20.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/a20.c - * * Enable A20 gate (return -1 on failure) */ diff --git a/arch/x86/boot/apm.c b/arch/x86/boot/apm.c index c117c7fb859c12bb13685f9fd213dd3e8facec2e..7aa6033001f9ace30ef1a4ffdb52bfd629a98f26 100644 --- a/arch/x86/boot/apm.c +++ b/arch/x86/boot/apm.c @@ -12,8 +12,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/apm.c - * * Get APM BIOS information */ diff --git a/arch/x86/boot/bitops.h b/arch/x86/boot/bitops.h index 8dcc8dc7db88c98a143a7361966ee6901b51f596..878e4b9940d9212ce581c5bea0ac518ae6bbf85f 100644 --- a/arch/x86/boot/bitops.h +++ b/arch/x86/boot/bitops.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/bitops.h - * * Very simple bitops for the boot code. */ diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index 09578070bfba94b2a66c750e6f0a1aafb25e9a1b..a34b9982c7cbcf928bd96e3b17955a789f226f57 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/boot.h - * * Header file for the real-mode kernel code */ diff --git a/arch/x86/boot/cmdline.c b/arch/x86/boot/cmdline.c index 680408a0f46317c898d5e73a620b499e9a9cbdb2..a1d35634bce0097d6ea32110f4285d0c1a9225b8 100644 --- a/arch/x86/boot/cmdline.c +++ b/arch/x86/boot/cmdline.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cmdline.c - * * Simple command-line parser for early boot. */ diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index 036e635f18a3f14391fa39033fb70d37f8cad8d2..ba7736cf2ec73e8977e447a8ab852d083f079c42 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -130,7 +130,7 @@ relocated: /* * Setup the stack for the decompressor */ - leal stack_end(%ebx), %esp + leal boot_stack_end(%ebx), %esp /* * Do the decompression, and jump to the new kernel.. @@ -142,8 +142,8 @@ relocated: pushl %eax # input_len leal input_data(%ebx), %eax pushl %eax # input_data - leal _end(%ebx), %eax - pushl %eax # end of the image as third argument + leal boot_heap(%ebx), %eax + pushl %eax # heap area as third argument pushl %esi # real mode pointer as second arg call decompress_kernel addl $20, %esp @@ -181,7 +181,10 @@ relocated: jmp *%ebp .bss +/* Stack and heap for uncompression */ .balign 4 -stack: - .fill 4096, 1, 0 -stack_end: +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index e8657b98c902a0773a1e9861fc4db6349f0bb909..d8819efac81dc4488ff59163172448710311cce0 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -62,7 +63,7 @@ startup_32: subl $1b, %ebp /* setup a stack and make sure cpu supports long mode. */ - movl $user_stack_end, %eax + movl $boot_stack_end, %eax addl %ebp, %eax movl %eax, %esp @@ -243,9 +244,9 @@ ENTRY(startup_64) /* Copy the compressed kernel to the end of our buffer * where decompression in place becomes safe. */ - leaq _end(%rip), %r8 - leaq _end(%rbx), %r9 - movq $_end /* - $startup_32 */, %rcx + leaq _end_before_pgt(%rip), %r8 + leaq _end_before_pgt(%rbx), %r9 + movq $_end_before_pgt /* - $startup_32 */, %rcx 1: subq $8, %r8 subq $8, %r9 movq 0(%r8), %rax @@ -267,14 +268,14 @@ relocated: */ xorq %rax, %rax leaq _edata(%rbx), %rdi - leaq _end(%rbx), %rcx + leaq _end_before_pgt(%rbx), %rcx subq %rdi, %rcx cld rep stosb /* Setup the stack */ - leaq user_stack_end(%rip), %rsp + leaq boot_stack_end(%rip), %rsp /* zero EFLAGS after setting rsp */ pushq $0 @@ -285,7 +286,7 @@ relocated: */ pushq %rsi # Save the real mode argument movq %rsi, %rdi # real mode address - leaq _heap(%rip), %rsi # _heap + leaq boot_heap(%rip), %rsi # malloc area for uncompression leaq input_data(%rip), %rdx # input_data movl input_len(%rip), %eax movq %rax, %rcx # input_len @@ -310,9 +311,12 @@ gdt: .quad 0x0080890000000000 /* TS descriptor */ .quad 0x0000000000000000 /* TS continued */ gdt_end: - .bss -/* Stack for uncompression */ - .balign 4 -user_stack: - .fill 4096,4,0 -user_stack_end: + +.bss +/* Stack and heap for uncompression */ +.balign 4 +boot_heap: + .fill BOOT_HEAP_SIZE, 1, 0 +boot_stack: + .fill BOOT_STACK_SIZE, 1, 0 +boot_stack_end: diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c index dad4e699f5a326d41bd6c72d988517c32f0ec538..90456cee47c337b226a8874582440377c428a91e 100644 --- a/arch/x86/boot/compressed/misc.c +++ b/arch/x86/boot/compressed/misc.c @@ -217,12 +217,6 @@ static void putstr(const char *); static memptr free_mem_ptr; static memptr free_mem_end_ptr; -#ifdef CONFIG_X86_64 -#define HEAP_SIZE 0x7000 -#else -#define HEAP_SIZE 0x4000 -#endif - static char *vidmem; static int vidport; static int lines, cols; @@ -449,7 +443,7 @@ asmlinkage void decompress_kernel(void *rmode, memptr heap, window = output; /* Output buffer (Normally at 1M) */ free_mem_ptr = heap; /* Heap */ - free_mem_end_ptr = heap + HEAP_SIZE; + free_mem_end_ptr = heap + BOOT_HEAP_SIZE; inbuf = input_data; /* Input buffer */ insize = input_len; inptr = 0; diff --git a/arch/x86/boot/compressed/vmlinux_64.lds b/arch/x86/boot/compressed/vmlinux_64.lds index 7e5c7209f6cc2f0b932c6deb3208b8c4d5583374..bef1ac891bce9987d2693cdc1ff601457af0d6c4 100644 --- a/arch/x86/boot/compressed/vmlinux_64.lds +++ b/arch/x86/boot/compressed/vmlinux_64.lds @@ -39,10 +39,10 @@ SECTIONS *(.bss.*) *(COMMON) . = ALIGN(8); - _end = . ; + _end_before_pgt = . ; . = ALIGN(4096); pgtable = . ; . = . + 4096 * 6; - _heap = .; + _ebss = .; } } diff --git a/arch/x86/boot/copy.S b/arch/x86/boot/copy.S index ef127e56a3cf7ca36fed6bd275edc690325857fd..ef50c84e8b4bd6a517417d84c9275240ae19dd0c 100644 --- a/arch/x86/boot/copy.S +++ b/arch/x86/boot/copy.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/copy.S - * * Memory copy routines */ diff --git a/arch/x86/boot/cpucheck.c b/arch/x86/boot/cpucheck.c index 2462c88689edd47ea939de1b2d54cb82c25b7a40..7804389ee0059eb8f4be589cccd24a2623f9567f 100644 --- a/arch/x86/boot/cpucheck.c +++ b/arch/x86/boot/cpucheck.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/cpucheck.c - * * Check for obligatory CPU features and abort if the features are not * present. This code should be compilable as 16-, 32- or 64-bit * code, so be very careful with types and inline assembly. diff --git a/arch/x86/boot/edd.c b/arch/x86/boot/edd.c index 8721dc46a0b618336b9908e0d855611f7beb6175..d84a48ece78503b10e8429981e44397b6755a454 100644 --- a/arch/x86/boot/edd.c +++ b/arch/x86/boot/edd.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/edd.c - * * Get EDD BIOS disk information */ diff --git a/arch/x86/boot/install.sh b/arch/x86/boot/install.sh index 88d77761d01bfbccba3dc9d970ede616b0900d52..8d60ee15dfd9b2e5e4ff3808976ea8143e94c05a 100644 --- a/arch/x86/boot/install.sh +++ b/arch/x86/boot/install.sh @@ -1,7 +1,5 @@ #!/bin/sh # -# arch/i386/boot/install.sh -# # This file is subject to the terms and conditions of the GNU General Public # License. See the file "COPYING" in the main directory of this archive # for more details. diff --git a/arch/x86/boot/main.c b/arch/x86/boot/main.c index 7828da5cfd07475376c7d4d3fc95282334b60a3d..77569a4a3be114aaca948d04b060ebd28c80c1a9 100644 --- a/arch/x86/boot/main.c +++ b/arch/x86/boot/main.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/main.c - * * Main module for the real-mode kernel code */ diff --git a/arch/x86/boot/mca.c b/arch/x86/boot/mca.c index 68222f2d4b670479fd7656f4036c848bfed7e5e8..911eaae5d696427c98fdad7f344a14bdc0340efc 100644 --- a/arch/x86/boot/mca.c +++ b/arch/x86/boot/mca.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/mca.c - * * Get the MCA system description table */ diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c index e77d89f9e8aa23c13751bee268718e8ed7ee54a7..acad32eb4290861b1539553c1e8eb49f7ec82e92 100644 --- a/arch/x86/boot/memory.c +++ b/arch/x86/boot/memory.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/memory.c - * * Memory detection code */ diff --git a/arch/x86/boot/pm.c b/arch/x86/boot/pm.c index a93cb8bded4da529e76ee746874cc628808c767a..328956fdb59e79dc354e96f47e98c5cb5dff4a6b 100644 --- a/arch/x86/boot/pm.c +++ b/arch/x86/boot/pm.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pm.c - * * Prepare the machine for transition to protected mode. */ diff --git a/arch/x86/boot/pmjump.S b/arch/x86/boot/pmjump.S index f5402d51f7c3d1ff534a009d54d438a3cfa54ec8..ab049d40a884d7ef44d32fb63e30c44ed6aedabf 100644 --- a/arch/x86/boot/pmjump.S +++ b/arch/x86/boot/pmjump.S @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/pmjump.S - * * The actual transition into protected mode */ diff --git a/arch/x86/boot/printf.c b/arch/x86/boot/printf.c index 7e7e890699be9acf34756c7179e1b928db3c0bb8..c1d00c0274c4888c203ac186b4f3930791669be4 100644 --- a/arch/x86/boot/printf.c +++ b/arch/x86/boot/printf.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/printf.c - * * Oh, it's a waste of space, but oh-so-yummy for debugging. This * version of printf() does not include 64-bit support. "Live with * it." diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index 481a22097781953c088931b52021d9a5365b80b2..f94b7a0c2abf8e2477ec5589a54ba682872f680b 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/string.c - * * Very basic string functions */ diff --git a/arch/x86/boot/tty.c b/arch/x86/boot/tty.c index f3f14bd2637191ca7a87faf8f69785f87dce2aef..0be77b39328afc957c8aa70b64bbe0641f477a7c 100644 --- a/arch/x86/boot/tty.c +++ b/arch/x86/boot/tty.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/tty.c - * * Very simple screen I/O * XXX: Probably should add very simple serial I/O? */ diff --git a/arch/x86/boot/version.c b/arch/x86/boot/version.c index c61462f7d9a75b636dae291e65924bdf39f3604b..2723d9b5ce432b4d229a9d3cbe47f9b8fdf08a2c 100644 --- a/arch/x86/boot/version.c +++ b/arch/x86/boot/version.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/version.c - * * Kernel version string */ diff --git a/arch/x86/boot/video-bios.c b/arch/x86/boot/video-bios.c index 39e247e96172a53c24aefbfe1c6a6beae89d01bc..49f26aaaebc8f2d58064cc1be981103742ab8524 100644 --- a/arch/x86/boot/video-bios.c +++ b/arch/x86/boot/video-bios.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-bios.c - * * Standard video BIOS modes * * We have two options for this; silent and scanned. diff --git a/arch/x86/boot/video-vesa.c b/arch/x86/boot/video-vesa.c index 5d5a3f6e8b5ca31e05bac9e862c9067aaf1afd72..401ad998ad08d7cd63943a5b7a7ae893264b5e3f 100644 --- a/arch/x86/boot/video-vesa.c +++ b/arch/x86/boot/video-vesa.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vesa.c - * * VESA text modes */ diff --git a/arch/x86/boot/video-vga.c b/arch/x86/boot/video-vga.c index 330d6589a2adf854e6e43836b6d4327fb2a072dc..40ecb8d7688c3d5c8a7587aa8fce8bb7ea06d76b 100644 --- a/arch/x86/boot/video-vga.c +++ b/arch/x86/boot/video-vga.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video-vga.c - * * Common all-VGA modes */ diff --git a/arch/x86/boot/video.c b/arch/x86/boot/video.c index c1c47ba069ef7c98c9732f03d408a927bb2eb905..83598b23093aa31398db2d1c9a5f89eb30733357 100644 --- a/arch/x86/boot/video.c +++ b/arch/x86/boot/video.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.c - * * Select video mode */ diff --git a/arch/x86/boot/video.h b/arch/x86/boot/video.h index d69347f79e8e5b76e23233f7b64216023336e96f..ee63f5d14461517ef96b89a6c644ac6c3a60a2f6 100644 --- a/arch/x86/boot/video.h +++ b/arch/x86/boot/video.h @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/video.h - * * Header file for the real-mode video probing code */ diff --git a/arch/x86/boot/voyager.c b/arch/x86/boot/voyager.c index 6499e3239b4132213907ab9ff54d00be9219dd1b..433909d61e5cb2ef7a20f7386c0ad87c362b47c1 100644 --- a/arch/x86/boot/voyager.c +++ b/arch/x86/boot/voyager.c @@ -9,8 +9,6 @@ * ----------------------------------------------------------------------- */ /* - * arch/i386/boot/voyager.c - * * Get the Voyager config information */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index c3920ea8ac56f99020c2de9562b422c97ec2a0c3..90e092d0af0c639211932fc44a0f5dd959ec590b 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -22,13 +22,14 @@ obj-y += setup_$(BITS).o i8259_$(BITS).o setup.o obj-$(CONFIG_X86_32) += sys_i386_32.o i386_ksyms_32.o obj-$(CONFIG_X86_64) += sys_x86_64.o x8664_ksyms_64.o obj-$(CONFIG_X86_64) += syscall_64.o vsyscall_64.o setup64.o -obj-y += pci-dma_$(BITS).o bootflag.o e820_$(BITS).o -obj-y += quirks.o i8237.o topology.o kdebugfs.o -obj-y += alternative.o i8253.o -obj-$(CONFIG_X86_64) += pci-nommu_64.o bugs_64.o +obj-y += bootflag.o e820_$(BITS).o +obj-y += pci-dma.o quirks.o i8237.o topology.o kdebugfs.o +obj-y += alternative.o i8253.o pci-nommu.o +obj-$(CONFIG_X86_64) += bugs_64.o obj-y += tsc_$(BITS).o io_delay.o rtc.o obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o +obj-y += process.o obj-y += i387.o obj-y += ptrace.o obj-y += ds.o diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index 8ca3557a6d599c947a8b71314902d2d9470e8d48..9366fb68d8d8a0c47f32a1c36df1eba89a6b455e 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/cstate.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for SMP C-states on Intel CPUs diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c index 324eb0cab19ceb7c8353f402d78884f139c6741d..de2d2e4ebad97217be93de05b32886cfa09d0526 100644 --- a/arch/x86/kernel/acpi/processor.c +++ b/arch/x86/kernel/acpi/processor.c @@ -1,6 +1,4 @@ /* - * arch/i386/kernel/acpi/processor.c - * * Copyright (C) 2005 Intel Corporation * Venkatesh Pallipadi * - Added _PDC for platforms with Intel CPUs diff --git a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c index 14791ec55cfd16798eb0366cb908b31d0dedf1e7..199e4e05e5dc1c7cb82cbe417864377cb399d2d8 100644 --- a/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c +++ b/arch/x86/kernel/cpu/cpufreq/p4-clockmod.c @@ -289,8 +289,8 @@ static int __init cpufreq_p4_init(void) if (c->x86_vendor != X86_VENDOR_INTEL) return -ENODEV; - if (!test_bit(X86_FEATURE_ACPI, c->x86_capability) || - !test_bit(X86_FEATURE_ACC, c->x86_capability)) + if (!test_cpu_cap(c, X86_FEATURE_ACPI) || + !test_cpu_cap(c, X86_FEATURE_ACC)) return -ENODEV; ret = cpufreq_register_driver(&p4clockmod_driver); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 9b7e01daa1ca22c70830add7e46e68247f903899..1f4cc48c14c633be4e0a1ca62b1502616337b048 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/cpu/mcheck/therm_throt.c * * Thermal throttle event support code (such as syslog messaging and rate * limiting) that was factored out from x86_64 (mce_intel.c) and i386 (p4.c). diff --git a/arch/x86/kernel/e820_32.c b/arch/x86/kernel/e820_32.c index 0240cd778365d12a05e3096e4b7492b510573e5a..ed733e7cf4e611c454f9ea622c9322919aa624b4 100644 --- a/arch/x86/kernel/e820_32.c +++ b/arch/x86/kernel/e820_32.c @@ -475,7 +475,7 @@ int __init copy_e820_map(struct e820entry *biosmap, int nr_map) /* * Find the highest page frame number we have available */ -void __init find_max_pfn(void) +void __init propagate_e820_map(void) { int i; @@ -704,7 +704,7 @@ static int __init parse_memmap(char *arg) * size before original memory map is * reset. */ - find_max_pfn(); + propagate_e820_map(); saved_max_pfn = max_pfn; #endif e820.nr_map = 0; diff --git a/arch/x86/kernel/e820_64.c b/arch/x86/kernel/e820_64.c index 7f6c0c85c8f65e0a37524a095b6ac5d7d86009b1..cbd42e51cb082d82b8f287eaa1c244913268d64c 100644 --- a/arch/x86/kernel/e820_64.c +++ b/arch/x86/kernel/e820_64.c @@ -96,7 +96,7 @@ void __init early_res_to_bootmem(void) } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) { int i; @@ -116,7 +116,7 @@ bad_addr(unsigned long *addrp, unsigned long size, unsigned long align) } /* Check for already reserved areas */ -static inline int +static inline int __init bad_addr_size(unsigned long *addrp, unsigned long *sizep, unsigned long align) { int i; diff --git a/arch/x86/kernel/efi.c b/arch/x86/kernel/efi.c index 759e02bec0708f955764907ec380e6eb737c6dfa..77d424cf68b38e6b919f55b6fd895436e887fd74 100644 --- a/arch/x86/kernel/efi.c +++ b/arch/x86/kernel/efi.c @@ -383,6 +383,7 @@ static void __init runtime_code_page_mkexec(void) { efi_memory_desc_t *md; void *p; + u64 addr, npages; /* Make EFI runtime service code area executable */ for (p = memmap.map; p < memmap.map_end; p += memmap.desc_size) { @@ -391,7 +392,10 @@ static void __init runtime_code_page_mkexec(void) if (md->type != EFI_RUNTIME_SERVICES_CODE) continue; - set_memory_x(md->virt_addr, md->num_pages); + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_x(addr, npages); } } @@ -408,7 +412,7 @@ void __init efi_enter_virtual_mode(void) efi_memory_desc_t *md; efi_status_t status; unsigned long size; - u64 end, systab; + u64 end, systab, addr, npages; void *p, *va; efi.systab = NULL; @@ -420,7 +424,7 @@ void __init efi_enter_virtual_mode(void) size = md->num_pages << EFI_PAGE_SHIFT; end = md->phys_addr + size; - if ((end >> PAGE_SHIFT) <= max_pfn_mapped) + if (PFN_UP(end) <= max_pfn_mapped) va = __va(md->phys_addr); else va = efi_ioremap(md->phys_addr, size); @@ -433,8 +437,12 @@ void __init efi_enter_virtual_mode(void) continue; } - if (!(md->attribute & EFI_MEMORY_WB)) - set_memory_uc(md->virt_addr, md->num_pages); + if (!(md->attribute & EFI_MEMORY_WB)) { + addr = md->virt_addr; + npages = md->num_pages; + memrange_efi_to_native(&addr, &npages); + set_memory_uc(addr, npages); + } systab = (u64) (unsigned long) efi_phys.systab; if (md->phys_addr <= systab && systab < end) { diff --git a/arch/x86/kernel/efi_64.c b/arch/x86/kernel/efi_64.c index d143a1e76b301737c6fc34f2a0d6da656b787038..d0060fdcccac1658968db527f4c05d791b056e6b 100644 --- a/arch/x86/kernel/efi_64.c +++ b/arch/x86/kernel/efi_64.c @@ -105,14 +105,14 @@ void __init efi_reserve_bootmem(void) void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) { - static unsigned pages_mapped; + static unsigned pages_mapped __initdata; unsigned i, pages; + unsigned long offset; - /* phys_addr and size must be page aligned */ - if ((phys_addr & ~PAGE_MASK) || (size & ~PAGE_MASK)) - return NULL; + pages = PFN_UP(phys_addr + size) - PFN_DOWN(phys_addr); + offset = phys_addr & ~PAGE_MASK; + phys_addr &= PAGE_MASK; - pages = size >> PAGE_SHIFT; if (pages_mapped + pages > MAX_EFI_IO_PAGES) return NULL; @@ -124,5 +124,5 @@ void __iomem * __init efi_ioremap(unsigned long phys_addr, unsigned long size) } return (void __iomem *)__fix_to_virt(FIX_EFI_IO_MAP_FIRST_PAGE - \ - (pages_mapped - pages)); + (pages_mapped - pages)) + offset; } diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S index 9ba49a26dff8a25c5ad0e1adfb9c175e7cb3639e..f0f8934fc30324fc29af50dc436db6f14b198a93 100644 --- a/arch/x86/kernel/entry_32.S +++ b/arch/x86/kernel/entry_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/entry.S * * Copyright (C) 1991, 1992 Linus Torvalds */ diff --git a/arch/x86/kernel/genx2apic_uv_x.c b/arch/x86/kernel/genx2apic_uv_x.c index 5d77c9cd8e15c6782e4a0872d5f8e4e820509395..ebf13908a743b6cd8cbdc231d0758f2388b92243 100644 --- a/arch/x86/kernel/genx2apic_uv_x.c +++ b/arch/x86/kernel/genx2apic_uv_x.c @@ -61,26 +61,31 @@ int uv_wakeup_secondary(int phys_apicid, unsigned int start_rip) val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | - (6 << UVH_IPI_INT_DELIVERY_MODE_SHFT); + APIC_DM_INIT; + uv_write_global_mmr64(nasid, UVH_IPI_INT, val); + mdelay(10); + + val = (1UL << UVH_IPI_INT_SEND_SHFT) | + (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | + (((long)start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | + APIC_DM_STARTUP; uv_write_global_mmr64(nasid, UVH_IPI_INT, val); return 0; } static void uv_send_IPI_one(int cpu, int vector) { - unsigned long val, apicid; + unsigned long val, apicid, lapicid; int nasid; apicid = per_cpu(x86_cpu_to_apicid, cpu); /* ZZZ - cache node-local ? */ + lapicid = apicid & 0x3f; /* ZZZ macro needed */ nasid = uv_apicid_to_nasid(apicid); val = - (1UL << UVH_IPI_INT_SEND_SHFT) | (apicid << + (1UL << UVH_IPI_INT_SEND_SHFT) | (lapicid << UVH_IPI_INT_APIC_ID_SHFT) | (vector << UVH_IPI_INT_VECTOR_SHFT); uv_write_global_mmr64(nasid, UVH_IPI_INT, val); - printk(KERN_DEBUG - "UV: IPI to cpu %d, apicid 0x%lx, vec %d, nasid%d, val 0x%lx\n", - cpu, apicid, vector, nasid, val); } static void uv_send_IPI_mask(cpumask_t mask, int vector) diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index d6d54faa84dfb1020ea838896a2aca8ee18bda06..993c767732564afe285e7ddc23228511108b7b11 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -146,6 +146,7 @@ void __init x86_64_start_kernel(char * real_mode_data) reserve_early(__pa_symbol(&_text), __pa_symbol(&_end), "TEXT DATA BSS"); +#ifdef CONFIG_BLK_DEV_INITRD /* Reserve INITRD */ if (boot_params.hdr.type_of_loader && boot_params.hdr.ramdisk_image) { unsigned long ramdisk_image = boot_params.hdr.ramdisk_image; @@ -153,6 +154,7 @@ void __init x86_64_start_kernel(char * real_mode_data) unsigned long ramdisk_end = ramdisk_image + ramdisk_size; reserve_early(ramdisk_image, ramdisk_end, "RAMDISK"); } +#endif reserve_ebda_region(); diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 826988a6e964717e623350efec94e03db932c798..90f038af3adc326725cbdec61825e1b31f340ebb 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -1,5 +1,4 @@ /* - * linux/arch/i386/kernel/head.S -- the 32-bit startup code. * * Copyright (C) 1991, 1992 Linus Torvalds * diff --git a/arch/x86/kernel/i387.c b/arch/x86/kernel/i387.c index 8f8102d967b3f4111c58be6dccbd6e4192fc3864..db6839b53195e1a83d9186a76bf9a05562fc39e6 100644 --- a/arch/x86/kernel/i387.c +++ b/arch/x86/kernel/i387.c @@ -35,17 +35,18 @@ #endif static unsigned int mxcsr_feature_mask __read_mostly = 0xffffffffu; +unsigned int xstate_size; +static struct i387_fxsave_struct fx_scratch __cpuinitdata; -void mxcsr_feature_mask_init(void) +void __cpuinit mxcsr_feature_mask_init(void) { unsigned long mask = 0; clts(); if (cpu_has_fxsr) { - memset(¤t->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - asm volatile("fxsave %0" : : "m" (current->thread.i387.fxsave)); - mask = current->thread.i387.fxsave.mxcsr_mask; + memset(&fx_scratch, 0, sizeof(struct i387_fxsave_struct)); + asm volatile("fxsave %0" : : "m" (fx_scratch)); + mask = fx_scratch.mxcsr_mask; if (mask == 0) mask = 0x0000ffbf; } @@ -53,6 +54,16 @@ void mxcsr_feature_mask_init(void) stts(); } +void __init init_thread_xstate(void) +{ + if (cpu_has_fxsr) + xstate_size = sizeof(struct i387_fxsave_struct); +#ifdef CONFIG_X86_32 + else + xstate_size = sizeof(struct i387_fsave_struct); +#endif +} + #ifdef CONFIG_X86_64 /* * Called at bootup to set up the initial FPU state that is later cloned @@ -61,10 +72,6 @@ void mxcsr_feature_mask_init(void) void __cpuinit fpu_init(void) { unsigned long oldcr0 = read_cr0(); - extern void __bad_fxsave_alignment(void); - - if (offsetof(struct task_struct, thread.i387.fxsave) & 15) - __bad_fxsave_alignment(); set_in_cr4(X86_CR4_OSFXSR); set_in_cr4(X86_CR4_OSXMMEXCPT); @@ -84,32 +91,44 @@ void __cpuinit fpu_init(void) * value at reset if we support XMM instructions and then * remeber the current task has used the FPU. */ -void init_fpu(struct task_struct *tsk) +int init_fpu(struct task_struct *tsk) { if (tsk_used_math(tsk)) { if (tsk == current) unlazy_fpu(tsk); - return; + return 0; + } + + /* + * Memory allocation at the first usage of the FPU and other state. + */ + if (!tsk->thread.xstate) { + tsk->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!tsk->thread.xstate) + return -ENOMEM; } if (cpu_has_fxsr) { - memset(&tsk->thread.i387.fxsave, 0, - sizeof(struct i387_fxsave_struct)); - tsk->thread.i387.fxsave.cwd = 0x37f; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; + + memset(fx, 0, xstate_size); + fx->cwd = 0x37f; if (cpu_has_xmm) - tsk->thread.i387.fxsave.mxcsr = MXCSR_DEFAULT; + fx->mxcsr = MXCSR_DEFAULT; } else { - memset(&tsk->thread.i387.fsave, 0, - sizeof(struct i387_fsave_struct)); - tsk->thread.i387.fsave.cwd = 0xffff037fu; - tsk->thread.i387.fsave.swd = 0xffff0000u; - tsk->thread.i387.fsave.twd = 0xffffffffu; - tsk->thread.i387.fsave.fos = 0xffff0000u; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; + memset(fp, 0, xstate_size); + fp->cwd = 0xffff037fu; + fp->swd = 0xffff0000u; + fp->twd = 0xffffffffu; + fp->fos = 0xffff0000u; } /* * Only the device not available exception or ptrace can call init_fpu. */ set_stopped_child_used_math(tsk); + return 0; } int fpregs_active(struct task_struct *target, const struct user_regset *regset) @@ -126,13 +145,17 @@ int xfpregs_get(struct task_struct *target, const struct user_regset *regset, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { + int ret; + if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); } int xfpregs_set(struct task_struct *target, const struct user_regset *regset, @@ -144,16 +167,19 @@ int xfpregs_set(struct task_struct *target, const struct user_regset *regset, if (!cpu_has_fxsr) return -ENODEV; - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); ret = user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fxsave, 0, -1); + &target->thread.xstate->fxsave, 0, -1); /* * mxcsr reserved bits must be masked to zero for security reasons. */ - target->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + target->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; return ret; } @@ -233,7 +259,7 @@ static inline u32 twd_fxsr_to_i387(struct i387_fxsave_struct *fxsave) static void convert_from_fxsr(struct user_i387_ia32_struct *env, struct task_struct *tsk) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *to = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *from = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -273,7 +299,7 @@ static void convert_to_fxsr(struct task_struct *tsk, const struct user_i387_ia32_struct *env) { - struct i387_fxsave_struct *fxsave = &tsk->thread.i387.fxsave; + struct i387_fxsave_struct *fxsave = &tsk->thread.xstate->fxsave; struct _fpreg *from = (struct _fpreg *) &env->st_space[0]; struct _fpxreg *to = (struct _fpxreg *) &fxsave->st_space[0]; int i; @@ -302,15 +328,19 @@ int fpregs_get(struct task_struct *target, const struct user_regset *regset, void *kbuf, void __user *ubuf) { struct user_i387_ia32_struct env; + int ret; if (!HAVE_HWFP) return fpregs_soft_get(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; if (!cpu_has_fxsr) { return user_regset_copyout(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, + -1); } if (kbuf && pos == 0 && count == sizeof(env)) { @@ -333,12 +363,15 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, if (!HAVE_HWFP) return fpregs_soft_set(target, regset, pos, count, kbuf, ubuf); - init_fpu(target); + ret = init_fpu(target); + if (ret) + return ret; + set_stopped_child_used_math(target); if (!cpu_has_fxsr) { return user_regset_copyin(&pos, &count, &kbuf, &ubuf, - &target->thread.i387.fsave, 0, -1); + &target->thread.xstate->fsave, 0, -1); } if (pos > 0 || count < sizeof(env)) @@ -358,11 +391,11 @@ int fpregs_set(struct task_struct *target, const struct user_regset *regset, static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fsave_struct *fp = &tsk->thread.xstate->fsave; unlazy_fpu(tsk); - tsk->thread.i387.fsave.status = tsk->thread.i387.fsave.swd; - if (__copy_to_user(buf, &tsk->thread.i387.fsave, - sizeof(struct i387_fsave_struct))) + fp->status = fp->swd; + if (__copy_to_user(buf, fp, sizeof(struct i387_fsave_struct))) return -1; return 1; } @@ -370,6 +403,7 @@ static inline int save_i387_fsave(struct _fpstate_ia32 __user *buf) static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) { struct task_struct *tsk = current; + struct i387_fxsave_struct *fx = &tsk->thread.xstate->fxsave; struct user_i387_ia32_struct env; int err = 0; @@ -379,12 +413,12 @@ static int save_i387_fxsave(struct _fpstate_ia32 __user *buf) if (__copy_to_user(buf, &env, sizeof(env))) return -1; - err |= __put_user(tsk->thread.i387.fxsave.swd, &buf->status); + err |= __put_user(fx->swd, &buf->status); err |= __put_user(X86_FXSR_MAGIC, &buf->magic); if (err) return -1; - if (__copy_to_user(&buf->_fxsr_env[0], &tsk->thread.i387.fxsave, + if (__copy_to_user(&buf->_fxsr_env[0], fx, sizeof(struct i387_fxsave_struct))) return -1; return 1; @@ -417,7 +451,7 @@ static inline int restore_i387_fsave(struct _fpstate_ia32 __user *buf) struct task_struct *tsk = current; clear_fpu(tsk); - return __copy_from_user(&tsk->thread.i387.fsave, buf, + return __copy_from_user(&tsk->thread.xstate->fsave, buf, sizeof(struct i387_fsave_struct)); } @@ -428,10 +462,10 @@ static int restore_i387_fxsave(struct _fpstate_ia32 __user *buf) int err; clear_fpu(tsk); - err = __copy_from_user(&tsk->thread.i387.fxsave, &buf->_fxsr_env[0], + err = __copy_from_user(&tsk->thread.xstate->fxsave, &buf->_fxsr_env[0], sizeof(struct i387_fxsave_struct)); /* mxcsr reserved bits must be masked to zero for security reasons */ - tsk->thread.i387.fxsave.mxcsr &= mxcsr_feature_mask; + tsk->thread.xstate->fxsave.mxcsr &= mxcsr_feature_mask; if (err || __copy_from_user(&env, buf, sizeof(env))) return 1; convert_to_fxsr(tsk, &env); diff --git a/arch/x86/kernel/kgdb.c b/arch/x86/kernel/kgdb.c index 24362ecf5f9a9006f76698a2e410a48312904ed5..f47f0eb886b8ddeab27ec8a8fd319801c45ef503 100644 --- a/arch/x86/kernel/kgdb.c +++ b/arch/x86/kernel/kgdb.c @@ -46,11 +46,7 @@ #include #include -#ifdef CONFIG_X86_32 -# include -#else -# include -#endif +#include /* * Put the error code here just in case the user cares: diff --git a/arch/x86/kernel/nmi_32.c b/arch/x86/kernel/nmi_32.c index 8421d0ac6f2200fbf91dcae68aceba921ea49cbd..11b14bbaa61e6be6cfb1ffe7fd5b4c8f69fd71a7 100644 --- a/arch/x86/kernel/nmi_32.c +++ b/arch/x86/kernel/nmi_32.c @@ -321,7 +321,8 @@ EXPORT_SYMBOL(touch_nmi_watchdog); extern void die_nmi(struct pt_regs *, const char *msg); -__kprobes int nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { /* diff --git a/arch/x86/kernel/nmi_64.c b/arch/x86/kernel/nmi_64.c index 11f9130ac513e732fb6f9832d3af32c07bd4bd05..5a29ded994fa345fb67f92cc19b9fa9dd5606cf8 100644 --- a/arch/x86/kernel/nmi_64.c +++ b/arch/x86/kernel/nmi_64.c @@ -313,7 +313,8 @@ void touch_nmi_watchdog(void) } EXPORT_SYMBOL(touch_nmi_watchdog); -int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) +notrace __kprobes int +nmi_watchdog_tick(struct pt_regs *regs, unsigned reason) { int sum; int touched = 0; @@ -384,7 +385,8 @@ int __kprobes nmi_watchdog_tick(struct pt_regs * regs, unsigned reason) static unsigned ignore_nmis; -asmlinkage __kprobes void do_nmi(struct pt_regs * regs, long error_code) +asmlinkage notrace __kprobes void +do_nmi(struct pt_regs *regs, long error_code) { nmi_enter(); add_pda(__nmi_count,1); diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 1b5464c2434f2fbe115816a6603fc3434dd55b96..adb91e4b62dad3d6ef51188eb8e628b2024755c2 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -470,10 +470,11 @@ static int calgary_map_sg(struct device *dev, struct scatterlist *sg, return 0; } -static dma_addr_t calgary_map_single(struct device *dev, void *vaddr, +static dma_addr_t calgary_map_single(struct device *dev, phys_addr_t paddr, size_t size, int direction) { dma_addr_t dma_handle = bad_dma_address; + void *vaddr = phys_to_virt(paddr); unsigned long uaddr; unsigned int npages; struct iommu_table *tbl = find_iommu_table(dev); diff --git a/arch/x86/kernel/pci-dma_64.c b/arch/x86/kernel/pci-dma.c similarity index 55% rename from arch/x86/kernel/pci-dma_64.c rename to arch/x86/kernel/pci-dma.c index ada5a0604992d7788a3fda58ec82e44a2c662804..388b113a7d88ffbc0efaf52c348412af9e439a8c 100644 --- a/arch/x86/kernel/pci-dma_64.c +++ b/arch/x86/kernel/pci-dma.c @@ -1,61 +1,370 @@ -/* - * Dynamic DMA mapping support. - */ - -#include -#include -#include -#include -#include +#include #include -#include +#include +#include + +#include +#include #include #include -int iommu_merge __read_mostly = 0; - -dma_addr_t bad_dma_address __read_mostly; -EXPORT_SYMBOL(bad_dma_address); +int forbid_dac __read_mostly; +EXPORT_SYMBOL(forbid_dac); -/* This tells the BIO block layer to assume merging. Default to off - because we cannot guarantee merging later. */ -int iommu_bio_merge __read_mostly = 0; -EXPORT_SYMBOL(iommu_bio_merge); +const struct dma_mapping_ops *dma_ops; +EXPORT_SYMBOL(dma_ops); -static int iommu_sac_force __read_mostly = 0; +int iommu_sac_force __read_mostly = 0; -int no_iommu __read_mostly; #ifdef CONFIG_IOMMU_DEBUG int panic_on_overflow __read_mostly = 1; int force_iommu __read_mostly = 1; #else int panic_on_overflow __read_mostly = 0; -int force_iommu __read_mostly= 0; +int force_iommu __read_mostly = 0; #endif +int iommu_merge __read_mostly = 0; + +int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; +/* This tells the BIO block layer to assume merging. Default to off + because we cannot guarantee merging later. */ +int iommu_bio_merge __read_mostly = 0; +EXPORT_SYMBOL(iommu_bio_merge); + +dma_addr_t bad_dma_address __read_mostly = 0; +EXPORT_SYMBOL(bad_dma_address); + /* Dummy device used for NULL arguments (normally ISA). Better would be probably a smaller DMA mask, but this is bug-to-bug compatible - to i386. */ + to older i386. */ struct device fallback_dev = { .bus_id = "fallback device", .coherent_dma_mask = DMA_32BIT_MASK, .dma_mask = &fallback_dev.coherent_dma_mask, }; +int dma_set_mask(struct device *dev, u64 mask) +{ + if (!dev->dma_mask || !dma_supported(dev, mask)) + return -EIO; + + *dev->dma_mask = mask; + + return 0; +} +EXPORT_SYMBOL(dma_set_mask); + +#ifdef CONFIG_X86_64 +static __initdata void *dma32_bootmem_ptr; +static unsigned long dma32_bootmem_size __initdata = (128ULL<<20); + +static int __init parse_dma32_size_opt(char *p) +{ + if (!p) + return -EINVAL; + dma32_bootmem_size = memparse(p, &p); + return 0; +} +early_param("dma32_size", parse_dma32_size_opt); + +void __init dma32_reserve_bootmem(void) +{ + unsigned long size, align; + if (end_pfn <= MAX_DMA32_PFN) + return; + + align = 64ULL<<20; + size = round_up(dma32_bootmem_size, align); + dma32_bootmem_ptr = __alloc_bootmem_nopanic(size, align, + __pa(MAX_DMA_ADDRESS)); + if (dma32_bootmem_ptr) + dma32_bootmem_size = size; + else + dma32_bootmem_size = 0; +} +static void __init dma32_free_bootmem(void) +{ + int node; + + if (end_pfn <= MAX_DMA32_PFN) + return; + + if (!dma32_bootmem_ptr) + return; + + for_each_online_node(node) + free_bootmem_node(NODE_DATA(node), __pa(dma32_bootmem_ptr), + dma32_bootmem_size); + + dma32_bootmem_ptr = NULL; + dma32_bootmem_size = 0; +} + +void __init pci_iommu_alloc(void) +{ + /* free the range so iommu could get some range less than 4G */ + dma32_free_bootmem(); + /* + * The order of these functions is important for + * fall-back/fail-over reasons + */ +#ifdef CONFIG_GART_IOMMU + gart_iommu_hole_init(); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + detect_calgary(); +#endif + + detect_intel_iommu(); + +#ifdef CONFIG_SWIOTLB + pci_swiotlb_init(); +#endif +} +#endif + +/* + * See for the iommu kernel parameter + * documentation. + */ +static __init int iommu_setup(char *p) +{ + iommu_merge = 1; + + if (!p) + return -EINVAL; + + while (*p) { + if (!strncmp(p, "off", 3)) + no_iommu = 1; + /* gart_parse_options has more force support */ + if (!strncmp(p, "force", 5)) + force_iommu = 1; + if (!strncmp(p, "noforce", 7)) { + iommu_merge = 0; + force_iommu = 0; + } + + if (!strncmp(p, "biomerge", 8)) { + iommu_bio_merge = 4096; + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "panic", 5)) + panic_on_overflow = 1; + if (!strncmp(p, "nopanic", 7)) + panic_on_overflow = 0; + if (!strncmp(p, "merge", 5)) { + iommu_merge = 1; + force_iommu = 1; + } + if (!strncmp(p, "nomerge", 7)) + iommu_merge = 0; + if (!strncmp(p, "forcesac", 8)) + iommu_sac_force = 1; + if (!strncmp(p, "allowdac", 8)) + forbid_dac = 0; + if (!strncmp(p, "nodac", 5)) + forbid_dac = -1; + if (!strncmp(p, "usedac", 6)) { + forbid_dac = -1; + return 1; + } +#ifdef CONFIG_SWIOTLB + if (!strncmp(p, "soft", 4)) + swiotlb = 1; +#endif + +#ifdef CONFIG_GART_IOMMU + gart_parse_options(p); +#endif + +#ifdef CONFIG_CALGARY_IOMMU + if (!strncmp(p, "calgary", 7)) + use_calgary = 1; +#endif /* CONFIG_CALGARY_IOMMU */ + + p += strcspn(p, ","); + if (*p == ',') + ++p; + } + return 0; +} +early_param("iommu", iommu_setup); + +#ifdef CONFIG_X86_32 +int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags) +{ + void __iomem *mem_base = NULL; + int pages = size >> PAGE_SHIFT; + int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); + + if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) + goto out; + if (!size) + goto out; + if (dev->dma_mem) + goto out; + + /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ + + mem_base = ioremap(bus_addr, size); + if (!mem_base) + goto out; + + dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); + if (!dev->dma_mem) + goto out; + dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); + if (!dev->dma_mem->bitmap) + goto free1_out; + + dev->dma_mem->virt_base = mem_base; + dev->dma_mem->device_base = device_addr; + dev->dma_mem->size = pages; + dev->dma_mem->flags = flags; + + if (flags & DMA_MEMORY_MAP) + return DMA_MEMORY_MAP; + + return DMA_MEMORY_IO; + + free1_out: + kfree(dev->dma_mem); + out: + if (mem_base) + iounmap(mem_base); + return 0; +} +EXPORT_SYMBOL(dma_declare_coherent_memory); + +void dma_release_declared_memory(struct device *dev) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + + if (!mem) + return; + dev->dma_mem = NULL; + iounmap(mem->virt_base); + kfree(mem->bitmap); + kfree(mem); +} +EXPORT_SYMBOL(dma_release_declared_memory); + +void *dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size) +{ + struct dma_coherent_mem *mem = dev->dma_mem; + int pos, err; + int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1); + + pages >>= PAGE_SHIFT; + + if (!mem) + return ERR_PTR(-EINVAL); + + pos = (device_addr - mem->device_base) >> PAGE_SHIFT; + err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); + if (err != 0) + return ERR_PTR(err); + return mem->virt_base + (pos << PAGE_SHIFT); +} +EXPORT_SYMBOL(dma_mark_declared_memory_occupied); + +static int dma_alloc_from_coherent_mem(struct device *dev, ssize_t size, + dma_addr_t *dma_handle, void **ret) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + int order = get_order(size); + + if (mem) { + int page = bitmap_find_free_region(mem->bitmap, mem->size, + order); + if (page >= 0) { + *dma_handle = mem->device_base + (page << PAGE_SHIFT); + *ret = mem->virt_base + (page << PAGE_SHIFT); + memset(*ret, 0, size); + } + if (mem->flags & DMA_MEMORY_EXCLUSIVE) + *ret = NULL; + } + return (mem != NULL); +} + +static int dma_release_coherent(struct device *dev, int order, void *vaddr) +{ + struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; + + if (mem && vaddr >= mem->virt_base && vaddr < + (mem->virt_base + (mem->size << PAGE_SHIFT))) { + int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; + + bitmap_release_region(mem->bitmap, page, order); + return 1; + } + return 0; +} +#else +#define dma_alloc_from_coherent_mem(dev, size, handle, ret) (0) +#define dma_release_coherent(dev, order, vaddr) (0) +#endif /* CONFIG_X86_32 */ + +int dma_supported(struct device *dev, u64 mask) +{ +#ifdef CONFIG_PCI + if (mask > 0xffffffff && forbid_dac > 0) { + printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", + dev->bus_id); + return 0; + } +#endif + + if (dma_ops->dma_supported) + return dma_ops->dma_supported(dev, mask); + + /* Copied from i386. Doesn't make much sense, because it will + only work for pci_alloc_coherent. + The caller just has to use GFP_DMA in this case. */ + if (mask < DMA_24BIT_MASK) + return 0; + + /* Tell the device to use SAC when IOMMU force is on. This + allows the driver to use cheaper accesses in some cases. + + Problem with this is that if we overflow the IOMMU area and + return DAC as fallback address the device may not handle it + correctly. + + As a special case some controllers have a 39bit address + mode that is as efficient as 32bit (aic79xx). Don't force + SAC for these. Assume all masks <= 40 bits are of this + type. Normally this doesn't make any difference, but gives + more gentle handling of IOMMU overflow. */ + if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { + printk(KERN_INFO "%s: Force SAC with mask %Lx\n", + dev->bus_id, mask); + return 0; + } + + return 1; +} +EXPORT_SYMBOL(dma_supported); + /* Allocate DMA memory on node near device */ -noinline static void * +noinline struct page * dma_alloc_pages(struct device *dev, gfp_t gfp, unsigned order) { - struct page *page; int node; node = dev_to_node(dev); - page = alloc_pages_node(node, gfp, order); - return page ? page_address(page) : NULL; + return alloc_pages_node(node, gfp, order); } /* @@ -65,9 +374,16 @@ void * dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp) { - void *memory; + void *memory = NULL; + struct page *page; unsigned long dma_mask = 0; - u64 bus; + dma_addr_t bus; + + /* ignore region specifiers */ + gfp &= ~(__GFP_DMA | __GFP_HIGHMEM | __GFP_DMA32); + + if (dma_alloc_from_coherent_mem(dev, size, dma_handle, &memory)) + return memory; if (!dev) dev = &fallback_dev; @@ -82,26 +398,25 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, /* Don't invoke OOM killer */ gfp |= __GFP_NORETRY; - /* Kludge to make it bug-to-bug compatible with i386. i386 - uses the normal dma_mask for alloc_coherent. */ - dma_mask &= *dev->dma_mask; - +#ifdef CONFIG_X86_64 /* Why <=? Even when the mask is smaller than 4GB it is often larger than 16MB and in this case we have a chance of finding fitting memory in the next higher zone first. If not retry with true GFP_DMA. -AK */ if (dma_mask <= DMA_32BIT_MASK) gfp |= GFP_DMA32; +#endif again: - memory = dma_alloc_pages(dev, gfp, get_order(size)); - if (memory == NULL) + page = dma_alloc_pages(dev, gfp, get_order(size)); + if (page == NULL) return NULL; { int high, mmu; - bus = virt_to_bus(memory); - high = (bus + size) >= dma_mask; + bus = page_to_phys(page); + memory = page_address(page); + high = (bus + size) >= dma_mask; mmu = high; if (force_iommu && !(gfp & GFP_DMA)) mmu = 1; @@ -127,7 +442,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, memset(memory, 0, size); if (!mmu) { - *dma_handle = virt_to_bus(memory); + *dma_handle = bus; return memory; } } @@ -139,7 +454,7 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } if (dma_ops->map_simple) { - *dma_handle = dma_ops->map_simple(dev, memory, + *dma_handle = dma_ops->map_simple(dev, virt_to_phys(memory), size, PCI_DMA_BIDIRECTIONAL); if (*dma_handle != bad_dma_address) @@ -147,7 +462,8 @@ dma_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, } if (panic_on_overflow) - panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n",size); + panic("dma_alloc_coherent: IOMMU overflow by %lu bytes\n", + (unsigned long)size); free_pages((unsigned long)memory, get_order(size)); return NULL; } @@ -160,153 +476,16 @@ EXPORT_SYMBOL(dma_alloc_coherent); void dma_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t bus) { + int order = get_order(size); WARN_ON(irqs_disabled()); /* for portability */ + if (dma_release_coherent(dev, order, vaddr)) + return; if (dma_ops->unmap_single) dma_ops->unmap_single(dev, bus, size, 0); - free_pages((unsigned long)vaddr, get_order(size)); + free_pages((unsigned long)vaddr, order); } EXPORT_SYMBOL(dma_free_coherent); -static int forbid_dac __read_mostly; - -int dma_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_PCI - if (mask > 0xffffffff && forbid_dac > 0) { - - - - printk(KERN_INFO "PCI: Disallowing DAC for device %s\n", dev->bus_id); - return 0; - } -#endif - - if (dma_ops->dma_supported) - return dma_ops->dma_supported(dev, mask); - - /* Copied from i386. Doesn't make much sense, because it will - only work for pci_alloc_coherent. - The caller just has to use GFP_DMA in this case. */ - if (mask < DMA_24BIT_MASK) - return 0; - - /* Tell the device to use SAC when IOMMU force is on. This - allows the driver to use cheaper accesses in some cases. - - Problem with this is that if we overflow the IOMMU area and - return DAC as fallback address the device may not handle it - correctly. - - As a special case some controllers have a 39bit address - mode that is as efficient as 32bit (aic79xx). Don't force - SAC for these. Assume all masks <= 40 bits are of this - type. Normally this doesn't make any difference, but gives - more gentle handling of IOMMU overflow. */ - if (iommu_sac_force && (mask >= DMA_40BIT_MASK)) { - printk(KERN_INFO "%s: Force SAC with mask %Lx\n", dev->bus_id,mask); - return 0; - } - - return 1; -} -EXPORT_SYMBOL(dma_supported); - -int dma_set_mask(struct device *dev, u64 mask) -{ - if (!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - *dev->dma_mask = mask; - return 0; -} -EXPORT_SYMBOL(dma_set_mask); - -/* - * See for the iommu kernel parameter - * documentation. - */ -static __init int iommu_setup(char *p) -{ - iommu_merge = 1; - - if (!p) - return -EINVAL; - - while (*p) { - if (!strncmp(p, "off", 3)) - no_iommu = 1; - /* gart_parse_options has more force support */ - if (!strncmp(p, "force", 5)) - force_iommu = 1; - if (!strncmp(p, "noforce", 7)) { - iommu_merge = 0; - force_iommu = 0; - } - - if (!strncmp(p, "biomerge", 8)) { - iommu_bio_merge = 4096; - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "panic", 5)) - panic_on_overflow = 1; - if (!strncmp(p, "nopanic", 7)) - panic_on_overflow = 0; - if (!strncmp(p, "merge", 5)) { - iommu_merge = 1; - force_iommu = 1; - } - if (!strncmp(p, "nomerge", 7)) - iommu_merge = 0; - if (!strncmp(p, "forcesac", 8)) - iommu_sac_force = 1; - if (!strncmp(p, "allowdac", 8)) - forbid_dac = 0; - if (!strncmp(p, "nodac", 5)) - forbid_dac = -1; - -#ifdef CONFIG_SWIOTLB - if (!strncmp(p, "soft", 4)) - swiotlb = 1; -#endif - -#ifdef CONFIG_GART_IOMMU - gart_parse_options(p); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - if (!strncmp(p, "calgary", 7)) - use_calgary = 1; -#endif /* CONFIG_CALGARY_IOMMU */ - - p += strcspn(p, ","); - if (*p == ',') - ++p; - } - return 0; -} -early_param("iommu", iommu_setup); - -void __init pci_iommu_alloc(void) -{ - /* - * The order of these functions is important for - * fall-back/fail-over reasons - */ -#ifdef CONFIG_GART_IOMMU - gart_iommu_hole_init(); -#endif - -#ifdef CONFIG_CALGARY_IOMMU - detect_calgary(); -#endif - - detect_intel_iommu(); - -#ifdef CONFIG_SWIOTLB - pci_swiotlb_init(); -#endif -} - static int __init pci_iommu_init(void) { #ifdef CONFIG_CALGARY_IOMMU @@ -327,6 +506,8 @@ void pci_iommu_shutdown(void) { gart_iommu_shutdown(); } +/* Must execute after PCI subsystem */ +fs_initcall(pci_iommu_init); #ifdef CONFIG_PCI /* Many VIA bridges seem to corrupt data for DAC. Disable it here */ @@ -334,11 +515,10 @@ void pci_iommu_shutdown(void) static __devinit void via_no_dac(struct pci_dev *dev) { if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); + printk(KERN_INFO "PCI: VIA PCI bridge detected." + "Disabling DAC.\n"); forbid_dac = 1; } } DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); #endif -/* Must execute after PCI subsystem */ -fs_initcall(pci_iommu_init); diff --git a/arch/x86/kernel/pci-dma_32.c b/arch/x86/kernel/pci-dma_32.c deleted file mode 100644 index 51330321a5d32488ed806d72b5c610d078bd3efe..0000000000000000000000000000000000000000 --- a/arch/x86/kernel/pci-dma_32.c +++ /dev/null @@ -1,177 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * On i386 there is no hardware dynamic DMA address translation, - * so consistent alloc/free are merely page allocation/freeing. - * The rest of the dynamic DMA mapping interface is implemented - * in asm/pci.h. - */ - -#include -#include -#include -#include -#include -#include - -struct dma_coherent_mem { - void *virt_base; - u32 device_base; - int size; - int flags; - unsigned long *bitmap; -}; - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp) -{ - void *ret; - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - /* ignore region specifiers */ - gfp &= ~(__GFP_DMA | __GFP_HIGHMEM); - - if (mem) { - int page = bitmap_find_free_region(mem->bitmap, mem->size, - order); - if (page >= 0) { - *dma_handle = mem->device_base + (page << PAGE_SHIFT); - ret = mem->virt_base + (page << PAGE_SHIFT); - memset(ret, 0, size); - return ret; - } - if (mem->flags & DMA_MEMORY_EXCLUSIVE) - return NULL; - } - - if (dev == NULL || (dev->coherent_dma_mask < 0xffffffff)) - gfp |= GFP_DMA; - - ret = (void *)__get_free_pages(gfp, order); - - if (ret != NULL) { - memset(ret, 0, size); - *dma_handle = virt_to_phys(ret); - } - return ret; -} -EXPORT_SYMBOL(dma_alloc_coherent); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle) -{ - struct dma_coherent_mem *mem = dev ? dev->dma_mem : NULL; - int order = get_order(size); - - WARN_ON(irqs_disabled()); /* for portability */ - if (mem && vaddr >= mem->virt_base && vaddr < (mem->virt_base + (mem->size << PAGE_SHIFT))) { - int page = (vaddr - mem->virt_base) >> PAGE_SHIFT; - - bitmap_release_region(mem->bitmap, page, order); - } else - free_pages((unsigned long)vaddr, order); -} -EXPORT_SYMBOL(dma_free_coherent); - -int dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags) -{ - void __iomem *mem_base = NULL; - int pages = size >> PAGE_SHIFT; - int bitmap_size = BITS_TO_LONGS(pages) * sizeof(long); - - if ((flags & (DMA_MEMORY_MAP | DMA_MEMORY_IO)) == 0) - goto out; - if (!size) - goto out; - if (dev->dma_mem) - goto out; - - /* FIXME: this routine just ignores DMA_MEMORY_INCLUDES_CHILDREN */ - - mem_base = ioremap(bus_addr, size); - if (!mem_base) - goto out; - - dev->dma_mem = kzalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); - if (!dev->dma_mem) - goto out; - dev->dma_mem->bitmap = kzalloc(bitmap_size, GFP_KERNEL); - if (!dev->dma_mem->bitmap) - goto free1_out; - - dev->dma_mem->virt_base = mem_base; - dev->dma_mem->device_base = device_addr; - dev->dma_mem->size = pages; - dev->dma_mem->flags = flags; - - if (flags & DMA_MEMORY_MAP) - return DMA_MEMORY_MAP; - - return DMA_MEMORY_IO; - - free1_out: - kfree(dev->dma_mem); - out: - if (mem_base) - iounmap(mem_base); - return 0; -} -EXPORT_SYMBOL(dma_declare_coherent_memory); - -void dma_release_declared_memory(struct device *dev) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - - if(!mem) - return; - dev->dma_mem = NULL; - iounmap(mem->virt_base); - kfree(mem->bitmap); - kfree(mem); -} -EXPORT_SYMBOL(dma_release_declared_memory); - -void *dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size) -{ - struct dma_coherent_mem *mem = dev->dma_mem; - int pages = (size + (device_addr & ~PAGE_MASK) + PAGE_SIZE - 1) >> PAGE_SHIFT; - int pos, err; - - if (!mem) - return ERR_PTR(-EINVAL); - - pos = (device_addr - mem->device_base) >> PAGE_SHIFT; - err = bitmap_allocate_region(mem->bitmap, pos, get_order(pages)); - if (err != 0) - return ERR_PTR(err); - return mem->virt_base + (pos << PAGE_SHIFT); -} -EXPORT_SYMBOL(dma_mark_declared_memory_occupied); - -#ifdef CONFIG_PCI -/* Many VIA bridges seem to corrupt data for DAC. Disable it here */ - -int forbid_dac; -EXPORT_SYMBOL(forbid_dac); - -static __devinit void via_no_dac(struct pci_dev *dev) -{ - if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI && forbid_dac == 0) { - printk(KERN_INFO "PCI: VIA PCI bridge detected. Disabling DAC.\n"); - forbid_dac = 1; - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_VIA, PCI_ANY_ID, via_no_dac); - -static int check_iommu(char *s) -{ - if (!strcmp(s, "usedac")) { - forbid_dac = -1; - return 1; - } - return 0; -} -__setup("iommu=", check_iommu); -#endif diff --git a/arch/x86/kernel/pci-gart_64.c b/arch/x86/kernel/pci-gart_64.c index 700e4647dd30214ba33b90845f4121421515948d..c07455d1695f531843df8bf165ce5131bb1ca836 100644 --- a/arch/x86/kernel/pci-gart_64.c +++ b/arch/x86/kernel/pci-gart_64.c @@ -264,9 +264,9 @@ static dma_addr_t dma_map_area(struct device *dev, dma_addr_t phys_mem, } static dma_addr_t -gart_map_simple(struct device *dev, char *buf, size_t size, int dir) +gart_map_simple(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - dma_addr_t map = dma_map_area(dev, virt_to_bus(buf), size, dir); + dma_addr_t map = dma_map_area(dev, paddr, size, dir); flush_gart(); @@ -275,18 +275,17 @@ gart_map_simple(struct device *dev, char *buf, size_t size, int dir) /* Map a single area into the IOMMU */ static dma_addr_t -gart_map_single(struct device *dev, void *addr, size_t size, int dir) +gart_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir) { - unsigned long phys_mem, bus; + unsigned long bus; if (!dev) dev = &fallback_dev; - phys_mem = virt_to_phys(addr); - if (!need_iommu(dev, phys_mem, size)) - return phys_mem; + if (!need_iommu(dev, paddr, size)) + return paddr; - bus = gart_map_simple(dev, addr, size, dir); + bus = gart_map_simple(dev, paddr, size, dir); return bus; } diff --git a/arch/x86/kernel/pci-nommu_64.c b/arch/x86/kernel/pci-nommu.c similarity index 77% rename from arch/x86/kernel/pci-nommu_64.c rename to arch/x86/kernel/pci-nommu.c index ab08e1832228a74869bc6e7aeb517b0e29e8b044..aec43d56f49c57cedffaefad7205fbbfe9b984c2 100644 --- a/arch/x86/kernel/pci-nommu_64.c +++ b/arch/x86/kernel/pci-nommu.c @@ -14,7 +14,7 @@ static int check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) { - if (hwdev && bus + size > *hwdev->dma_mask) { + if (hwdev && bus + size > *hwdev->dma_mask) { if (*hwdev->dma_mask >= DMA_32BIT_MASK) printk(KERN_ERR "nommu_%s: overflow %Lx+%zu of device mask %Lx\n", @@ -26,19 +26,17 @@ check_addr(char *name, struct device *hwdev, dma_addr_t bus, size_t size) } static dma_addr_t -nommu_map_single(struct device *hwdev, void *ptr, size_t size, +nommu_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int direction) { - dma_addr_t bus = virt_to_bus(ptr); + dma_addr_t bus = paddr; + WARN_ON(size == 0); if (!check_addr("map_single", hwdev, bus, size)) return bad_dma_address; + flush_write_buffers(); return bus; } -static void nommu_unmap_single(struct device *dev, dma_addr_t addr,size_t size, - int direction) -{ -} /* Map a set of buffers described by scatterlist in streaming * mode for DMA. This is the scatter-gather version of the @@ -61,30 +59,34 @@ static int nommu_map_sg(struct device *hwdev, struct scatterlist *sg, struct scatterlist *s; int i; + WARN_ON(nents == 0 || sg[0].length == 0); + for_each_sg(sg, s, nents, i) { BUG_ON(!sg_page(s)); - s->dma_address = virt_to_bus(sg_virt(s)); + s->dma_address = sg_phys(s); if (!check_addr("map_sg", hwdev, s->dma_address, s->length)) return 0; s->dma_length = s->length; } + flush_write_buffers(); return nents; } -/* Unmap a set of streaming mode DMA translations. - * Again, cpu read rules concerning calls here are the same as for - * pci_unmap_single() above. - */ -static void nommu_unmap_sg(struct device *dev, struct scatterlist *sg, - int nents, int dir) +/* Make sure we keep the same behaviour */ +static int nommu_mapping_error(dma_addr_t dma_addr) { +#ifdef CONFIG_X86_32 + return 0; +#else + return (dma_addr == bad_dma_address); +#endif } + const struct dma_mapping_ops nommu_dma_ops = { .map_single = nommu_map_single, - .unmap_single = nommu_unmap_single, .map_sg = nommu_map_sg, - .unmap_sg = nommu_unmap_sg, + .mapping_error = nommu_mapping_error, .is_phys = 1, }; diff --git a/arch/x86/kernel/pci-swiotlb_64.c b/arch/x86/kernel/pci-swiotlb_64.c index 82a0a674a003f815b5d98cd540e56e26f4c332df..490da7f4b8d0dd8e29abf7ddd2e11b01fe19f196 100644 --- a/arch/x86/kernel/pci-swiotlb_64.c +++ b/arch/x86/kernel/pci-swiotlb_64.c @@ -11,11 +11,18 @@ int swiotlb __read_mostly; +static dma_addr_t +swiotlb_map_single_phys(struct device *hwdev, phys_addr_t paddr, size_t size, + int direction) +{ + return swiotlb_map_single(hwdev, phys_to_virt(paddr), size, direction); +} + const struct dma_mapping_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc_coherent = swiotlb_alloc_coherent, .free_coherent = swiotlb_free_coherent, - .map_single = swiotlb_map_single, + .map_single = swiotlb_map_single_phys, .unmap_single = swiotlb_unmap_single, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c new file mode 100644 index 0000000000000000000000000000000000000000..3004d716539dde2b882acd4e0576676531382965 --- /dev/null +++ b/arch/x86/kernel/process.c @@ -0,0 +1,44 @@ +#include +#include +#include +#include +#include +#include + +struct kmem_cache *task_xstate_cachep; + +int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src) +{ + *dst = *src; + if (src->thread.xstate) { + dst->thread.xstate = kmem_cache_alloc(task_xstate_cachep, + GFP_KERNEL); + if (!dst->thread.xstate) + return -ENOMEM; + WARN_ON((unsigned long)dst->thread.xstate & 15); + memcpy(dst->thread.xstate, src->thread.xstate, xstate_size); + } + return 0; +} + +void free_thread_xstate(struct task_struct *tsk) +{ + if (tsk->thread.xstate) { + kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); + tsk->thread.xstate = NULL; + } +} + +void free_thread_info(struct thread_info *ti) +{ + free_thread_xstate(ti->task); + free_pages((unsigned long)ti, get_order(THREAD_SIZE)); +} + +void arch_task_cache_init(void) +{ + task_xstate_cachep = + kmem_cache_create("task_xstate", xstate_size, + __alignof__(union thread_xstate), + SLAB_PANIC, NULL); +} diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index 3903a8f2eb978f2d9c6676c8f1bc9813d749fae9..7adad088e373fbf4bf5523dcf70f22665331b105 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -45,7 +46,6 @@ #include #include #include -#include #ifdef CONFIG_MATH_EMULATION #include #endif @@ -521,14 +521,18 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->cs = __USER_CS; regs->ip = new_ip; regs->sp = new_sp; + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); -#ifdef CONFIG_SECCOMP static void hard_disable_TSC(void) { write_cr4(read_cr4() | X86_CR4_TSD); } + void disable_TSC(void) { preempt_disable(); @@ -540,11 +544,47 @@ void disable_TSC(void) hard_disable_TSC(); preempt_enable(); } + static void hard_enable_TSC(void) { write_cr4(read_cr4() & ~X86_CR4_TSD); } -#endif /* CONFIG_SECCOMP */ + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} static noinline void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, @@ -578,7 +618,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, set_debugreg(next->debugreg7, 7); } -#ifdef CONFIG_SECCOMP if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ @@ -587,7 +626,6 @@ __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, else hard_enable_TSC(); } -#endif #ifdef X86_BTS if (test_tsk_thread_flag(prev_p, TIF_BTS_TRACE_TS)) @@ -669,7 +707,7 @@ struct task_struct * __switch_to(struct task_struct *prev_p, struct task_struct /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter > 5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0. diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index e75ccc8a2b87b7e659e0ee4c8ad74459239b98bb..891af1a1b48a5cbf0ef303d9f1b5c5601b485869 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include @@ -532,9 +533,71 @@ start_thread(struct pt_regs *regs, unsigned long new_ip, unsigned long new_sp) regs->ss = __USER_DS; regs->flags = 0x200; set_fs(USER_DS); + /* + * Free the old FP and other extended state + */ + free_thread_xstate(current); } EXPORT_SYMBOL_GPL(start_thread); +static void hard_disable_TSC(void) +{ + write_cr4(read_cr4() | X86_CR4_TSD); +} + +void disable_TSC(void) +{ + preempt_disable(); + if (!test_and_set_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_disable_TSC(); + preempt_enable(); +} + +static void hard_enable_TSC(void) +{ + write_cr4(read_cr4() & ~X86_CR4_TSD); +} + +void enable_TSC(void) +{ + preempt_disable(); + if (test_and_clear_thread_flag(TIF_NOTSC)) + /* + * Must flip the CPU state synchronously with + * TIF_NOTSC in the current running context. + */ + hard_enable_TSC(); + preempt_enable(); +} + +int get_tsc_mode(unsigned long adr) +{ + unsigned int val; + + if (test_thread_flag(TIF_NOTSC)) + val = PR_TSC_SIGSEGV; + else + val = PR_TSC_ENABLE; + + return put_user(val, (unsigned int __user *)adr); +} + +int set_tsc_mode(unsigned int val) +{ + if (val == PR_TSC_SIGSEGV) + disable_TSC(); + else if (val == PR_TSC_ENABLE) + enable_TSC(); + else + return -EINVAL; + + return 0; +} + /* * This special macro can be used to load a debugging register */ @@ -572,6 +635,15 @@ static inline void __switch_to_xtra(struct task_struct *prev_p, loaddebug(next, 7); } + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ + test_tsk_thread_flag(next_p, TIF_NOTSC)) { + /* prev and next are different */ + if (test_tsk_thread_flag(next_p, TIF_NOTSC)) + hard_disable_TSC(); + else + hard_enable_TSC(); + } + if (test_tsk_thread_flag(next_p, TIF_IO_BITMAP)) { /* * Copy the relevant range of the IO bitmap. @@ -614,7 +686,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p) /* we're going to use this soon, after a few expensive things */ if (next_p->fpu_counter>5) - prefetch(&next->i387.fxsave); + prefetch(next->xstate); /* * Reload esp0, LDT and the page table pointer: diff --git a/arch/x86/kernel/setup64.c b/arch/x86/kernel/setup64.c index 9042fb0e36f54b8846fea6ddb8dfe37ce4db9ce2..aee0e8200777057699f8cd580c1add23684d780d 100644 --- a/arch/x86/kernel/setup64.c +++ b/arch/x86/kernel/setup64.c @@ -74,8 +74,8 @@ int force_personality32 = 0; Control non executable heap for 32bit processes. To control the stack too use noexec=off -on PROT_READ does not imply PROT_EXEC for 32bit processes -off PROT_READ implies PROT_EXEC (default) +on PROT_READ does not imply PROT_EXEC for 32bit processes (default) +off PROT_READ implies PROT_EXEC */ static int __init nonx32_setup(char *str) { diff --git a/arch/x86/kernel/setup_32.c b/arch/x86/kernel/setup_32.c index 5b0bffb7fcc91d9aa237367a396dd1e5321aba6c..1c4799e687183f231f9a1c8c3248148bcc4aceae 100644 --- a/arch/x86/kernel/setup_32.c +++ b/arch/x86/kernel/setup_32.c @@ -812,10 +812,10 @@ void __init setup_arch(char **cmdline_p) efi_init(); /* update e820 for memory not covered by WB MTRRs */ - find_max_pfn(); + propagate_e820_map(); mtrr_bp_init(); if (mtrr_trim_uncached_memory(max_pfn)) - find_max_pfn(); + propagate_e820_map(); max_low_pfn = setup_memory(); diff --git a/arch/x86/kernel/setup_64.c b/arch/x86/kernel/setup_64.c index 674ef3510cdfd66972a08f716e374a0d96884dea..6b8e11f0c15d79c699dec0bea0df7c73e2759c8e 100644 --- a/arch/x86/kernel/setup_64.c +++ b/arch/x86/kernel/setup_64.c @@ -398,6 +398,8 @@ void __init setup_arch(char **cmdline_p) early_res_to_bootmem(); + dma32_reserve_bootmem(); + #ifdef CONFIG_ACPI_SLEEP /* * Reserve low memory region for sleep support. @@ -420,11 +422,14 @@ void __init setup_arch(char **cmdline_p) unsigned long end_of_mem = end_pfn << PAGE_SHIFT; if (ramdisk_end <= end_of_mem) { - reserve_bootmem_generic(ramdisk_image, ramdisk_size); + /* + * don't need to reserve again, already reserved early + * in x86_64_start_kernel, and early_res_to_bootmem + * convert that to reserved in bootmem + */ initrd_start = ramdisk_image + PAGE_OFFSET; initrd_end = initrd_start+ramdisk_size; } else { - /* Assumes everything on node 0 */ free_bootmem(ramdisk_image, ramdisk_size); printk(KERN_ERR "initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index e6abe8a49b1fa0b63cccf4b4e1ebfb03c6de8fde..6a925394bc7e646e70cd17a54fd6da0710a11e80 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -61,6 +61,7 @@ #include #include #include +#include #include #include @@ -677,6 +678,12 @@ wakeup_secondary_cpu(int phys_apicid, unsigned long start_eip) unsigned long send_status, accept_status = 0; int maxlvt, num_starts, j; + if (get_uv_system_type() == UV_NON_UNIQUE_APIC) { + send_status = uv_wakeup_secondary(phys_apicid, start_eip); + atomic_set(&init_deasserted, 1); + return send_status; + } + /* * Be paranoid about clearing APIC errors. */ @@ -918,16 +925,19 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) atomic_set(&init_deasserted, 0); - Dprintk("Setting warm reset code and vector.\n"); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) { - store_NMI_vector(&nmi_high, &nmi_low); + Dprintk("Setting warm reset code and vector.\n"); - smpboot_setup_warm_reset_vector(start_ip); - /* - * Be paranoid about clearing APIC errors. - */ - apic_write(APIC_ESR, 0); - apic_read(APIC_ESR); + store_NMI_vector(&nmi_high, &nmi_low); + + smpboot_setup_warm_reset_vector(start_ip); + /* + * Be paranoid about clearing APIC errors. + */ + apic_write(APIC_ESR, 0); + apic_read(APIC_ESR); + } /* * Starting actual IPI sequence... @@ -966,7 +976,8 @@ static int __cpuinit do_boot_cpu(int apicid, int cpu) else /* trampoline code not run */ printk(KERN_ERR "Not responding.\n"); - inquire_remote_apic(apicid); + if (get_uv_system_type() != UV_NON_UNIQUE_APIC) + inquire_remote_apic(apicid); } } diff --git a/arch/x86/kernel/traps_32.c b/arch/x86/kernel/traps_32.c index 65791ca2824a289651017fb524485746908a804e..471e694d6713193baa5a25dac995f177ef34abf2 100644 --- a/arch/x86/kernel/traps_32.c +++ b/arch/x86/kernel/traps_32.c @@ -681,7 +681,7 @@ void __kprobes do_general_protection(struct pt_regs *regs, long error_code) } } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs *regs) { printk(KERN_EMERG @@ -707,7 +707,7 @@ mem_parity_error(unsigned char reason, struct pt_regs *regs) clear_mem_error(reason); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs *regs) { unsigned long i; @@ -727,7 +727,7 @@ io_check_error(unsigned char reason, struct pt_regs *regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs *regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -755,7 +755,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs *regs) static DEFINE_SPINLOCK(nmi_print_lock); -void __kprobes die_nmi(struct pt_regs *regs, const char *msg) +void notrace __kprobes die_nmi(struct pt_regs *regs, const char *msg) { if (notify_die(DIE_NMIWATCHDOG, msg, regs, 0, 2, SIGINT) == NOTIFY_STOP) return; @@ -786,7 +786,7 @@ void __kprobes die_nmi(struct pt_regs *regs, const char *msg) do_exit(SIGSEGV); } -static __kprobes void default_do_nmi(struct pt_regs *regs) +static notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; @@ -828,7 +828,7 @@ static __kprobes void default_do_nmi(struct pt_regs *regs) static int ignore_nmis; -__kprobes void do_nmi(struct pt_regs *regs, long error_code) +notrace __kprobes void do_nmi(struct pt_regs *regs, long error_code) { int cpu; @@ -1148,9 +1148,22 @@ asmlinkage void math_state_restore(void) struct thread_info *thread = current_thread_info(); struct task_struct *tsk = thread->task; + if (!tsk_used_math(tsk)) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(tsk)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + clts(); /* Allow maths ops (or we recurse) */ - if (!tsk_used_math(tsk)) - init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ tsk->fpu_counter++; @@ -1208,11 +1221,6 @@ void __init trap_init(void) #endif set_trap_gate(19, &simd_coprocessor_error); - /* - * Verify that the FXSAVE/FXRSTOR data will be 16-byte aligned. - * Generate a build-time error if the alignment is wrong. - */ - BUILD_BUG_ON(offsetof(struct task_struct, thread.i387.fxsave) & 15); if (cpu_has_fxsr) { printk(KERN_INFO "Enabling fast FPU save and restore... "); set_in_cr4(X86_CR4_OSFXSR); @@ -1233,6 +1241,7 @@ void __init trap_init(void) set_bit(SYSCALL_VECTOR, used_vectors); + init_thread_xstate(); /* * Should be a barrier for any external CPU state: */ diff --git a/arch/x86/kernel/traps_64.c b/arch/x86/kernel/traps_64.c index 79aa6fc0815c9e75de806430c5660335bc1cc13e..adff76ea97c4732de4b7766272c8b6ad26ace478 100644 --- a/arch/x86/kernel/traps_64.c +++ b/arch/x86/kernel/traps_64.c @@ -600,7 +600,8 @@ void die(const char * str, struct pt_regs * regs, long err) oops_end(flags, regs, SIGSEGV); } -void __kprobes die_nmi(char *str, struct pt_regs *regs, int do_panic) +notrace __kprobes void +die_nmi(char *str, struct pt_regs *regs, int do_panic) { unsigned long flags; @@ -772,7 +773,7 @@ asmlinkage void __kprobes do_general_protection(struct pt_regs * regs, die("general protection fault", regs, error_code); } -static __kprobes void +static notrace __kprobes void mem_parity_error(unsigned char reason, struct pt_regs * regs) { printk(KERN_EMERG "Uhhuh. NMI received for unknown reason %02x.\n", @@ -796,7 +797,7 @@ mem_parity_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void io_check_error(unsigned char reason, struct pt_regs * regs) { printk("NMI: IOCK error (debug interrupt?)\n"); @@ -810,7 +811,7 @@ io_check_error(unsigned char reason, struct pt_regs * regs) outb(reason, 0x61); } -static __kprobes void +static notrace __kprobes void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) { if (notify_die(DIE_NMIUNKNOWN, "nmi", regs, reason, 2, SIGINT) == NOTIFY_STOP) @@ -827,7 +828,7 @@ unknown_nmi_error(unsigned char reason, struct pt_regs * regs) /* Runs on IST stack. This code must keep interrupts off all the time. Nested NMIs are prevented by the CPU. */ -asmlinkage __kprobes void default_do_nmi(struct pt_regs *regs) +asmlinkage notrace __kprobes void default_do_nmi(struct pt_regs *regs) { unsigned char reason = 0; int cpu; @@ -1123,11 +1124,24 @@ asmlinkage void __attribute__((weak)) mce_threshold_interrupt(void) asmlinkage void math_state_restore(void) { struct task_struct *me = current; - clts(); /* Allow maths ops (or we recurse) */ - if (!used_math()) - init_fpu(me); - restore_fpu_checking(&me->thread.i387.fxsave); + if (!used_math()) { + local_irq_enable(); + /* + * does a slab alloc which can sleep + */ + if (init_fpu(me)) { + /* + * ran out of memory! + */ + do_group_exit(SIGKILL); + return; + } + local_irq_disable(); + } + + clts(); /* Allow maths ops (or we recurse) */ + restore_fpu_checking(&me->thread.xstate->fxsave); task_thread_info(me)->status |= TS_USEDFPU; me->fpu_counter++; } @@ -1162,6 +1176,10 @@ void __init trap_init(void) set_system_gate(IA32_SYSCALL_VECTOR, ia32_syscall); #endif + /* + * initialize the per thread extended state: + */ + init_thread_xstate(); /* * Should be a barrier for any external CPU state. */ diff --git a/arch/x86/kernel/tsc_32.c b/arch/x86/kernel/tsc_32.c index 3d7e6e9fa6c2e4eaf5bee5c78927f63ddfa49e51..e4790728b2244fc90c60ff755d17f32653bcbbab 100644 --- a/arch/x86/kernel/tsc_32.c +++ b/arch/x86/kernel/tsc_32.c @@ -221,9 +221,9 @@ EXPORT_SYMBOL(recalibrate_cpu_khz); * if the CPU frequency is scaled, TSC-based delays will need a different * loops_per_jiffy value to function properly. */ -static unsigned int ref_freq = 0; -static unsigned long loops_per_jiffy_ref = 0; -static unsigned long cpu_khz_ref = 0; +static unsigned int ref_freq; +static unsigned long loops_per_jiffy_ref; +static unsigned long cpu_khz_ref; static int time_cpufreq_notifier(struct notifier_block *nb, unsigned long val, void *data) @@ -283,15 +283,28 @@ core_initcall(cpufreq_tsc); /* clock source code */ -static unsigned long current_tsc_khz = 0; +static unsigned long current_tsc_khz; +static struct clocksource clocksource_tsc; +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp issue. This can be observed in + * a very small window right after one CPU updated cycle_last under + * xtime lock and the other CPU reads a TSC value which is smaller + * than the cycle_last reference value due to a TSC which is slighty + * behind. This delta is nowhere else observable, but in that case it + * results in a forward time jump in the range of hours due to the + * unsigned delta calculation of the time keeping core code, which is + * necessary to support wrapping clocksources like pm timer. + */ static cycle_t read_tsc(void) { cycle_t ret; rdtscll(ret); - return ret; + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static struct clocksource clocksource_tsc = { diff --git a/arch/x86/kernel/tsc_64.c b/arch/x86/kernel/tsc_64.c index ceeba01e7f479a9dabc73906d5d2e243ff5e7926..fcc16e58609e17c42de7fae8a8309106df366423 100644 --- a/arch/x86/kernel/tsc_64.c +++ b/arch/x86/kernel/tsc_64.c @@ -11,6 +11,7 @@ #include #include #include +#include static int notsc __initdata = 0; @@ -287,18 +288,34 @@ int __init notsc_setup(char *s) __setup("notsc", notsc_setup); +static struct clocksource clocksource_tsc; -/* clock source code: */ +/* + * We compare the TSC to the cycle_last value in the clocksource + * structure to avoid a nasty time-warp. This can be observed in a + * very small window right after one CPU updated cycle_last under + * xtime/vsyscall_gtod lock and the other CPU reads a TSC value which + * is smaller than the cycle_last reference value due to a TSC which + * is slighty behind. This delta is nowhere else observable, but in + * that case it results in a forward time jump in the range of hours + * due to the unsigned delta calculation of the time keeping core + * code, which is necessary to support wrapping clocksources like pm + * timer. + */ static cycle_t read_tsc(void) { cycle_t ret = (cycle_t)get_cycles(); - return ret; + + return ret >= clocksource_tsc.cycle_last ? + ret : clocksource_tsc.cycle_last; } static cycle_t __vsyscall_fn vread_tsc(void) { cycle_t ret = (cycle_t)vget_cycles(); - return ret; + + return ret >= __vsyscall_gtod_data.clock.cycle_last ? + ret : __vsyscall_gtod_data.clock.cycle_last; } static struct clocksource clocksource_tsc = { diff --git a/arch/x86/mach-visws/visws_apic.c b/arch/x86/mach-visws/visws_apic.c index 710faf71a650b11a991cdbc034cb21e31f0a98f1..cef9cb1d15accf6a6677efcfd76260dcd8990dca 100644 --- a/arch/x86/mach-visws/visws_apic.c +++ b/arch/x86/mach-visws/visws_apic.c @@ -1,6 +1,4 @@ /* - * linux/arch/i386/mach-visws/visws_apic.c - * * Copyright (C) 1999 Bent Hagemark, Ingo Molnar * * SGI Visual Workstation interrupt controller diff --git a/arch/x86/mach-voyager/voyager_basic.c b/arch/x86/mach-voyager/voyager_basic.c index 6a949e4edde8f17758fbb5fc9ff004eff0b4d286..46d6f8067690d9f37610718c9af1b924a3233e98 100644 --- a/arch/x86/mach-voyager/voyager_basic.c +++ b/arch/x86/mach-voyager/voyager_basic.c @@ -2,8 +2,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager.c - * * This file contains all the voyager specific routines for getting * initialisation of the architecture to function. For additional * features see: diff --git a/arch/x86/mach-voyager/voyager_cat.c b/arch/x86/mach-voyager/voyager_cat.c index 17a7904f75b19ce55b63a562158bc82c172cf677..ecab9fff0fd17579b43d550f8bae2e5eea98f22b 100644 --- a/arch/x86/mach-voyager/voyager_cat.c +++ b/arch/x86/mach-voyager/voyager_cat.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_cat.c - * * This file contains all the logic for manipulating the CAT bus * in a level 5 machine. * diff --git a/arch/x86/mach-voyager/voyager_smp.c b/arch/x86/mach-voyager/voyager_smp.c index be7235bf105d8ac7f2e77838e36988710fd51048..96f60c7cd124a141e7f4825bbbe555e8893d9ef3 100644 --- a/arch/x86/mach-voyager/voyager_smp.c +++ b/arch/x86/mach-voyager/voyager_smp.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_smp.c - * * This file provides all the same external entries as smp.c but uses * the voyager hal to provide the functionality */ diff --git a/arch/x86/mach-voyager/voyager_thread.c b/arch/x86/mach-voyager/voyager_thread.c index c69c931818ed49b8d5d9326a50aefb8aa339ba9e..15464a20fb388ee56fb4c7f05de9e8ed34803be6 100644 --- a/arch/x86/mach-voyager/voyager_thread.c +++ b/arch/x86/mach-voyager/voyager_thread.c @@ -4,8 +4,6 @@ * * Author: J.E.J.Bottomley@HansenPartnership.com * - * linux/arch/i386/kernel/voyager_thread.c - * * This module provides the machine status monitor thread for the * voyager architecture. This allows us to monitor the machine * environment (temp, voltage, fan function) and the front panel and diff --git a/arch/x86/math-emu/fpu_entry.c b/arch/x86/math-emu/fpu_entry.c index 4bab3b14539242ceeddf3a1580617f71c9227710..6e38d877ea7725fb8d94d2c91a0ee7daab2585d1 100644 --- a/arch/x86/math-emu/fpu_entry.c +++ b/arch/x86/math-emu/fpu_entry.c @@ -678,7 +678,7 @@ int fpregs_soft_set(struct task_struct *target, unsigned int pos, unsigned int count, const void *kbuf, const void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.i387.soft; + struct i387_soft_struct *s387 = &target->thread.xstate->soft; void *space = s387->st_space; int ret; int offset, other, i, tags, regnr, tag, newtop; @@ -730,7 +730,7 @@ int fpregs_soft_get(struct task_struct *target, unsigned int pos, unsigned int count, void *kbuf, void __user *ubuf) { - struct i387_soft_struct *s387 = &target->thread.i387.soft; + struct i387_soft_struct *s387 = &target->thread.xstate->soft; const void *space = s387->st_space; int ret; int offset = (S387->ftop & 7) * 10, other = 80 - offset; diff --git a/arch/x86/math-emu/fpu_system.h b/arch/x86/math-emu/fpu_system.h index a3ae28c49dddad063de9c177f7d0f13480df3da5..13488fa153e0c81dacc9370edc1f0a2128d0babd 100644 --- a/arch/x86/math-emu/fpu_system.h +++ b/arch/x86/math-emu/fpu_system.h @@ -35,8 +35,8 @@ #define SEG_EXPAND_DOWN(s) (((s).b & ((1 << 11) | (1 << 10))) \ == (1 << 10)) -#define I387 (current->thread.i387) -#define FPU_info (I387.soft.info) +#define I387 (current->thread.xstate) +#define FPU_info (I387->soft.info) #define FPU_CS (*(unsigned short *) &(FPU_info->___cs)) #define FPU_SS (*(unsigned short *) &(FPU_info->___ss)) @@ -46,25 +46,25 @@ #define FPU_EIP (FPU_info->___eip) #define FPU_ORIG_EIP (FPU_info->___orig_eip) -#define FPU_lookahead (I387.soft.lookahead) +#define FPU_lookahead (I387->soft.lookahead) /* nz if ip_offset and cs_selector are not to be set for the current instruction. */ -#define no_ip_update (*(u_char *)&(I387.soft.no_update)) -#define FPU_rm (*(u_char *)&(I387.soft.rm)) +#define no_ip_update (*(u_char *)&(I387->soft.no_update)) +#define FPU_rm (*(u_char *)&(I387->soft.rm)) /* Number of bytes of data which can be legally accessed by the current instruction. This only needs to hold a number <= 108, so a byte will do. */ -#define access_limit (*(u_char *)&(I387.soft.alimit)) +#define access_limit (*(u_char *)&(I387->soft.alimit)) -#define partial_status (I387.soft.swd) -#define control_word (I387.soft.cwd) -#define fpu_tag_word (I387.soft.twd) -#define registers (I387.soft.st_space) -#define top (I387.soft.ftop) +#define partial_status (I387->soft.swd) +#define control_word (I387->soft.cwd) +#define fpu_tag_word (I387->soft.twd) +#define registers (I387->soft.st_space) +#define top (I387->soft.ftop) -#define instruction_address (*(struct address *)&I387.soft.fip) -#define operand_address (*(struct address *)&I387.soft.foo) +#define instruction_address (*(struct address *)&I387->soft.fip) +#define operand_address (*(struct address *)&I387->soft.foo) #define FPU_access_ok(x,y,z) if ( !access_ok(x,y,z) ) \ math_abort(FPU_info,SIGSEGV) diff --git a/arch/x86/math-emu/reg_ld_str.c b/arch/x86/math-emu/reg_ld_str.c index 02af772a24db24f12d2fbd17cfbd53fc5d22515b..d597fe7423c98441f7ed52f8bcb0df3bd45646a9 100644 --- a/arch/x86/math-emu/reg_ld_str.c +++ b/arch/x86/math-emu/reg_ld_str.c @@ -1180,8 +1180,8 @@ u_char __user *fstenv(fpu_addr_modes addr_modes, u_char __user *d) control_word |= 0xffff0040; partial_status = status_word() | 0xffff0000; fpu_tag_word |= 0xffff0000; - I387.soft.fcs &= ~0xf8000000; - I387.soft.fos |= 0xffff0000; + I387->soft.fcs &= ~0xf8000000; + I387->soft.fos |= 0xffff0000; #endif /* PECULIAR_486 */ if (__copy_to_user(d, &control_word, 7 * 4)) FPU_abort; diff --git a/arch/x86/mm/discontig_32.c b/arch/x86/mm/discontig_32.c index eba0bbede7a6b8e05319c5148b8d8b8b715e0db3..18378850e25aab13c3149903b5c2d6c611ca10a9 100644 --- a/arch/x86/mm/discontig_32.c +++ b/arch/x86/mm/discontig_32.c @@ -120,7 +120,7 @@ int __init get_memcfg_numa_flat(void) printk("NUMA - single node, flat memory mode\n"); /* Run the memory configuration and find the top of memory. */ - find_max_pfn(); + propagate_e820_map(); node_start_pfn[0] = 0; node_end_pfn[0] = max_pfn; memory_present(0, 0, max_pfn); @@ -134,7 +134,7 @@ int __init get_memcfg_numa_flat(void) /* * Find the highest page frame number we have available for the node */ -static void __init find_max_pfn_node(int nid) +static void __init propagate_e820_map_node(int nid) { if (node_end_pfn[nid] > max_pfn) node_end_pfn[nid] = max_pfn; @@ -379,7 +379,7 @@ unsigned long __init setup_memory(void) printk("High memory starts at vaddr %08lx\n", (ulong) pfn_to_kaddr(highstart_pfn)); for_each_online_node(nid) - find_max_pfn_node(nid); + propagate_e820_map_node(nid); memset(NODE_DATA(0), 0, sizeof(struct pglist_data)); NODE_DATA(0)->bdata = &node0_bdata; diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 1500dc8d63e4676586470d458722b71cad63ace1..9ec62da85fd79a75fc450b58c4ea54e4c77f857b 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -1,5 +1,4 @@ /* - * linux/arch/i386/mm/init.c * * Copyright (C) 1995 Linus Torvalds * diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 1076097dcab22115f5de1e9b22afc1baf2129ab6..1ff7906a9a4dbc7afa6dfdd24f274e077cd26773 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -47,9 +47,6 @@ #include #include -const struct dma_mapping_ops *dma_ops; -EXPORT_SYMBOL(dma_ops); - static unsigned long dma_reserve __initdata; DEFINE_PER_CPU(struct mmu_gather, mmu_gathers); diff --git a/arch/x86/mm/ioremap.c b/arch/x86/mm/ioremap.c index c590fd200e297e892d1552b8b575e2cf7b8604f7..3a4baf95e24d5a5cee6626a5b8789064e9dce564 100644 --- a/arch/x86/mm/ioremap.c +++ b/arch/x86/mm/ioremap.c @@ -134,7 +134,7 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, if (!phys_addr_valid(phys_addr)) { printk(KERN_WARNING "ioremap: invalid physical address %llx\n", - phys_addr); + (unsigned long long)phys_addr); WARN_ON_ONCE(1); return NULL; } @@ -187,7 +187,8 @@ static void __iomem *__ioremap(resource_size_t phys_addr, unsigned long size, new_prot_val == _PAGE_CACHE_WB)) { pr_debug( "ioremap error for 0x%llx-0x%llx, requested 0x%lx, got 0x%lx\n", - phys_addr, phys_addr + size, + (unsigned long long)phys_addr, + (unsigned long long)(phys_addr + size), prot_val, new_prot_val); free_memtype(phys_addr, phys_addr + size); return NULL; diff --git a/arch/x86/mm/k8topology_64.c b/arch/x86/mm/k8topology_64.c index 7a2ebce87df5dee511701d3f3d521d62e2d26dc8..86808e666f9c2aeea15492a8609400915f819a09 100644 --- a/arch/x86/mm/k8topology_64.c +++ b/arch/x86/mm/k8topology_64.c @@ -164,7 +164,7 @@ int __init k8_scan_nodes(unsigned long start, unsigned long end) if (!found) return -1; - memnode_shift = compute_hash_shift(nodes, 8); + memnode_shift = compute_hash_shift(nodes, 8, NULL); if (memnode_shift < 0) { printk(KERN_ERR "No NUMA node hash function found. Contact maintainer\n"); return -1; diff --git a/arch/x86/mm/numa_64.c b/arch/x86/mm/numa_64.c index 2ea56f48f29b506c3a180bfd79b3bdb62bf5fd2c..cb31701863551e95a3765817dfdedf82caf46fda 100644 --- a/arch/x86/mm/numa_64.c +++ b/arch/x86/mm/numa_64.c @@ -60,7 +60,7 @@ unsigned long __initdata nodemap_size; * -1 if node overlap or lost ram (shift too big) */ static int __init populate_memnodemap(const struct bootnode *nodes, - int numnodes, int shift) + int numnodes, int shift, int *nodeids) { unsigned long addr, end; int i, res = -1; @@ -76,7 +76,12 @@ static int __init populate_memnodemap(const struct bootnode *nodes, do { if (memnodemap[addr >> shift] != NUMA_NO_NODE) return -1; - memnodemap[addr >> shift] = i; + + if (!nodeids) + memnodemap[addr >> shift] = i; + else + memnodemap[addr >> shift] = nodeids[i]; + addr += (1UL << shift); } while (addr < end); res = 1; @@ -139,7 +144,8 @@ static int __init extract_lsb_from_nodes(const struct bootnode *nodes, return i; } -int __init compute_hash_shift(struct bootnode *nodes, int numnodes) +int __init compute_hash_shift(struct bootnode *nodes, int numnodes, + int *nodeids) { int shift; @@ -149,7 +155,7 @@ int __init compute_hash_shift(struct bootnode *nodes, int numnodes) printk(KERN_DEBUG "NUMA: Using %d for the hash shift.\n", shift); - if (populate_memnodemap(nodes, numnodes, shift) != 1) { + if (populate_memnodemap(nodes, numnodes, shift, nodeids) != 1) { printk(KERN_INFO "Your memory is not aligned you need to " "rebuild your kernel with a bigger NODEMAPSIZE " "shift=%d\n", shift); @@ -462,7 +468,7 @@ static int __init numa_emulation(unsigned long start_pfn, unsigned long end_pfn) } } out: - memnode_shift = compute_hash_shift(nodes, num_nodes); + memnode_shift = compute_hash_shift(nodes, num_nodes, NULL); if (memnode_shift < 0) { memnode_shift = 0; printk(KERN_ERR "No NUMA hash function found. NUMA emulation " diff --git a/arch/x86/mm/pgtable_32.c b/arch/x86/mm/pgtable_32.c index 3165ec0672bd1855cb607c6864b83c810c29d729..6fb9e7c6893fd44afad45232355b47c867c7cf75 100644 --- a/arch/x86/mm/pgtable_32.c +++ b/arch/x86/mm/pgtable_32.c @@ -1,7 +1,3 @@ -/* - * linux/arch/i386/mm/pgtable.c - */ - #include #include #include diff --git a/arch/x86/mm/srat_64.c b/arch/x86/mm/srat_64.c index 1bae9c855ceb8b56cce821129a21194a693f7c81..fb43d89f46f3c92b51e5eedc85be49340eabd765 100644 --- a/arch/x86/mm/srat_64.c +++ b/arch/x86/mm/srat_64.c @@ -32,6 +32,10 @@ static struct bootnode nodes_add[MAX_NUMNODES]; static int found_add_area __initdata; int hotadd_percent __initdata = 0; +static int num_node_memblks __initdata; +static struct bootnode node_memblk_range[NR_NODE_MEMBLKS] __initdata; +static int memblk_nodeid[NR_NODE_MEMBLKS] __initdata; + /* Too small nodes confuse the VM badly. Usually they result from BIOS bugs. */ #define NODE_MIN_SIZE (4*1024*1024) @@ -41,17 +45,17 @@ static __init int setup_node(int pxm) return acpi_map_pxm_to_node(pxm); } -static __init int conflicting_nodes(unsigned long start, unsigned long end) +static __init int conflicting_memblks(unsigned long start, unsigned long end) { int i; - for_each_node_mask(i, nodes_parsed) { - struct bootnode *nd = &nodes[i]; + for (i = 0; i < num_node_memblks; i++) { + struct bootnode *nd = &node_memblk_range[i]; if (nd->start == nd->end) continue; if (nd->end > start && nd->start < end) - return i; + return memblk_nodeid[i]; if (nd->end == end && nd->start == start) - return i; + return memblk_nodeid[i]; } return -1; } @@ -258,7 +262,7 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) bad_srat(); return; } - i = conflicting_nodes(start, end); + i = conflicting_memblks(start, end); if (i == node) { printk(KERN_WARNING "SRAT: Warning: PXM %d (%lx-%lx) overlaps with itself (%Lx-%Lx)\n", @@ -283,10 +287,10 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) nd->end = end; } - printk(KERN_INFO "SRAT: Node %u PXM %u %Lx-%Lx\n", node, pxm, - nd->start, nd->end); - e820_register_active_regions(node, nd->start >> PAGE_SHIFT, - nd->end >> PAGE_SHIFT); + printk(KERN_INFO "SRAT: Node %u PXM %u %lx-%lx\n", node, pxm, + start, end); + e820_register_active_regions(node, start >> PAGE_SHIFT, + end >> PAGE_SHIFT); push_node_boundaries(node, nd->start >> PAGE_SHIFT, nd->end >> PAGE_SHIFT); @@ -298,6 +302,11 @@ acpi_numa_memory_affinity_init(struct acpi_srat_mem_affinity *ma) if ((nd->start | nd->end) == 0) node_clear(node, nodes_parsed); } + + node_memblk_range[num_node_memblks].start = start; + node_memblk_range[num_node_memblks].end = end; + memblk_nodeid[num_node_memblks] = node; + num_node_memblks++; } /* Sanity check to catch more bad SRATs (they are amazingly common). @@ -368,7 +377,8 @@ int __init acpi_scan_nodes(unsigned long start, unsigned long end) return -1; } - memnode_shift = compute_hash_shift(nodes, MAX_NUMNODES); + memnode_shift = compute_hash_shift(node_memblk_range, num_node_memblks, + memblk_nodeid); if (memnode_shift < 0) { printk(KERN_ERR "SRAT: No NUMA node hash function found. Contact maintainer\n"); diff --git a/arch/x86/vdso/Makefile b/arch/x86/vdso/Makefile index 17a6b057856b6f6759eb358d7060b5b682861440..b7ad9f89d21f8898dbf792de05b0dddd21f3a803 100644 --- a/arch/x86/vdso/Makefile +++ b/arch/x86/vdso/Makefile @@ -37,7 +37,8 @@ $(obj)/%.so: OBJCOPYFLAGS := -S $(obj)/%.so: $(obj)/%.so.dbg FORCE $(call if_changed,objcopy) -CFL := $(PROFILING) -mcmodel=small -fPIC -g0 -O2 -fasynchronous-unwind-tables -m64 +CFL := $(PROFILING) -mcmodel=small -fPIC -O2 -fasynchronous-unwind-tables -m64 \ + $(filter -g%,$(KBUILD_CFLAGS)) $(vobjs): KBUILD_CFLAGS += $(CFL) diff --git a/arch/x86/video/fbdev.c b/arch/x86/video/fbdev.c index 48fb38d7d2c0ff695a60deedfb0d92139f84c21d..4db42bff8c603ee216345bba6143ad5f5efd3dec 100644 --- a/arch/x86/video/fbdev.c +++ b/arch/x86/video/fbdev.c @@ -1,5 +1,4 @@ /* - * arch/i386/video/fbdev.c - i386 Framebuffer * * Copyright (C) 2007 Antonino Daplas * diff --git a/include/asm-x86/boot.h b/include/asm-x86/boot.h index ed8affbf96cb804f3a97ec46d610fdec19345a2b..2faed7ecb092a7893c89c573b7ac9870e060d8a7 100644 --- a/include/asm-x86/boot.h +++ b/include/asm-x86/boot.h @@ -17,4 +17,12 @@ + (CONFIG_PHYSICAL_ALIGN - 1)) \ & ~(CONFIG_PHYSICAL_ALIGN - 1)) +#ifdef CONFIG_X86_64 +#define BOOT_HEAP_SIZE 0x7000 +#define BOOT_STACK_SIZE 0x4000 +#else +#define BOOT_HEAP_SIZE 0x4000 +#define BOOT_STACK_SIZE 0x1000 +#endif + #endif /* _ASM_BOOT_H */ diff --git a/include/asm-x86/dma-mapping.h b/include/asm-x86/dma-mapping.h index 58f790f4df5253fe80c3a9ed34b2b1d4bb91b4ef..a1a4dc7fe6ece75087cc33e33d3c60e2477cd586 100644 --- a/include/asm-x86/dma-mapping.h +++ b/include/asm-x86/dma-mapping.h @@ -1,5 +1,237 @@ +#ifndef _ASM_DMA_MAPPING_H_ +#define _ASM_DMA_MAPPING_H_ + +/* + * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for + * documentation. + */ + +#include +#include +#include + +extern dma_addr_t bad_dma_address; +extern int iommu_merge; +extern struct device fallback_dev; +extern int panic_on_overflow; +extern int forbid_dac; +extern int force_iommu; + +struct dma_mapping_ops { + int (*mapping_error)(dma_addr_t dma_addr); + void* (*alloc_coherent)(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t gfp); + void (*free_coherent)(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + dma_addr_t (*map_single)(struct device *hwdev, phys_addr_t ptr, + size_t size, int direction); + /* like map_single, but doesn't check the device mask */ + dma_addr_t (*map_simple)(struct device *hwdev, phys_addr_t ptr, + size_t size, int direction); + void (*unmap_single)(struct device *dev, dma_addr_t addr, + size_t size, int direction); + void (*sync_single_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_for_device)(struct device *hwdev, + dma_addr_t dma_handle, size_t size, + int direction); + void (*sync_single_range_for_cpu)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_single_range_for_device)(struct device *hwdev, + dma_addr_t dma_handle, unsigned long offset, + size_t size, int direction); + void (*sync_sg_for_cpu)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + void (*sync_sg_for_device)(struct device *hwdev, + struct scatterlist *sg, int nelems, + int direction); + int (*map_sg)(struct device *hwdev, struct scatterlist *sg, + int nents, int direction); + void (*unmap_sg)(struct device *hwdev, + struct scatterlist *sg, int nents, + int direction); + int (*dma_supported)(struct device *hwdev, u64 mask); + int is_phys; +}; + +extern const struct dma_mapping_ops *dma_ops; + +static inline int dma_mapping_error(dma_addr_t dma_addr) +{ + if (dma_ops->mapping_error) + return dma_ops->mapping_error(dma_addr); + + return (dma_addr == bad_dma_address); +} + +#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) +#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) + +void *dma_alloc_coherent(struct device *dev, size_t size, + dma_addr_t *dma_handle, gfp_t flag); + +void dma_free_coherent(struct device *dev, size_t size, + void *vaddr, dma_addr_t dma_handle); + + +extern int dma_supported(struct device *hwdev, u64 mask); +extern int dma_set_mask(struct device *dev, u64 mask); + +static inline dma_addr_t +dma_map_single(struct device *hwdev, void *ptr, size_t size, + int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + return dma_ops->map_single(hwdev, virt_to_phys(ptr), size, direction); +} + +static inline void +dma_unmap_single(struct device *dev, dma_addr_t addr, size_t size, + int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->unmap_single) + dma_ops->unmap_single(dev, addr, size, direction); +} + +static inline int +dma_map_sg(struct device *hwdev, struct scatterlist *sg, + int nents, int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + return dma_ops->map_sg(hwdev, sg, nents, direction); +} + +static inline void +dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, + int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->unmap_sg) + dma_ops->unmap_sg(hwdev, sg, nents, direction); +} + +static inline void +dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->sync_single_for_cpu) + dma_ops->sync_single_for_cpu(hwdev, dma_handle, size, + direction); + flush_write_buffers(); +} + +static inline void +dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, + size_t size, int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->sync_single_for_device) + dma_ops->sync_single_for_device(hwdev, dma_handle, size, + direction); + flush_write_buffers(); +} + +static inline void +dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->sync_single_range_for_cpu) + dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, + size, direction); + + flush_write_buffers(); +} + +static inline void +dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, + unsigned long offset, size_t size, + int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->sync_single_range_for_device) + dma_ops->sync_single_range_for_device(hwdev, dma_handle, + offset, size, direction); + + flush_write_buffers(); +} + +static inline void +dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->sync_sg_for_cpu) + dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); + flush_write_buffers(); +} + +static inline void +dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, + int nelems, int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + if (dma_ops->sync_sg_for_device) + dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction); + + flush_write_buffers(); +} + +static inline dma_addr_t dma_map_page(struct device *dev, struct page *page, + size_t offset, size_t size, + int direction) +{ + BUG_ON(!valid_dma_direction(direction)); + return dma_ops->map_single(dev, page_to_phys(page)+offset, + size, direction); +} + +static inline void dma_unmap_page(struct device *dev, dma_addr_t addr, + size_t size, int direction) +{ + dma_unmap_single(dev, addr, size, direction); +} + +static inline void +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) +{ + flush_write_buffers(); +} + +static inline int dma_get_cache_alignment(void) +{ + /* no easy way to get cache size on all x86, so return the + * maximum possible, to be safe */ + return boot_cpu_data.x86_clflush_size; +} + +#define dma_is_consistent(d, h) (1) + #ifdef CONFIG_X86_32 -# include "dma-mapping_32.h" -#else -# include "dma-mapping_64.h" +# define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY +struct dma_coherent_mem { + void *virt_base; + u32 device_base; + int size; + int flags; + unsigned long *bitmap; +}; + +extern int +dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, + dma_addr_t device_addr, size_t size, int flags); + +extern void +dma_release_declared_memory(struct device *dev); + +extern void * +dma_mark_declared_memory_occupied(struct device *dev, + dma_addr_t device_addr, size_t size); +#endif /* CONFIG_X86_32 */ #endif diff --git a/include/asm-x86/dma-mapping_32.h b/include/asm-x86/dma-mapping_32.h deleted file mode 100644 index 55f01bd9e556f9c2519d90991475f9507e45f67a..0000000000000000000000000000000000000000 --- a/include/asm-x86/dma-mapping_32.h +++ /dev/null @@ -1,187 +0,0 @@ -#ifndef _ASM_I386_DMA_MAPPING_H -#define _ASM_I386_DMA_MAPPING_H - -#include -#include - -#include -#include -#include - -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flag); - -void dma_free_coherent(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - -static inline dma_addr_t -dma_map_single(struct device *dev, void *ptr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); - WARN_ON(size == 0); - flush_write_buffers(); - return virt_to_phys(ptr); -} - -static inline void -dma_unmap_single(struct device *dev, dma_addr_t dma_addr, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static inline int -dma_map_sg(struct device *dev, struct scatterlist *sglist, int nents, - enum dma_data_direction direction) -{ - struct scatterlist *sg; - int i; - - BUG_ON(!valid_dma_direction(direction)); - WARN_ON(nents == 0 || sglist[0].length == 0); - - for_each_sg(sglist, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg->dma_address = sg_phys(sg); - } - - flush_write_buffers(); - return nents; -} - -static inline dma_addr_t -dma_map_page(struct device *dev, struct page *page, unsigned long offset, - size_t size, enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); - return page_to_phys(page) + offset; -} - -static inline void -dma_unmap_page(struct device *dev, dma_addr_t dma_address, size_t size, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - - -static inline void -dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nhwentries, - enum dma_data_direction direction) -{ - BUG_ON(!valid_dma_direction(direction)); -} - -static inline void -dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ -} - -static inline void -dma_sync_single_for_device(struct device *dev, dma_addr_t dma_handle, size_t size, - enum dma_data_direction direction) -{ - flush_write_buffers(); -} - -static inline void -dma_sync_single_range_for_cpu(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ -} - -static inline void -dma_sync_single_range_for_device(struct device *dev, dma_addr_t dma_handle, - unsigned long offset, size_t size, - enum dma_data_direction direction) -{ - flush_write_buffers(); -} - -static inline void -dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ -} - -static inline void -dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, - enum dma_data_direction direction) -{ - flush_write_buffers(); -} - -static inline int -dma_mapping_error(dma_addr_t dma_addr) -{ - return 0; -} - -extern int forbid_dac; - -static inline int -dma_supported(struct device *dev, u64 mask) -{ - /* - * we fall back to GFP_DMA when the mask isn't all 1s, - * so we can't guarantee allocations that must be - * within a tighter range than GFP_DMA.. - */ - if(mask < 0x00ffffff) - return 0; - - /* Work around chipset bugs */ - if (forbid_dac > 0 && mask > 0xffffffffULL) - return 0; - - return 1; -} - -static inline int -dma_set_mask(struct device *dev, u64 mask) -{ - if(!dev->dma_mask || !dma_supported(dev, mask)) - return -EIO; - - *dev->dma_mask = mask; - - return 0; -} - -static inline int -dma_get_cache_alignment(void) -{ - /* no easy way to get cache size on all x86, so return the - * maximum possible, to be safe */ - return (1 << INTERNODE_CACHE_SHIFT); -} - -#define dma_is_consistent(d, h) (1) - -static inline void -dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction direction) -{ - flush_write_buffers(); -} - -#define ARCH_HAS_DMA_DECLARE_COHERENT_MEMORY -extern int -dma_declare_coherent_memory(struct device *dev, dma_addr_t bus_addr, - dma_addr_t device_addr, size_t size, int flags); - -extern void -dma_release_declared_memory(struct device *dev); - -extern void * -dma_mark_declared_memory_occupied(struct device *dev, - dma_addr_t device_addr, size_t size); - -#endif diff --git a/include/asm-x86/dma-mapping_64.h b/include/asm-x86/dma-mapping_64.h deleted file mode 100644 index ecd0f6125ba3b3c005be3192c44ce07e4383021e..0000000000000000000000000000000000000000 --- a/include/asm-x86/dma-mapping_64.h +++ /dev/null @@ -1,202 +0,0 @@ -#ifndef _X8664_DMA_MAPPING_H -#define _X8664_DMA_MAPPING_H 1 - -/* - * IOMMU interface. See Documentation/DMA-mapping.txt and DMA-API.txt for - * documentation. - */ - -#include -#include -#include - -struct dma_mapping_ops { - int (*mapping_error)(dma_addr_t dma_addr); - void* (*alloc_coherent)(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); - void (*free_coherent)(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle); - dma_addr_t (*map_single)(struct device *hwdev, void *ptr, - size_t size, int direction); - /* like map_single, but doesn't check the device mask */ - dma_addr_t (*map_simple)(struct device *hwdev, char *ptr, - size_t size, int direction); - void (*unmap_single)(struct device *dev, dma_addr_t addr, - size_t size, int direction); - void (*sync_single_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_for_device)(struct device *hwdev, - dma_addr_t dma_handle, size_t size, - int direction); - void (*sync_single_range_for_cpu)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_single_range_for_device)(struct device *hwdev, - dma_addr_t dma_handle, unsigned long offset, - size_t size, int direction); - void (*sync_sg_for_cpu)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - void (*sync_sg_for_device)(struct device *hwdev, - struct scatterlist *sg, int nelems, - int direction); - int (*map_sg)(struct device *hwdev, struct scatterlist *sg, - int nents, int direction); - void (*unmap_sg)(struct device *hwdev, - struct scatterlist *sg, int nents, - int direction); - int (*dma_supported)(struct device *hwdev, u64 mask); - int is_phys; -}; - -extern dma_addr_t bad_dma_address; -extern const struct dma_mapping_ops* dma_ops; -extern int iommu_merge; - -static inline int dma_mapping_error(dma_addr_t dma_addr) -{ - if (dma_ops->mapping_error) - return dma_ops->mapping_error(dma_addr); - - return (dma_addr == bad_dma_address); -} - -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -#define dma_alloc_noncoherent(d, s, h, f) dma_alloc_coherent(d, s, h, f) -#define dma_free_noncoherent(d, s, v, h) dma_free_coherent(d, s, v, h) - -extern void *dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp); -extern void dma_free_coherent(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_handle); - -static inline dma_addr_t -dma_map_single(struct device *hwdev, void *ptr, size_t size, - int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_single(hwdev, ptr, size, direction); -} - -static inline void -dma_unmap_single(struct device *dev, dma_addr_t addr,size_t size, - int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - dma_ops->unmap_single(dev, addr, size, direction); -} - -#define dma_map_page(dev,page,offset,size,dir) \ - dma_map_single((dev), page_address(page)+(offset), (size), (dir)) - -#define dma_unmap_page dma_unmap_single - -static inline void -dma_sync_single_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - size_t size, int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_cpu) - dma_ops->sync_single_for_cpu(hwdev, dma_handle, size, - direction); - flush_write_buffers(); -} - -static inline void -dma_sync_single_for_device(struct device *hwdev, dma_addr_t dma_handle, - size_t size, int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_for_device) - dma_ops->sync_single_for_device(hwdev, dma_handle, size, - direction); - flush_write_buffers(); -} - -static inline void -dma_sync_single_range_for_cpu(struct device *hwdev, dma_addr_t dma_handle, - unsigned long offset, size_t size, int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_cpu) { - dma_ops->sync_single_range_for_cpu(hwdev, dma_handle, offset, size, direction); - } - - flush_write_buffers(); -} - -static inline void -dma_sync_single_range_for_device(struct device *hwdev, dma_addr_t dma_handle, - unsigned long offset, size_t size, int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_single_range_for_device) - dma_ops->sync_single_range_for_device(hwdev, dma_handle, - offset, size, direction); - - flush_write_buffers(); -} - -static inline void -dma_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_cpu) - dma_ops->sync_sg_for_cpu(hwdev, sg, nelems, direction); - flush_write_buffers(); -} - -static inline void -dma_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - if (dma_ops->sync_sg_for_device) { - dma_ops->sync_sg_for_device(hwdev, sg, nelems, direction); - } - - flush_write_buffers(); -} - -static inline int -dma_map_sg(struct device *hwdev, struct scatterlist *sg, int nents, int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - return dma_ops->map_sg(hwdev, sg, nents, direction); -} - -static inline void -dma_unmap_sg(struct device *hwdev, struct scatterlist *sg, int nents, - int direction) -{ - BUG_ON(!valid_dma_direction(direction)); - dma_ops->unmap_sg(hwdev, sg, nents, direction); -} - -extern int dma_supported(struct device *hwdev, u64 mask); - -/* same for gart, swiotlb, and nommu */ -static inline int dma_get_cache_alignment(void) -{ - return boot_cpu_data.x86_clflush_size; -} - -#define dma_is_consistent(d, h) 1 - -extern int dma_set_mask(struct device *dev, u64 mask); - -static inline void -dma_cache_sync(struct device *dev, void *vaddr, size_t size, - enum dma_data_direction dir) -{ - flush_write_buffers(); -} - -extern struct device fallback_dev; -extern int panic_on_overflow; - -#endif /* _X8664_DMA_MAPPING_H */ diff --git a/include/asm-x86/e820_32.h b/include/asm-x86/e820_32.h index 43b1a8bd4b349c15694cd4d490e793b8b40c340e..a9f7c6ec32bf7ecad8041a7db1af54c4f2a47393 100644 --- a/include/asm-x86/e820_32.h +++ b/include/asm-x86/e820_32.h @@ -24,7 +24,7 @@ extern void update_e820(void); extern int e820_all_mapped(unsigned long start, unsigned long end, unsigned type); extern int e820_any_mapped(u64 start, u64 end, unsigned type); -extern void find_max_pfn(void); +extern void propagate_e820_map(void); extern void register_bootmem_low_pages(unsigned long max_low_pfn); extern void add_memory_region(unsigned long long start, unsigned long long size, int type); diff --git a/include/asm-x86/genapic_32.h b/include/asm-x86/genapic_32.h index f1b96932746be9ec9de3fd6b37d3f6b49adaf9e3..b02ea6e17de8b6a097c722f28d2c726f5e39514d 100644 --- a/include/asm-x86/genapic_32.h +++ b/include/asm-x86/genapic_32.h @@ -117,6 +117,7 @@ extern struct genapic *genapic; enum uv_system_type {UV_NONE, UV_LEGACY_APIC, UV_X2APIC, UV_NON_UNIQUE_APIC}; #define get_uv_system_type() UV_NONE #define is_uv_system() 0 +#define uv_wakeup_secondary(a, b) 1 #endif diff --git a/include/asm-x86/i387.h b/include/asm-x86/i387.h index 54522b814f1c796e36f30c65632b30a35eb8d1a8..da2adb45f6e3949476ac0a4d2c1aa1cbb2024986 100644 --- a/include/asm-x86/i387.h +++ b/include/asm-x86/i387.h @@ -21,8 +21,9 @@ extern void fpu_init(void); extern void mxcsr_feature_mask_init(void); -extern void init_fpu(struct task_struct *child); +extern int init_fpu(struct task_struct *child); extern asmlinkage void math_state_restore(void); +extern void init_thread_xstate(void); extern user_regset_active_fn fpregs_active, xfpregs_active; extern user_regset_get_fn fpregs_get, xfpregs_get, fpregs_soft_get; @@ -117,24 +118,22 @@ static inline void __save_init_fpu(struct task_struct *tsk) /* Using "fxsaveq %0" would be the ideal choice, but is only supported starting with gas 2.16. */ __asm__ __volatile__("fxsaveq %0" - : "=m" (tsk->thread.i387.fxsave)); + : "=m" (tsk->thread.xstate->fxsave)); #elif 0 /* Using, as a workaround, the properly prefixed form below isn't accepted by any binutils version so far released, complaining that the same type of prefix is used twice if an extended register is needed for addressing (fix submitted to mainline 2005-11-21). */ __asm__ __volatile__("rex64/fxsave %0" - : "=m" (tsk->thread.i387.fxsave)); + : "=m" (tsk->thread.xstate->fxsave)); #else /* This, however, we can work around by forcing the compiler to select an addressing mode that doesn't require extended registers. */ - __asm__ __volatile__("rex64/fxsave %P2(%1)" - : "=m" (tsk->thread.i387.fxsave) - : "cdaSDb" (tsk), - "i" (offsetof(__typeof__(*tsk), - thread.i387.fxsave))); + __asm__ __volatile__("rex64/fxsave (%1)" + : "=m" (tsk->thread.xstate->fxsave) + : "cdaSDb" (&tsk->thread.xstate->fxsave)); #endif - clear_fpu_state(&tsk->thread.i387.fxsave); + clear_fpu_state(&tsk->thread.xstate->fxsave); task_thread_info(tsk)->status &= ~TS_USEDFPU; } @@ -148,7 +147,7 @@ static inline int save_i387(struct _fpstate __user *buf) int err = 0; BUILD_BUG_ON(sizeof(struct user_i387_struct) != - sizeof(tsk->thread.i387.fxsave)); + sizeof(tsk->thread.xstate->fxsave)); if ((unsigned long)buf % 16) printk("save_i387: bad fpstate %p\n", buf); @@ -164,7 +163,7 @@ static inline int save_i387(struct _fpstate __user *buf) task_thread_info(tsk)->status &= ~TS_USEDFPU; stts(); } else { - if (__copy_to_user(buf, &tsk->thread.i387.fxsave, + if (__copy_to_user(buf, &tsk->thread.xstate->fxsave, sizeof(struct i387_fxsave_struct))) return -1; } @@ -201,7 +200,7 @@ static inline void restore_fpu(struct task_struct *tsk) "nop ; frstor %1", "fxrstor %1", X86_FEATURE_FXSR, - "m" ((tsk)->thread.i387.fxsave)); + "m" (tsk->thread.xstate->fxsave)); } /* We need a safe address that is cheap to find and that is already @@ -225,8 +224,8 @@ static inline void __save_init_fpu(struct task_struct *tsk) "fxsave %[fx]\n" "bt $7,%[fsw] ; jnc 1f ; fnclex\n1:", X86_FEATURE_FXSR, - [fx] "m" (tsk->thread.i387.fxsave), - [fsw] "m" (tsk->thread.i387.fxsave.swd) : "memory"); + [fx] "m" (tsk->thread.xstate->fxsave), + [fsw] "m" (tsk->thread.xstate->fxsave.swd) : "memory"); /* AMD K7/K8 CPUs don't save/restore FDP/FIP/FOP unless an exception is pending. Clear the x87 state here by setting it to fixed values. safe_address is a random variable that should be in L1 */ @@ -327,25 +326,25 @@ static inline void clear_fpu(struct task_struct *tsk) static inline unsigned short get_fpu_cwd(struct task_struct *tsk) { if (cpu_has_fxsr) { - return tsk->thread.i387.fxsave.cwd; + return tsk->thread.xstate->fxsave.cwd; } else { - return (unsigned short)tsk->thread.i387.fsave.cwd; + return (unsigned short)tsk->thread.xstate->fsave.cwd; } } static inline unsigned short get_fpu_swd(struct task_struct *tsk) { if (cpu_has_fxsr) { - return tsk->thread.i387.fxsave.swd; + return tsk->thread.xstate->fxsave.swd; } else { - return (unsigned short)tsk->thread.i387.fsave.swd; + return (unsigned short)tsk->thread.xstate->fsave.swd; } } static inline unsigned short get_fpu_mxcsr(struct task_struct *tsk) { if (cpu_has_xmm) { - return tsk->thread.i387.fxsave.mxcsr; + return tsk->thread.xstate->fxsave.mxcsr; } else { return MXCSR_DEFAULT; } diff --git a/include/asm-x86/numa_64.h b/include/asm-x86/numa_64.h index 32c22ae0709f1c13236b6fe31fb22734ba70a7cd..22e87c9f6a80551be4ed7108a87a83e4551a15ab 100644 --- a/include/asm-x86/numa_64.h +++ b/include/asm-x86/numa_64.h @@ -9,7 +9,8 @@ struct bootnode { u64 end; }; -extern int compute_hash_shift(struct bootnode *nodes, int numnodes); +extern int compute_hash_shift(struct bootnode *nodes, int numblks, + int *nodeids); #define ZONE_ALIGN (1UL << (MAX_ORDER+PAGE_SHIFT)) diff --git a/include/asm-x86/pci_64.h b/include/asm-x86/pci_64.h index df867e5d80b197a220c8cd84967a8adf9b6c88db..f330234ffa5c568888aeabf92c1e7a49d91335dc 100644 --- a/include/asm-x86/pci_64.h +++ b/include/asm-x86/pci_64.h @@ -22,6 +22,7 @@ extern int (*pci_config_read)(int seg, int bus, int dev, int fn, extern int (*pci_config_write)(int seg, int bus, int dev, int fn, int reg, int len, u32 value); +extern void dma32_reserve_bootmem(void); extern void pci_iommu_alloc(void); /* The PCI address space does equal the physical memory diff --git a/include/asm-x86/processor.h b/include/asm-x86/processor.h index 6e26c7c717a23a15255869f2b864350faf21d597..e6bf92ddeb21ded7de69cff3f37120df56520291 100644 --- a/include/asm-x86/processor.h +++ b/include/asm-x86/processor.h @@ -354,7 +354,7 @@ struct i387_soft_struct { u32 entry_eip; }; -union i387_union { +union thread_xstate { struct i387_fsave_struct fsave; struct i387_fxsave_struct fxsave; struct i387_soft_struct soft; @@ -365,6 +365,9 @@ DECLARE_PER_CPU(struct orig_ist, orig_ist); #endif extern void print_cpu_info(struct cpuinfo_x86 *); +extern unsigned int xstate_size; +extern void free_thread_xstate(struct task_struct *); +extern struct kmem_cache *task_xstate_cachep; extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern unsigned short num_cache_leaves; @@ -397,8 +400,8 @@ struct thread_struct { unsigned long cr2; unsigned long trap_no; unsigned long error_code; - /* Floating point info: */ - union i387_union i387 __attribute__((aligned(16)));; + /* floating point and extended processor state */ + union thread_xstate *xstate; #ifdef CONFIG_X86_32 /* Virtual 86 mode info */ struct vm86_struct __user *vm86_info; @@ -918,4 +921,11 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip, #define KSTK_EIP(task) (task_pt_regs(task)->ip) +/* Get/set a process' ability to use the timestamp counter instruction */ +#define GET_TSC_CTL(adr) get_tsc_mode((adr)) +#define SET_TSC_CTL(val) set_tsc_mode((val)) + +extern int get_tsc_mode(unsigned long adr); +extern int set_tsc_mode(unsigned int val); + #endif diff --git a/include/asm-x86/scatterlist.h b/include/asm-x86/scatterlist.h index d13c197866d627daa572c6928034e639dc2b17f2..c0432061f81a0f0609e8b115e3c23e4ff6cdcbb4 100644 --- a/include/asm-x86/scatterlist.h +++ b/include/asm-x86/scatterlist.h @@ -11,9 +11,7 @@ struct scatterlist { unsigned int offset; unsigned int length; dma_addr_t dma_address; -#ifdef CONFIG_X86_64 unsigned int dma_length; -#endif }; #define ARCH_HAS_SG_CHAIN diff --git a/include/asm-x86/thread_info.h b/include/asm-x86/thread_info.h index d5fd12f2abdbb141668c205dd424de8c91de90e4..77244f17993f303d8e80e8af7b0f71e50e69f0a7 100644 --- a/include/asm-x86/thread_info.h +++ b/include/asm-x86/thread_info.h @@ -1,5 +1,14 @@ +#ifndef _ASM_X86_THREAD_INFO_H #ifdef CONFIG_X86_32 # include "thread_info_32.h" #else # include "thread_info_64.h" #endif + +#ifndef __ASSEMBLY__ +extern void arch_task_cache_init(void); +extern void free_thread_info(struct thread_info *ti); +extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); +#define arch_task_cache_init arch_task_cache_init +#endif +#endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/include/asm-x86/thread_info_32.h b/include/asm-x86/thread_info_32.h index 4e053fa561a9c5e643336d10e68d835e8863e1b8..53185996209664c82588904ca3429844696ab9b2 100644 --- a/include/asm-x86/thread_info_32.h +++ b/include/asm-x86/thread_info_32.h @@ -102,8 +102,6 @@ static inline struct thread_info *current_thread_info(void) __get_free_pages(GFP_KERNEL, get_order(THREAD_SIZE))) #endif -#define free_thread_info(info) free_pages((unsigned long)(info), get_order(THREAD_SIZE)) - #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ diff --git a/include/asm-x86/thread_info_64.h b/include/asm-x86/thread_info_64.h index 1e5c6f6152cd109ad2d3f19b2848a2846c0bfbe1..ed664e874decb873d83bad65ba2eb5a549dab69d 100644 --- a/include/asm-x86/thread_info_64.h +++ b/include/asm-x86/thread_info_64.h @@ -85,8 +85,6 @@ static inline struct thread_info *stack_thread_info(void) #define alloc_thread_info(tsk) \ ((struct thread_info *)__get_free_pages(THREAD_FLAGS, THREAD_ORDER)) -#define free_thread_info(ti) free_pages((unsigned long) (ti), THREAD_ORDER) - #else /* !__ASSEMBLY__ */ /* how to get the thread information struct from ASM */ @@ -126,6 +124,7 @@ static inline struct thread_info *stack_thread_info(void) #define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ #define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_BTS_TRACE_TS 27 /* record scheduling event timestamps */ +#define TIF_NOTSC 28 /* TSC is not accessible in userland */ #define _TIF_SYSCALL_TRACE (1 << TIF_SYSCALL_TRACE) #define _TIF_SIGPENDING (1 << TIF_SIGPENDING) @@ -147,6 +146,7 @@ static inline struct thread_info *stack_thread_info(void) #define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) #define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_BTS_TRACE_TS (1 << TIF_BTS_TRACE_TS) +#define _TIF_NOTSC (1 << TIF_NOTSC) /* work to do on interrupt/exception return */ #define _TIF_WORK_MASK \ @@ -160,7 +160,7 @@ static inline struct thread_info *stack_thread_info(void) /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS) + (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_BTS_TRACE_TS|_TIF_NOTSC) #define _TIF_WORK_CTXSW_PREV _TIF_WORK_CTXSW #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) diff --git a/include/asm-x86/tsc.h b/include/asm-x86/tsc.h index d2d8eb5b55f532365f9277b5f776109626f7ed4b..0434bd8349a7456f27f8ca95c18b472e07c1ba8e 100644 --- a/include/asm-x86/tsc.h +++ b/include/asm-x86/tsc.h @@ -18,6 +18,7 @@ extern unsigned int cpu_khz; extern unsigned int tsc_khz; extern void disable_TSC(void); +extern void enable_TSC(void); static inline cycles_t get_cycles(void) { diff --git a/include/linux/efi.h b/include/linux/efi.h index 14813b5958022bbc608430999729b6519a58a246..a5f359a7ad0ef84d2829872f904f50014abe71f9 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -394,4 +395,10 @@ struct efi_generic_dev_path { u16 length; } __attribute ((packed)); +static inline void memrange_efi_to_native(u64 *addr, u64 *npages) +{ + *npages = PFN_UP(*addr + (*npages<rlim[RLIMIT_NPROC]; } +int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst, + struct task_struct *src) +{ + *dst = *src; + return 0; +} + static struct task_struct *dup_task_struct(struct task_struct *orig) { struct task_struct *tsk; @@ -181,15 +199,15 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) return NULL; } - *tsk = *orig; + err = arch_dup_task_struct(tsk, orig); + if (err) + goto out; + tsk->stack = ti; err = prop_local_init_single(&tsk->dirties); - if (err) { - free_thread_info(ti); - free_task_struct(tsk); - return NULL; - } + if (err) + goto out; setup_thread_stack(tsk, orig); @@ -205,6 +223,11 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) #endif tsk->splice_pipe = NULL; return tsk; + +out: + free_thread_info(ti); + free_task_struct(tsk); + return NULL; } #ifdef CONFIG_MMU diff --git a/kernel/sys.c b/kernel/sys.c index a626116af5db96b58f47434eb47c6af23da6535e..6a0cc71ee88d61e1afdd47e960a841515dad8a3c 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -67,6 +67,12 @@ #ifndef SET_ENDIAN # define SET_ENDIAN(a,b) (-EINVAL) #endif +#ifndef GET_TSC_CTL +# define GET_TSC_CTL(a) (-EINVAL) +#endif +#ifndef SET_TSC_CTL +# define SET_TSC_CTL(a) (-EINVAL) +#endif /* * this is where the system-wide overflow UID and GID are defined, for @@ -1737,7 +1743,12 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, #else return -EINVAL; #endif - + case PR_GET_TSC: + error = GET_TSC_CTL(arg2); + break; + case PR_SET_TSC: + error = SET_TSC_CTL(arg2); + break; default: error = -EINVAL; break; diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index a3fa587c350c598063a17c903beeee99fe0f47e0..2d6087c7cf9820fb4a16c43fdd75ed9f33d16bca 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -178,6 +178,7 @@ static void change_clocksource(void) if (clock == new) return; + new->cycle_last = 0; now = clocksource_read(new); nsec = __get_nsec_offset(); timespec_add_ns(&xtime, nsec); @@ -295,6 +296,7 @@ static int timekeeping_resume(struct sys_device *dev) timespec_add_ns(&xtime, timekeeping_suspend_nsecs); update_xtime_cache(0); /* re-base the last cycle value */ + clock->cycle_last = 0; clock->cycle_last = clocksource_read(clock); clock->error = 0; timekeeping_suspended = 0;