提交 650400b2 编写于 作者: I Ingo Molnar

Merge branch 'upstream-x86-selftests' into WIP.x86/pti.base

Conflicts:
	arch/x86/kernel/cpu/Makefile
Signed-off-by: NIngo Molnar <mingo@kernel.org>
...@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space ...@@ -34,7 +34,7 @@ ff92000000000000 - ffd1ffffffffffff (=54 bits) vmalloc/ioremap space
ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole ffd2000000000000 - ffd3ffffffffffff (=49 bits) hole
ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB) ffd4000000000000 - ffd5ffffffffffff (=49 bits) virtual memory map (512TB)
... unused hole ... ... unused hole ...
ffd8000000000000 - fff7ffffffffffff (=53 bits) kasan shadow memory (8PB) ffdf000000000000 - fffffc0000000000 (=53 bits) kasan shadow memory (8PB)
... unused hole ... ... unused hole ...
ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks ffffff0000000000 - ffffff7fffffffff (=39 bits) %esp fixup stacks
... unused hole ... ... unused hole ...
......
...@@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC ...@@ -303,7 +303,6 @@ config ARCH_SUPPORTS_DEBUG_PAGEALLOC
config KASAN_SHADOW_OFFSET config KASAN_SHADOW_OFFSET
hex hex
depends on KASAN depends on KASAN
default 0xdff8000000000000 if X86_5LEVEL
default 0xdffffc0000000000 default 0xdffffc0000000000
config HAVE_INTEL_TXT config HAVE_INTEL_TXT
......
# SPDX-License-Identifier: GPL-2.0 # SPDX-License-Identifier: GPL-2.0
out := $(obj)/../../include/generated/asm out := arch/$(SRCARCH)/include/generated/asm
uapi := $(obj)/../../include/generated/uapi/asm uapi := arch/$(SRCARCH)/include/generated/uapi/asm
# Create output directory if not already present # Create output directory if not already present
_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \ _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
......
...@@ -20,14 +20,12 @@ ...@@ -20,14 +20,12 @@
* Note: If the comment begins with a quoted string, that string is used * Note: If the comment begins with a quoted string, that string is used
* in /proc/cpuinfo instead of the macro name. If the string is "", * in /proc/cpuinfo instead of the macro name. If the string is "",
* this feature bit is not displayed in /proc/cpuinfo at all. * this feature bit is not displayed in /proc/cpuinfo at all.
*/ *
/*
* When adding new features here that depend on other features, * When adding new features here that depend on other features,
* please update the table in kernel/cpu/cpuid-deps.c * please update the table in kernel/cpu/cpuid-deps.c as well.
*/ */
/* Intel-defined CPU features, CPUID level 0x00000001 (edx), word 0 */ /* Intel-defined CPU features, CPUID level 0x00000001 (EDX), word 0 */
#define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */ #define X86_FEATURE_FPU ( 0*32+ 0) /* Onboard FPU */
#define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */ #define X86_FEATURE_VME ( 0*32+ 1) /* Virtual Mode Extensions */
#define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */ #define X86_FEATURE_DE ( 0*32+ 2) /* Debugging Extensions */
...@@ -42,8 +40,7 @@ ...@@ -42,8 +40,7 @@
#define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */ #define X86_FEATURE_MTRR ( 0*32+12) /* Memory Type Range Registers */
#define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */ #define X86_FEATURE_PGE ( 0*32+13) /* Page Global Enable */
#define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */ #define X86_FEATURE_MCA ( 0*32+14) /* Machine Check Architecture */
#define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions */ #define X86_FEATURE_CMOV ( 0*32+15) /* CMOV instructions (plus FCMOVcc, FCOMI with FPU) */
/* (plus FCMOVcc, FCOMI with FPU) */
#define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */ #define X86_FEATURE_PAT ( 0*32+16) /* Page Attribute Table */
#define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */ #define X86_FEATURE_PSE36 ( 0*32+17) /* 36-bit PSEs */
#define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */ #define X86_FEATURE_PN ( 0*32+18) /* Processor serial number */
...@@ -63,15 +60,15 @@ ...@@ -63,15 +60,15 @@
/* AMD-defined CPU features, CPUID level 0x80000001, word 1 */ /* AMD-defined CPU features, CPUID level 0x80000001, word 1 */
/* Don't duplicate feature flags which are redundant with Intel! */ /* Don't duplicate feature flags which are redundant with Intel! */
#define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */ #define X86_FEATURE_SYSCALL ( 1*32+11) /* SYSCALL/SYSRET */
#define X86_FEATURE_MP ( 1*32+19) /* MP Capable. */ #define X86_FEATURE_MP ( 1*32+19) /* MP Capable */
#define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */ #define X86_FEATURE_NX ( 1*32+20) /* Execute Disable */
#define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */ #define X86_FEATURE_MMXEXT ( 1*32+22) /* AMD MMX extensions */
#define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */ #define X86_FEATURE_FXSR_OPT ( 1*32+25) /* FXSAVE/FXRSTOR optimizations */
#define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */ #define X86_FEATURE_GBPAGES ( 1*32+26) /* "pdpe1gb" GB pages */
#define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */ #define X86_FEATURE_RDTSCP ( 1*32+27) /* RDTSCP */
#define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64) */ #define X86_FEATURE_LM ( 1*32+29) /* Long Mode (x86-64, 64-bit support) */
#define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow! extensions */ #define X86_FEATURE_3DNOWEXT ( 1*32+30) /* AMD 3DNow extensions */
#define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow! */ #define X86_FEATURE_3DNOW ( 1*32+31) /* 3DNow */
/* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */ /* Transmeta-defined CPU features, CPUID level 0x80860001, word 2 */
#define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */ #define X86_FEATURE_RECOVERY ( 2*32+ 0) /* CPU in recovery mode */
...@@ -84,66 +81,67 @@ ...@@ -84,66 +81,67 @@
#define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */ #define X86_FEATURE_K6_MTRR ( 3*32+ 1) /* AMD K6 nonstandard MTRRs */
#define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */ #define X86_FEATURE_CYRIX_ARR ( 3*32+ 2) /* Cyrix ARRs (= MTRRs) */
#define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */ #define X86_FEATURE_CENTAUR_MCR ( 3*32+ 3) /* Centaur MCRs (= MTRRs) */
/* cpu types for specific tunings: */
/* CPU types for specific tunings: */
#define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */ #define X86_FEATURE_K8 ( 3*32+ 4) /* "" Opteron, Athlon64 */
#define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */ #define X86_FEATURE_K7 ( 3*32+ 5) /* "" Athlon */
#define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */ #define X86_FEATURE_P3 ( 3*32+ 6) /* "" P3 */
#define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */ #define X86_FEATURE_P4 ( 3*32+ 7) /* "" P4 */
#define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */ #define X86_FEATURE_CONSTANT_TSC ( 3*32+ 8) /* TSC ticks at a constant rate */
#define X86_FEATURE_UP ( 3*32+ 9) /* smp kernel running on up */ #define X86_FEATURE_UP ( 3*32+ 9) /* SMP kernel running on UP */
#define X86_FEATURE_ART ( 3*32+10) /* Platform has always running timer (ART) */ #define X86_FEATURE_ART ( 3*32+10) /* Always running timer (ART) */
#define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */ #define X86_FEATURE_ARCH_PERFMON ( 3*32+11) /* Intel Architectural PerfMon */
#define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */ #define X86_FEATURE_PEBS ( 3*32+12) /* Precise-Event Based Sampling */
#define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */ #define X86_FEATURE_BTS ( 3*32+13) /* Branch Trace Store */
#define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in ia32 userspace */ #define X86_FEATURE_SYSCALL32 ( 3*32+14) /* "" syscall in IA32 userspace */
#define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in ia32 userspace */ #define X86_FEATURE_SYSENTER32 ( 3*32+15) /* "" sysenter in IA32 userspace */
#define X86_FEATURE_REP_GOOD ( 3*32+16) /* rep microcode works well */ #define X86_FEATURE_REP_GOOD ( 3*32+16) /* REP microcode works well */
#define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" Mfence synchronizes RDTSC */ #define X86_FEATURE_MFENCE_RDTSC ( 3*32+17) /* "" MFENCE synchronizes RDTSC */
#define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" Lfence synchronizes RDTSC */ #define X86_FEATURE_LFENCE_RDTSC ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
#define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */ #define X86_FEATURE_ACC_POWER ( 3*32+19) /* AMD Accumulated Power Mechanism */
#define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */ #define X86_FEATURE_NOPL ( 3*32+20) /* The NOPL (0F 1F) instructions */
#define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */ #define X86_FEATURE_ALWAYS ( 3*32+21) /* "" Always-present feature */
#define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* CPU topology enum extensions */
#define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */
#define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */
#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */
#define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* Extended APICID (8 bits) */
#define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* AMD multi-node processor */
#define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* P-State hardware coordination feedback capability (APERF/MPERF MSRs) */
#define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */ #define X86_FEATURE_NONSTOP_TSC_S3 ( 3*32+30) /* TSC doesn't stop in S3 state */
#define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */ #define X86_FEATURE_TSC_KNOWN_FREQ ( 3*32+31) /* TSC has known frequency */
/* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ /* Intel-defined CPU features, CPUID level 0x00000001 (ECX), word 4 */
#define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */ #define X86_FEATURE_XMM3 ( 4*32+ 0) /* "pni" SSE-3 */
#define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */ #define X86_FEATURE_PCLMULQDQ ( 4*32+ 1) /* PCLMULQDQ instruction */
#define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */ #define X86_FEATURE_DTES64 ( 4*32+ 2) /* 64-bit Debug Store */
#define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" Monitor/Mwait support */ #define X86_FEATURE_MWAIT ( 4*32+ 3) /* "monitor" MONITOR/MWAIT support */
#define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL Qual. Debug Store */ #define X86_FEATURE_DSCPL ( 4*32+ 4) /* "ds_cpl" CPL-qualified (filtered) Debug Store */
#define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */ #define X86_FEATURE_VMX ( 4*32+ 5) /* Hardware virtualization */
#define X86_FEATURE_SMX ( 4*32+ 6) /* Safer mode */ #define X86_FEATURE_SMX ( 4*32+ 6) /* Safer Mode eXtensions */
#define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */ #define X86_FEATURE_EST ( 4*32+ 7) /* Enhanced SpeedStep */
#define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */ #define X86_FEATURE_TM2 ( 4*32+ 8) /* Thermal Monitor 2 */
#define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */ #define X86_FEATURE_SSSE3 ( 4*32+ 9) /* Supplemental SSE-3 */
#define X86_FEATURE_CID ( 4*32+10) /* Context ID */ #define X86_FEATURE_CID ( 4*32+10) /* Context ID */
#define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */ #define X86_FEATURE_SDBG ( 4*32+11) /* Silicon Debug */
#define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */ #define X86_FEATURE_FMA ( 4*32+12) /* Fused multiply-add */
#define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B */ #define X86_FEATURE_CX16 ( 4*32+13) /* CMPXCHG16B instruction */
#define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */ #define X86_FEATURE_XTPR ( 4*32+14) /* Send Task Priority Messages */
#define X86_FEATURE_PDCM ( 4*32+15) /* Performance Capabilities */ #define X86_FEATURE_PDCM ( 4*32+15) /* Perf/Debug Capabilities MSR */
#define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */ #define X86_FEATURE_PCID ( 4*32+17) /* Process Context Identifiers */
#define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */ #define X86_FEATURE_DCA ( 4*32+18) /* Direct Cache Access */
#define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */ #define X86_FEATURE_XMM4_1 ( 4*32+19) /* "sse4_1" SSE-4.1 */
#define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */ #define X86_FEATURE_XMM4_2 ( 4*32+20) /* "sse4_2" SSE-4.2 */
#define X86_FEATURE_X2APIC ( 4*32+21) /* x2APIC */ #define X86_FEATURE_X2APIC ( 4*32+21) /* X2APIC */
#define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */ #define X86_FEATURE_MOVBE ( 4*32+22) /* MOVBE instruction */
#define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */ #define X86_FEATURE_POPCNT ( 4*32+23) /* POPCNT instruction */
#define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* Tsc deadline timer */ #define X86_FEATURE_TSC_DEADLINE_TIMER ( 4*32+24) /* TSC deadline timer */
#define X86_FEATURE_AES ( 4*32+25) /* AES instructions */ #define X86_FEATURE_AES ( 4*32+25) /* AES instructions */
#define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV */ #define X86_FEATURE_XSAVE ( 4*32+26) /* XSAVE/XRSTOR/XSETBV/XGETBV instructions */
#define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE enabled in the OS */ #define X86_FEATURE_OSXSAVE ( 4*32+27) /* "" XSAVE instruction enabled in the OS */
#define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */ #define X86_FEATURE_AVX ( 4*32+28) /* Advanced Vector Extensions */
#define X86_FEATURE_F16C ( 4*32+29) /* 16-bit fp conversions */ #define X86_FEATURE_F16C ( 4*32+29) /* 16-bit FP conversions */
#define X86_FEATURE_RDRAND ( 4*32+30) /* The RDRAND instruction */ #define X86_FEATURE_RDRAND ( 4*32+30) /* RDRAND instruction */
#define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */ #define X86_FEATURE_HYPERVISOR ( 4*32+31) /* Running on a hypervisor */
/* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */ /* VIA/Cyrix/Centaur-defined CPU features, CPUID level 0xC0000001, word 5 */
...@@ -158,10 +156,10 @@ ...@@ -158,10 +156,10 @@
#define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */ #define X86_FEATURE_PMM ( 5*32+12) /* PadLock Montgomery Multiplier */
#define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */ #define X86_FEATURE_PMM_EN ( 5*32+13) /* PMM enabled */
/* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ /* More extended AMD flags: CPUID level 0x80000001, ECX, word 6 */
#define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */ #define X86_FEATURE_LAHF_LM ( 6*32+ 0) /* LAHF/SAHF in long mode */
#define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */ #define X86_FEATURE_CMP_LEGACY ( 6*32+ 1) /* If yes HyperThreading not valid */
#define X86_FEATURE_SVM ( 6*32+ 2) /* Secure virtual machine */ #define X86_FEATURE_SVM ( 6*32+ 2) /* Secure Virtual Machine */
#define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */ #define X86_FEATURE_EXTAPIC ( 6*32+ 3) /* Extended APIC space */
#define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */ #define X86_FEATURE_CR8_LEGACY ( 6*32+ 4) /* CR8 in 32-bit mode */
#define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */ #define X86_FEATURE_ABM ( 6*32+ 5) /* Advanced bit manipulation */
...@@ -175,16 +173,16 @@ ...@@ -175,16 +173,16 @@
#define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */ #define X86_FEATURE_WDT ( 6*32+13) /* Watchdog timer */
#define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */ #define X86_FEATURE_LWP ( 6*32+15) /* Light Weight Profiling */
#define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */ #define X86_FEATURE_FMA4 ( 6*32+16) /* 4 operands MAC instructions */
#define X86_FEATURE_TCE ( 6*32+17) /* translation cache extension */ #define X86_FEATURE_TCE ( 6*32+17) /* Translation Cache Extension */
#define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */ #define X86_FEATURE_NODEID_MSR ( 6*32+19) /* NodeId MSR */
#define X86_FEATURE_TBM ( 6*32+21) /* trailing bit manipulations */ #define X86_FEATURE_TBM ( 6*32+21) /* Trailing Bit Manipulations */
#define X86_FEATURE_TOPOEXT ( 6*32+22) /* topology extensions CPUID leafs */ #define X86_FEATURE_TOPOEXT ( 6*32+22) /* Topology extensions CPUID leafs */
#define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* core performance counter extensions */ #define X86_FEATURE_PERFCTR_CORE ( 6*32+23) /* Core performance counter extensions */
#define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */ #define X86_FEATURE_PERFCTR_NB ( 6*32+24) /* NB performance counter extensions */
#define X86_FEATURE_BPEXT (6*32+26) /* data breakpoint extension */ #define X86_FEATURE_BPEXT ( 6*32+26) /* Data breakpoint extension */
#define X86_FEATURE_PTSC ( 6*32+27) /* performance time-stamp counter */ #define X86_FEATURE_PTSC ( 6*32+27) /* Performance time-stamp counter */
#define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */ #define X86_FEATURE_PERFCTR_LLC ( 6*32+28) /* Last Level Cache performance counter extensions */
#define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX) */ #define X86_FEATURE_MWAITX ( 6*32+29) /* MWAIT extension (MONITORX/MWAITX instructions) */
/* /*
* Auxiliary flags: Linux defined - For features scattered in various * Auxiliary flags: Linux defined - For features scattered in various
...@@ -192,7 +190,7 @@ ...@@ -192,7 +190,7 @@
* *
* Reuse free bits when adding new feature flags! * Reuse free bits when adding new feature flags!
*/ */
#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ #define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT instructions */
#define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */ #define X86_FEATURE_CPUID_FAULT ( 7*32+ 1) /* Intel CPUID faulting */
#define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */
#define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */
...@@ -206,8 +204,8 @@ ...@@ -206,8 +204,8 @@
#define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */
#define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */
#define X86_FEATURE_AVX512_4VNNIW (7*32+16) /* AVX-512 Neural Network Instructions */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */
#define X86_FEATURE_AVX512_4FMAPS (7*32+17) /* AVX-512 Multiply Accumulation Single precision */ #define X86_FEATURE_AVX512_4FMAPS ( 7*32+17) /* AVX-512 Multiply Accumulation Single precision */
#define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */ #define X86_FEATURE_MBA ( 7*32+18) /* Memory Bandwidth Allocation */
...@@ -218,19 +216,19 @@ ...@@ -218,19 +216,19 @@
#define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */ #define X86_FEATURE_EPT ( 8*32+ 3) /* Intel Extended Page Table */
#define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */ #define X86_FEATURE_VPID ( 8*32+ 4) /* Intel Virtual Processor ID */
#define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer vmmcall to vmcall */ #define X86_FEATURE_VMMCALL ( 8*32+15) /* Prefer VMMCALL to VMCALL */
#define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */ #define X86_FEATURE_XENPV ( 8*32+16) /* "" Xen paravirtual guest */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ebx), word 9 */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
#define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* {RD/WR}{FS/GS}BASE instructions*/ #define X86_FEATURE_FSGSBASE ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
#define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3b */ #define X86_FEATURE_TSC_ADJUST ( 9*32+ 1) /* TSC adjustment MSR 0x3B */
#define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */ #define X86_FEATURE_BMI1 ( 9*32+ 3) /* 1st group bit manipulation extensions */
#define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */ #define X86_FEATURE_HLE ( 9*32+ 4) /* Hardware Lock Elision */
#define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */ #define X86_FEATURE_AVX2 ( 9*32+ 5) /* AVX2 instructions */
#define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */ #define X86_FEATURE_SMEP ( 9*32+ 7) /* Supervisor Mode Execution Protection */
#define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */ #define X86_FEATURE_BMI2 ( 9*32+ 8) /* 2nd group bit manipulation extensions */
#define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB */ #define X86_FEATURE_ERMS ( 9*32+ 9) /* Enhanced REP MOVSB/STOSB instructions */
#define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */ #define X86_FEATURE_INVPCID ( 9*32+10) /* Invalidate Processor Context ID */
#define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */ #define X86_FEATURE_RTM ( 9*32+11) /* Restricted Transactional Memory */
#define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */ #define X86_FEATURE_CQM ( 9*32+12) /* Cache QoS Monitoring */
...@@ -238,8 +236,8 @@ ...@@ -238,8 +236,8 @@
#define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */ #define X86_FEATURE_RDT_A ( 9*32+15) /* Resource Director Technology Allocation */
#define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */ #define X86_FEATURE_AVX512F ( 9*32+16) /* AVX-512 Foundation */
#define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */ #define X86_FEATURE_AVX512DQ ( 9*32+17) /* AVX-512 DQ (Double/Quad granular) Instructions */
#define X86_FEATURE_RDSEED ( 9*32+18) /* The RDSEED instruction */ #define X86_FEATURE_RDSEED ( 9*32+18) /* RDSEED instruction */
#define X86_FEATURE_ADX ( 9*32+19) /* The ADCX and ADOX instructions */ #define X86_FEATURE_ADX ( 9*32+19) /* ADCX and ADOX instructions */
#define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */ #define X86_FEATURE_SMAP ( 9*32+20) /* Supervisor Mode Access Prevention */
#define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */ #define X86_FEATURE_AVX512IFMA ( 9*32+21) /* AVX-512 Integer Fused Multiply-Add instructions */
#define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */ #define X86_FEATURE_CLFLUSHOPT ( 9*32+23) /* CLFLUSHOPT instruction */
...@@ -251,25 +249,25 @@ ...@@ -251,25 +249,25 @@
#define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */ #define X86_FEATURE_AVX512BW ( 9*32+30) /* AVX-512 BW (Byte/Word granular) Instructions */
#define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */ #define X86_FEATURE_AVX512VL ( 9*32+31) /* AVX-512 VL (128/256 Vector Length) Extensions */
/* Extended state features, CPUID level 0x0000000d:1 (eax), word 10 */ /* Extended state features, CPUID level 0x0000000d:1 (EAX), word 10 */
#define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT */ #define X86_FEATURE_XSAVEOPT (10*32+ 0) /* XSAVEOPT instruction */
#define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC */ #define X86_FEATURE_XSAVEC (10*32+ 1) /* XSAVEC instruction */
#define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 */ #define X86_FEATURE_XGETBV1 (10*32+ 2) /* XGETBV with ECX = 1 instruction */
#define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS */ #define X86_FEATURE_XSAVES (10*32+ 3) /* XSAVES/XRSTORS instructions */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (edx), word 11 */ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:0 (EDX), word 11 */
#define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */ #define X86_FEATURE_CQM_LLC (11*32+ 1) /* LLC QoS if 1 */
/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (edx), word 12 */ /* Intel-defined CPU QoS Sub-leaf, CPUID level 0x0000000F:1 (EDX), word 12 */
#define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring if 1 */ #define X86_FEATURE_CQM_OCCUP_LLC (12*32+ 0) /* LLC occupancy monitoring */
#define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */ #define X86_FEATURE_CQM_MBM_TOTAL (12*32+ 1) /* LLC Total MBM monitoring */
#define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */ #define X86_FEATURE_CQM_MBM_LOCAL (12*32+ 2) /* LLC Local MBM monitoring */
/* AMD-defined CPU features, CPUID level 0x80000008 (ebx), word 13 */ /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
#define X86_FEATURE_CLZERO (13*32+0) /* CLZERO instruction */ #define X86_FEATURE_CLZERO (13*32+ 0) /* CLZERO instruction */
#define X86_FEATURE_IRPERF (13*32+1) /* Instructions Retired Count */ #define X86_FEATURE_IRPERF (13*32+ 1) /* Instructions Retired Count */
/* Thermal and Power Management Leaf, CPUID level 0x00000006 (eax), word 14 */ /* Thermal and Power Management Leaf, CPUID level 0x00000006 (EAX), word 14 */
#define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */ #define X86_FEATURE_DTHERM (14*32+ 0) /* Digital Thermal Sensor */
#define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */ #define X86_FEATURE_IDA (14*32+ 1) /* Intel Dynamic Acceleration */
#define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */ #define X86_FEATURE_ARAT (14*32+ 2) /* Always Running APIC Timer */
...@@ -281,7 +279,7 @@ ...@@ -281,7 +279,7 @@
#define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */ #define X86_FEATURE_HWP_EPP (14*32+10) /* HWP Energy Perf. Preference */
#define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */ #define X86_FEATURE_HWP_PKG_REQ (14*32+11) /* HWP Package Level Request */
/* AMD SVM Feature Identification, CPUID level 0x8000000a (edx), word 15 */ /* AMD SVM Feature Identification, CPUID level 0x8000000a (EDX), word 15 */
#define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */ #define X86_FEATURE_NPT (15*32+ 0) /* Nested Page Table support */
#define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */ #define X86_FEATURE_LBRV (15*32+ 1) /* LBR Virtualization support */
#define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */ #define X86_FEATURE_SVML (15*32+ 2) /* "svm_lock" SVM locking MSR */
...@@ -296,24 +294,24 @@ ...@@ -296,24 +294,24 @@
#define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */ #define X86_FEATURE_V_VMSAVE_VMLOAD (15*32+15) /* Virtual VMSAVE VMLOAD */
#define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */ #define X86_FEATURE_VGIF (15*32+16) /* Virtual GIF */
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
#define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
#define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */
#define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */ #define X86_FEATURE_AVX512_VBMI2 (16*32+ 6) /* Additional AVX512 Vector Bit Manipulation Instructions */
#define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */ #define X86_FEATURE_GFNI (16*32+ 8) /* Galois Field New Instructions */
#define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */ #define X86_FEATURE_VAES (16*32+ 9) /* Vector AES */
#define X86_FEATURE_VPCLMULQDQ (16*32+ 10) /* Carry-Less Multiplication Double Quadword */ #define X86_FEATURE_VPCLMULQDQ (16*32+10) /* Carry-Less Multiplication Double Quadword */
#define X86_FEATURE_AVX512_VNNI (16*32+ 11) /* Vector Neural Network Instructions */ #define X86_FEATURE_AVX512_VNNI (16*32+11) /* Vector Neural Network Instructions */
#define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB */ #define X86_FEATURE_AVX512_BITALG (16*32+12) /* Support for VPOPCNT[B,W] and VPSHUF-BITQMB instructions */
#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */
#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ #define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */
#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ #define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */
/* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ /* AMD-defined CPU features, CPUID level 0x80000007 (EBX), word 17 */
#define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+ 0) /* MCA overflow recovery support */
#define X86_FEATURE_SUCCOR (17*32+1) /* Uncorrectable error containment and recovery */ #define X86_FEATURE_SUCCOR (17*32+ 1) /* Uncorrectable error containment and recovery */
#define X86_FEATURE_SMCA (17*32+3) /* Scalable MCA */ #define X86_FEATURE_SMCA (17*32+ 3) /* Scalable MCA */
/* /*
* BUG word(s) * BUG word(s)
...@@ -340,4 +338,5 @@ ...@@ -340,4 +338,5 @@
#define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */
#define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */
#define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */
#endif /* _ASM_X86_CPUFEATURES_H */ #endif /* _ASM_X86_CPUFEATURES_H */
...@@ -200,10 +200,9 @@ enum page_cache_mode { ...@@ -200,10 +200,9 @@ enum page_cache_mode {
#define _PAGE_ENC (_AT(pteval_t, sme_me_mask)) #define _PAGE_ENC (_AT(pteval_t, sme_me_mask))
#define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \
_PAGE_ACCESSED | _PAGE_DIRTY | _PAGE_ENC)
#define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \ #define _KERNPG_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_ACCESSED | \
_PAGE_DIRTY | _PAGE_ENC) _PAGE_DIRTY | _PAGE_ENC)
#define _PAGE_TABLE (_KERNPG_TABLE | _PAGE_USER)
#define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC) #define __PAGE_KERNEL_ENC (__PAGE_KERNEL | _PAGE_ENC)
#define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC) #define __PAGE_KERNEL_ENC_WP (__PAGE_KERNEL_WP | _PAGE_ENC)
......
...@@ -26,6 +26,7 @@ KASAN_SANITIZE_head$(BITS).o := n ...@@ -26,6 +26,7 @@ KASAN_SANITIZE_head$(BITS).o := n
KASAN_SANITIZE_dumpstack.o := n KASAN_SANITIZE_dumpstack.o := n
KASAN_SANITIZE_dumpstack_$(BITS).o := n KASAN_SANITIZE_dumpstack_$(BITS).o := n
KASAN_SANITIZE_stacktrace.o := n KASAN_SANITIZE_stacktrace.o := n
KASAN_SANITIZE_paravirt.o := n
OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y OBJECT_FILES_NON_STANDARD_relocate_kernel_$(BITS).o := y
OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y OBJECT_FILES_NON_STANDARD_ftrace_$(BITS).o := y
......
...@@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps[] = { ...@@ -62,23 +62,19 @@ const static struct cpuid_dep cpuid_deps[] = {
{} {}
}; };
static inline void __clear_cpu_cap(struct cpuinfo_x86 *c, unsigned int bit)
{
clear_bit32(bit, c->x86_capability);
}
static inline void __setup_clear_cpu_cap(unsigned int bit)
{
clear_cpu_cap(&boot_cpu_data, bit);
set_bit32(bit, cpu_caps_cleared);
}
static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature) static inline void clear_feature(struct cpuinfo_x86 *c, unsigned int feature)
{ {
if (!c) /*
__setup_clear_cpu_cap(feature); * Note: This could use the non atomic __*_bit() variants, but the
else * rest of the cpufeature code uses atomics as well, so keep it for
__clear_cpu_cap(c, feature); * consistency. Cleanup all of it separately.
*/
if (!c) {
clear_cpu_cap(&boot_cpu_data, feature);
set_bit(feature, (unsigned long *)cpu_caps_cleared);
} else {
clear_bit(feature, (unsigned long *)c->x86_capability);
}
} }
/* Take the capabilities and the BUG bits into account */ /* Take the capabilities and the BUG bits into account */
......
...@@ -38,11 +38,12 @@ ...@@ -38,11 +38,12 @@
* *
*/ */
#define p4d_index(x) (((x) >> P4D_SHIFT) & (PTRS_PER_P4D-1))
#define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1)) #define pud_index(x) (((x) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
#if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE) PGD_PAGE_OFFSET = pgd_index(__PAGE_OFFSET_BASE)
PGD_START_KERNEL = pgd_index(__START_KERNEL_map) PGD_START_KERNEL = pgd_index(__START_KERNEL_map)
#endif
L3_START_KERNEL = pud_index(__START_KERNEL_map) L3_START_KERNEL = pud_index(__START_KERNEL_map)
.text .text
...@@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts) ...@@ -362,10 +363,7 @@ NEXT_PAGE(early_dynamic_pgts)
.data .data
#ifndef CONFIG_XEN #if defined(CONFIG_XEN_PV) || defined(CONFIG_XEN_PVH)
NEXT_PAGE(init_top_pgt)
.fill 512,8,0
#else
NEXT_PAGE(init_top_pgt) NEXT_PAGE(init_top_pgt)
.quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC .quad level3_ident_pgt - __START_KERNEL_map + _KERNPG_TABLE_NOENC
.org init_top_pgt + PGD_PAGE_OFFSET*8, 0 .org init_top_pgt + PGD_PAGE_OFFSET*8, 0
...@@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt) ...@@ -382,6 +380,9 @@ NEXT_PAGE(level2_ident_pgt)
* Don't set NX because code runs from these pages. * Don't set NX because code runs from these pages.
*/ */
PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD) PMDS(0, __PAGE_KERNEL_IDENT_LARGE_EXEC, PTRS_PER_PMD)
#else
NEXT_PAGE(init_top_pgt)
.fill 512,8,0
#endif #endif
#ifdef CONFIG_X86_5LEVEL #ifdef CONFIG_X86_5LEVEL
......
...@@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node) ...@@ -1426,16 +1426,16 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node)
#if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE) #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_HAVE_BOOTMEM_INFO_NODE)
void register_page_bootmem_memmap(unsigned long section_nr, void register_page_bootmem_memmap(unsigned long section_nr,
struct page *start_page, unsigned long size) struct page *start_page, unsigned long nr_pages)
{ {
unsigned long addr = (unsigned long)start_page; unsigned long addr = (unsigned long)start_page;
unsigned long end = (unsigned long)(start_page + size); unsigned long end = (unsigned long)(start_page + nr_pages);
unsigned long next; unsigned long next;
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d; p4d_t *p4d;
pud_t *pud; pud_t *pud;
pmd_t *pmd; pmd_t *pmd;
unsigned int nr_pages; unsigned int nr_pmd_pages;
struct page *page; struct page *page;
for (; addr < end; addr = next) { for (; addr < end; addr = next) {
...@@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr, ...@@ -1482,9 +1482,9 @@ void register_page_bootmem_memmap(unsigned long section_nr,
if (pmd_none(*pmd)) if (pmd_none(*pmd))
continue; continue;
nr_pages = 1 << (get_order(PMD_SIZE)); nr_pmd_pages = 1 << get_order(PMD_SIZE);
page = pmd_page(*pmd); page = pmd_page(*pmd);
while (nr_pages--) while (nr_pmd_pages--)
get_page_bootmem(section_nr, page++, get_page_bootmem(section_nr, page++,
SECTION_INFO); SECTION_INFO);
} }
......
...@@ -16,6 +16,8 @@ ...@@ -16,6 +16,8 @@
extern struct range pfn_mapped[E820_MAX_ENTRIES]; extern struct range pfn_mapped[E820_MAX_ENTRIES];
static p4d_t tmp_p4d_table[PTRS_PER_P4D] __initdata __aligned(PAGE_SIZE);
static int __init map_range(struct range *range) static int __init map_range(struct range *range)
{ {
unsigned long start; unsigned long start;
...@@ -31,8 +33,10 @@ static void __init clear_pgds(unsigned long start, ...@@ -31,8 +33,10 @@ static void __init clear_pgds(unsigned long start,
unsigned long end) unsigned long end)
{ {
pgd_t *pgd; pgd_t *pgd;
/* See comment in kasan_init() */
unsigned long pgd_end = end & PGDIR_MASK;
for (; start < end; start += PGDIR_SIZE) { for (; start < pgd_end; start += PGDIR_SIZE) {
pgd = pgd_offset_k(start); pgd = pgd_offset_k(start);
/* /*
* With folded p4d, pgd_clear() is nop, use p4d_clear() * With folded p4d, pgd_clear() is nop, use p4d_clear()
...@@ -43,29 +47,61 @@ static void __init clear_pgds(unsigned long start, ...@@ -43,29 +47,61 @@ static void __init clear_pgds(unsigned long start,
else else
pgd_clear(pgd); pgd_clear(pgd);
} }
pgd = pgd_offset_k(start);
for (; start < end; start += P4D_SIZE)
p4d_clear(p4d_offset(pgd, start));
}
static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{
unsigned long p4d;
if (!IS_ENABLED(CONFIG_X86_5LEVEL))
return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
p4d += __START_KERNEL_map - phys_base;
return (p4d_t *)p4d + p4d_index(addr);
}
static void __init kasan_early_p4d_populate(pgd_t *pgd,
unsigned long addr,
unsigned long end)
{
pgd_t pgd_entry;
p4d_t *p4d, p4d_entry;
unsigned long next;
if (pgd_none(*pgd)) {
pgd_entry = __pgd(_KERNPG_TABLE | __pa_nodebug(kasan_zero_p4d));
set_pgd(pgd, pgd_entry);
}
p4d = early_p4d_offset(pgd, addr);
do {
next = p4d_addr_end(addr, end);
if (!p4d_none(*p4d))
continue;
p4d_entry = __p4d(_KERNPG_TABLE | __pa_nodebug(kasan_zero_pud));
set_p4d(p4d, p4d_entry);
} while (p4d++, addr = next, addr != end && p4d_none(*p4d));
} }
static void __init kasan_map_early_shadow(pgd_t *pgd) static void __init kasan_map_early_shadow(pgd_t *pgd)
{ {
int i; /* See comment in kasan_init() */
unsigned long start = KASAN_SHADOW_START; unsigned long addr = KASAN_SHADOW_START & PGDIR_MASK;
unsigned long end = KASAN_SHADOW_END; unsigned long end = KASAN_SHADOW_END;
unsigned long next;
for (i = pgd_index(start); start < end; i++) { pgd += pgd_index(addr);
switch (CONFIG_PGTABLE_LEVELS) { do {
case 4: next = pgd_addr_end(addr, end);
pgd[i] = __pgd(__pa_nodebug(kasan_zero_pud) | kasan_early_p4d_populate(pgd, addr, next);
_KERNPG_TABLE); } while (pgd++, addr = next, addr != end);
break;
case 5:
pgd[i] = __pgd(__pa_nodebug(kasan_zero_p4d) |
_KERNPG_TABLE);
break;
default:
BUILD_BUG();
}
start += PGDIR_SIZE;
}
} }
#ifdef CONFIG_KASAN_INLINE #ifdef CONFIG_KASAN_INLINE
...@@ -102,7 +138,7 @@ void __init kasan_early_init(void) ...@@ -102,7 +138,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++) for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val); kasan_zero_pud[i] = __pud(pud_val);
for (i = 0; CONFIG_PGTABLE_LEVELS >= 5 && i < PTRS_PER_P4D; i++) for (i = 0; IS_ENABLED(CONFIG_X86_5LEVEL) && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val); kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt); kasan_map_early_shadow(early_top_pgt);
...@@ -118,12 +154,35 @@ void __init kasan_init(void) ...@@ -118,12 +154,35 @@ void __init kasan_init(void)
#endif #endif
memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt)); memcpy(early_top_pgt, init_top_pgt, sizeof(early_top_pgt));
/*
* We use the same shadow offset for 4- and 5-level paging to
* facilitate boot-time switching between paging modes.
* As result in 5-level paging mode KASAN_SHADOW_START and
* KASAN_SHADOW_END are not aligned to PGD boundary.
*
* KASAN_SHADOW_START doesn't share PGD with anything else.
* We claim whole PGD entry to make things easier.
*
* KASAN_SHADOW_END lands in the last PGD entry and it collides with
* bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
memcpy(tmp_p4d_table, (void *)ptr, sizeof(tmp_p4d_table));
set_pgd(&early_top_pgt[pgd_index(KASAN_SHADOW_END)],
__pgd(__pa(tmp_p4d_table) | _KERNPG_TABLE));
}
load_cr3(early_top_pgt); load_cr3(early_top_pgt);
__flush_tlb_all(); __flush_tlb_all();
clear_pgds(KASAN_SHADOW_START, KASAN_SHADOW_END); clear_pgds(KASAN_SHADOW_START & PGDIR_MASK, KASAN_SHADOW_END);
kasan_populate_zero_shadow((void *)KASAN_SHADOW_START, kasan_populate_zero_shadow((void *)(KASAN_SHADOW_START & PGDIR_MASK),
kasan_mem_to_shadow((void *)PAGE_OFFSET)); kasan_mem_to_shadow((void *)PAGE_OFFSET));
for (i = 0; i < E820_MAX_ENTRIES; i++) { for (i = 0; i < E820_MAX_ENTRIES; i++) {
......
...@@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd) ...@@ -449,7 +449,7 @@ __visible pmd_t xen_make_pmd(pmdval_t pmd)
} }
PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd); PV_CALLEE_SAVE_REGS_THUNK(xen_make_pmd);
#if CONFIG_PGTABLE_LEVELS == 4 #ifdef CONFIG_X86_64
__visible pudval_t xen_pud_val(pud_t pud) __visible pudval_t xen_pud_val(pud_t pud)
{ {
return pte_mfn_to_pfn(pud.pud); return pte_mfn_to_pfn(pud.pud);
...@@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val) ...@@ -538,7 +538,7 @@ static void xen_set_p4d(p4d_t *ptr, p4d_t val)
xen_mc_issue(PARAVIRT_LAZY_MMU); xen_mc_issue(PARAVIRT_LAZY_MMU);
} }
#endif /* CONFIG_PGTABLE_LEVELS == 4 */ #endif /* CONFIG_X86_64 */
static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd, static int xen_pmd_walk(struct mm_struct *mm, pmd_t *pmd,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level), int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
...@@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d, ...@@ -580,21 +580,17 @@ static int xen_p4d_walk(struct mm_struct *mm, p4d_t *p4d,
int (*func)(struct mm_struct *mm, struct page *, enum pt_level), int (*func)(struct mm_struct *mm, struct page *, enum pt_level),
bool last, unsigned long limit) bool last, unsigned long limit)
{ {
int i, nr, flush = 0; int flush = 0;
nr = last ? p4d_index(limit) + 1 : PTRS_PER_P4D;
for (i = 0; i < nr; i++) {
pud_t *pud; pud_t *pud;
if (p4d_none(p4d[i]))
continue;
pud = pud_offset(&p4d[i], 0); if (p4d_none(*p4d))
return flush;
pud = pud_offset(p4d, 0);
if (PTRS_PER_PUD > 1) if (PTRS_PER_PUD > 1)
flush |= (*func)(mm, virt_to_page(pud), PT_PUD); flush |= (*func)(mm, virt_to_page(pud), PT_PUD);
flush |= xen_pud_walk(mm, pud, func, flush |= xen_pud_walk(mm, pud, func, last, limit);
last && i == nr - 1, limit);
}
return flush; return flush;
} }
...@@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd, ...@@ -644,8 +640,6 @@ static int __xen_pgd_walk(struct mm_struct *mm, pgd_t *pgd,
continue; continue;
p4d = p4d_offset(&pgd[i], 0); p4d = p4d_offset(&pgd[i], 0);
if (PTRS_PER_P4D > 1)
flush |= (*func)(mm, virt_to_page(p4d), PT_P4D);
flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit); flush |= xen_p4d_walk(mm, p4d, func, i == nr - 1, limit);
} }
...@@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr) ...@@ -1176,22 +1170,14 @@ static void __init xen_cleanmfnmap(unsigned long vaddr)
{ {
pgd_t *pgd; pgd_t *pgd;
p4d_t *p4d; p4d_t *p4d;
unsigned int i;
bool unpin; bool unpin;
unpin = (vaddr == 2 * PGDIR_SIZE); unpin = (vaddr == 2 * PGDIR_SIZE);
vaddr &= PMD_MASK; vaddr &= PMD_MASK;
pgd = pgd_offset_k(vaddr); pgd = pgd_offset_k(vaddr);
p4d = p4d_offset(pgd, 0); p4d = p4d_offset(pgd, 0);
for (i = 0; i < PTRS_PER_P4D; i++) { if (!p4d_none(*p4d))
if (p4d_none(p4d[i])) xen_cleanmfnmap_p4d(p4d, unpin);
continue;
xen_cleanmfnmap_p4d(p4d + i, unpin);
}
if (IS_ENABLED(CONFIG_X86_5LEVEL)) {
set_pgd(pgd, __pgd(0));
xen_cleanmfnmap_free_pgtbl(p4d, unpin);
}
} }
static void __init xen_pagetable_p2m_free(void) static void __init xen_pagetable_p2m_free(void)
...@@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn) ...@@ -1692,7 +1678,7 @@ static void xen_release_pmd(unsigned long pfn)
xen_release_ptpage(pfn, PT_PMD); xen_release_ptpage(pfn, PT_PMD);
} }
#if CONFIG_PGTABLE_LEVELS >= 4 #ifdef CONFIG_X86_64
static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn) static void xen_alloc_pud(struct mm_struct *mm, unsigned long pfn)
{ {
xen_alloc_ptpage(mm, pfn, PT_PUD); xen_alloc_ptpage(mm, pfn, PT_PUD);
...@@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr) ...@@ -2029,13 +2015,12 @@ static phys_addr_t __init xen_early_virt_to_phys(unsigned long vaddr)
*/ */
void __init xen_relocate_p2m(void) void __init xen_relocate_p2m(void)
{ {
phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys, p4d_phys; phys_addr_t size, new_area, pt_phys, pmd_phys, pud_phys;
unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end; unsigned long p2m_pfn, p2m_pfn_end, n_frames, pfn, pfn_end;
int n_pte, n_pt, n_pmd, n_pud, n_p4d, idx_pte, idx_pt, idx_pmd, idx_pud, idx_p4d; int n_pte, n_pt, n_pmd, n_pud, idx_pte, idx_pt, idx_pmd, idx_pud;
pte_t *pt; pte_t *pt;
pmd_t *pmd; pmd_t *pmd;
pud_t *pud; pud_t *pud;
p4d_t *p4d = NULL;
pgd_t *pgd; pgd_t *pgd;
unsigned long *new_p2m; unsigned long *new_p2m;
int save_pud; int save_pud;
...@@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void) ...@@ -2045,11 +2030,7 @@ void __init xen_relocate_p2m(void)
n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT; n_pt = roundup(size, PMD_SIZE) >> PMD_SHIFT;
n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT; n_pmd = roundup(size, PUD_SIZE) >> PUD_SHIFT;
n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT; n_pud = roundup(size, P4D_SIZE) >> P4D_SHIFT;
if (PTRS_PER_P4D > 1) n_frames = n_pte + n_pt + n_pmd + n_pud;
n_p4d = roundup(size, PGDIR_SIZE) >> PGDIR_SHIFT;
else
n_p4d = 0;
n_frames = n_pte + n_pt + n_pmd + n_pud + n_p4d;
new_area = xen_find_free_area(PFN_PHYS(n_frames)); new_area = xen_find_free_area(PFN_PHYS(n_frames));
if (!new_area) { if (!new_area) {
...@@ -2065,22 +2046,14 @@ void __init xen_relocate_p2m(void) ...@@ -2065,22 +2046,14 @@ void __init xen_relocate_p2m(void)
* To avoid any possible virtual address collision, just use * To avoid any possible virtual address collision, just use
* 2 * PUD_SIZE for the new area. * 2 * PUD_SIZE for the new area.
*/ */
p4d_phys = new_area; pud_phys = new_area;
pud_phys = p4d_phys + PFN_PHYS(n_p4d);
pmd_phys = pud_phys + PFN_PHYS(n_pud); pmd_phys = pud_phys + PFN_PHYS(n_pud);
pt_phys = pmd_phys + PFN_PHYS(n_pmd); pt_phys = pmd_phys + PFN_PHYS(n_pmd);
p2m_pfn = PFN_DOWN(pt_phys) + n_pt; p2m_pfn = PFN_DOWN(pt_phys) + n_pt;
pgd = __va(read_cr3_pa()); pgd = __va(read_cr3_pa());
new_p2m = (unsigned long *)(2 * PGDIR_SIZE); new_p2m = (unsigned long *)(2 * PGDIR_SIZE);
idx_p4d = 0;
save_pud = n_pud; save_pud = n_pud;
do {
if (n_p4d > 0) {
p4d = early_memremap(p4d_phys, PAGE_SIZE);
clear_page(p4d);
n_pud = min(save_pud, PTRS_PER_P4D);
}
for (idx_pud = 0; idx_pud < n_pud; idx_pud++) { for (idx_pud = 0; idx_pud < n_pud; idx_pud++) {
pud = early_memremap(pud_phys, PAGE_SIZE); pud = early_memremap(pud_phys, PAGE_SIZE);
clear_page(pud); clear_page(pud);
...@@ -2120,21 +2093,9 @@ void __init xen_relocate_p2m(void) ...@@ -2120,21 +2093,9 @@ void __init xen_relocate_p2m(void)
early_memunmap(pud, PAGE_SIZE); early_memunmap(pud, PAGE_SIZE);
make_lowmem_page_readonly(__va(pud_phys)); make_lowmem_page_readonly(__va(pud_phys));
pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys)); pin_pagetable_pfn(MMUEXT_PIN_L3_TABLE, PFN_DOWN(pud_phys));
if (n_p4d > 0)
set_p4d(p4d + idx_pud, __p4d(_PAGE_TABLE | pud_phys));
else
set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys)); set_pgd(pgd + 2 + idx_pud, __pgd(_PAGE_TABLE | pud_phys));
pud_phys += PAGE_SIZE; pud_phys += PAGE_SIZE;
} }
if (n_p4d > 0) {
save_pud -= PTRS_PER_P4D;
early_memunmap(p4d, PAGE_SIZE);
make_lowmem_page_readonly(__va(p4d_phys));
pin_pagetable_pfn(MMUEXT_PIN_L4_TABLE, PFN_DOWN(p4d_phys));
set_pgd(pgd + 2 + idx_p4d, __pgd(_PAGE_TABLE | p4d_phys));
p4d_phys += PAGE_SIZE;
}
} while (++idx_p4d < n_p4d);
/* Now copy the old p2m info to the new area. */ /* Now copy the old p2m info to the new area. */
memcpy(new_p2m, xen_p2m_addr, size); memcpy(new_p2m, xen_p2m_addr, size);
...@@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void) ...@@ -2361,7 +2322,7 @@ static void __init xen_post_allocator_init(void)
pv_mmu_ops.set_pte = xen_set_pte; pv_mmu_ops.set_pte = xen_set_pte;
pv_mmu_ops.set_pmd = xen_set_pmd; pv_mmu_ops.set_pmd = xen_set_pmd;
pv_mmu_ops.set_pud = xen_set_pud; pv_mmu_ops.set_pud = xen_set_pud;
#if CONFIG_PGTABLE_LEVELS >= 4 #ifdef CONFIG_X86_64
pv_mmu_ops.set_p4d = xen_set_p4d; pv_mmu_ops.set_p4d = xen_set_p4d;
#endif #endif
...@@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void) ...@@ -2371,7 +2332,7 @@ static void __init xen_post_allocator_init(void)
pv_mmu_ops.alloc_pmd = xen_alloc_pmd; pv_mmu_ops.alloc_pmd = xen_alloc_pmd;
pv_mmu_ops.release_pte = xen_release_pte; pv_mmu_ops.release_pte = xen_release_pte;
pv_mmu_ops.release_pmd = xen_release_pmd; pv_mmu_ops.release_pmd = xen_release_pmd;
#if CONFIG_PGTABLE_LEVELS >= 4 #ifdef CONFIG_X86_64
pv_mmu_ops.alloc_pud = xen_alloc_pud; pv_mmu_ops.alloc_pud = xen_alloc_pud;
pv_mmu_ops.release_pud = xen_release_pud; pv_mmu_ops.release_pud = xen_release_pud;
#endif #endif
...@@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = { ...@@ -2435,14 +2396,14 @@ static const struct pv_mmu_ops xen_mmu_ops __initconst = {
.make_pmd = PV_CALLEE_SAVE(xen_make_pmd), .make_pmd = PV_CALLEE_SAVE(xen_make_pmd),
.pmd_val = PV_CALLEE_SAVE(xen_pmd_val), .pmd_val = PV_CALLEE_SAVE(xen_pmd_val),
#if CONFIG_PGTABLE_LEVELS >= 4 #ifdef CONFIG_X86_64
.pud_val = PV_CALLEE_SAVE(xen_pud_val), .pud_val = PV_CALLEE_SAVE(xen_pud_val),
.make_pud = PV_CALLEE_SAVE(xen_make_pud), .make_pud = PV_CALLEE_SAVE(xen_make_pud),
.set_p4d = xen_set_p4d_hyper, .set_p4d = xen_set_p4d_hyper,
.alloc_pud = xen_alloc_pmd_init, .alloc_pud = xen_alloc_pmd_init,
.release_pud = xen_release_pmd_init, .release_pud = xen_release_pmd_init,
#endif /* CONFIG_PGTABLE_LEVELS == 4 */ #endif /* CONFIG_X86_64 */
.activate_mm = xen_activate_mm, .activate_mm = xen_activate_mm,
.dup_mmap = xen_dup_mmap, .dup_mmap = xen_dup_mmap,
......
...@@ -228,32 +228,6 @@ static inline unsigned long __ffs64(u64 word) ...@@ -228,32 +228,6 @@ static inline unsigned long __ffs64(u64 word)
return __ffs((unsigned long)word); return __ffs((unsigned long)word);
} }
/*
* clear_bit32 - Clear a bit in memory for u32 array
* @nr: Bit to clear
* @addr: u32 * address of bitmap
*
* Same as clear_bit, but avoids needing casts for u32 arrays.
*/
static __always_inline void clear_bit32(long nr, volatile u32 *addr)
{
clear_bit(nr, (volatile unsigned long *)addr);
}
/*
* set_bit32 - Set a bit in memory for u32 array
* @nr: Bit to clear
* @addr: u32 * address of bitmap
*
* Same as set_bit, but avoids needing casts for u32 arrays.
*/
static __always_inline void set_bit32(long nr, volatile u32 *addr)
{
set_bit(nr, (volatile unsigned long *)addr);
}
#ifdef __KERNEL__ #ifdef __KERNEL__
#ifndef set_mask_bits #ifndef set_mask_bits
......
...@@ -2496,7 +2496,7 @@ void vmemmap_populate_print_last(void); ...@@ -2496,7 +2496,7 @@ void vmemmap_populate_print_last(void);
void vmemmap_free(unsigned long start, unsigned long end); void vmemmap_free(unsigned long start, unsigned long end);
#endif #endif
void register_page_bootmem_memmap(unsigned long section_nr, struct page *map, void register_page_bootmem_memmap(unsigned long section_nr, struct page *map,
unsigned long size); unsigned long nr_pages);
enum mf_flags { enum mf_flags {
MF_COUNT_INCREASED = 1 << 0, MF_COUNT_INCREASED = 1 << 0,
......
...@@ -1151,13 +1151,17 @@ struct mem_section { ...@@ -1151,13 +1151,17 @@ struct mem_section {
#define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1) #define SECTION_ROOT_MASK (SECTIONS_PER_ROOT - 1)
#ifdef CONFIG_SPARSEMEM_EXTREME #ifdef CONFIG_SPARSEMEM_EXTREME
extern struct mem_section *mem_section[NR_SECTION_ROOTS]; extern struct mem_section **mem_section;
#else #else
extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]; extern struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT];
#endif #endif
static inline struct mem_section *__nr_to_section(unsigned long nr) static inline struct mem_section *__nr_to_section(unsigned long nr)
{ {
#ifdef CONFIG_SPARSEMEM_EXTREME
if (!mem_section)
return NULL;
#endif
if (!mem_section[SECTION_NR_TO_ROOT(nr)]) if (!mem_section[SECTION_NR_TO_ROOT(nr)])
return NULL; return NULL;
return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK]; return &mem_section[SECTION_NR_TO_ROOT(nr)][nr & SECTION_ROOT_MASK];
......
...@@ -1643,6 +1643,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, ...@@ -1643,6 +1643,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1; return 1;
} }
static void gup_pgd_range(unsigned long addr, unsigned long end,
int write, struct page **pages, int *nr)
{
unsigned long next;
pgd_t *pgdp;
pgdp = pgd_offset(current->mm, addr);
do {
pgd_t pgd = READ_ONCE(*pgdp);
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
return;
if (unlikely(pgd_huge(pgd))) {
if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
pages, nr))
return;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
PGDIR_SHIFT, next, write, pages, nr))
return;
} else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
return;
} while (pgdp++, addr = next, addr != end);
}
#ifndef gup_fast_permitted
/*
* Check if it's allowed to use __get_user_pages_fast() for the range, or
* we need to fall back to the slow version:
*/
bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
{
unsigned long len, end;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
return end >= start;
}
#endif
/* /*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to * Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values. * the regular GUP. It will only return non-negative values.
...@@ -1650,10 +1691,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end, ...@@ -1650,10 +1691,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
int __get_user_pages_fast(unsigned long start, int nr_pages, int write, int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages) struct page **pages)
{ {
struct mm_struct *mm = current->mm;
unsigned long addr, len, end; unsigned long addr, len, end;
unsigned long next, flags; unsigned long flags;
pgd_t *pgdp;
int nr = 0; int nr = 0;
start &= PAGE_MASK; start &= PAGE_MASK;
...@@ -1677,45 +1716,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write, ...@@ -1677,45 +1716,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
* block IPIs that come from THPs splitting. * block IPIs that come from THPs splitting.
*/ */
if (gup_fast_permitted(start, nr_pages, write)) {
local_irq_save(flags); local_irq_save(flags);
pgdp = pgd_offset(mm, addr); gup_pgd_range(addr, end, write, pages, &nr);
do {
pgd_t pgd = READ_ONCE(*pgdp);
next = pgd_addr_end(addr, end);
if (pgd_none(pgd))
break;
if (unlikely(pgd_huge(pgd))) {
if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
pages, &nr))
break;
} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
PGDIR_SHIFT, next, write, pages, &nr))
break;
} else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
break;
} while (pgdp++, addr = next, addr != end);
local_irq_restore(flags); local_irq_restore(flags);
}
return nr; return nr;
} }
#ifndef gup_fast_permitted
/*
* Check if it's allowed to use __get_user_pages_fast() for the range, or
* we need to fall back to the slow version:
*/
bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
{
unsigned long len, end;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
return end >= start;
}
#endif
/** /**
* get_user_pages_fast() - pin user pages in memory * get_user_pages_fast() - pin user pages in memory
* @start: starting user address * @start: starting user address
...@@ -1735,12 +1744,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write) ...@@ -1735,12 +1744,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
int get_user_pages_fast(unsigned long start, int nr_pages, int write, int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages) struct page **pages)
{ {
unsigned long addr, len, end;
int nr = 0, ret = 0; int nr = 0, ret = 0;
start &= PAGE_MASK; start &= PAGE_MASK;
addr = start;
len = (unsigned long) nr_pages << PAGE_SHIFT;
end = start + len;
if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
(void __user *)start, len)))
return 0;
if (gup_fast_permitted(start, nr_pages, write)) { if (gup_fast_permitted(start, nr_pages, write)) {
nr = __get_user_pages_fast(start, nr_pages, write, pages); local_irq_disable();
gup_pgd_range(addr, end, write, pages, &nr);
local_irq_enable();
ret = nr; ret = nr;
} }
......
...@@ -5646,6 +5646,16 @@ void __init sparse_memory_present_with_active_regions(int nid) ...@@ -5646,6 +5646,16 @@ void __init sparse_memory_present_with_active_regions(int nid)
unsigned long start_pfn, end_pfn; unsigned long start_pfn, end_pfn;
int i, this_nid; int i, this_nid;
#ifdef CONFIG_SPARSEMEM_EXTREME
if (!mem_section) {
unsigned long size, align;
size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
align = 1 << (INTERNODE_CACHE_SHIFT);
mem_section = memblock_virt_alloc(size, align);
}
#endif
for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid) for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, &this_nid)
memory_present(this_nid, start_pfn, end_pfn); memory_present(this_nid, start_pfn, end_pfn);
} }
......
...@@ -23,8 +23,7 @@ ...@@ -23,8 +23,7 @@
* 1) mem_section - memory sections, mem_map's for valid memory * 1) mem_section - memory sections, mem_map's for valid memory
*/ */
#ifdef CONFIG_SPARSEMEM_EXTREME #ifdef CONFIG_SPARSEMEM_EXTREME
struct mem_section *mem_section[NR_SECTION_ROOTS] struct mem_section **mem_section;
____cacheline_internodealigned_in_smp;
#else #else
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT] struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
____cacheline_internodealigned_in_smp; ____cacheline_internodealigned_in_smp;
...@@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid) ...@@ -101,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
int __section_nr(struct mem_section* ms) int __section_nr(struct mem_section* ms)
{ {
unsigned long root_nr; unsigned long root_nr;
struct mem_section* root; struct mem_section *root = NULL;
for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) { for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
root = __nr_to_section(root_nr * SECTIONS_PER_ROOT); root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
...@@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms) ...@@ -112,7 +111,7 @@ int __section_nr(struct mem_section* ms)
break; break;
} }
VM_BUG_ON(root_nr == NR_SECTION_ROOTS); VM_BUG_ON(!root);
return (root_nr * SECTIONS_PER_ROOT) + (ms - root); return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
} }
...@@ -330,11 +329,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat, ...@@ -330,11 +329,17 @@ sparse_early_usemaps_alloc_pgdat_section(struct pglist_data *pgdat,
static void __init check_usemap_section_nr(int nid, unsigned long *usemap) static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
{ {
unsigned long usemap_snr, pgdat_snr; unsigned long usemap_snr, pgdat_snr;
static unsigned long old_usemap_snr = NR_MEM_SECTIONS; static unsigned long old_usemap_snr;
static unsigned long old_pgdat_snr = NR_MEM_SECTIONS; static unsigned long old_pgdat_snr;
struct pglist_data *pgdat = NODE_DATA(nid); struct pglist_data *pgdat = NODE_DATA(nid);
int usemap_nid; int usemap_nid;
/* First call */
if (!old_usemap_snr) {
old_usemap_snr = NR_MEM_SECTIONS;
old_pgdat_snr = NR_MEM_SECTIONS;
}
usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT); usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT); pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
if (usemap_snr == pgdat_snr) if (usemap_snr == pgdat_snr)
......
...@@ -115,7 +115,15 @@ static void check_valid_segment(uint16_t index, int ldt, ...@@ -115,7 +115,15 @@ static void check_valid_segment(uint16_t index, int ldt,
return; return;
} }
if (ar != expected_ar) { /* The SDM says "bits 19:16 are undefined". Thanks. */
ar &= ~0xF0000;
/*
* NB: Different Linux versions do different things with the
* accessed bit in set_thread_area().
*/
if (ar != expected_ar &&
(ldt || ar != (expected_ar | AR_ACCESSED))) {
printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n", printf("[FAIL]\t%s entry %hu has AR 0x%08X but expected 0x%08X\n",
(ldt ? "LDT" : "GDT"), index, ar, expected_ar); (ldt ? "LDT" : "GDT"), index, ar, expected_ar);
nerrs++; nerrs++;
...@@ -129,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt, ...@@ -129,30 +137,51 @@ static void check_valid_segment(uint16_t index, int ldt,
} }
} }
static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, static bool install_valid_mode(const struct user_desc *d, uint32_t ar,
bool oldmode) bool oldmode, bool ldt)
{ {
int ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11, struct user_desc desc = *d;
desc, sizeof(*desc)); int ret;
if (!ldt) {
#ifndef __i386__
/* No point testing set_thread_area in a 64-bit build */
return false;
#endif
if (!gdt_entry_num)
return false;
desc.entry_number = gdt_entry_num;
ret = syscall(SYS_set_thread_area, &desc);
} else {
ret = syscall(SYS_modify_ldt, oldmode ? 1 : 0x11,
&desc, sizeof(desc));
if (ret < -1) if (ret < -1)
errno = -ret; errno = -ret;
if (ret != 0 && errno == ENOSYS) {
printf("[OK]\tmodify_ldt returned -ENOSYS\n");
return false;
}
}
if (ret == 0) { if (ret == 0) {
uint32_t limit = desc->limit; uint32_t limit = desc.limit;
if (desc->limit_in_pages) if (desc.limit_in_pages)
limit = (limit << 12) + 4095; limit = (limit << 12) + 4095;
check_valid_segment(desc->entry_number, 1, ar, limit, true); check_valid_segment(desc.entry_number, ldt, ar, limit, true);
return true; return true;
} else if (errno == ENOSYS) {
printf("[OK]\tmodify_ldt returned -ENOSYS\n");
return false;
} else { } else {
if (desc->seg_32bit) { if (desc.seg_32bit) {
printf("[FAIL]\tUnexpected modify_ldt failure %d\n", printf("[FAIL]\tUnexpected %s failure %d\n",
ldt ? "modify_ldt" : "set_thread_area",
errno); errno);
nerrs++; nerrs++;
return false; return false;
} else { } else {
printf("[OK]\tmodify_ldt rejected 16 bit segment\n"); printf("[OK]\t%s rejected 16 bit segment\n",
ldt ? "modify_ldt" : "set_thread_area");
return false; return false;
} }
} }
...@@ -160,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar, ...@@ -160,7 +189,15 @@ static bool install_valid_mode(const struct user_desc *desc, uint32_t ar,
static bool install_valid(const struct user_desc *desc, uint32_t ar) static bool install_valid(const struct user_desc *desc, uint32_t ar)
{ {
return install_valid_mode(desc, ar, false); bool ret = install_valid_mode(desc, ar, false, true);
if (desc->contents <= 1 && desc->seg_32bit &&
!desc->seg_not_present) {
/* Should work in the GDT, too. */
install_valid_mode(desc, ar, false, false);
}
return ret;
} }
static void install_invalid(const struct user_desc *desc, bool oldmode) static void install_invalid(const struct user_desc *desc, bool oldmode)
...@@ -367,9 +404,24 @@ static void do_simple_tests(void) ...@@ -367,9 +404,24 @@ static void do_simple_tests(void)
install_invalid(&desc, false); install_invalid(&desc, false);
desc.seg_not_present = 0; desc.seg_not_present = 0;
desc.read_exec_only = 0;
desc.seg_32bit = 1; desc.seg_32bit = 1;
desc.read_exec_only = 0;
desc.limit = 0xfffff;
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB); install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB);
desc.limit_in_pages = 1;
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA | AR_S | AR_P | AR_DB | AR_G);
desc.read_exec_only = 1;
install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA | AR_S | AR_P | AR_DB | AR_G);
desc.contents = 1;
desc.read_exec_only = 0;
install_valid(&desc, AR_DPL3 | AR_TYPE_RWDATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
desc.read_exec_only = 1;
install_valid(&desc, AR_DPL3 | AR_TYPE_RODATA_EXPDOWN | AR_S | AR_P | AR_DB | AR_G);
desc.limit = 0;
install_invalid(&desc, true); install_invalid(&desc, true);
} }
......
...@@ -189,17 +189,29 @@ void lots_o_noops_around_write(int *write_to_me) ...@@ -189,17 +189,29 @@ void lots_o_noops_around_write(int *write_to_me)
#define u64 uint64_t #define u64 uint64_t
#ifdef __i386__ #ifdef __i386__
#define SYS_mprotect_key 380
#define SYS_pkey_alloc 381 #ifndef SYS_mprotect_key
#define SYS_pkey_free 382 # define SYS_mprotect_key 380
#endif
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 381
# define SYS_pkey_free 382
#endif
#define REG_IP_IDX REG_EIP #define REG_IP_IDX REG_EIP
#define si_pkey_offset 0x14 #define si_pkey_offset 0x14
#else #else
#define SYS_mprotect_key 329
#define SYS_pkey_alloc 330 #ifndef SYS_mprotect_key
#define SYS_pkey_free 331 # define SYS_mprotect_key 329
#endif
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 330
# define SYS_pkey_free 331
#endif
#define REG_IP_IDX REG_RIP #define REG_IP_IDX REG_RIP
#define si_pkey_offset 0x20 #define si_pkey_offset 0x20
#endif #endif
void dump_mem(void *dumpme, int len_bytes) void dump_mem(void *dumpme, int len_bytes)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册