提交 5cef8c2a 编写于 作者: L Linus Torvalds

Merge branch 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 boot updates from Ingo Molnar:

 - Centaur CPU updates (David Wang)

 - AMD and other CPU topology enumeration improvements and fixes
   (Borislav Petkov, Thomas Gleixner, Suravee Suthikulpanit)

 - Continued 5-level paging work (Kirill A. Shutemov)

* 'x86-boot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/mm: Mark __pgtable_l5_enabled __initdata
  x86/mm: Mark p4d_offset() __always_inline
  x86/mm: Introduce the 'no5lvl' kernel parameter
  x86/mm: Stop pretending pgtable_l5_enabled is a variable
  x86/mm: Unify pgtable_l5_enabled usage in early boot code
  x86/boot/compressed/64: Fix trampoline page table address calculation
  x86/CPU: Move x86_cpuinfo::x86_max_cores assignment to detect_num_cpu_cores()
  x86/Centaur: Report correct CPU/cache topology
  x86/CPU: Move cpu_detect_cache_sizes() into init_intel_cacheinfo()
  x86/CPU: Make intel_num_cpu_cores() generic
  x86/CPU: Move cpu local function declarations to local header
  x86/CPU/AMD: Derive CPU topology from CPUID function 0xB when available
  x86/CPU: Modify detect_extended_topology() to return result
  x86/CPU/AMD: Calculate last level cache ID from number of sharing threads
  x86/CPU: Rename intel_cacheinfo.c to cacheinfo.c
  perf/events/amd/uncore: Fix amd_uncore_llc ID to use pre-defined cpu_llc_id
  x86/CPU/AMD: Have smp_num_siblings and cpu_llc_id always be present
  x86/Centaur: Initialize supported CPU features properly
......@@ -2608,6 +2608,9 @@
emulation library even if a 387 maths coprocessor
is present.
no5lvl [X86-64] Disable 5-level paging mode. Forces
kernel to use 4-level paging instead.
no_console_suspend
[HW] Never suspend the console
Disable suspending of consoles during suspend and
......
// SPDX-License-Identifier: GPL-2.0
#include "misc.h"
#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE
#if CONFIG_EARLY_PRINTK || CONFIG_RANDOMIZE_BASE || CONFIG_X86_5LEVEL
static unsigned long fs;
static inline void set_fs(unsigned long seg)
......
......@@ -365,6 +365,7 @@ ENTRY(startup_64)
* this function call.
*/
pushq %rsi
movq %rsi, %rdi /* real mode address */
call paging_prepare
popq %rsi
......
......@@ -47,7 +47,7 @@
#include <linux/decompress/mm.h>
#ifdef CONFIG_X86_5LEVEL
unsigned int pgtable_l5_enabled __ro_after_init;
unsigned int __pgtable_l5_enabled;
unsigned int pgdir_shift __ro_after_init = 39;
unsigned int ptrs_per_p4d __ro_after_init = 1;
#endif
......@@ -734,7 +734,7 @@ void choose_random_location(unsigned long input,
#ifdef CONFIG_X86_5LEVEL
if (__read_cr4() & X86_CR4_LA57) {
pgtable_l5_enabled = 1;
__pgtable_l5_enabled = 1;
pgdir_shift = 48;
ptrs_per_p4d = 512;
}
......
......@@ -12,10 +12,8 @@
#undef CONFIG_PARAVIRT_SPINLOCKS
#undef CONFIG_KASAN
#ifdef CONFIG_X86_5LEVEL
/* cpu_feature_enabled() cannot be used that early */
#define pgtable_l5_enabled __pgtable_l5_enabled
#endif
/* cpu_feature_enabled() cannot be used this early */
#define USE_EARLY_PGTABLE_L5
#include <linux/linkage.h>
#include <linux/screen_info.h>
......
......@@ -31,16 +31,23 @@ static char trampoline_save[TRAMPOLINE_32BIT_SIZE];
*/
unsigned long *trampoline_32bit __section(.data);
struct paging_config paging_prepare(void)
extern struct boot_params *boot_params;
int cmdline_find_option_bool(const char *option);
struct paging_config paging_prepare(void *rmode)
{
struct paging_config paging_config = {};
unsigned long bios_start, ebda_start;
/* Initialize boot_params. Required for cmdline_find_option_bool(). */
boot_params = rmode;
/*
* Check if LA57 is desired and supported.
*
* There are two parts to the check:
* There are several parts to the check:
* - if the kernel supports 5-level paging: CONFIG_X86_5LEVEL=y
* - if user asked to disable 5-level paging: no5lvl in cmdline
* - if the machine supports 5-level paging:
* + CPUID leaf 7 is supported
* + the leaf has the feature bit set
......@@ -48,6 +55,7 @@ struct paging_config paging_prepare(void)
* That's substitute for boot_cpu_has() in early boot code.
*/
if (IS_ENABLED(CONFIG_X86_5LEVEL) &&
!cmdline_find_option_bool("no5lvl") &&
native_cpuid_eax(0) >= 7 &&
(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31)))) {
paging_config.l5_required = 1;
......@@ -130,7 +138,7 @@ void cleanup_trampoline(void *pgtable)
{
void *trampoline_pgtable;
trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET;
trampoline_pgtable = trampoline_32bit + TRAMPOLINE_32BIT_PGTABLE_OFFSET / sizeof(unsigned long);
/*
* Move the top level page table out of trampoline memory,
......
......@@ -19,6 +19,7 @@
#include <asm/cpufeature.h>
#include <asm/perf_event.h>
#include <asm/msr.h>
#include <asm/smp.h>
#define NUM_COUNTERS_NB 4
#define NUM_COUNTERS_L2 4
......@@ -399,26 +400,8 @@ static int amd_uncore_cpu_starting(unsigned int cpu)
}
if (amd_uncore_llc) {
unsigned int apicid = cpu_data(cpu).apicid;
unsigned int nshared, subleaf, prev_eax = 0;
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
/*
* Iterate over Cache Topology Definition leaves until no
* more cache descriptions are available.
*/
for (subleaf = 0; subleaf < 5; subleaf++) {
cpuid_count(0x8000001d, subleaf, &eax, &ebx, &ecx, &edx);
/* EAX[0:4] gives type of cache */
if (!(eax & 0x1f))
break;
prev_eax = eax;
}
nshared = ((prev_eax >> 14) & 0xfff) + 1;
uncore->id = apicid - (apicid % nshared);
uncore->id = per_cpu(cpu_llc_id, cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
......
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _ASM_X86_CACHEINFO_H
#define _ASM_X86_CACHEINFO_H
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id);
#endif /* _ASM_X86_CACHEINFO_H */
......@@ -53,7 +53,7 @@
#define __PHYSICAL_MASK_SHIFT 52
#ifdef CONFIG_X86_5LEVEL
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled ? 56 : 47)
#define __VIRTUAL_MASK_SHIFT (pgtable_l5_enabled() ? 56 : 47)
#else
#define __VIRTUAL_MASK_SHIFT 47
#endif
......
......@@ -574,14 +574,14 @@ static inline void __set_pgd(pgd_t *pgdp, pgd_t pgd)
}
#define set_pgd(pgdp, pgdval) do { \
if (pgtable_l5_enabled) \
if (pgtable_l5_enabled()) \
__set_pgd(pgdp, pgdval); \
else \
set_p4d((p4d_t *)(pgdp), (p4d_t) { (pgdval).pgd }); \
} while (0)
#define pgd_clear(pgdp) do { \
if (pgtable_l5_enabled) \
if (pgtable_l5_enabled()) \
set_pgd(pgdp, __pgd(0)); \
} while (0)
......
......@@ -167,7 +167,7 @@ static inline void __pud_free_tlb(struct mmu_gather *tlb, pud_t *pud,
#if CONFIG_PGTABLE_LEVELS > 4
static inline void pgd_populate(struct mm_struct *mm, pgd_t *pgd, p4d_t *p4d)
{
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
return;
paravirt_alloc_p4d(mm, __pa(p4d) >> PAGE_SHIFT);
set_pgd(pgd, __pgd(_PAGE_TABLE | __pa(p4d)));
......@@ -193,7 +193,7 @@ extern void ___p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d);
static inline void __p4d_free_tlb(struct mmu_gather *tlb, p4d_t *p4d,
unsigned long address)
{
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
___p4d_free_tlb(tlb, p4d);
}
......
......@@ -65,7 +65,7 @@ extern pmdval_t early_pmd_flags;
#ifndef __PAGETABLE_P4D_FOLDED
#define set_pgd(pgdp, pgd) native_set_pgd(pgdp, pgd)
#define pgd_clear(pgd) (pgtable_l5_enabled ? native_pgd_clear(pgd) : 0)
#define pgd_clear(pgd) (pgtable_l5_enabled() ? native_pgd_clear(pgd) : 0)
#endif
#ifndef set_p4d
......@@ -881,7 +881,7 @@ static inline unsigned long p4d_index(unsigned long address)
#if CONFIG_PGTABLE_LEVELS > 4
static inline int pgd_present(pgd_t pgd)
{
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
return 1;
return pgd_flags(pgd) & _PAGE_PRESENT;
}
......@@ -898,9 +898,9 @@ static inline unsigned long pgd_page_vaddr(pgd_t pgd)
#define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* to find an entry in a page-table-directory. */
static inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
static __always_inline p4d_t *p4d_offset(pgd_t *pgd, unsigned long address)
{
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
......@@ -909,7 +909,7 @@ static inline int pgd_bad(pgd_t pgd)
{
unsigned long ignore_flags = _PAGE_USER;
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
return 0;
if (IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
......@@ -920,7 +920,7 @@ static inline int pgd_bad(pgd_t pgd)
static inline int pgd_none(pgd_t pgd)
{
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
return 0;
/*
* There is no need to do a workaround for the KNL stray
......
......@@ -15,7 +15,7 @@
# include <asm/pgtable-2level_types.h>
#endif
#define pgtable_l5_enabled 0
#define pgtable_l5_enabled() 0
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE - 1))
......
......@@ -220,7 +220,7 @@ static inline void native_set_p4d(p4d_t *p4dp, p4d_t p4d)
{
pgd_t pgd;
if (pgtable_l5_enabled || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
if (pgtable_l5_enabled() || !IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
*p4dp = p4d;
return;
}
......
......@@ -22,12 +22,23 @@ typedef struct { pteval_t pte; } pte_t;
#ifdef CONFIG_X86_5LEVEL
extern unsigned int __pgtable_l5_enabled;
#ifndef pgtable_l5_enabled
#define pgtable_l5_enabled cpu_feature_enabled(X86_FEATURE_LA57)
#endif
#ifdef USE_EARLY_PGTABLE_L5
/*
* cpu_feature_enabled() is not available in early boot code.
* Use variable instead.
*/
static inline bool pgtable_l5_enabled(void)
{
return __pgtable_l5_enabled;
}
#else
#define pgtable_l5_enabled 0
#endif
#define pgtable_l5_enabled() cpu_feature_enabled(X86_FEATURE_LA57)
#endif /* USE_EARLY_PGTABLE_L5 */
#else
#define pgtable_l5_enabled() 0
#endif /* CONFIG_X86_5LEVEL */
extern unsigned int pgdir_shift;
extern unsigned int ptrs_per_p4d;
......@@ -102,7 +113,7 @@ extern unsigned int ptrs_per_p4d;
#define LDT_PGD_ENTRY_L4 -3UL
#define LDT_PGD_ENTRY_L5 -112UL
#define LDT_PGD_ENTRY (pgtable_l5_enabled ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
#define LDT_PGD_ENTRY (pgtable_l5_enabled() ? LDT_PGD_ENTRY_L5 : LDT_PGD_ENTRY_L4)
#define LDT_BASE_ADDR (LDT_PGD_ENTRY << PGDIR_SHIFT)
#define __VMALLOC_BASE_L4 0xffffc90000000000UL
......@@ -116,7 +127,7 @@ extern unsigned int ptrs_per_p4d;
#ifdef CONFIG_DYNAMIC_MEMORY_LAYOUT
# define VMALLOC_START vmalloc_base
# define VMALLOC_SIZE_TB (pgtable_l5_enabled ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMALLOC_SIZE_TB (pgtable_l5_enabled() ? VMALLOC_SIZE_TB_L5 : VMALLOC_SIZE_TB_L4)
# define VMEMMAP_START vmemmap_base
#else
# define VMALLOC_START __VMALLOC_BASE_L4
......
......@@ -186,15 +186,6 @@ extern void identify_boot_cpu(void);
extern void identify_secondary_cpu(struct cpuinfo_x86 *);
extern void print_cpu_info(struct cpuinfo_x86 *);
void print_cpu_msr(struct cpuinfo_x86 *);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern u32 get_scattered_cpuid_leaf(unsigned int level,
unsigned int sub_leaf,
enum cpuid_regs_idx reg);
extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
extern void detect_extended_topology(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
#ifdef CONFIG_X86_32
extern int have_cpuid_p(void);
......
......@@ -171,7 +171,6 @@ static inline int wbinvd_on_all_cpus(void)
wbinvd();
return 0;
}
#define smp_num_siblings 1
#endif /* CONFIG_SMP */
extern unsigned disabled_cpus;
......
......@@ -27,8 +27,8 @@
# endif
#else /* CONFIG_X86_32 */
# define SECTION_SIZE_BITS 27 /* matt - 128 is convenient right now */
# define MAX_PHYSADDR_BITS (pgtable_l5_enabled ? 52 : 44)
# define MAX_PHYSMEM_BITS (pgtable_l5_enabled ? 52 : 46)
# define MAX_PHYSADDR_BITS (pgtable_l5_enabled() ? 52 : 44)
# define MAX_PHYSMEM_BITS (pgtable_l5_enabled() ? 52 : 46)
#endif
#endif /* CONFIG_SPARSEMEM */
......
......@@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
obj-y := intel_cacheinfo.o scattered.o topology.o
obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
......
......@@ -9,6 +9,7 @@
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/apic.h>
#include <asm/cacheinfo.h>
#include <asm/cpu.h>
#include <asm/spec-ctrl.h>
#include <asm/smp.h>
......@@ -298,7 +299,6 @@ static int nearby_node(int apicid)
}
#endif
#ifdef CONFIG_SMP
/*
* Fix up cpu_core_id for pre-F17h systems to be in the
* [0 .. cores_per_node - 1] range. Not really needed but
......@@ -328,6 +328,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
......@@ -346,21 +347,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
}
/*
* We may have multiple LLCs if L3 caches exist, so check if we
* have an L3 cache by looking at the L3 cache CPUID leaf.
* In case leaf B is available, use it to derive
* topology information.
*/
if (cpuid_edx(0x80000006)) {
if (c->x86 == 0x17) {
/*
* LLC is at the core complex level.
* Core complex id is ApicId[3].
*/
per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
} else {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = node_id;
}
}
err = detect_extended_topology(c);
if (!err)
c->x86_coreid_bits = get_count_order(c->x86_max_cores);
cacheinfo_amd_init_llc_id(c, cpu, node_id);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
......@@ -376,7 +371,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
legacy_fixup_core_id(c);
}
}
#endif
/*
* On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
......@@ -384,7 +378,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned bits;
int cpu = smp_processor_id();
......@@ -395,17 +388,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
amd_get_topology(c);
#endif
}
u16 amd_get_nb_id(int cpu)
{
u16 id = 0;
#ifdef CONFIG_SMP
id = per_cpu(cpu_llc_id, cpu);
#endif
return id;
return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
......@@ -864,6 +851,7 @@ static void init_amd(struct cpuinfo_x86 *c)
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
amd_detect_cmp(c);
amd_get_topology(c);
srat_detect_node(c);
}
......
......@@ -20,6 +20,8 @@
#include <asm/amd_nb.h>
#include <asm/smp.h>
#include "cpu.h"
#define LVL_1_INST 1
#define LVL_1_DATA 2
#define LVL_2 3
......@@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
* have an L3 cache by looking at the L3 cache CPUID leaf.
*/
if (!cpuid_edx(0x80000006))
return;
if (c->x86 < 0x17) {
/* LLC is at the node level. */
per_cpu(cpu_llc_id, cpu) = node_id;
} else if (c->x86 == 0x17 &&
c->x86_model >= 0 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
} else {
/*
* LLC ID is calculated from the number of threads sharing the
* cache.
* */
u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
u32 llc_index = find_num_cache_leaves(c) - 1;
cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
if (eax)
num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
if (num_sharing_cache) {
int bits = get_count_order(num_sharing_cache) - 1;
per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
}
}
}
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
......@@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
void init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
......@@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
return l2;
if (!l2)
cpu_detect_cache_sizes(c);
}
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
......
......@@ -18,6 +18,13 @@
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
static void init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
......@@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
}
}
static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
msr_ctl = vmx_msr_high | vmx_msr_low;
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
set_cpu_cap(c, X86_FEATURE_VNMI);
if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
vmx_msr_low, vmx_msr_high);
msr_ctl2 = vmx_msr_high | vmx_msr_low;
if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
(msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
set_cpu_cap(c, X86_FEATURE_EPT);
if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
set_cpu_cap(c, X86_FEATURE_VPID);
}
}
static void init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
......@@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, 0*32+31);
#endif
early_init_centaur(c);
init_intel_cacheinfo(c);
detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
if (c->cpuid_level > 9) {
unsigned int eax = cpuid_eax(10);
/*
* Check for version and the number of counters
* Version(eax[7:0]) can't be 0;
* Counters(eax[15:8]) should be greater than 1;
*/
if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
}
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
......@@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
if (cpu_has(c, X86_FEATURE_VMX))
centaur_detect_vmx_virtcap(c);
}
#ifdef CONFIG_X86_32
......
......@@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
......@@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c)
*(s + 1) = '\0';
}
void detect_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
c->x86_max_cores = 1;
if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
return;
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
c->x86_max_cores = (eax >> 26) + 1;
}
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
......@@ -1044,6 +1064,21 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
#endif
/*
* Later in the boot process pgtable_l5_enabled() relies on
* cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
* enabled by this point we need to clear the feature bit to avoid
* false-positives at the later stage.
*
* pgtable_l5_enabled() can be false here for several reasons:
* - 5-level paging is disabled compile-time;
* - it's 32-bit kernel;
* - machine doesn't support 5-level paging;
* - user specified 'no5lvl' in kernel command line.
*/
if (!pgtable_l5_enabled())
setup_clear_cpu_cap(X86_FEATURE_LA57);
}
void __init early_cpu_init(void)
......
......@@ -47,6 +47,16 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
extern u32 get_scattered_cpuid_leaf(unsigned int level,
unsigned int sub_leaf,
enum cpuid_regs_idx reg);
extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
extern int detect_extended_topology(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
......
......@@ -456,24 +456,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
/*
* find out the number of processor cores on the die
*/
static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
{
unsigned int eax, ebx, ecx, edx;
if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
return 1;
/* Intel has a non-standard dependency on %ecx for this CPUID level. */
cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
if (eax & 0x1f)
return (eax >> 26) + 1;
else
return 1;
}
static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
/* Intel VMX MSR indicated features */
......@@ -656,8 +638,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
static void init_intel(struct cpuinfo_x86 *c)
{
unsigned int l2 = 0;
early_init_intel(c);
intel_workarounds(c);
......@@ -674,19 +654,13 @@ static void init_intel(struct cpuinfo_x86 *c)
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
* detection.
*/
c->x86_max_cores = intel_num_cpu_cores(c);
detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
}
l2 = init_intel_cacheinfo(c);
/* Detect legacy cache sizes if init_intel_cacheinfo did not */
if (l2 == 0) {
cpu_detect_cache_sizes(c);
l2 = c->x86_cache_size;
}
init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
......@@ -699,7 +673,8 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
unsigned int l1;
unsigned int l1, l2;
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
set_cpu_cap(c, X86_FEATURE_BTS);
......@@ -727,6 +702,7 @@ static void init_intel(struct cpuinfo_x86 *c)
* Dixon is NOT a Celeron.
*/
if (c->x86 == 6) {
unsigned int l2 = c->x86_cache_size;
char *p = NULL;
switch (c->x86_model) {
......
......@@ -27,7 +27,7 @@
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
void detect_extended_topology(struct cpuinfo_x86 *c)
int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
......@@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
static bool printed;
if (c->cpuid_level < 0xb)
return;
return -1;
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
......@@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
* check if the cpuid leaf 0xb is actually implemented.
*/
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
return;
return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
......@@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->cpu_core_id);
printed = 1;
}
return;
#endif
return 0;
}
......@@ -6,6 +6,10 @@
*/
#define DISABLE_BRANCH_PROFILING
/* cpu_feature_enabled() cannot be used this early */
#define USE_EARLY_PGTABLE_L5
#include <linux/init.h>
#include <linux/linkage.h>
#include <linux/types.h>
......@@ -32,11 +36,6 @@
#include <asm/microcode.h>
#include <asm/kasan.h>
#ifdef CONFIG_X86_5LEVEL
#undef pgtable_l5_enabled
#define pgtable_l5_enabled __pgtable_l5_enabled
#endif
/*
* Manage page tables very early on.
*/
......@@ -45,8 +44,7 @@ static unsigned int __initdata next_early_pgt;
pmdval_t early_pmd_flags = __PAGE_KERNEL_LARGE & ~(_PAGE_GLOBAL | _PAGE_NX);
#ifdef CONFIG_X86_5LEVEL
unsigned int __pgtable_l5_enabled __ro_after_init;
EXPORT_SYMBOL(__pgtable_l5_enabled);
unsigned int __pgtable_l5_enabled __initdata;
unsigned int pgdir_shift __ro_after_init = 39;
EXPORT_SYMBOL(pgdir_shift);
unsigned int ptrs_per_p4d __ro_after_init = 1;
......@@ -82,13 +80,14 @@ static unsigned int __head *fixup_int(void *ptr, unsigned long physaddr)
static bool __head check_la57_support(unsigned long physaddr)
{
if (native_cpuid_eax(0) < 7)
return false;
if (!(native_cpuid_ecx(7) & (1 << (X86_FEATURE_LA57 & 31))))
/*
* 5-level paging is detected and enabled at kernel decomression
* stage. Only check if it has been enabled there.
*/
if (!(native_read_cr4() & X86_CR4_LA57))
return false;
*fixup_int(&pgtable_l5_enabled, physaddr) = 1;
*fixup_int(&__pgtable_l5_enabled, physaddr) = 1;
*fixup_int(&pgdir_shift, physaddr) = 48;
*fixup_int(&ptrs_per_p4d, physaddr) = 512;
*fixup_long(&page_offset_base, physaddr) = __PAGE_OFFSET_BASE_L5;
......@@ -281,7 +280,7 @@ int __init __early_make_pgtable(unsigned long address, pmdval_t pmd)
* critical -- __PAGE_OFFSET would point us back into the dynamic
* range and we might end up looping forever...
*/
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
p4d_p = pgd_p;
else if (pgd)
p4d_p = (p4dval_t *)((pgd & PTE_PFN_MASK) + __START_KERNEL_map - phys_base);
......
......@@ -354,7 +354,8 @@ void arch_crash_save_vmcoreinfo(void)
{
VMCOREINFO_NUMBER(phys_base);
VMCOREINFO_SYMBOL(init_top_pgt);
VMCOREINFO_NUMBER(pgtable_l5_enabled);
vmcoreinfo_append_str("NUMBER(pgtable_l5_enabled)=%d\n",
pgtable_l5_enabled());
#ifdef CONFIG_NUMA
VMCOREINFO_SYMBOL(node_data);
......
......@@ -81,13 +81,6 @@
#include <asm/cpu_device_id.h>
#include <asm/spec-ctrl.h>
/* Number of siblings per CPU package */
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
/* Last level cache ID of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
/* representing HT siblings of each logical CPU */
DEFINE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_sibling_map);
EXPORT_PER_CPU_SYMBOL(cpu_sibling_map);
......
......@@ -360,7 +360,7 @@ static inline bool kasan_page_table(struct seq_file *m, struct pg_state *st,
void *pt)
{
if (__pa(pt) == __pa(kasan_zero_pmd) ||
(pgtable_l5_enabled && __pa(pt) == __pa(kasan_zero_p4d)) ||
(pgtable_l5_enabled() && __pa(pt) == __pa(kasan_zero_p4d)) ||
__pa(pt) == __pa(kasan_zero_pud)) {
pgprotval_t prot = pte_flags(kasan_zero_pte[0]);
note_page(m, st, __pgprot(prot), 0, 5);
......@@ -476,8 +476,8 @@ static void walk_p4d_level(struct seq_file *m, struct pg_state *st, pgd_t addr,
}
}
#define pgd_large(a) (pgtable_l5_enabled ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
#define pgd_none(a) (pgtable_l5_enabled ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
#define pgd_large(a) (pgtable_l5_enabled() ? pgd_large(a) : p4d_large(__p4d(pgd_val(a))))
#define pgd_none(a) (pgtable_l5_enabled() ? pgd_none(a) : p4d_none(__p4d(pgd_val(a))))
static inline bool is_hypervisor_range(int idx)
{
......
......@@ -440,7 +440,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (pgd_none(*pgd_k))
return -1;
if (pgtable_l5_enabled) {
if (pgtable_l5_enabled()) {
if (pgd_none(*pgd)) {
set_pgd(pgd, *pgd_k);
arch_flush_lazy_mmu_mode();
......@@ -455,7 +455,7 @@ static noinline int vmalloc_fault(unsigned long address)
if (p4d_none(*p4d_k))
return -1;
if (p4d_none(*p4d) && !pgtable_l5_enabled) {
if (p4d_none(*p4d) && !pgtable_l5_enabled()) {
set_p4d(p4d, *p4d_k);
arch_flush_lazy_mmu_mode();
} else {
......
......@@ -123,7 +123,7 @@ int kernel_ident_mapping_init(struct x86_mapping_info *info, pgd_t *pgd_page,
result = ident_p4d_init(info, p4d, addr, next);
if (result)
return result;
if (pgtable_l5_enabled) {
if (pgtable_l5_enabled()) {
set_pgd(pgd, __pgd(__pa(p4d) | info->kernpg_flag));
} else {
/*
......
......@@ -180,7 +180,7 @@ static void sync_global_pgds_l4(unsigned long start, unsigned long end)
*/
void sync_global_pgds(unsigned long start, unsigned long end)
{
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
sync_global_pgds_l5(start, end);
else
sync_global_pgds_l4(start, end);
......@@ -643,7 +643,7 @@ phys_p4d_init(p4d_t *p4d_page, unsigned long paddr, unsigned long paddr_end,
unsigned long vaddr = (unsigned long)__va(paddr);
int i = p4d_index(vaddr);
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
return phys_pud_init((pud_t *) p4d_page, paddr, paddr_end, page_size_mask);
for (; i < PTRS_PER_P4D; i++, paddr = paddr_next) {
......@@ -723,7 +723,7 @@ kernel_physical_mapping_init(unsigned long paddr_start,
page_size_mask);
spin_lock(&init_mm.page_table_lock);
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
pgd_populate(&init_mm, pgd, p4d);
else
p4d_populate(&init_mm, p4d_offset(pgd, vaddr), (pud_t *) p4d);
......@@ -1100,7 +1100,7 @@ remove_p4d_table(p4d_t *p4d_start, unsigned long addr, unsigned long end,
* 5-level case we should free them. This code will have to change
* to adapt for boot-time switching between 4 and 5 level page tables.
*/
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
free_pud_table(pud_base, p4d);
}
......
......@@ -2,10 +2,8 @@
#define DISABLE_BRANCH_PROFILING
#define pr_fmt(fmt) "kasan: " fmt
#ifdef CONFIG_X86_5LEVEL
/* Too early to use cpu_feature_enabled() */
#define pgtable_l5_enabled __pgtable_l5_enabled
#endif
/* cpu_feature_enabled() cannot be used this early */
#define USE_EARLY_PGTABLE_L5
#include <linux/bootmem.h>
#include <linux/kasan.h>
......@@ -182,7 +180,7 @@ static void __init clear_pgds(unsigned long start,
* With folded p4d, pgd_clear() is nop, use p4d_clear()
* instead.
*/
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
pgd_clear(pgd);
else
p4d_clear(p4d_offset(pgd, start));
......@@ -197,7 +195,7 @@ static inline p4d_t *early_p4d_offset(pgd_t *pgd, unsigned long addr)
{
unsigned long p4d;
if (!pgtable_l5_enabled)
if (!pgtable_l5_enabled())
return (p4d_t *)pgd;
p4d = __pa_nodebug(pgd_val(*pgd)) & PTE_PFN_MASK;
......@@ -284,7 +282,7 @@ void __init kasan_early_init(void)
for (i = 0; i < PTRS_PER_PUD; i++)
kasan_zero_pud[i] = __pud(pud_val);
for (i = 0; pgtable_l5_enabled && i < PTRS_PER_P4D; i++)
for (i = 0; pgtable_l5_enabled() && i < PTRS_PER_P4D; i++)
kasan_zero_p4d[i] = __p4d(p4d_val);
kasan_map_early_shadow(early_top_pgt);
......@@ -315,7 +313,7 @@ void __init kasan_init(void)
* bunch of things like kernel code, modules, EFI mapping, etc.
* We need to take extra steps to not overwrite them.
*/
if (pgtable_l5_enabled) {
if (pgtable_l5_enabled()) {
void *ptr;
ptr = (void *)pgd_page_vaddr(*pgd_offset_k(KASAN_SHADOW_END));
......
......@@ -78,7 +78,7 @@ void __init kernel_randomize_memory(void)
struct rnd_state rand_state;
unsigned long remain_entropy;
vaddr_start = pgtable_l5_enabled ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr_start = pgtable_l5_enabled() ? __PAGE_OFFSET_BASE_L5 : __PAGE_OFFSET_BASE_L4;
vaddr = vaddr_start;
/*
......@@ -124,7 +124,7 @@ void __init kernel_randomize_memory(void)
*/
entropy = remain_entropy / (ARRAY_SIZE(kaslr_regions) - i);
prandom_bytes_state(&rand_state, &rand, sizeof(rand));
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
entropy = (rand % (entropy + 1)) & P4D_MASK;
else
entropy = (rand % (entropy + 1)) & PUD_MASK;
......@@ -136,7 +136,7 @@ void __init kernel_randomize_memory(void)
* randomization alignment.
*/
vaddr += get_padding(&kaslr_regions[i]);
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
vaddr = round_up(vaddr + 1, P4D_SIZE);
else
vaddr = round_up(vaddr + 1, PUD_SIZE);
......@@ -212,7 +212,7 @@ void __meminit init_trampoline(void)
return;
}
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
init_trampoline_p4d();
else
init_trampoline_pud();
......
......@@ -157,7 +157,7 @@ static void sync_current_stack_to_mm(struct mm_struct *mm)
unsigned long sp = current_stack_pointer;
pgd_t *pgd = pgd_offset(mm, sp);
if (pgtable_l5_enabled) {
if (pgtable_l5_enabled()) {
if (unlikely(pgd_none(*pgd))) {
pgd_t *pgd_ref = pgd_offset_k(sp);
......
......@@ -225,7 +225,7 @@ int __init efi_alloc_page_tables(void)
pud = pud_alloc(&init_mm, p4d, EFI_VA_END);
if (!pud) {
if (pgtable_l5_enabled)
if (pgtable_l5_enabled())
free_page((unsigned long) pgd_page_vaddr(*pgd));
free_pages((unsigned long)efi_pgd, PGD_ALLOCATION_ORDER);
return -ENOMEM;
......
......@@ -72,7 +72,7 @@ static int set_up_temporary_text_mapping(pgd_t *pgd)
* tables used by the image kernel.
*/
if (pgtable_l5_enabled) {
if (pgtable_l5_enabled()) {
p4d = (p4d_t *)get_safe_page(GFP_ATOMIC);
if (!p4d)
return -ENOMEM;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册