diff --git a/Documentation/x86/topology.txt b/Documentation/x86/topology.txt new file mode 100644 index 0000000000000000000000000000000000000000..06afac252f5b5c0877746c2ac406b6aa84af8f8e --- /dev/null +++ b/Documentation/x86/topology.txt @@ -0,0 +1,208 @@ +x86 Topology +============ + +This documents and clarifies the main aspects of x86 topology modelling and +representation in the kernel. Update/change when doing changes to the +respective code. + +The architecture-agnostic topology definitions are in +Documentation/cputopology.txt. This file holds x86-specific +differences/specialities which must not necessarily apply to the generic +definitions. Thus, the way to read up on Linux topology on x86 is to start +with the generic one and look at this one in parallel for the x86 specifics. + +Needless to say, code should use the generic functions - this file is *only* +here to *document* the inner workings of x86 topology. + +Started by Thomas Gleixner and Borislav Petkov . + +The main aim of the topology facilities is to present adequate interfaces to +code which needs to know/query/use the structure of the running system wrt +threads, cores, packages, etc. + +The kernel does not care about the concept of physical sockets because a +socket has no relevance to software. It's an electromechanical component. In +the past a socket always contained a single package (see below), but with the +advent of Multi Chip Modules (MCM) a socket can hold more than one package. So +there might be still references to sockets in the code, but they are of +historical nature and should be cleaned up. + +The topology of a system is described in the units of: + + - packages + - cores + - threads + +* Package: + + Packages contain a number of cores plus shared resources, e.g. DRAM + controller, shared caches etc. + + AMD nomenclature for package is 'Node'. + + Package-related topology information in the kernel: + + - cpuinfo_x86.x86_max_cores: + + The number of cores in a package. This information is retrieved via CPUID. + + - cpuinfo_x86.phys_proc_id: + + The physical ID of the package. This information is retrieved via CPUID + and deduced from the APIC IDs of the cores in the package. + + - cpuinfo_x86.logical_id: + + The logical ID of the package. As we do not trust BIOSes to enumerate the + packages in a consistent way, we introduced the concept of logical package + ID so we can sanely calculate the number of maximum possible packages in + the system and have the packages enumerated linearly. + + - topology_max_packages(): + + The maximum possible number of packages in the system. Helpful for per + package facilities to preallocate per package information. + + +* Cores: + + A core consists of 1 or more threads. It does not matter whether the threads + are SMT- or CMT-type threads. + + AMDs nomenclature for a CMT core is "Compute Unit". The kernel always uses + "core". + + Core-related topology information in the kernel: + + - smp_num_siblings: + + The number of threads in a core. The number of threads in a package can be + calculated by: + + threads_per_package = cpuinfo_x86.x86_max_cores * smp_num_siblings + + +* Threads: + + A thread is a single scheduling unit. It's the equivalent to a logical Linux + CPU. + + AMDs nomenclature for CMT threads is "Compute Unit Core". The kernel always + uses "thread". + + Thread-related topology information in the kernel: + + - topology_core_cpumask(): + + The cpumask contains all online threads in the package to which a thread + belongs. + + The number of online threads is also printed in /proc/cpuinfo "siblings." + + - topology_sibling_mask(): + + The cpumask contains all online threads in the core to which a thread + belongs. + + - topology_logical_package_id(): + + The logical package ID to which a thread belongs. + + - topology_physical_package_id(): + + The physical package ID to which a thread belongs. + + - topology_core_id(); + + The ID of the core to which a thread belongs. It is also printed in /proc/cpuinfo + "core_id." + + + +System topology examples + +Note: + +The alternative Linux CPU enumeration depends on how the BIOS enumerates the +threads. Many BIOSes enumerate all threads 0 first and then all threads 1. +That has the "advantage" that the logical Linux CPU numbers of threads 0 stay +the same whether threads are enabled or not. That's merely an implementation +detail and has no practical impact. + +1) Single Package, Single Core + + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0 + +2) Single Package, Dual Core + + a) One thread per core + + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0 + -> [core 1] -> [thread 0] -> Linux CPU 1 + + b) Two threads per core + + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0 + -> [thread 1] -> Linux CPU 1 + -> [core 1] -> [thread 0] -> Linux CPU 2 + -> [thread 1] -> Linux CPU 3 + + Alternative enumeration: + + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0 + -> [thread 1] -> Linux CPU 2 + -> [core 1] -> [thread 0] -> Linux CPU 1 + -> [thread 1] -> Linux CPU 3 + + AMD nomenclature for CMT systems: + + [node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0 + -> [Compute Unit Core 1] -> Linux CPU 1 + -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2 + -> [Compute Unit Core 1] -> Linux CPU 3 + +4) Dual Package, Dual Core + + a) One thread per core + + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0 + -> [core 1] -> [thread 0] -> Linux CPU 1 + + [package 1] -> [core 0] -> [thread 0] -> Linux CPU 2 + -> [core 1] -> [thread 0] -> Linux CPU 3 + + b) Two threads per core + + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0 + -> [thread 1] -> Linux CPU 1 + -> [core 1] -> [thread 0] -> Linux CPU 2 + -> [thread 1] -> Linux CPU 3 + + [package 1] -> [core 0] -> [thread 0] -> Linux CPU 4 + -> [thread 1] -> Linux CPU 5 + -> [core 1] -> [thread 0] -> Linux CPU 6 + -> [thread 1] -> Linux CPU 7 + + Alternative enumeration: + + [package 0] -> [core 0] -> [thread 0] -> Linux CPU 0 + -> [thread 1] -> Linux CPU 4 + -> [core 1] -> [thread 0] -> Linux CPU 1 + -> [thread 1] -> Linux CPU 5 + + [package 1] -> [core 0] -> [thread 0] -> Linux CPU 2 + -> [thread 1] -> Linux CPU 6 + -> [core 1] -> [thread 0] -> Linux CPU 3 + -> [thread 1] -> Linux CPU 7 + + AMD nomenclature for CMT systems: + + [node 0] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 0 + -> [Compute Unit Core 1] -> Linux CPU 1 + -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 2 + -> [Compute Unit Core 1] -> Linux CPU 3 + + [node 1] -> [Compute Unit 0] -> [Compute Unit Core 0] -> Linux CPU 4 + -> [Compute Unit Core 1] -> Linux CPU 5 + -> [Compute Unit 1] -> [Compute Unit Core 0] -> Linux CPU 6 + -> [Compute Unit Core 1] -> Linux CPU 7 diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index 049ada8d4e9c98b9c187b62528a55d9206a3d7cc..86a9bec18dab57950ea9af4201a670b99fe1db71 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -369,7 +369,7 @@ static int amd_pmu_cpu_prepare(int cpu) WARN_ON_ONCE(cpuc->amd_nb); - if (boot_cpu_data.x86_max_cores < 2) + if (!x86_pmu.amd_nb_constraints) return NOTIFY_OK; cpuc->amd_nb = amd_alloc_nb(cpu); @@ -388,7 +388,7 @@ static void amd_pmu_cpu_starting(int cpu) cpuc->perf_ctr_virt_mask = AMD64_EVENTSEL_HOSTONLY; - if (boot_cpu_data.x86_max_cores < 2) + if (!x86_pmu.amd_nb_constraints) return; nb_id = amd_get_nb_id(cpu); @@ -414,7 +414,7 @@ static void amd_pmu_cpu_dead(int cpu) { struct cpu_hw_events *cpuhw; - if (boot_cpu_data.x86_max_cores < 2) + if (!x86_pmu.amd_nb_constraints) return; cpuhw = &per_cpu(cpu_hw_events, cpu); @@ -648,6 +648,8 @@ static __initconst const struct x86_pmu amd_pmu = { .cpu_prepare = amd_pmu_cpu_prepare, .cpu_starting = amd_pmu_cpu_starting, .cpu_dead = amd_pmu_cpu_dead, + + .amd_nb_constraints = 1, }; static int __init amd_core_pmu_init(void) @@ -674,6 +676,11 @@ static int __init amd_core_pmu_init(void) x86_pmu.eventsel = MSR_F15H_PERF_CTL; x86_pmu.perfctr = MSR_F15H_PERF_CTR; x86_pmu.num_counters = AMD64_NUM_COUNTERS_CORE; + /* + * AMD Core perfctr has separate MSRs for the NB events, see + * the amd/uncore.c driver. + */ + x86_pmu.amd_nb_constraints = 0; pr_cont("core perfctr, "); return 0; @@ -693,6 +700,14 @@ __init int amd_pmu_init(void) if (ret) return ret; + if (num_possible_cpus() == 1) { + /* + * No point in allocating data structures to serialize + * against other CPUs, when there is only the one CPU. + */ + x86_pmu.amd_nb_constraints = 0; + } + /* Events are common for all AMDs */ memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index ba6ef18528c906444049587506535115687ce01c..716d0482f5db38e2cf2c3c1095e1cbc85d30296d 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -607,6 +607,11 @@ struct x86_pmu { */ atomic_t lbr_exclusive[x86_lbr_exclusive_max]; + /* + * AMD bits + */ + unsigned int amd_nb_constraints : 1; + /* * Extra registers for events */ diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 2da46ac16e3750441294d983d82f359fdccdb0a4..426e946ed0c0155d011c942dd28aca32b0b85f2f 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -190,6 +190,7 @@ #define MSR_PP1_ENERGY_STATUS 0x00000641 #define MSR_PP1_POLICY 0x00000642 +/* Config TDP MSRs */ #define MSR_CONFIG_TDP_NOMINAL 0x00000648 #define MSR_CONFIG_TDP_LEVEL_1 0x00000649 #define MSR_CONFIG_TDP_LEVEL_2 0x0000064A @@ -210,13 +211,6 @@ #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 -/* Config TDP MSRs */ -#define MSR_CONFIG_TDP_NOMINAL 0x00000648 -#define MSR_CONFIG_TDP_LEVEL1 0x00000649 -#define MSR_CONFIG_TDP_LEVEL2 0x0000064A -#define MSR_CONFIG_TDP_CONTROL 0x0000064B -#define MSR_TURBO_ACTIVATION_RATIO 0x0000064C - /* Hardware P state interface */ #define MSR_PPERF 0x0000064e #define MSR_PERF_LIMIT_REASONS 0x0000064f diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 983738ac014c6508f499b26650d44b6f5b3b1b2b..9264476f3d578e8fa346411aad4900e85afcb41a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -132,8 +132,6 @@ struct cpuinfo_x86 { u16 logical_proc_id; /* Core id: */ u16 cpu_core_id; - /* Compute unit id */ - u8 compute_unit_id; /* Index into per_cpu list: */ u16 cpu_index; u32 microcode; diff --git a/arch/x86/include/asm/smp.h b/arch/x86/include/asm/smp.h index 20a3de5cb3b0dd5e3362833baebd752c1142ae4e..66b057306f404718c233c18c5603ec30e9e535ba 100644 --- a/arch/x86/include/asm/smp.h +++ b/arch/x86/include/asm/smp.h @@ -155,6 +155,7 @@ static inline int wbinvd_on_all_cpus(void) wbinvd(); return 0; } +#define smp_num_siblings 1 #endif /* CONFIG_SMP */ extern unsigned disabled_cpus; diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index 82866697fcf186ac7f63f1f6b5f1f77385dc8ab4..ffae84df8a9313cd3cc7b12953c0c24a809cd792 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -276,11 +276,9 @@ static inline bool is_ia32_task(void) */ #define force_iret() set_thread_flag(TIF_NOTIFY_RESUME) -#endif /* !__ASSEMBLY__ */ - -#ifndef __ASSEMBLY__ extern void arch_task_cache_init(void); extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src); extern void arch_release_task_struct(struct task_struct *tsk); -#endif +#endif /* !__ASSEMBLY__ */ + #endif /* _ASM_X86_THREAD_INFO_H */ diff --git a/arch/x86/kernel/amd_nb.c b/arch/x86/kernel/amd_nb.c index 29fa475ec51823e61a9e94f34142e37341a1208b..a147e676fc7b3439d156534472d63be5c2d10f4a 100644 --- a/arch/x86/kernel/amd_nb.c +++ b/arch/x86/kernel/amd_nb.c @@ -170,15 +170,13 @@ int amd_get_subcaches(int cpu) { struct pci_dev *link = node_to_amd_nb(amd_get_nb_id(cpu))->link; unsigned int mask; - int cuid; if (!amd_nb_has_feature(AMD_NB_L3_PARTITIONING)) return 0; pci_read_config_dword(link, 0x1d4, &mask); - cuid = cpu_data(cpu).compute_unit_id; - return (mask >> (4 * cuid)) & 0xf; + return (mask >> (4 * cpu_data(cpu).cpu_core_id)) & 0xf; } int amd_set_subcaches(int cpu, unsigned long mask) @@ -204,7 +202,7 @@ int amd_set_subcaches(int cpu, unsigned long mask) pci_write_config_dword(nb->misc, 0x1b8, reg & ~0x180000); } - cuid = cpu_data(cpu).compute_unit_id; + cuid = cpu_data(cpu).cpu_core_id; mask <<= 4 * cuid; mask |= (0xf ^ (1 << cuid)) << 26; diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 6e47e3a916f12a03069a7c38ef21e9b2df5edcdf..7b76eb67a9b3dcb84bb8e6cd6e40945924d32938 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -300,7 +300,6 @@ static int nearby_node(int apicid) #ifdef CONFIG_SMP static void amd_get_topology(struct cpuinfo_x86 *c) { - u32 cores_per_cu = 1; u8 node_id; int cpu = smp_processor_id(); @@ -313,8 +312,8 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get compute unit information */ smp_num_siblings = ((ebx >> 8) & 3) + 1; - c->compute_unit_id = ebx & 0xff; - cores_per_cu += ((ebx >> 8) & 3); + c->x86_max_cores /= smp_num_siblings; + c->cpu_core_id = ebx & 0xff; } else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) { u64 value; @@ -325,19 +324,16 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* fixup multi-node processor information */ if (nodes_per_socket > 1) { - u32 cores_per_node; u32 cus_per_node; set_cpu_cap(c, X86_FEATURE_AMD_DCM); - cores_per_node = c->x86_max_cores / nodes_per_socket; - cus_per_node = cores_per_node / cores_per_cu; + cus_per_node = c->x86_max_cores / nodes_per_socket; /* store NodeID, use llc_shared_map to store sibling info */ per_cpu(cpu_llc_id, cpu) = node_id; /* core id has to be in the [0 .. cores_per_node - 1] range */ - c->cpu_core_id %= cores_per_node; - c->compute_unit_id %= cus_per_node; + c->cpu_core_id %= cus_per_node; } } #endif diff --git a/arch/x86/kernel/cpu/powerflags.c b/arch/x86/kernel/cpu/powerflags.c index 31f0f335ed2241d6d561e6b4edc98f8e03216e96..1dd8294fd7301c979744deedc1922f226b0e0346 100644 --- a/arch/x86/kernel/cpu/powerflags.c +++ b/arch/x86/kernel/cpu/powerflags.c @@ -18,4 +18,6 @@ const char *const x86_power_flags[32] = { "", /* tsc invariant mapped to constant_tsc */ "cpb", /* core performance boost */ "eff_freq_ro", /* Readonly aperf/mperf */ + "proc_feedback", /* processor feedback interface */ + "acc_power", /* accumulated power mechanism */ }; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index b2c99f811c3ff437ddfb45c7c4a56ed73101eccd..a2065d3b3b396f4503f4e4f42acc2af2bd4b307b 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -422,7 +422,7 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) if (c->phys_proc_id == o->phys_proc_id && per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && - c->compute_unit_id == o->compute_unit_id) + c->cpu_core_id == o->cpu_core_id) return topology_sane(c, o, "smt"); } else if (c->phys_proc_id == o->phys_proc_id && diff --git a/arch/x86/ras/mce_amd_inj.c b/arch/x86/ras/mce_amd_inj.c index 55d38cfa46c2626c6d2f85587da27c05ca3e6bf7..9e02dcaef68311ed376f8fcd0579d6c207e80103 100644 --- a/arch/x86/ras/mce_amd_inj.c +++ b/arch/x86/ras/mce_amd_inj.c @@ -20,6 +20,7 @@ #include #include +#include #include #include @@ -206,7 +207,7 @@ static u32 get_nbc_for_node(int node_id) struct cpuinfo_x86 *c = &boot_cpu_data; u32 cores_per_node; - cores_per_node = c->x86_max_cores / amd_get_nodes_per_socket(); + cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket(); return cores_per_node * node_id; }