The topology IDs which identify the LLC and L2 domains clearly belong to
the per CPU topology information.
Move them into cpuinfo_x86::cpuinfo_topo and get rid of the extra per CPU
data and the related exports.
This also paves the way to do proper topology evaluation during early boot
because it removes the only per CPU dependency for that.
No functional change.
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Juergen Gross <jgross@suse.com>
Tested-by: Sohil Mehta <sohil.mehta@intel.com>
Tested-by: Michael Kelley <mikelley@microsoft.com>
Tested-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Tested-by: Zhang Rui <rui.zhang@intel.com>
Reviewed-by: Arjan van de Ven <arjan@linux.intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lore.kernel.org/r/20230814085112.803864641@linutronix.de
13 files changed:
The maximum possible number of packages in the system. Helpful for per
package facilities to preallocate per package information.
The maximum possible number of packages in the system. Helpful for per
package facilities to preallocate per package information.
- - cpu_llc_id:
-
- A per-CPU variable containing:
+ - cpuinfo_x86.topo.llc_id:
- On Intel, the first APIC ID of the list of CPUs sharing the Last Level
Cache
- On Intel, the first APIC ID of the list of CPUs sharing the Last Level
Cache
if (amd_uncore_llc) {
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
if (amd_uncore_llc) {
uncore = *per_cpu_ptr(amd_uncore_llc, cpu);
- uncore->id = get_llc_id(cpu);
+ uncore->id = per_cpu_llc_id(cpu);
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc);
*per_cpu_ptr(amd_uncore_llc, cpu) = uncore;
#define CACHE_MTRR 0x01
#define CACHE_PAT 0x02
#define CACHE_MTRR 0x01
#define CACHE_PAT 0x02
-void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu);
-void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu);
-
void cache_disable(void);
void cache_enable(void);
void set_cache_aps_delayed_init(bool val);
void cache_disable(void);
void cache_enable(void);
void set_cache_aps_delayed_init(bool val);
// Logical ID mappings
u32 logical_pkg_id;
u32 logical_die_id;
// Logical ID mappings
u32 logical_pkg_id;
u32 logical_die_id;
+
+ // Cache level topology IDs
+ u32 llc_id;
+ u32 l2c_id;
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
DECLARE_PER_CPU(u64, msr_misc_features_shadow);
-extern u16 get_llc_id(unsigned int cpu);
+static inline u16 per_cpu_llc_id(unsigned int cpu)
+{
+ return per_cpu(cpu_info.topo.llc_id, cpu);
+}
+
+static inline u16 per_cpu_l2c_id(unsigned int cpu)
+{
+ return per_cpu(cpu_info.topo.l2c_id, cpu);
+}
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
#ifdef CONFIG_CPU_SUP_AMD
extern u32 amd_get_nodes_per_socket(void);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
/* cpus sharing the last level cache: */
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_llc_shared_map);
DECLARE_PER_CPU_READ_MOSTLY(cpumask_var_t, cpu_l2c_shared_map);
-DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id);
-DECLARE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u16, x86_cpu_to_apicid);
DECLARE_EARLY_PER_CPU_READ_MOSTLY(u32, x86_cpu_to_acpiid);
extern unsigned int __max_die_per_package;
#ifdef CONFIG_SMP
extern unsigned int __max_die_per_package;
#ifdef CONFIG_SMP
-#define topology_cluster_id(cpu) (per_cpu(cpu_l2c_id, cpu))
+#define topology_cluster_id(cpu) (cpu_data(cpu).topo.l2c_id)
#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
#define topology_die_cpumask(cpu) (per_cpu(cpu_die_map, cpu))
#define topology_cluster_cpumask(cpu) (cpu_clustergroup_mask(cpu))
#define topology_core_cpumask(cpu) (per_cpu(cpu_core_map, cpu))
- this_cpu_write(cpu_llc_id, node);
/* Account for nodes per socket in multi-core-module processors */
if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
/* Account for nodes per socket in multi-core-module processors */
if (boot_cpu_has(X86_FEATURE_NODEID_MSR)) {
*/
static void amd_get_topology(struct cpuinfo_x86 *c)
{
*/
static void amd_get_topology(struct cpuinfo_x86 *c)
{
- int cpu = smp_processor_id();
-
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
if (!err)
c->x86_coreid_bits = get_count_order(c->x86_max_cores);
if (!err)
c->x86_coreid_bits = get_count_order(c->x86_max_cores);
- cacheinfo_amd_init_llc_id(c, cpu);
+ cacheinfo_amd_init_llc_id(c);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
c->topo.die_id = value & 7;
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
c->topo.die_id = value & 7;
-
- per_cpu(cpu_llc_id, cpu) = c->topo.die_id;
+ c->topo.llc_id = c->topo.die_id;
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
unsigned bits;
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
unsigned bits;
- int cpu = smp_processor_id();
bits = c->x86_coreid_bits;
/* Low order bits define the core id (index of core in socket) */
bits = c->x86_coreid_bits;
/* Low order bits define the core id (index of core in socket) */
/* Convert the initial APIC ID into the socket ID */
c->topo.pkg_id = c->topo.initial_apicid >> bits;
/* use socket ID also for last level cache */
/* Convert the initial APIC ID into the socket ID */
c->topo.pkg_id = c->topo.initial_apicid >> bits;
/* use socket ID also for last level cache */
- per_cpu(cpu_llc_id, cpu) = c->topo.die_id = c->topo.pkg_id;
+ c->topo.llc_id = c->topo.die_id = c->topo.pkg_id;
}
u32 amd_get_nodes_per_socket(void)
}
u32 amd_get_nodes_per_socket(void)
node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
- node = get_llc_id(cpu);
+ node = per_cpu_llc_id(cpu);
/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
-void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu)
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c)
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
if (c->x86 < 0x17) {
/* LLC is at the node level. */
if (c->x86 < 0x17) {
/* LLC is at the node level. */
- per_cpu(cpu_llc_id, cpu) = c->topo.die_id;
+ c->topo.llc_id = c->topo.die_id;
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
} else if (c->x86 == 0x17 && c->x86_model <= 0x1F) {
/*
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
- per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> 3;
+ c->topo.llc_id = c->topo.apicid >> 3;
} else {
/*
* LLC ID is calculated from the number of threads sharing the
} else {
/*
* LLC ID is calculated from the number of threads sharing the
if (num_sharing_cache) {
int bits = get_count_order(num_sharing_cache);
if (num_sharing_cache) {
int bits = get_count_order(num_sharing_cache);
- per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> bits;
+ c->topo.llc_id = c->topo.apicid >> bits;
-void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c, int cpu)
+void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c)
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
{
/*
* We may have multiple LLCs if L3 caches exist, so check if we
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
* LLC is at the core complex level.
* Core complex ID is ApicId[3] for these processors.
*/
- per_cpu(cpu_llc_id, cpu) = c->topo.apicid >> 3;
+ c->topo.llc_id = c->topo.apicid >> 3;
}
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
unsigned int new_l1d = 0, new_l1i = 0; /* Cache sizes from cpuid(4) */
unsigned int new_l2 = 0, new_l3 = 0, i; /* Cache sizes from cpuid(4) */
unsigned int l2_id = 0, l3_id = 0, num_threads_sharing, index_msb;
-#ifdef CONFIG_SMP
- unsigned int cpu = c->cpu_index;
-#endif
if (c->cpuid_level > 3) {
static int is_initialized;
if (c->cpuid_level > 3) {
static int is_initialized;
if (new_l2) {
l2 = new_l2;
if (new_l2) {
l2 = new_l2;
-#ifdef CONFIG_SMP
- per_cpu(cpu_llc_id, cpu) = l2_id;
- per_cpu(cpu_l2c_id, cpu) = l2_id;
-#endif
+ c->topo.llc_id = l2_id;
+ c->topo.l2c_id = l2_id;
}
if (new_l3) {
l3 = new_l3;
}
if (new_l3) {
l3 = new_l3;
-#ifdef CONFIG_SMP
- per_cpu(cpu_llc_id, cpu) = l3_id;
-#endif
+ c->topo.llc_id = l3_id;
- * If cpu_llc_id is not yet set, this means cpuid_level < 4 which in
+ * If llc_id is not yet set, this means cpuid_level < 4 which in
* turns means that the only possibility is SMT (as indicated in
* cpuid1). Since cpuid2 doesn't specify shared caches, and we know
* that SMT shares all caches, we can unconditionally set cpu_llc_id to
* c->topo.pkg_id.
*/
* turns means that the only possibility is SMT (as indicated in
* cpuid1). Since cpuid2 doesn't specify shared caches, and we know
* that SMT shares all caches, we can unconditionally set cpu_llc_id to
* c->topo.pkg_id.
*/
- if (per_cpu(cpu_llc_id, cpu) == BAD_APICID)
- per_cpu(cpu_llc_id, cpu) = c->topo.pkg_id;
-#endif
+ if (c->topo.llc_id == BAD_APICID)
+ c->topo.llc_id = c->topo.pkg_id;
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
int smp_num_siblings = 1;
EXPORT_SYMBOL(smp_num_siblings);
-/* Last level cache ID of each logical CPU */
-DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
-
-u16 get_llc_id(unsigned int cpu)
-{
- return per_cpu(cpu_llc_id, cpu);
-}
-EXPORT_SYMBOL_GPL(get_llc_id);
-
-/* L2 cache ID of each logical CPU */
-DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_l2c_id) = BAD_APICID;
-
static struct ppin_info {
int feature;
int msr_ppin_ctl;
static struct ppin_info {
int feature;
int msr_ppin_ctl;
c->x86_max_cores = 1;
c->x86_coreid_bits = 0;
c->topo.cu_id = 0xff;
c->x86_max_cores = 1;
c->x86_coreid_bits = 0;
c->topo.cu_id = 0xff;
+ c->topo.llc_id = BAD_APICID;
+ c->topo.l2c_id = BAD_APICID;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
#ifdef CONFIG_X86_64
c->x86_clflush_size = 64;
c->x86_phys_bits = 36;
extern void detect_ht(struct cpuinfo_x86 *c);
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
extern void detect_ht(struct cpuinfo_x86 *c);
extern void check_null_seg_clears_base(struct cpuinfo_x86 *c);
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c);
+void cacheinfo_hygon_init_llc_id(struct cpuinfo_x86 *c);
+
unsigned int aperfmperf_get_khz(int cpu);
void cpu_select_mitigations(void);
unsigned int aperfmperf_get_khz(int cpu);
void cpu_select_mitigations(void);
*/
static void hygon_get_topology(struct cpuinfo_x86 *c)
{
*/
static void hygon_get_topology(struct cpuinfo_x86 *c)
{
- int cpu = smp_processor_id();
-
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
int err;
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3)
c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT;
if (!boot_cpu_has(X86_FEATURE_HYPERVISOR) && c->x86_model <= 0x3)
c->topo.pkg_id = c->topo.apicid >> APICID_SOCKET_ID_BIT;
- cacheinfo_hygon_init_llc_id(c, cpu);
+ cacheinfo_hygon_init_llc_id(c);
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
c->topo.die_id = value & 7;
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
rdmsrl(MSR_FAM10H_NODE_ID, value);
c->topo.die_id = value & 7;
-
- per_cpu(cpu_llc_id, cpu) = c->topo.die_id;
+ c->topo.llc_id = c->topo.die_id;
static void hygon_detect_cmp(struct cpuinfo_x86 *c)
{
unsigned int bits;
static void hygon_detect_cmp(struct cpuinfo_x86 *c)
{
unsigned int bits;
- int cpu = smp_processor_id();
bits = c->x86_coreid_bits;
/* Low order bits define the core id (index of core in socket) */
c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1);
/* Convert the initial APIC ID into the socket ID */
c->topo.pkg_id = c->topo.initial_apicid >> bits;
bits = c->x86_coreid_bits;
/* Low order bits define the core id (index of core in socket) */
c->topo.core_id = c->topo.initial_apicid & ((1 << bits)-1);
/* Convert the initial APIC ID into the socket ID */
c->topo.pkg_id = c->topo.initial_apicid >> bits;
- /* use socket ID also for last level cache */
- per_cpu(cpu_llc_id, cpu) = c->topo.die_id = c->topo.pkg_id;
+ /* Use package ID also for last level cache */
+ c->topo.llc_id = c->topo.die_id = c->topo.pkg_id;
}
static void srat_detect_node(struct cpuinfo_x86 *c)
}
static void srat_detect_node(struct cpuinfo_x86 *c)
node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
node = numa_cpu_node(cpu);
if (node == NUMA_NO_NODE)
- node = per_cpu(cpu_llc_id, cpu);
/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
/*
* On multi-fabric platform (e.g. Numascale NumaChip) a
if (c->topo.pkg_id == o->topo.pkg_id &&
c->topo.die_id == o->topo.die_id &&
if (c->topo.pkg_id == o->topo.pkg_id &&
c->topo.die_id == o->topo.die_id &&
- per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) {
+ per_cpu_llc_id(cpu1) == per_cpu_llc_id(cpu2)) {
if (c->topo.core_id == o->topo.core_id)
return topology_sane(c, o, "smt");
if (c->topo.core_id == o->topo.core_id)
return topology_sane(c, o, "smt");
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
/* If the arch didn't set up l2c_id, fall back to SMT */
int cpu1 = c->cpu_index, cpu2 = o->cpu_index;
/* If the arch didn't set up l2c_id, fall back to SMT */
- if (per_cpu(cpu_l2c_id, cpu1) == BAD_APICID)
+ if (per_cpu_l2c_id(cpu1) == BAD_APICID)
return match_smt(c, o);
/* Do not match if L2 cache id does not match: */
return match_smt(c, o);
/* Do not match if L2 cache id does not match: */
- if (per_cpu(cpu_l2c_id, cpu1) != per_cpu(cpu_l2c_id, cpu2))
+ if (per_cpu_l2c_id(cpu1) != per_cpu_l2c_id(cpu2))
return false;
return topology_sane(c, o, "l2c");
return false;
return topology_sane(c, o, "l2c");
bool intel_snc = id && id->driver_data;
/* Do not match if we do not have a valid APICID for cpu: */
bool intel_snc = id && id->driver_data;
/* Do not match if we do not have a valid APICID for cpu: */
- if (per_cpu(cpu_llc_id, cpu1) == BAD_APICID)
+ if (per_cpu_llc_id(cpu1) == BAD_APICID)
return false;
/* Do not match if LLC id does not match: */
return false;
/* Do not match if LLC id does not match: */
- if (per_cpu(cpu_llc_id, cpu1) != per_cpu(cpu_llc_id, cpu2))
+ if (per_cpu_llc_id(cpu1) != per_cpu_llc_id(cpu2))