diff --git a/include/asm-generic/topology.h b/include/asm-generic/topology.h index 510df36dd5d44f8b40e0ed048bbb7c8fa1ef506f..fd60700503c8545795a031e15147e466154f0051 100644 --- a/include/asm-generic/topology.h +++ b/include/asm-generic/topology.h @@ -34,6 +34,9 @@ #ifndef cpu_to_node #define cpu_to_node(cpu) ((void)(cpu),0) #endif +#ifndef cpu_to_mem +#define cpu_to_mem(cpu) ((void)(cpu),0) +#endif #ifndef parent_node #define parent_node(node) ((void)(node),0) #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0fa491326c4a165eceeef43f24c0f30ac3834832..b4d109e389b806f205aa1575135001e09a864bf5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -671,6 +671,12 @@ void memory_present(int nid, unsigned long start, unsigned long end); static inline void memory_present(int nid, unsigned long start, unsigned long end) {} #endif +#ifdef CONFIG_HAVE_MEMORYLESS_NODES +int local_memory_node(int node_id); +#else +static inline int local_memory_node(int node_id) { return node_id; }; +#endif + #ifdef CONFIG_NEED_NODE_MEMMAP_SIZE unsigned long __init node_memmap_size_bytes(int, unsigned long, unsigned long); #endif diff --git a/include/linux/topology.h b/include/linux/topology.h index 2e5518f46571647f1721a2088e117168a5e8aaa7..c44df50a05ab5a81e42b1a37299fb9e790298fc3 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -251,6 +251,67 @@ static inline int numa_node_id(void) #endif /* [!]CONFIG_USE_PERCPU_NUMA_NODE_ID */ +#ifdef CONFIG_HAVE_MEMORYLESS_NODES + +/* + * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. + * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. + * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem(). + */ +DECLARE_PER_CPU(int, _numa_mem_); + +#ifndef set_numa_mem +static inline void set_numa_mem(int node) +{ + percpu_write(_numa_mem_, node); +} +#endif + +#ifndef numa_mem_id +/* Returns the number of the nearest Node with memory */ +static inline int numa_mem_id(void) +{ + return __this_cpu_read(_numa_mem_); +} +#endif + +#ifndef cpu_to_mem +static inline int cpu_to_mem(int cpu) +{ + return per_cpu(_numa_mem_, cpu); +} +#endif + +#ifndef set_cpu_numa_mem +static inline void set_cpu_numa_mem(int cpu, int node) +{ + per_cpu(_numa_mem_, cpu) = node; +} +#endif + +#else /* !CONFIG_HAVE_MEMORYLESS_NODES */ + +static inline void set_numa_mem(int node) {} + +static inline void set_cpu_numa_mem(int cpu, int node) {} + +#ifndef numa_mem_id +/* Returns the number of the nearest Node with memory */ +static inline int numa_mem_id(void) +{ + return numa_node_id(); +} +#endif + +#ifndef cpu_to_mem +static inline int cpu_to_mem(int cpu) +{ + return cpu_to_node(cpu); +} +#endif + +#endif /* [!]CONFIG_HAVE_MEMORYLESS_NODES */ + #ifndef topology_physical_package_id #define topology_physical_package_id(cpu) ((void)(cpu), -1) #endif diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 6fe1b65ee1a81a761c0261456dfe4e01dab81c21..431214b941acec39f68e171e468a59f74b9b749e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -62,6 +62,17 @@ DEFINE_PER_CPU(int, numa_node); EXPORT_PER_CPU_SYMBOL(numa_node); #endif +#ifdef CONFIG_HAVE_MEMORYLESS_NODES +/* + * N.B., Do NOT reference the '_numa_mem_' per cpu variable directly. + * It will not be defined when CONFIG_HAVE_MEMORYLESS_NODES is not defined. + * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem() + * defined in . + */ +DEFINE_PER_CPU(int, _numa_mem_); /* Kernel "local memory" node */ +EXPORT_PER_CPU_SYMBOL(_numa_mem_); +#endif + /* * Array of node states. */ @@ -2861,6 +2872,24 @@ static void build_zonelist_cache(pg_data_t *pgdat) zlc->z_to_n[z - zonelist->_zonerefs] = zonelist_node_idx(z); } +#ifdef CONFIG_HAVE_MEMORYLESS_NODES +/* + * Return node id of node used for "local" allocations. + * I.e., first node id of first zone in arg node's generic zonelist. + * Used for initializing percpu 'numa_mem', which is used primarily + * for kernel allocations, so use GFP_KERNEL flags to locate zonelist. + */ +int local_memory_node(int node) +{ + struct zone *zone; + + (void)first_zones_zonelist(node_zonelist(node, GFP_KERNEL), + gfp_zone(GFP_KERNEL), + NULL, + &zone); + return zone->node; +} +#endif #else /* CONFIG_NUMA */ @@ -2975,9 +3004,23 @@ static __init_refok int __build_all_zonelists(void *data) * needs the percpu allocator in order to allocate its pagesets * (a chicken-egg dilemma). */ - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { setup_pageset(&per_cpu(boot_pageset, cpu), 0); +#ifdef CONFIG_HAVE_MEMORYLESS_NODES + /* + * We now know the "local memory node" for each node-- + * i.e., the node of the first zone in the generic zonelist. + * Set up numa_mem percpu variable for on-line cpus. During + * boot, only the boot cpu should be on-line; we'll init the + * secondary cpus' numa_mem as they come on-line. During + * node/memory hotplug, we'll fixup all on-line cpus. + */ + if (cpu_online(cpu)) + set_cpu_numa_mem(cpu, local_memory_node(cpu_to_node(cpu))); +#endif + } + return 0; }