diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 736d73801cb6a222f01636d665e6156f76d4860e..1feebf16ab08239102823abb9c8bbd9fd92e2a2f 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -20,7 +20,7 @@ extern void cpuset_fork(struct task_struct *p); extern void cpuset_exit(struct task_struct *p); extern cpumask_t cpuset_cpus_allowed(const struct task_struct *p); void cpuset_init_current_mems_allowed(void); -void cpuset_update_current_mems_allowed(void); +void cpuset_update_task_memory_state(void); #define cpuset_nodes_subset_current_mems_allowed(nodes) \ nodes_subset((nodes), current->mems_allowed) int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl); @@ -51,7 +51,7 @@ static inline cpumask_t cpuset_cpus_allowed(struct task_struct *p) } static inline void cpuset_init_current_mems_allowed(void) {} -static inline void cpuset_update_current_mems_allowed(void) {} +static inline void cpuset_update_task_memory_state(void) {} #define cpuset_nodes_subset_current_mems_allowed(nodes) (1) static inline int cpuset_zonelist_valid_mems_allowed(struct zonelist *zl) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index d9349cc48b95d2b0273a7b4b120c58e083b69bd7..e9917d71628a039d5f0e157cc603f08ef8bb4959 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -584,13 +584,26 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) BUG_ON(!nodes_intersects(*pmask, node_online_map)); } -/* - * Refresh current tasks mems_allowed and mems_generation from current - * tasks cpuset. +/** + * cpuset_update_task_memory_state - update task memory placement * - * Call without callback_sem or task_lock() held. May be called with - * or without manage_sem held. Will acquire task_lock() and might - * acquire callback_sem during call. + * If the current tasks cpusets mems_allowed changed behind our + * backs, update current->mems_allowed, mems_generation and task NUMA + * mempolicy to the new value. + * + * Task mempolicy is updated by rebinding it relative to the + * current->cpuset if a task has its memory placement changed. + * Do not call this routine if in_interrupt(). + * + * Call without callback_sem or task_lock() held. May be called + * with or without manage_sem held. Except in early boot or + * an exiting task, when tsk->cpuset is NULL, this routine will + * acquire task_lock(). We don't need to use task_lock to guard + * against another task changing a non-NULL cpuset pointer to NULL, + * as that is only done by a task on itself, and if the current task + * is here, it is not simultaneously in the exit code NULL'ing its + * cpuset pointer. This routine also might acquire callback_sem and + * current->mm->mmap_sem during call. * * The task_lock() is required to dereference current->cpuset safely. * Without it, we could pick up the pointer value of current->cpuset @@ -605,32 +618,36 @@ static void guarantee_online_mems(const struct cpuset *cs, nodemask_t *pmask) * task has been modifying its cpuset. */ -static void refresh_mems(void) +void cpuset_update_task_memory_state() { int my_cpusets_mem_gen; + struct task_struct *tsk = current; + struct cpuset *cs = tsk->cpuset; - task_lock(current); - my_cpusets_mem_gen = current->cpuset->mems_generation; - task_unlock(current); + if (unlikely(!cs)) + return; + + task_lock(tsk); + my_cpusets_mem_gen = cs->mems_generation; + task_unlock(tsk); - if (current->cpuset_mems_generation != my_cpusets_mem_gen) { - struct cpuset *cs; - nodemask_t oldmem = current->mems_allowed; + if (my_cpusets_mem_gen != tsk->cpuset_mems_generation) { + nodemask_t oldmem = tsk->mems_allowed; int migrate; down(&callback_sem); - task_lock(current); - cs = current->cpuset; + task_lock(tsk); + cs = tsk->cpuset; /* Maybe changed when task not locked */ migrate = is_memory_migrate(cs); - guarantee_online_mems(cs, ¤t->mems_allowed); - current->cpuset_mems_generation = cs->mems_generation; - task_unlock(current); + guarantee_online_mems(cs, &tsk->mems_allowed); + tsk->cpuset_mems_generation = cs->mems_generation; + task_unlock(tsk); up(&callback_sem); - if (!nodes_equal(oldmem, current->mems_allowed)) { - numa_policy_rebind(&oldmem, ¤t->mems_allowed); + numa_policy_rebind(&oldmem, &tsk->mems_allowed); + if (!nodes_equal(oldmem, tsk->mems_allowed)) { if (migrate) { - do_migrate_pages(current->mm, &oldmem, - ¤t->mems_allowed, + do_migrate_pages(tsk->mm, &oldmem, + &tsk->mems_allowed, MPOL_MF_MOVE_ALL); } } @@ -1630,7 +1647,7 @@ static long cpuset_create(struct cpuset *parent, const char *name, int mode) return -ENOMEM; down(&manage_sem); - refresh_mems(); + cpuset_update_task_memory_state(); cs->flags = 0; if (notify_on_release(parent)) set_bit(CS_NOTIFY_ON_RELEASE, &cs->flags); @@ -1688,7 +1705,7 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) /* the vfs holds both inode->i_sem already */ down(&manage_sem); - refresh_mems(); + cpuset_update_task_memory_state(); if (atomic_read(&cs->count) > 0) { up(&manage_sem); return -EBUSY; @@ -1872,36 +1889,6 @@ void cpuset_init_current_mems_allowed(void) current->mems_allowed = NODE_MASK_ALL; } -/** - * cpuset_update_current_mems_allowed - update mems parameters to new values - * - * If the current tasks cpusets mems_allowed changed behind our backs, - * update current->mems_allowed and mems_generation to the new value. - * Do not call this routine if in_interrupt(). - * - * Call without callback_sem or task_lock() held. May be called - * with or without manage_sem held. Unless exiting, it will acquire - * task_lock(). Also might acquire callback_sem during call to - * refresh_mems(). - */ - -void cpuset_update_current_mems_allowed(void) -{ - struct cpuset *cs; - int need_to_refresh = 0; - - task_lock(current); - cs = current->cpuset; - if (!cs) - goto done; - if (current->cpuset_mems_generation != cs->mems_generation) - need_to_refresh = 1; -done: - task_unlock(current); - if (need_to_refresh) - refresh_mems(); -} - /** * cpuset_zonelist_valid_mems_allowed - check zonelist vs. curremt mems_allowed * @zl: the zonelist to be checked diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 9dea2b8a7d485b43b7251f182a55e7e968e58655..515bfeee027e1cdecad2618a5c61a17ef1962d5f 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -387,7 +387,7 @@ static int contextualize_policy(int mode, nodemask_t *nodes) if (!nodes) return 0; - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (!cpuset_nodes_subset_current_mems_allowed(*nodes)) return -EINVAL; return mpol_check_policy(mode, nodes); @@ -461,7 +461,7 @@ long do_get_mempolicy(int *policy, nodemask_t *nmask, struct vm_area_struct *vma = NULL; struct mempolicy *pol = current->mempolicy; - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (flags & ~(unsigned long)(MPOL_F_NODE|MPOL_F_ADDR)) return -EINVAL; if (flags & MPOL_F_ADDR) { @@ -1089,7 +1089,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) { struct mempolicy *pol = get_vma_policy(current, vma, addr); - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (unlikely(pol->policy == MPOL_INTERLEAVE)) { unsigned nid; @@ -1115,7 +1115,7 @@ alloc_page_vma(gfp_t gfp, struct vm_area_struct *vma, unsigned long addr) * interrupt context and apply the current process NUMA policy. * Returns NULL when no page can be allocated. * - * Don't call cpuset_update_current_mems_allowed() unless + * Don't call cpuset_update_task_memory_state() unless * 1) it's ok to take cpuset_sem (can WAIT), and * 2) allocating for current task (not interrupt). */ @@ -1124,7 +1124,7 @@ struct page *alloc_pages_current(gfp_t gfp, unsigned order) struct mempolicy *pol = current->mempolicy; if ((gfp & __GFP_WAIT) && !in_interrupt()) - cpuset_update_current_mems_allowed(); + cpuset_update_task_memory_state(); if (!pol || in_interrupt()) pol = &default_policy; if (pol->policy == MPOL_INTERLEAVE)