diff --git a/Documentation/cpusets.txt b/Documentation/cpusets.txt index b875d231ac748c64129865b8b4c130c929b474c1..ec9de6917f01f34c088cd612121caaadc3edf08b 100644 --- a/Documentation/cpusets.txt +++ b/Documentation/cpusets.txt @@ -87,9 +87,6 @@ This can be especially valuable on: and a database), or * NUMA systems running large HPC applications with demanding performance characteristics. - * Also cpu_exclusive cpusets are useful for servers running orthogonal - workloads such as RT applications requiring low latency and HPC - applications that are throughput sensitive These subsets, or "soft partitions" must be able to be dynamically adjusted, as the job mix changes, without impacting other concurrently @@ -132,8 +129,6 @@ Cpusets extends these two mechanisms as follows: - A cpuset may be marked exclusive, which ensures that no other cpuset (except direct ancestors and descendents) may contain any overlapping CPUs or Memory Nodes. - Also a cpu_exclusive cpuset would be associated with a sched - domain. - You can list all the tasks (by pid) attached to any cpuset. The implementation of cpusets requires a few, simple hooks @@ -145,9 +140,6 @@ into the rest of the kernel, none in performance critical paths: allowed in that tasks cpuset. - in sched.c migrate_all_tasks(), to keep migrating tasks within the CPUs allowed by their cpuset, if possible. - - in sched.c, a new API partition_sched_domains for handling - sched domain changes associated with cpu_exclusive cpusets - and related changes in both sched.c and arch/ia64/kernel/domain.c - in the mbind and set_mempolicy system calls, to mask the requested Memory Nodes by what's allowed in that tasks cpuset. - in page_alloc.c, to restrict memory to allowed nodes. @@ -232,15 +224,6 @@ If a cpuset is cpu or mem exclusive, no other cpuset, other than a direct ancestor or descendent, may share any of the same CPUs or Memory Nodes. -A cpuset that is cpu_exclusive has a scheduler (sched) domain -associated with it. The sched domain consists of all CPUs in the -current cpuset that are not part of any exclusive child cpusets. -This ensures that the scheduler load balancing code only balances -against the CPUs that are in the sched domain as defined above and -not all of the CPUs in the system. This removes any overhead due to -load balancing code trying to pull tasks outside of the cpu_exclusive -cpuset only to be prevented by the tasks' cpus_allowed mask. - A cpuset that is mem_exclusive restricts kernel allocations for page, buffer and other data commonly shared by the kernel across multiple users. All cpusets, whether mem_exclusive or not, restrict diff --git a/include/linux/sched.h b/include/linux/sched.h index 285ee4827a3c98573119f1ab801d8b5e7634897d..592e3a55f818fe328222a6051d6a51c5c15ecfae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -729,9 +729,6 @@ struct sched_domain { #endif }; -extern int partition_sched_domains(cpumask_t *partition1, - cpumask_t *partition2); - #endif /* CONFIG_SMP */ /* diff --git a/kernel/cpuset.c b/kernel/cpuset.c index e196510aa40f6975310655450030c9d28a4b1e12..0864f4097930e1195450c4359c144e96369786e9 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -754,61 +754,6 @@ static int validate_change(const struct cpuset *cur, const struct cpuset *trial) return 0; } -/* - * For a given cpuset cur, partition the system as follows - * a. All cpus in the parent cpuset's cpus_allowed that are not part of any - * exclusive child cpusets - * b. All cpus in the current cpuset's cpus_allowed that are not part of any - * exclusive child cpusets - * Build these two partitions by calling partition_sched_domains - * - * Call with manage_mutex held. May nest a call to the - * lock_cpu_hotplug()/unlock_cpu_hotplug() pair. - * Must not be called holding callback_mutex, because we must - * not call lock_cpu_hotplug() while holding callback_mutex. - */ - -static void update_cpu_domains(struct cpuset *cur) -{ - struct cpuset *c, *par = cur->parent; - cpumask_t pspan, cspan; - - if (par == NULL || cpus_empty(cur->cpus_allowed)) - return; - - /* - * Get all cpus from parent's cpus_allowed not part of exclusive - * children - */ - pspan = par->cpus_allowed; - list_for_each_entry(c, &par->children, sibling) { - if (is_cpu_exclusive(c)) - cpus_andnot(pspan, pspan, c->cpus_allowed); - } - if (!is_cpu_exclusive(cur)) { - cpus_or(pspan, pspan, cur->cpus_allowed); - if (cpus_equal(pspan, cur->cpus_allowed)) - return; - cspan = CPU_MASK_NONE; - } else { - if (cpus_empty(pspan)) - return; - cspan = cur->cpus_allowed; - /* - * Get all cpus from current cpuset's cpus_allowed not part - * of exclusive children - */ - list_for_each_entry(c, &cur->children, sibling) { - if (is_cpu_exclusive(c)) - cpus_andnot(cspan, cspan, c->cpus_allowed); - } - } - - lock_cpu_hotplug(); - partition_sched_domains(&pspan, &cspan); - unlock_cpu_hotplug(); -} - /* * Call with manage_mutex held. May take callback_mutex during call. */ @@ -816,7 +761,7 @@ static void update_cpu_domains(struct cpuset *cur) static int update_cpumask(struct cpuset *cs, char *buf) { struct cpuset trialcs; - int retval, cpus_unchanged; + int retval; /* top_cpuset.cpus_allowed tracks cpu_online_map; it's read-only */ if (cs == &top_cpuset) @@ -843,12 +788,9 @@ static int update_cpumask(struct cpuset *cs, char *buf) retval = validate_change(cs, &trialcs); if (retval < 0) return retval; - cpus_unchanged = cpus_equal(cs->cpus_allowed, trialcs.cpus_allowed); mutex_lock(&callback_mutex); cs->cpus_allowed = trialcs.cpus_allowed; mutex_unlock(&callback_mutex); - if (is_cpu_exclusive(cs) && !cpus_unchanged) - update_cpu_domains(cs); return 0; } @@ -1085,7 +1027,7 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) { int turning_on; struct cpuset trialcs; - int err, cpu_exclusive_changed; + int err; turning_on = (simple_strtoul(buf, NULL, 10) != 0); @@ -1098,14 +1040,10 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, char *buf) err = validate_change(cs, &trialcs); if (err < 0) return err; - cpu_exclusive_changed = - (is_cpu_exclusive(cs) != is_cpu_exclusive(&trialcs)); mutex_lock(&callback_mutex); cs->flags = trialcs.flags; mutex_unlock(&callback_mutex); - if (cpu_exclusive_changed) - update_cpu_domains(cs); return 0; } @@ -1965,17 +1903,6 @@ static int cpuset_mkdir(struct inode *dir, struct dentry *dentry, int mode) return cpuset_create(c_parent, dentry->d_name.name, mode | S_IFDIR); } -/* - * Locking note on the strange update_flag() call below: - * - * If the cpuset being removed is marked cpu_exclusive, then simulate - * turning cpu_exclusive off, which will call update_cpu_domains(). - * The lock_cpu_hotplug() call in update_cpu_domains() must not be - * made while holding callback_mutex. Elsewhere the kernel nests - * callback_mutex inside lock_cpu_hotplug() calls. So the reverse - * nesting would risk an ABBA deadlock. - */ - static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) { struct cpuset *cs = dentry->d_fsdata; @@ -1995,13 +1922,6 @@ static int cpuset_rmdir(struct inode *unused_dir, struct dentry *dentry) mutex_unlock(&manage_mutex); return -EBUSY; } - if (is_cpu_exclusive(cs)) { - int retval = update_flag(CS_CPU_EXCLUSIVE, cs, "0"); - if (retval < 0) { - mutex_unlock(&manage_mutex); - return retval; - } - } parent = cs->parent; mutex_lock(&callback_mutex); set_bit(CS_REMOVED, &cs->flags); diff --git a/kernel/sched.c b/kernel/sched.c index 78c8fbd373a39046304c908d4d35d29ad130c25f..0da2b2635c54b0257ceb3dcbdc49a8b9a5840762 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -6348,35 +6348,6 @@ static void detach_destroy_domains(const cpumask_t *cpu_map) arch_destroy_sched_domains(cpu_map); } -/* - * Partition sched domains as specified by the cpumasks below. - * This attaches all cpus from the cpumasks to the NULL domain, - * waits for a RCU quiescent period, recalculates sched - * domain information and then attaches them back to the - * correct sched domains - * Call with hotplug lock held - */ -int partition_sched_domains(cpumask_t *partition1, cpumask_t *partition2) -{ - cpumask_t change_map; - int err = 0; - - cpus_and(*partition1, *partition1, cpu_online_map); - cpus_and(*partition2, *partition2, cpu_online_map); - cpus_or(change_map, *partition1, *partition2); - - /* Detach sched domains from all of the affected cpus */ - detach_destroy_domains(&change_map); - if (!cpus_empty(*partition1)) - err = build_sched_domains(partition1); - if (!err && !cpus_empty(*partition2)) - err = build_sched_domains(partition2); - - register_sched_domain_sysctl(); - - return err; -} - #if defined(CONFIG_SCHED_MC) || defined(CONFIG_SCHED_SMT) static int arch_reinit_sched_domains(void) {