提交 9a9594ef 编写于 作者: L Linus Torvalds

Merge branch 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull SMP hotplug updates from Thomas Gleixner:
 "This update is primarily a cleanup of the CPU hotplug locking code.

  The hotplug locking mechanism is an open coded RWSEM, which allows
  recursive locking. The main problem with that is the recursive nature
  as it evades the full lockdep coverage and hides potential deadlocks.

  The rework replaces the open coded RWSEM with a percpu RWSEM and
  establishes full lockdep coverage that way.

  The bulk of the changes fix up recursive locking issues and address
  the now fully reported potential deadlocks all over the place. Some of
  these deadlocks have been observed in the RT tree, but on mainline the
  probability was low enough to hide them away."

* 'smp-hotplug-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (37 commits)
  cpu/hotplug: Constify attribute_group structures
  powerpc: Only obtain cpu_hotplug_lock if called by rtasd
  ARM/hw_breakpoint: Fix possible recursive locking for arch_hw_breakpoint_init
  cpu/hotplug: Remove unused check_for_tasks() function
  perf/core: Don't release cred_guard_mutex if not taken
  cpuhotplug: Link lock stacks for hotplug callbacks
  acpi/processor: Prevent cpu hotplug deadlock
  sched: Provide is_percpu_thread() helper
  cpu/hotplug: Convert hotplug locking to percpu rwsem
  s390: Prevent hotplug rwsem recursion
  arm: Prevent hotplug rwsem recursion
  arm64: Prevent cpu hotplug rwsem recursion
  kprobes: Cure hotplug lock ordering issues
  jump_label: Reorder hotplug lock and jump_label_lock
  perf/tracing/cpuhotplug: Fix locking order
  ACPI/processor: Use cpu_hotplug_disable() instead of get_online_cpus()
  PCI: Replace the racy recursion prevention
  PCI: Use cpu_hotplug_disable() instead of get_online_cpus()
  perf/x86/intel: Drop get_online_cpus() in intel_snb_check_microcode()
  x86/perf: Drop EXPORT of perf_check_microcode
  ...
......@@ -1090,7 +1090,7 @@ static int __init arch_hw_breakpoint_init(void)
* driven low on this core and there isn't an architected way to
* determine that.
*/
get_online_cpus();
cpus_read_lock();
register_undef_hook(&debug_reg_hook);
/*
......@@ -1098,15 +1098,16 @@ static int __init arch_hw_breakpoint_init(void)
* assume that a halting debugger will leave the world in a nice state
* for us.
*/
ret = cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, "arm/hw_breakpoint:online",
ret = cpuhp_setup_state_cpuslocked(CPUHP_AP_ONLINE_DYN,
"arm/hw_breakpoint:online",
dbg_reset_online, NULL);
unregister_undef_hook(&debug_reg_hook);
if (WARN_ON(ret < 0) || !cpumask_empty(&debug_err_mask)) {
core_num_brps = 0;
core_num_wrps = 0;
if (ret > 0)
cpuhp_remove_state_nocalls(ret);
put_online_cpus();
cpuhp_remove_state_nocalls_cpuslocked(ret);
cpus_read_unlock();
return 0;
}
......@@ -1124,7 +1125,7 @@ static int __init arch_hw_breakpoint_init(void)
TRAP_HWBKPT, "watchpoint debug exception");
hook_ifault_code(FAULT_CODE_DEBUG, hw_breakpoint_pending, SIGTRAP,
TRAP_HWBKPT, "breakpoint debug exception");
put_online_cpus();
cpus_read_unlock();
/* Register PM notifiers. */
pm_init();
......
......@@ -124,5 +124,5 @@ void __kprobes patch_text(void *addr, unsigned int insn)
.insn = insn,
};
stop_machine(patch_text_stop_machine, &patch, NULL);
stop_machine_cpuslocked(patch_text_stop_machine, &patch, NULL);
}
......@@ -182,7 +182,8 @@ void __kprobes kprobes_remove_breakpoint(void *addr, unsigned int insn)
.addr = addr,
.insn = insn,
};
stop_machine(__kprobes_remove_breakpoint, &p, cpu_online_mask);
stop_machine_cpuslocked(__kprobes_remove_breakpoint, &p,
cpu_online_mask);
}
void __kprobes arch_disarm_kprobe(struct kprobe *p)
......
......@@ -433,7 +433,6 @@ u32 aarch64_set_branch_offset(u32 insn, s32 offset);
bool aarch64_insn_hotpatch_safe(u32 old_insn, u32 new_insn);
int aarch64_insn_patch_text_nosync(void *addr, u32 insn);
int aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt);
int aarch64_insn_patch_text(void *addrs[], u32 insns[], int cnt);
s32 aarch64_insn_adrp_get_offset(u32 insn);
......
......@@ -255,6 +255,7 @@ static int __kprobes aarch64_insn_patch_text_cb(void *arg)
return ret;
}
static
int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
{
struct aarch64_insn_patch patch = {
......@@ -267,7 +268,7 @@ int __kprobes aarch64_insn_patch_text_sync(void *addrs[], u32 insns[], int cnt)
if (cnt <= 0)
return -EINVAL;
return stop_machine(aarch64_insn_patch_text_cb, &patch,
return stop_machine_cpuslocked(aarch64_insn_patch_text_cb, &patch,
cpu_online_mask);
}
......
......@@ -58,7 +58,6 @@ void arch_jump_label_transform(struct jump_entry *e,
insn.word = 0; /* nop */
}
get_online_cpus();
mutex_lock(&text_mutex);
if (IS_ENABLED(CONFIG_CPU_MICROMIPS)) {
insn_p->halfword[0] = insn.word >> 16;
......@@ -70,7 +69,6 @@ void arch_jump_label_transform(struct jump_entry *e,
(unsigned long)insn_p + sizeof(*insn_p));
mutex_unlock(&text_mutex);
put_online_cpus();
}
#endif /* HAVE_JUMP_LABEL */
......@@ -43,6 +43,7 @@ extern void __init dump_numa_cpu_topology(void);
extern int sysfs_add_device_to_node(struct device *dev, int nid);
extern void sysfs_remove_device_from_node(struct device *dev, int nid);
extern int numa_update_cpu_topology(bool cpus_locked);
static inline int early_cpu_to_node(int cpu)
{
......@@ -71,6 +72,11 @@ static inline void sysfs_remove_device_from_node(struct device *dev,
int nid)
{
}
static inline int numa_update_cpu_topology(bool cpus_locked)
{
return 0;
}
#endif /* CONFIG_NUMA */
#if defined(CONFIG_NUMA) && defined(CONFIG_PPC_SPLPAR)
......
......@@ -283,7 +283,7 @@ static void prrn_work_fn(struct work_struct *work)
* the RTAS event.
*/
pseries_devicetree_update(-prrn_update_scope);
arch_update_cpu_topology();
numa_update_cpu_topology(false);
}
static DECLARE_WORK(prrn_work, prrn_work_fn);
......
......@@ -3368,7 +3368,7 @@ void kvmppc_alloc_host_rm_ops(void)
return;
}
get_online_cpus();
cpus_read_lock();
for (cpu = 0; cpu < nr_cpu_ids; cpu += threads_per_core) {
if (!cpu_online(cpu))
......@@ -3390,17 +3390,17 @@ void kvmppc_alloc_host_rm_ops(void)
l_ops = (unsigned long) ops;
if (cmpxchg64((unsigned long *)&kvmppc_host_rm_ops_hv, 0, l_ops)) {
put_online_cpus();
cpus_read_unlock();
kfree(ops->rm_core);
kfree(ops);
return;
}
cpuhp_setup_state_nocalls(CPUHP_KVM_PPC_BOOK3S_PREPARE,
cpuhp_setup_state_nocalls_cpuslocked(CPUHP_KVM_PPC_BOOK3S_PREPARE,
"ppc/kvm_book3s:prepare",
kvmppc_set_host_core,
kvmppc_clear_host_core);
put_online_cpus();
cpus_read_unlock();
}
void kvmppc_free_host_rm_ops(void)
......
......@@ -1311,8 +1311,10 @@ static int update_lookup_table(void *data)
/*
* Update the node maps and sysfs entries for each cpu whose home node
* has changed. Returns 1 when the topology has changed, and 0 otherwise.
*
* cpus_locked says whether we already hold cpu_hotplug_lock.
*/
int arch_update_cpu_topology(void)
int numa_update_cpu_topology(bool cpus_locked)
{
unsigned int cpu, sibling, changed = 0;
struct topology_update_data *updates, *ud;
......@@ -1400,6 +1402,10 @@ int arch_update_cpu_topology(void)
if (!cpumask_weight(&updated_cpus))
goto out;
if (cpus_locked)
stop_machine_cpuslocked(update_cpu_topology, &updates[0],
&updated_cpus);
else
stop_machine(update_cpu_topology, &updates[0], &updated_cpus);
/*
......@@ -1407,6 +1413,10 @@ int arch_update_cpu_topology(void)
* offline CPUs. It is best to perform this update from the stop-
* machine context.
*/
if (cpus_locked)
stop_machine_cpuslocked(update_lookup_table, &updates[0],
cpumask_of(raw_smp_processor_id()));
else
stop_machine(update_lookup_table, &updates[0],
cpumask_of(raw_smp_processor_id()));
......@@ -1426,6 +1436,12 @@ int arch_update_cpu_topology(void)
return changed;
}
int arch_update_cpu_topology(void)
{
lockdep_assert_cpus_held();
return numa_update_cpu_topology(true);
}
static void topology_work_fn(struct work_struct *work)
{
rebuild_sched_domains();
......
......@@ -348,7 +348,7 @@ static int set_subcores_per_core(int new_mode)
state->master = 0;
}
get_online_cpus();
cpus_read_lock();
/* This cpu will update the globals before exiting stop machine */
this_cpu_ptr(&split_state)->master = 1;
......@@ -356,9 +356,10 @@ static int set_subcores_per_core(int new_mode)
/* Ensure state is consistent before we call the other cpus */
mb();
stop_machine(cpu_update_split_mode, &new_mode, cpu_online_mask);
stop_machine_cpuslocked(cpu_update_split_mode, &new_mode,
cpu_online_mask);
put_online_cpus();
cpus_read_unlock();
return 0;
}
......
......@@ -93,7 +93,7 @@ void arch_jump_label_transform(struct jump_entry *entry,
args.entry = entry;
args.type = type;
stop_machine(__sm_arch_jump_label_transform, &args, NULL);
stop_machine_cpuslocked(__sm_arch_jump_label_transform, &args, NULL);
}
void arch_jump_label_transform_static(struct jump_entry *entry,
......
......@@ -196,7 +196,7 @@ void arch_arm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 1};
stop_machine(swap_instruction, &args, NULL);
stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_arm_kprobe);
......@@ -204,7 +204,7 @@ void arch_disarm_kprobe(struct kprobe *p)
{
struct swap_insn_args args = {.p = p, .arm_kprobe = 0};
stop_machine(swap_instruction, &args, NULL);
stop_machine_cpuslocked(swap_instruction, &args, NULL);
}
NOKPROBE_SYMBOL(arch_disarm_kprobe);
......
......@@ -636,10 +636,10 @@ static void stp_work_fn(struct work_struct *work)
goto out_unlock;
memset(&stp_sync, 0, sizeof(stp_sync));
get_online_cpus();
cpus_read_lock();
atomic_set(&stp_sync.cpus, num_online_cpus() - 1);
stop_machine(stp_sync_clock, &stp_sync, cpu_online_mask);
put_online_cpus();
stop_machine_cpuslocked(stp_sync_clock, &stp_sync, cpu_online_mask);
cpus_read_unlock();
if (!check_sync_clock())
/*
......
......@@ -41,12 +41,10 @@ void arch_jump_label_transform(struct jump_entry *entry,
val = 0x01000000;
}
get_online_cpus();
mutex_lock(&text_mutex);
*insn = val;
flushi(insn);
mutex_unlock(&text_mutex);
put_online_cpus();
}
#endif
......@@ -45,14 +45,12 @@ static void __jump_label_transform(struct jump_entry *e,
void arch_jump_label_transform(struct jump_entry *e,
enum jump_label_type type)
{
get_online_cpus();
mutex_lock(&text_mutex);
__jump_label_transform(e, type);
flush_icache_range(e->code, e->code + sizeof(tilegx_bundle_bits));
mutex_unlock(&text_mutex);
put_online_cpus();
}
__init_or_module void arch_jump_label_transform_static(struct jump_entry *e,
......
......@@ -2233,7 +2233,6 @@ void perf_check_microcode(void)
if (x86_pmu.check_microcode)
x86_pmu.check_microcode();
}
EXPORT_SYMBOL_GPL(perf_check_microcode);
static struct pmu pmu = {
.pmu_enable = x86_pmu_enable,
......
......@@ -3425,12 +3425,10 @@ static void intel_snb_check_microcode(void)
int pebs_broken = 0;
int cpu;
get_online_cpus();
for_each_online_cpu(cpu) {
if ((pebs_broken = intel_snb_pebs_broken(cpu)))
break;
}
put_online_cpus();
if (pebs_broken == x86_pmu.pebs_broken)
return;
......@@ -3503,7 +3501,9 @@ static bool check_msr(unsigned long msr, u64 mask)
static __init void intel_sandybridge_quirk(void)
{
x86_pmu.check_microcode = intel_snb_check_microcode;
cpus_read_lock();
intel_snb_check_microcode();
cpus_read_unlock();
}
static const struct { int id; char *name; } intel_arch_events_map[] __initconst = {
......@@ -4175,13 +4175,12 @@ static __init int fixup_ht_bug(void)
lockup_detector_resume();
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(c) {
for_each_online_cpu(c)
free_excl_cntrs(c);
}
put_online_cpus();
cpus_read_unlock();
pr_info("PMU erratum BJ122, BV98, HSD29 workaround disabled, HT off\n");
return 0;
}
......
......@@ -1682,7 +1682,7 @@ static int __init intel_cqm_init(void)
*
* Also, check that the scales match on all cpus.
*/
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(cpu) {
struct cpuinfo_x86 *c = &cpu_data(cpu);
......@@ -1746,14 +1746,14 @@ static int __init intel_cqm_init(void)
* Setup the hot cpu notifier once we are sure cqm
* is enabled to avoid notifier leak.
*/
cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_STARTING,
cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_STARTING,
"perf/x86/cqm:starting",
intel_cqm_cpu_starting, NULL);
cpuhp_setup_state(CPUHP_AP_PERF_X86_CQM_ONLINE, "perf/x86/cqm:online",
cpuhp_setup_state_cpuslocked(CPUHP_AP_PERF_X86_CQM_ONLINE,
"perf/x86/cqm:online",
NULL, intel_cqm_cpu_exit);
out:
put_online_cpus();
cpus_read_unlock();
if (ret) {
kfree(str);
......
......@@ -807,10 +807,8 @@ void mtrr_save_state(void)
if (!mtrr_enabled())
return;
get_online_cpus();
first_cpu = cpumask_first(cpu_online_mask);
smp_call_function_single(first_cpu, mtrr_save_fixed_ranges, NULL, 1);
put_online_cpus();
}
void set_mtrr_aps_delayed_init(void)
......
......@@ -105,11 +105,9 @@ static void __jump_label_transform(struct jump_entry *entry,
void arch_jump_label_transform(struct jump_entry *entry,
enum jump_label_type type)
{
get_online_cpus();
mutex_lock(&text_mutex);
__jump_label_transform(entry, type, NULL, 0);
mutex_unlock(&text_mutex);
put_online_cpus();
}
static enum {
......
......@@ -268,9 +268,9 @@ static int acpi_processor_start(struct device *dev)
return -ENODEV;
/* Protect against concurrent CPU hotplug operations */
get_online_cpus();
cpu_hotplug_disable();
ret = __acpi_processor_start(device);
put_online_cpus();
cpu_hotplug_enable();
return ret;
}
......
......@@ -909,6 +909,13 @@ static long __acpi_processor_get_throttling(void *data)
return pr->throttling.acpi_processor_get_throttling(pr);
}
static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
{
if (direct || (is_percpu_thread() && cpu == smp_processor_id()))
return fn(arg);
return work_on_cpu(cpu, fn, arg);
}
static int acpi_processor_get_throttling(struct acpi_processor *pr)
{
if (!pr)
......@@ -926,7 +933,7 @@ static int acpi_processor_get_throttling(struct acpi_processor *pr)
if (!cpu_online(pr->id))
return -ENODEV;
return work_on_cpu(pr->id, __acpi_processor_get_throttling, pr);
return call_on_cpu(pr->id, __acpi_processor_get_throttling, pr, false);
}
static int acpi_processor_get_fadt_info(struct acpi_processor *pr)
......@@ -1076,13 +1083,6 @@ static long acpi_processor_throttling_fn(void *data)
arg->target_state, arg->force);
}
static int call_on_cpu(int cpu, long (*fn)(void *), void *arg, bool direct)
{
if (direct)
return fn(arg);
return work_on_cpu(cpu, fn, arg);
}
static int __acpi_processor_set_throttling(struct acpi_processor *pr,
int state, bool force, bool direct)
{
......
......@@ -887,7 +887,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
struct freq_attr *fattr = to_attr(attr);
ssize_t ret = -EINVAL;
get_online_cpus();
cpus_read_lock();
if (cpu_online(policy->cpu)) {
down_write(&policy->rwsem);
......@@ -895,7 +895,7 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr,
up_write(&policy->rwsem);
}
put_online_cpus();
cpus_read_unlock();
return ret;
}
......@@ -2441,7 +2441,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
pr_debug("trying to register driver %s\n", driver_data->name);
/* Protect against concurrent CPU online/offline. */
get_online_cpus();
cpus_read_lock();
write_lock_irqsave(&cpufreq_driver_lock, flags);
if (cpufreq_driver) {
......@@ -2474,7 +2474,8 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
goto err_if_unreg;
}
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN, "cpufreq:online",
ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
"cpufreq:online",
cpuhp_cpufreq_online,
cpuhp_cpufreq_offline);
if (ret < 0)
......@@ -2494,7 +2495,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data)
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
out:
put_online_cpus();
cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(cpufreq_register_driver);
......@@ -2517,17 +2518,17 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver)
pr_debug("unregistering driver %s\n", driver->name);
/* Protect against concurrent cpu hotplug */
get_online_cpus();
cpus_read_lock();
subsys_interface_unregister(&cpufreq_interface);
remove_boost_sysfs_file();
cpuhp_remove_state_nocalls(hp_online);
cpuhp_remove_state_nocalls_cpuslocked(hp_online);
write_lock_irqsave(&cpufreq_driver_lock, flags);
cpufreq_driver = NULL;
write_unlock_irqrestore(&cpufreq_driver_lock, flags);
put_online_cpus();
cpus_read_unlock();
return 0;
}
......
......@@ -587,7 +587,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev)
* after cpu online mask indicates the cpu is offline but before the
* DYING hotplug callback is serviced by the ETM driver.
*/
get_online_cpus();
cpus_read_lock();
spin_lock(&drvdata->spinlock);
/*
......@@ -597,7 +597,7 @@ static void etm_disable_sysfs(struct coresight_device *csdev)
smp_call_function_single(drvdata->cpu, etm_disable_hw, drvdata, 1);
spin_unlock(&drvdata->spinlock);
put_online_cpus();
cpus_read_unlock();
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
......@@ -795,7 +795,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
drvdata->cpu = pdata ? pdata->cpu : 0;
get_online_cpus();
cpus_read_lock();
etmdrvdata[drvdata->cpu] = drvdata;
if (smp_call_function_single(drvdata->cpu,
......@@ -803,17 +803,17 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
dev_err(dev, "ETM arch init failed\n");
if (!etm_count++) {
cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
"arm/coresight:starting",
etm_starting_cpu, etm_dying_cpu);
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
"arm/coresight:online",
etm_online_cpu, NULL);
if (ret < 0)
goto err_arch_supported;
hp_online = ret;
}
put_online_cpus();
cpus_read_unlock();
if (etm_arch_supported(drvdata->arch) == false) {
ret = -EINVAL;
......
......@@ -371,7 +371,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev)
* after cpu online mask indicates the cpu is offline but before the
* DYING hotplug callback is serviced by the ETM driver.
*/
get_online_cpus();
cpus_read_lock();
spin_lock(&drvdata->spinlock);
/*
......@@ -381,7 +381,7 @@ static void etm4_disable_sysfs(struct coresight_device *csdev)
smp_call_function_single(drvdata->cpu, etm4_disable_hw, drvdata, 1);
spin_unlock(&drvdata->spinlock);
put_online_cpus();
cpus_read_unlock();
dev_info(drvdata->dev, "ETM tracing disabled\n");
}
......@@ -982,7 +982,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
drvdata->cpu = pdata ? pdata->cpu : 0;
get_online_cpus();
cpus_read_lock();
etmdrvdata[drvdata->cpu] = drvdata;
if (smp_call_function_single(drvdata->cpu,
......@@ -990,10 +990,10 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
dev_err(dev, "ETM arch init failed\n");
if (!etm4_count++) {
cpuhp_setup_state_nocalls(CPUHP_AP_ARM_CORESIGHT_STARTING,
cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ARM_CORESIGHT_STARTING,
"arm/coresight4:starting",
etm4_starting_cpu, etm4_dying_cpu);
ret = cpuhp_setup_state_nocalls(CPUHP_AP_ONLINE_DYN,
ret = cpuhp_setup_state_nocalls_cpuslocked(CPUHP_AP_ONLINE_DYN,
"arm/coresight4:online",
etm4_online_cpu, NULL);
if (ret < 0)
......@@ -1001,7 +1001,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
hp_online = ret;
}
put_online_cpus();
cpus_read_unlock();
if (etm4_arch_supported(drvdata->arch) == false) {
ret = -EINVAL;
......
......@@ -320,10 +320,19 @@ static long local_pci_probe(void *_ddi)
return 0;
}
static bool pci_physfn_is_probed(struct pci_dev *dev)
{
#ifdef CONFIG_PCI_IOV
return dev->is_virtfn && dev->physfn->is_probed;
#else
return false;
#endif
}
static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
const struct pci_device_id *id)
{
int error, node;
int error, node, cpu;
struct drv_dev_and_id ddi = { drv, dev, id };
/*
......@@ -332,33 +341,27 @@ static int pci_call_probe(struct pci_driver *drv, struct pci_dev *dev,
* on the right node.
*/
node = dev_to_node(&dev->dev);
dev->is_probed = 1;
cpu_hotplug_disable();
/*
* On NUMA systems, we are likely to call a PF probe function using
* work_on_cpu(). If that probe calls pci_enable_sriov() (which
* adds the VF devices via pci_bus_add_device()), we may re-enter
* this function to call the VF probe function. Calling
* work_on_cpu() again will cause a lockdep warning. Since VFs are
* always on the same node as the PF, we can work around this by
* avoiding work_on_cpu() when we're already on the correct node.
*
* Preemption is enabled, so it's theoretically unsafe to use
* numa_node_id(), but even if we run the probe function on the
* wrong node, it should be functionally correct.
* Prevent nesting work_on_cpu() for the case where a Virtual Function
* device is probed from work_on_cpu() of the Physical device.
*/
if (node >= 0 && node != numa_node_id()) {
int cpu;
get_online_cpus();
if (node < 0 || node >= MAX_NUMNODES || !node_online(node) ||
pci_physfn_is_probed(dev))
cpu = nr_cpu_ids;
else
cpu = cpumask_any_and(cpumask_of_node(node), cpu_online_mask);
if (cpu < nr_cpu_ids)
error = work_on_cpu(cpu, local_pci_probe, &ddi);
else
error = local_pci_probe(&ddi);
put_online_cpus();
} else
error = local_pci_probe(&ddi);
dev->is_probed = 0;
cpu_hotplug_enable();
return error;
}
......
......@@ -99,12 +99,11 @@ static inline void cpu_maps_update_done(void)
extern struct bus_type cpu_subsys;
#ifdef CONFIG_HOTPLUG_CPU
/* Stop CPUs going up and down. */
extern void cpu_hotplug_begin(void);
extern void cpu_hotplug_done(void);
extern void get_online_cpus(void);
extern void put_online_cpus(void);
extern void cpus_write_lock(void);
extern void cpus_write_unlock(void);
extern void cpus_read_lock(void);
extern void cpus_read_unlock(void);
extern void lockdep_assert_cpus_held(void);
extern void cpu_hotplug_disable(void);
extern void cpu_hotplug_enable(void);
void clear_tasks_mm_cpumask(int cpu);
......@@ -112,13 +111,20 @@ int cpu_down(unsigned int cpu);
#else /* CONFIG_HOTPLUG_CPU */
static inline void cpu_hotplug_begin(void) {}
static inline void cpu_hotplug_done(void) {}
#define get_online_cpus() do { } while (0)
#define put_online_cpus() do { } while (0)
#define cpu_hotplug_disable() do { } while (0)
#define cpu_hotplug_enable() do { } while (0)
#endif /* CONFIG_HOTPLUG_CPU */
static inline void cpus_write_lock(void) { }
static inline void cpus_write_unlock(void) { }
static inline void cpus_read_lock(void) { }
static inline void cpus_read_unlock(void) { }
static inline void lockdep_assert_cpus_held(void) { }
static inline void cpu_hotplug_disable(void) { }
static inline void cpu_hotplug_enable(void) { }
#endif /* !CONFIG_HOTPLUG_CPU */
/* Wrappers which go away once all code is converted */
static inline void cpu_hotplug_begin(void) { cpus_write_lock(); }
static inline void cpu_hotplug_done(void) { cpus_write_unlock(); }
static inline void get_online_cpus(void) { cpus_read_lock(); }
static inline void put_online_cpus(void) { cpus_read_unlock(); }
#ifdef CONFIG_PM_SLEEP_SMP
extern int freeze_secondary_cpus(int primary);
......
......@@ -153,6 +153,11 @@ int __cpuhp_setup_state(enum cpuhp_state state, const char *name, bool invoke,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu), bool multi_instance);
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state, const char *name,
bool invoke,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu),
bool multi_instance);
/**
* cpuhp_setup_state - Setup hotplug state callbacks with calling the callbacks
* @state: The state for which the calls are installed
......@@ -171,6 +176,15 @@ static inline int cpuhp_setup_state(enum cpuhp_state state,
return __cpuhp_setup_state(state, name, true, startup, teardown, false);
}
static inline int cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
const char *name,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu))
{
return __cpuhp_setup_state_cpuslocked(state, name, true, startup,
teardown, false);
}
/**
* cpuhp_setup_state_nocalls - Setup hotplug state callbacks without calling the
* callbacks
......@@ -191,6 +205,15 @@ static inline int cpuhp_setup_state_nocalls(enum cpuhp_state state,
false);
}
static inline int cpuhp_setup_state_nocalls_cpuslocked(enum cpuhp_state state,
const char *name,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu))
{
return __cpuhp_setup_state_cpuslocked(state, name, false, startup,
teardown, false);
}
/**
* cpuhp_setup_state_multi - Add callbacks for multi state
* @state: The state for which the calls are installed
......@@ -217,6 +240,8 @@ static inline int cpuhp_setup_state_multi(enum cpuhp_state state,
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
bool invoke);
int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
struct hlist_node *node, bool invoke);
/**
* cpuhp_state_add_instance - Add an instance for a state and invoke startup
......@@ -249,7 +274,15 @@ static inline int cpuhp_state_add_instance_nocalls(enum cpuhp_state state,
return __cpuhp_state_add_instance(state, node, false);
}
static inline int
cpuhp_state_add_instance_nocalls_cpuslocked(enum cpuhp_state state,
struct hlist_node *node)
{
return __cpuhp_state_add_instance_cpuslocked(state, node, false);
}
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke);
void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke);
/**
* cpuhp_remove_state - Remove hotplug state callbacks and invoke the teardown
......@@ -273,6 +306,11 @@ static inline void cpuhp_remove_state_nocalls(enum cpuhp_state state)
__cpuhp_remove_state(state, false);
}
static inline void cpuhp_remove_state_nocalls_cpuslocked(enum cpuhp_state state)
{
__cpuhp_remove_state_cpuslocked(state, false);
}
/**
* cpuhp_remove_multi_state - Remove hotplug multi state callback
* @state: The state for which the calls are removed
......
......@@ -166,9 +166,6 @@ struct padata_instance {
extern struct padata_instance *padata_alloc_possible(
struct workqueue_struct *wq);
extern struct padata_instance *padata_alloc(struct workqueue_struct *wq,
const struct cpumask *pcpumask,
const struct cpumask *cbcpumask);
extern void padata_free(struct padata_instance *pinst);
extern int padata_do_parallel(struct padata_instance *pinst,
struct padata_priv *padata, int cb_cpu);
......
......@@ -376,6 +376,7 @@ struct pci_dev {
unsigned int irq_managed:1;
unsigned int has_secondary_link:1;
unsigned int non_compliant_bars:1; /* broken BARs; ignore them */
unsigned int is_probed:1; /* device probing in progress */
pci_dev_flags_t dev_flags;
atomic_t enable_cnt; /* pci_enable_device has been called */
......
......@@ -801,6 +801,8 @@ struct perf_cpu_context {
struct list_head sched_cb_entry;
int sched_cb_usage;
int online;
};
struct perf_output_handle {
......
......@@ -1281,6 +1281,16 @@ extern struct pid *cad_pid;
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)
static inline bool is_percpu_thread(void)
{
#ifdef CONFIG_SMP
return (current->flags & PF_NO_SETAFFINITY) &&
(current->nr_cpus_allowed == 1);
#else
return true;
#endif
}
/* Per-process atomic flags. */
#define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */
#define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */
......
......@@ -116,14 +116,28 @@ static inline int try_stop_cpus(const struct cpumask *cpumask,
* @fn() runs.
*
* This can be thought of as a very heavy write lock, equivalent to
* grabbing every spinlock in the kernel. */
* grabbing every spinlock in the kernel.
*
* Protects against CPU hotplug.
*/
int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
/**
* stop_machine_cpuslocked: freeze the machine on all CPUs and run this function
* @fn: the function to run
* @data: the data ptr for the @fn()
* @cpus: the cpus to run the @fn() on (NULL = any online cpu)
*
* Same as above. Must be called from with in a cpus_read_lock() protected
* region. Avoids nested calls to cpus_read_lock().
*/
int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus);
int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus);
#else /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
static inline int stop_machine(cpu_stop_fn_t fn, void *data,
static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
unsigned long flags;
......@@ -134,6 +148,12 @@ static inline int stop_machine(cpu_stop_fn_t fn, void *data,
return ret;
}
static inline int stop_machine(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
return stop_machine_cpuslocked(fn, data, cpus);
}
static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
......
......@@ -27,6 +27,7 @@
#include <linux/smpboot.h>
#include <linux/relay.h>
#include <linux/slab.h>
#include <linux/percpu-rwsem.h>
#include <trace/events/power.h>
#define CREATE_TRACE_POINTS
......@@ -65,6 +66,12 @@ struct cpuhp_cpu_state {
static DEFINE_PER_CPU(struct cpuhp_cpu_state, cpuhp_state);
#if defined(CONFIG_LOCKDEP) && defined(CONFIG_SMP)
static struct lock_class_key cpuhp_state_key;
static struct lockdep_map cpuhp_state_lock_map =
STATIC_LOCKDEP_MAP_INIT("cpuhp_state", &cpuhp_state_key);
#endif
/**
* cpuhp_step - Hotplug state machine step
* @name: Name of the step
......@@ -196,121 +203,41 @@ void cpu_maps_update_done(void)
mutex_unlock(&cpu_add_remove_lock);
}
/* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
/*
* If set, cpu_up and cpu_down will return -EBUSY and do nothing.
* Should always be manipulated under cpu_add_remove_lock
*/
static int cpu_hotplug_disabled;
#ifdef CONFIG_HOTPLUG_CPU
static struct {
struct task_struct *active_writer;
/* wait queue to wake up the active_writer */
wait_queue_head_t wq;
/* verifies that no writer will get active while readers are active */
struct mutex lock;
/*
* Also blocks the new readers during
* an ongoing cpu hotplug operation.
*/
atomic_t refcount;
#ifdef CONFIG_DEBUG_LOCK_ALLOC
struct lockdep_map dep_map;
#endif
} cpu_hotplug = {
.active_writer = NULL,
.wq = __WAIT_QUEUE_HEAD_INITIALIZER(cpu_hotplug.wq),
.lock = __MUTEX_INITIALIZER(cpu_hotplug.lock),
#ifdef CONFIG_DEBUG_LOCK_ALLOC
.dep_map = STATIC_LOCKDEP_MAP_INIT("cpu_hotplug.dep_map", &cpu_hotplug.dep_map),
#endif
};
/* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
#define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
#define cpuhp_lock_acquire_tryread() \
lock_map_acquire_tryread(&cpu_hotplug.dep_map)
#define cpuhp_lock_acquire() lock_map_acquire(&cpu_hotplug.dep_map)
#define cpuhp_lock_release() lock_map_release(&cpu_hotplug.dep_map)
DEFINE_STATIC_PERCPU_RWSEM(cpu_hotplug_lock);
void get_online_cpus(void)
void cpus_read_lock(void)
{
might_sleep();
if (cpu_hotplug.active_writer == current)
return;
cpuhp_lock_acquire_read();
mutex_lock(&cpu_hotplug.lock);
atomic_inc(&cpu_hotplug.refcount);
mutex_unlock(&cpu_hotplug.lock);
percpu_down_read(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(get_online_cpus);
EXPORT_SYMBOL_GPL(cpus_read_lock);
void put_online_cpus(void)
void cpus_read_unlock(void)
{
int refcount;
if (cpu_hotplug.active_writer == current)
return;
refcount = atomic_dec_return(&cpu_hotplug.refcount);
if (WARN_ON(refcount < 0)) /* try to fix things up */
atomic_inc(&cpu_hotplug.refcount);
if (refcount <= 0 && waitqueue_active(&cpu_hotplug.wq))
wake_up(&cpu_hotplug.wq);
cpuhp_lock_release();
percpu_up_read(&cpu_hotplug_lock);
}
EXPORT_SYMBOL_GPL(put_online_cpus);
EXPORT_SYMBOL_GPL(cpus_read_unlock);
/*
* This ensures that the hotplug operation can begin only when the
* refcount goes to zero.
*
* Note that during a cpu-hotplug operation, the new readers, if any,
* will be blocked by the cpu_hotplug.lock
*
* Since cpu_hotplug_begin() is always called after invoking
* cpu_maps_update_begin(), we can be sure that only one writer is active.
*
* Note that theoretically, there is a possibility of a livelock:
* - Refcount goes to zero, last reader wakes up the sleeping
* writer.
* - Last reader unlocks the cpu_hotplug.lock.
* - A new reader arrives at this moment, bumps up the refcount.
* - The writer acquires the cpu_hotplug.lock finds the refcount
* non zero and goes to sleep again.
*
* However, this is very difficult to achieve in practice since
* get_online_cpus() not an api which is called all that often.
*
*/
void cpu_hotplug_begin(void)
void cpus_write_lock(void)
{
DEFINE_WAIT(wait);
cpu_hotplug.active_writer = current;
cpuhp_lock_acquire();
percpu_down_write(&cpu_hotplug_lock);
}
for (;;) {
mutex_lock(&cpu_hotplug.lock);
prepare_to_wait(&cpu_hotplug.wq, &wait, TASK_UNINTERRUPTIBLE);
if (likely(!atomic_read(&cpu_hotplug.refcount)))
break;
mutex_unlock(&cpu_hotplug.lock);
schedule();
}
finish_wait(&cpu_hotplug.wq, &wait);
void cpus_write_unlock(void)
{
percpu_up_write(&cpu_hotplug_lock);
}
void cpu_hotplug_done(void)
void lockdep_assert_cpus_held(void)
{
cpu_hotplug.active_writer = NULL;
mutex_unlock(&cpu_hotplug.lock);
cpuhp_lock_release();
percpu_rwsem_assert_held(&cpu_hotplug_lock);
}
/*
......@@ -344,8 +271,6 @@ void cpu_hotplug_enable(void)
EXPORT_SYMBOL_GPL(cpu_hotplug_enable);
#endif /* CONFIG_HOTPLUG_CPU */
/* Notifier wrappers for transitioning to state machine */
static int bringup_wait_for_ap(unsigned int cpu)
{
struct cpuhp_cpu_state *st = per_cpu_ptr(&cpuhp_state, cpu);
......@@ -484,6 +409,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
st->should_run = false;
lock_map_acquire(&cpuhp_state_lock_map);
/* Single callback invocation for [un]install ? */
if (st->single) {
if (st->cb_state < CPUHP_AP_ONLINE) {
......@@ -510,6 +436,7 @@ static void cpuhp_thread_fun(unsigned int cpu)
else if (st->state > st->target)
ret = cpuhp_ap_offline(cpu, st);
}
lock_map_release(&cpuhp_state_lock_map);
st->result = ret;
complete(&st->done);
}
......@@ -524,6 +451,9 @@ cpuhp_invoke_ap_callback(int cpu, enum cpuhp_state state, bool bringup,
if (!cpu_online(cpu))
return 0;
lock_map_acquire(&cpuhp_state_lock_map);
lock_map_release(&cpuhp_state_lock_map);
/*
* If we are up and running, use the hotplug thread. For early calls
* we invoke the thread function directly.
......@@ -567,6 +497,8 @@ static int cpuhp_kick_ap_work(unsigned int cpu)
enum cpuhp_state state = st->state;
trace_cpuhp_enter(cpu, st->target, state, cpuhp_kick_ap_work);
lock_map_acquire(&cpuhp_state_lock_map);
lock_map_release(&cpuhp_state_lock_map);
__cpuhp_kick_ap_work(st);
wait_for_completion(&st->done);
trace_cpuhp_exit(cpu, st->state, state, st->result);
......@@ -630,30 +562,6 @@ void clear_tasks_mm_cpumask(int cpu)
rcu_read_unlock();
}
static inline void check_for_tasks(int dead_cpu)
{
struct task_struct *g, *p;
read_lock(&tasklist_lock);
for_each_process_thread(g, p) {
if (!p->on_rq)
continue;
/*
* We do the check with unlocked task_rq(p)->lock.
* Order the reading to do not warn about a task,
* which was running on this cpu in the past, and
* it's just been woken on another cpu.
*/
rmb();
if (task_cpu(p) != dead_cpu)
continue;
pr_warn("Task %s (pid=%d) is on cpu %d (state=%ld, flags=%x)\n",
p->comm, task_pid_nr(p), dead_cpu, p->state, p->flags);
}
read_unlock(&tasklist_lock);
}
/* Take this CPU down. */
static int take_cpu_down(void *_param)
{
......@@ -701,7 +609,7 @@ static int takedown_cpu(unsigned int cpu)
/*
* So now all preempt/rcu users must observe !cpu_active().
*/
err = stop_machine(take_cpu_down, NULL, cpumask_of(cpu));
err = stop_machine_cpuslocked(take_cpu_down, NULL, cpumask_of(cpu));
if (err) {
/* CPU refused to die */
irq_unlock_sparse();
......@@ -773,7 +681,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
if (!cpu_present(cpu))
return -EINVAL;
cpu_hotplug_begin();
cpus_write_lock();
cpuhp_tasks_frozen = tasks_frozen;
......@@ -811,7 +719,7 @@ static int __ref _cpu_down(unsigned int cpu, int tasks_frozen,
}
out:
cpu_hotplug_done();
cpus_write_unlock();
return ret;
}
......@@ -893,7 +801,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
struct task_struct *idle;
int ret = 0;
cpu_hotplug_begin();
cpus_write_lock();
if (!cpu_present(cpu)) {
ret = -EINVAL;
......@@ -941,7 +849,7 @@ static int _cpu_up(unsigned int cpu, int tasks_frozen, enum cpuhp_state target)
target = min((int)target, CPUHP_BRINGUP_CPU);
ret = cpuhp_up_callbacks(cpu, st, target);
out:
cpu_hotplug_done();
cpus_write_unlock();
return ret;
}
......@@ -1418,18 +1326,20 @@ static void cpuhp_rollback_install(int failedcpu, enum cpuhp_state state,
}
}
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
int __cpuhp_state_add_instance_cpuslocked(enum cpuhp_state state,
struct hlist_node *node,
bool invoke)
{
struct cpuhp_step *sp;
int cpu;
int ret;
lockdep_assert_cpus_held();
sp = cpuhp_get_step(state);
if (sp->multi_instance == false)
return -EINVAL;
get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !sp->startup.multi)
......@@ -1458,13 +1368,23 @@ int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
hlist_add_head(node, &sp->list);
unlock:
mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
return ret;
}
int __cpuhp_state_add_instance(enum cpuhp_state state, struct hlist_node *node,
bool invoke)
{
int ret;
cpus_read_lock();
ret = __cpuhp_state_add_instance_cpuslocked(state, node, invoke);
cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
/**
* __cpuhp_setup_state - Setup the callbacks for an hotplug machine state
* __cpuhp_setup_state_cpuslocked - Setup the callbacks for an hotplug machine state
* @state: The state to setup
* @invoke: If true, the startup function is invoked for cpus where
* cpu state >= @state
......@@ -1473,13 +1393,14 @@ EXPORT_SYMBOL_GPL(__cpuhp_state_add_instance);
* @multi_instance: State is set up for multiple instances which get
* added afterwards.
*
* The caller needs to hold cpus read locked while calling this function.
* Returns:
* On success:
* Positive state number if @state is CPUHP_AP_ONLINE_DYN
* 0 for all other states
* On failure: proper (negative) error code
*/
int __cpuhp_setup_state(enum cpuhp_state state,
int __cpuhp_setup_state_cpuslocked(enum cpuhp_state state,
const char *name, bool invoke,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu),
......@@ -1488,10 +1409,11 @@ int __cpuhp_setup_state(enum cpuhp_state state,
int cpu, ret = 0;
bool dynstate;
lockdep_assert_cpus_held();
if (cpuhp_cb_check(state) || !name)
return -EINVAL;
get_online_cpus();
mutex_lock(&cpuhp_state_mutex);
ret = cpuhp_store_callbacks(state, name, startup, teardown,
......@@ -1527,7 +1449,6 @@ int __cpuhp_setup_state(enum cpuhp_state state,
}
out:
mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
/*
* If the requested state is CPUHP_AP_ONLINE_DYN, return the
* dynamically allocated state in case of success.
......@@ -1536,6 +1457,22 @@ int __cpuhp_setup_state(enum cpuhp_state state,
return state;
return ret;
}
EXPORT_SYMBOL(__cpuhp_setup_state_cpuslocked);
int __cpuhp_setup_state(enum cpuhp_state state,
const char *name, bool invoke,
int (*startup)(unsigned int cpu),
int (*teardown)(unsigned int cpu),
bool multi_instance)
{
int ret;
cpus_read_lock();
ret = __cpuhp_setup_state_cpuslocked(state, name, invoke, startup,
teardown, multi_instance);
cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL(__cpuhp_setup_state);
int __cpuhp_state_remove_instance(enum cpuhp_state state,
......@@ -1549,7 +1486,7 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
if (!sp->multi_instance)
return -EINVAL;
get_online_cpus();
cpus_read_lock();
mutex_lock(&cpuhp_state_mutex);
if (!invoke || !cpuhp_get_teardown_cb(state))
......@@ -1570,29 +1507,30 @@ int __cpuhp_state_remove_instance(enum cpuhp_state state,
remove:
hlist_del(node);
mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
cpus_read_unlock();
return 0;
}
EXPORT_SYMBOL_GPL(__cpuhp_state_remove_instance);
/**
* __cpuhp_remove_state - Remove the callbacks for an hotplug machine state
* __cpuhp_remove_state_cpuslocked - Remove the callbacks for an hotplug machine state
* @state: The state to remove
* @invoke: If true, the teardown function is invoked for cpus where
* cpu state >= @state
*
* The caller needs to hold cpus read locked while calling this function.
* The teardown callback is currently not allowed to fail. Think
* about module removal!
*/
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
void __cpuhp_remove_state_cpuslocked(enum cpuhp_state state, bool invoke)
{
struct cpuhp_step *sp = cpuhp_get_step(state);
int cpu;
BUG_ON(cpuhp_cb_check(state));
get_online_cpus();
lockdep_assert_cpus_held();
mutex_lock(&cpuhp_state_mutex);
if (sp->multi_instance) {
......@@ -1620,7 +1558,14 @@ void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
remove:
cpuhp_store_callbacks(state, NULL, NULL, NULL, false);
mutex_unlock(&cpuhp_state_mutex);
put_online_cpus();
}
EXPORT_SYMBOL(__cpuhp_remove_state_cpuslocked);
void __cpuhp_remove_state(enum cpuhp_state state, bool invoke)
{
cpus_read_lock();
__cpuhp_remove_state_cpuslocked(state, invoke);
cpus_read_unlock();
}
EXPORT_SYMBOL(__cpuhp_remove_state);
......@@ -1689,7 +1634,7 @@ static struct attribute *cpuhp_cpu_attrs[] = {
NULL
};
static struct attribute_group cpuhp_cpu_attr_group = {
static const struct attribute_group cpuhp_cpu_attr_group = {
.attrs = cpuhp_cpu_attrs,
.name = "hotplug",
NULL
......@@ -1721,7 +1666,7 @@ static struct attribute *cpuhp_cpu_root_attrs[] = {
NULL
};
static struct attribute_group cpuhp_cpu_root_attr_group = {
static const struct attribute_group cpuhp_cpu_root_attr_group = {
.attrs = cpuhp_cpu_root_attrs,
.name = "hotplug",
NULL
......
......@@ -389,6 +389,7 @@ static atomic_t nr_switch_events __read_mostly;
static LIST_HEAD(pmus);
static DEFINE_MUTEX(pmus_lock);
static struct srcu_struct pmus_srcu;
static cpumask_var_t perf_online_mask;
/*
* perf event paranoia level:
......@@ -3807,14 +3808,6 @@ find_get_context(struct pmu *pmu, struct task_struct *task,
if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN))
return ERR_PTR(-EACCES);
/*
* We could be clever and allow to attach a event to an
* offline CPU and activate it when the CPU comes up, but
* that's for later.
*/
if (!cpu_online(cpu))
return ERR_PTR(-ENODEV);
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
get_ctx(ctx);
......@@ -7723,7 +7716,8 @@ static int swevent_hlist_get_cpu(int cpu)
int err = 0;
mutex_lock(&swhash->hlist_mutex);
if (!swevent_hlist_deref(swhash) && cpu_online(cpu)) {
if (!swevent_hlist_deref(swhash) &&
cpumask_test_cpu(cpu, perf_online_mask)) {
struct swevent_hlist *hlist;
hlist = kzalloc(sizeof(*hlist), GFP_KERNEL);
......@@ -7744,7 +7738,7 @@ static int swevent_hlist_get(void)
{
int err, cpu, failed_cpu;
get_online_cpus();
mutex_lock(&pmus_lock);
for_each_possible_cpu(cpu) {
err = swevent_hlist_get_cpu(cpu);
if (err) {
......@@ -7752,8 +7746,7 @@ static int swevent_hlist_get(void)
goto fail;
}
}
put_online_cpus();
mutex_unlock(&pmus_lock);
return 0;
fail:
for_each_possible_cpu(cpu) {
......@@ -7761,8 +7754,7 @@ static int swevent_hlist_get(void)
break;
swevent_hlist_put_cpu(cpu);
}
put_online_cpus();
mutex_unlock(&pmus_lock);
return err;
}
......@@ -8940,7 +8932,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
pmu->hrtimer_interval_ms = timer;
/* update all cpuctx for this PMU */
get_online_cpus();
cpus_read_lock();
for_each_online_cpu(cpu) {
struct perf_cpu_context *cpuctx;
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
......@@ -8949,7 +8941,7 @@ perf_event_mux_interval_ms_store(struct device *dev,
cpu_function_call(cpu,
(remote_function_f)perf_mux_hrtimer_restart, cpuctx);
}
put_online_cpus();
cpus_read_unlock();
mutex_unlock(&mux_interval_mutex);
return count;
......@@ -9079,6 +9071,7 @@ int perf_pmu_register(struct pmu *pmu, const char *name, int type)
lockdep_set_class(&cpuctx->ctx.mutex, &cpuctx_mutex);
lockdep_set_class(&cpuctx->ctx.lock, &cpuctx_lock);
cpuctx->ctx.pmu = pmu;
cpuctx->online = cpumask_test_cpu(cpu, perf_online_mask);
__perf_mux_hrtimer_init(cpuctx, cpu);
}
......@@ -9903,12 +9896,10 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_task;
}
get_online_cpus();
if (task) {
err = mutex_lock_interruptible(&task->signal->cred_guard_mutex);
if (err)
goto err_cpus;
goto err_task;
/*
* Reuse ptrace permission checks for now.
......@@ -10094,6 +10085,23 @@ SYSCALL_DEFINE5(perf_event_open,
goto err_locked;
}
if (!task) {
/*
* Check if the @cpu we're creating an event for is online.
*
* We use the perf_cpu_context::ctx::mutex to serialize against
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
*/
struct perf_cpu_context *cpuctx =
container_of(ctx, struct perf_cpu_context, ctx);
if (!cpuctx->online) {
err = -ENODEV;
goto err_locked;
}
}
/*
* Must be under the same ctx::mutex as perf_install_in_context(),
* because we need to serialize with concurrent event creation.
......@@ -10183,8 +10191,6 @@ SYSCALL_DEFINE5(perf_event_open,
put_task_struct(task);
}
put_online_cpus();
mutex_lock(&current->perf_event_mutex);
list_add_tail(&event->owner_entry, &current->perf_event_list);
mutex_unlock(&current->perf_event_mutex);
......@@ -10218,8 +10224,6 @@ SYSCALL_DEFINE5(perf_event_open,
err_cred:
if (task)
mutex_unlock(&task->signal->cred_guard_mutex);
err_cpus:
put_online_cpus();
err_task:
if (task)
put_task_struct(task);
......@@ -10274,6 +10278,21 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
goto err_unlock;
}
if (!task) {
/*
* Check if the @cpu we're creating an event for is online.
*
* We use the perf_cpu_context::ctx::mutex to serialize against
* the hotplug notifiers. See perf_event_{init,exit}_cpu().
*/
struct perf_cpu_context *cpuctx =
container_of(ctx, struct perf_cpu_context, ctx);
if (!cpuctx->online) {
err = -ENODEV;
goto err_unlock;
}
}
if (!exclusive_event_installable(event, ctx)) {
err = -EBUSY;
goto err_unlock;
......@@ -10941,6 +10960,8 @@ static void __init perf_event_init_all_cpus(void)
struct swevent_htable *swhash;
int cpu;
zalloc_cpumask_var(&perf_online_mask, GFP_KERNEL);
for_each_possible_cpu(cpu) {
swhash = &per_cpu(swevent_htable, cpu);
mutex_init(&swhash->hlist_mutex);
......@@ -10956,7 +10977,7 @@ static void __init perf_event_init_all_cpus(void)
}
}
int perf_event_init_cpu(unsigned int cpu)
void perf_swevent_init_cpu(unsigned int cpu)
{
struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu);
......@@ -10969,7 +10990,6 @@ int perf_event_init_cpu(unsigned int cpu)
rcu_assign_pointer(swhash->swevent_hlist, hlist);
}
mutex_unlock(&swhash->hlist_mutex);
return 0;
}
#if defined CONFIG_HOTPLUG_CPU || defined CONFIG_KEXEC_CORE
......@@ -10987,19 +11007,22 @@ static void __perf_event_exit_context(void *__info)
static void perf_event_exit_cpu_context(int cpu)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
int idx;
idx = srcu_read_lock(&pmus_srcu);
list_for_each_entry_rcu(pmu, &pmus, entry) {
ctx = &per_cpu_ptr(pmu->pmu_cpu_context, cpu)->ctx;
mutex_lock(&pmus_lock);
list_for_each_entry(pmu, &pmus, entry) {
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
mutex_lock(&ctx->mutex);
smp_call_function_single(cpu, __perf_event_exit_context, ctx, 1);
cpuctx->online = 0;
mutex_unlock(&ctx->mutex);
}
srcu_read_unlock(&pmus_srcu, idx);
cpumask_clear_cpu(cpu, perf_online_mask);
mutex_unlock(&pmus_lock);
}
#else
......@@ -11007,6 +11030,29 @@ static void perf_event_exit_cpu_context(int cpu) { }
#endif
int perf_event_init_cpu(unsigned int cpu)
{
struct perf_cpu_context *cpuctx;
struct perf_event_context *ctx;
struct pmu *pmu;
perf_swevent_init_cpu(cpu);
mutex_lock(&pmus_lock);
cpumask_set_cpu(cpu, perf_online_mask);
list_for_each_entry(pmu, &pmus, entry) {
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
ctx = &cpuctx->ctx;
mutex_lock(&ctx->mutex);
cpuctx->online = 1;
mutex_unlock(&ctx->mutex);
}
mutex_unlock(&pmus_lock);
return 0;
}
int perf_event_exit_cpu(unsigned int cpu)
{
perf_event_exit_cpu_context(cpu);
......
......@@ -15,6 +15,7 @@
#include <linux/static_key.h>
#include <linux/jump_label_ratelimit.h>
#include <linux/bug.h>
#include <linux/cpu.h>
#ifdef HAVE_JUMP_LABEL
......@@ -124,6 +125,7 @@ void static_key_slow_inc(struct static_key *key)
return;
}
cpus_read_lock();
jump_label_lock();
if (atomic_read(&key->enabled) == 0) {
atomic_set(&key->enabled, -1);
......@@ -133,12 +135,14 @@ void static_key_slow_inc(struct static_key *key)
atomic_inc(&key->enabled);
}
jump_label_unlock();
cpus_read_unlock();
}
EXPORT_SYMBOL_GPL(static_key_slow_inc);
static void __static_key_slow_dec(struct static_key *key,
unsigned long rate_limit, struct delayed_work *work)
{
cpus_read_lock();
/*
* The negative count check is valid even when a negative
* key->enabled is in use by static_key_slow_inc(); a
......@@ -149,6 +153,7 @@ static void __static_key_slow_dec(struct static_key *key,
if (!atomic_dec_and_mutex_lock(&key->enabled, &jump_label_mutex)) {
WARN(atomic_read(&key->enabled) < 0,
"jump label: negative count!\n");
cpus_read_unlock();
return;
}
......@@ -159,6 +164,7 @@ static void __static_key_slow_dec(struct static_key *key,
jump_label_update(key);
}
jump_label_unlock();
cpus_read_unlock();
}
static void jump_label_update_timeout(struct work_struct *work)
......@@ -334,6 +340,7 @@ void __init jump_label_init(void)
if (static_key_initialized)
return;
cpus_read_lock();
jump_label_lock();
jump_label_sort_entries(iter_start, iter_stop);
......@@ -353,6 +360,7 @@ void __init jump_label_init(void)
}
static_key_initialized = true;
jump_label_unlock();
cpus_read_unlock();
}
#ifdef CONFIG_MODULES
......@@ -590,28 +598,28 @@ jump_label_module_notify(struct notifier_block *self, unsigned long val,
struct module *mod = data;
int ret = 0;
cpus_read_lock();
jump_label_lock();
switch (val) {
case MODULE_STATE_COMING:
jump_label_lock();
ret = jump_label_add_module(mod);
if (ret) {
WARN(1, "Failed to allocatote memory: jump_label may not work properly.\n");
jump_label_del_module(mod);
}
jump_label_unlock();
break;
case MODULE_STATE_GOING:
jump_label_lock();
jump_label_del_module(mod);
jump_label_unlock();
break;
case MODULE_STATE_LIVE:
jump_label_lock();
jump_label_invalidate_module_init(mod);
jump_label_unlock();
break;
}
jump_label_unlock();
cpus_read_unlock();
return notifier_from_errno(ret);
}
......
......@@ -483,11 +483,6 @@ static DECLARE_DELAYED_WORK(optimizing_work, kprobe_optimizer);
*/
static void do_optimize_kprobes(void)
{
/* Optimization never be done when disarmed */
if (kprobes_all_disarmed || !kprobes_allow_optimization ||
list_empty(&optimizing_list))
return;
/*
* The optimization/unoptimization refers online_cpus via
* stop_machine() and cpu-hotplug modifies online_cpus.
......@@ -495,14 +490,19 @@ static void do_optimize_kprobes(void)
* This combination can cause a deadlock (cpu-hotplug try to lock
* text_mutex but stop_machine can not be done because online_cpus
* has been changed)
* To avoid this deadlock, we need to call get_online_cpus()
* To avoid this deadlock, caller must have locked cpu hotplug
* for preventing cpu-hotplug outside of text_mutex locking.
*/
get_online_cpus();
lockdep_assert_cpus_held();
/* Optimization never be done when disarmed */
if (kprobes_all_disarmed || !kprobes_allow_optimization ||
list_empty(&optimizing_list))
return;
mutex_lock(&text_mutex);
arch_optimize_kprobes(&optimizing_list);
mutex_unlock(&text_mutex);
put_online_cpus();
}
/*
......@@ -513,12 +513,13 @@ static void do_unoptimize_kprobes(void)
{
struct optimized_kprobe *op, *tmp;
/* See comment in do_optimize_kprobes() */
lockdep_assert_cpus_held();
/* Unoptimization must be done anytime */
if (list_empty(&unoptimizing_list))
return;
/* Ditto to do_optimize_kprobes */
get_online_cpus();
mutex_lock(&text_mutex);
arch_unoptimize_kprobes(&unoptimizing_list, &freeing_list);
/* Loop free_list for disarming */
......@@ -537,7 +538,6 @@ static void do_unoptimize_kprobes(void)
list_del_init(&op->list);
}
mutex_unlock(&text_mutex);
put_online_cpus();
}
/* Reclaim all kprobes on the free_list */
......@@ -562,6 +562,7 @@ static void kick_kprobe_optimizer(void)
static void kprobe_optimizer(struct work_struct *work)
{
mutex_lock(&kprobe_mutex);
cpus_read_lock();
/* Lock modules while optimizing kprobes */
mutex_lock(&module_mutex);
......@@ -587,6 +588,7 @@ static void kprobe_optimizer(struct work_struct *work)
do_free_cleaned_kprobes();
mutex_unlock(&module_mutex);
cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Step 5: Kick optimizer again if needed */
......@@ -650,9 +652,8 @@ static void optimize_kprobe(struct kprobe *p)
/* Short cut to direct unoptimizing */
static void force_unoptimize_kprobe(struct optimized_kprobe *op)
{
get_online_cpus();
lockdep_assert_cpus_held();
arch_unoptimize_kprobe(op);
put_online_cpus();
if (kprobe_disabled(&op->kp))
arch_disarm_kprobe(&op->kp);
}
......@@ -791,6 +792,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
return;
/* For preparing optimization, jump_label_text_reserved() is called */
cpus_read_lock();
jump_label_lock();
mutex_lock(&text_mutex);
......@@ -812,6 +814,7 @@ static void try_to_optimize_kprobe(struct kprobe *p)
out:
mutex_unlock(&text_mutex);
jump_label_unlock();
cpus_read_unlock();
}
#ifdef CONFIG_SYSCTL
......@@ -826,6 +829,7 @@ static void optimize_all_kprobes(void)
if (kprobes_allow_optimization)
goto out;
cpus_read_lock();
kprobes_allow_optimization = true;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
......@@ -833,6 +837,7 @@ static void optimize_all_kprobes(void)
if (!kprobe_disabled(p))
optimize_kprobe(p);
}
cpus_read_unlock();
printk(KERN_INFO "Kprobes globally optimized\n");
out:
mutex_unlock(&kprobe_mutex);
......@@ -851,6 +856,7 @@ static void unoptimize_all_kprobes(void)
return;
}
cpus_read_lock();
kprobes_allow_optimization = false;
for (i = 0; i < KPROBE_TABLE_SIZE; i++) {
head = &kprobe_table[i];
......@@ -859,6 +865,7 @@ static void unoptimize_all_kprobes(void)
unoptimize_kprobe(p, false);
}
}
cpus_read_unlock();
mutex_unlock(&kprobe_mutex);
/* Wait for unoptimizing completion */
......@@ -1010,14 +1017,11 @@ static void arm_kprobe(struct kprobe *kp)
arm_kprobe_ftrace(kp);
return;
}
/*
* Here, since __arm_kprobe() doesn't use stop_machine(),
* this doesn't cause deadlock on text_mutex. So, we don't
* need get_online_cpus().
*/
cpus_read_lock();
mutex_lock(&text_mutex);
__arm_kprobe(kp);
mutex_unlock(&text_mutex);
cpus_read_unlock();
}
/* Disarm a kprobe with text_mutex */
......@@ -1027,10 +1031,12 @@ static void disarm_kprobe(struct kprobe *kp, bool reopt)
disarm_kprobe_ftrace(kp);
return;
}
/* Ditto */
cpus_read_lock();
mutex_lock(&text_mutex);
__disarm_kprobe(kp, reopt);
mutex_unlock(&text_mutex);
cpus_read_unlock();
}
/*
......@@ -1298,13 +1304,10 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
int ret = 0;
struct kprobe *ap = orig_p;
cpus_read_lock();
/* For preparing optimization, jump_label_text_reserved() is called */
jump_label_lock();
/*
* Get online CPUs to avoid text_mutex deadlock.with stop machine,
* which is invoked by unoptimize_kprobe() in add_new_kprobe()
*/
get_online_cpus();
mutex_lock(&text_mutex);
if (!kprobe_aggrprobe(orig_p)) {
......@@ -1352,8 +1355,8 @@ static int register_aggr_kprobe(struct kprobe *orig_p, struct kprobe *p)
out:
mutex_unlock(&text_mutex);
put_online_cpus();
jump_label_unlock();
cpus_read_unlock();
if (ret == 0 && kprobe_disabled(ap) && !kprobe_disabled(p)) {
ap->flags &= ~KPROBE_FLAG_DISABLED;
......@@ -1555,9 +1558,12 @@ int register_kprobe(struct kprobe *p)
goto out;
}
mutex_lock(&text_mutex); /* Avoiding text modification */
cpus_read_lock();
/* Prevent text modification */
mutex_lock(&text_mutex);
ret = prepare_kprobe(p);
mutex_unlock(&text_mutex);
cpus_read_unlock();
if (ret)
goto out;
......@@ -1570,7 +1576,6 @@ int register_kprobe(struct kprobe *p)
/* Try to optimize kprobe */
try_to_optimize_kprobe(p);
out:
mutex_unlock(&kprobe_mutex);
......
......@@ -933,19 +933,6 @@ static struct kobj_type padata_attr_type = {
.release = padata_sysfs_release,
};
/**
* padata_alloc_possible - Allocate and initialize padata instance.
* Use the cpu_possible_mask for serial and
* parallel workers.
*
* @wq: workqueue to use for the allocated padata instance
*/
struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
{
return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
}
EXPORT_SYMBOL(padata_alloc_possible);
/**
* padata_alloc - allocate and initialize a padata instance and specify
* cpumasks for serial and parallel workers.
......@@ -953,8 +940,10 @@ EXPORT_SYMBOL(padata_alloc_possible);
* @wq: workqueue to use for the allocated padata instance
* @pcpumask: cpumask that will be used for padata parallelization
* @cbcpumask: cpumask that will be used for padata serialization
*
* Must be called from a cpus_read_lock() protected region
*/
struct padata_instance *padata_alloc(struct workqueue_struct *wq,
static struct padata_instance *padata_alloc(struct workqueue_struct *wq,
const struct cpumask *pcpumask,
const struct cpumask *cbcpumask)
{
......@@ -965,7 +954,6 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
if (!pinst)
goto err;
get_online_cpus();
if (!alloc_cpumask_var(&pinst->cpumask.pcpu, GFP_KERNEL))
goto err_free_inst;
if (!alloc_cpumask_var(&pinst->cpumask.cbcpu, GFP_KERNEL)) {
......@@ -989,14 +977,12 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
pinst->flags = 0;
put_online_cpus();
BLOCKING_INIT_NOTIFIER_HEAD(&pinst->cpumask_change_notifier);
kobject_init(&pinst->kobj, &padata_attr_type);
mutex_init(&pinst->lock);
#ifdef CONFIG_HOTPLUG_CPU
cpuhp_state_add_instance_nocalls(hp_online, &pinst->node);
cpuhp_state_add_instance_nocalls_cpuslocked(hp_online, &pinst->node);
#endif
return pinst;
......@@ -1005,11 +991,26 @@ struct padata_instance *padata_alloc(struct workqueue_struct *wq,
free_cpumask_var(pinst->cpumask.cbcpu);
err_free_inst:
kfree(pinst);
put_online_cpus();
err:
return NULL;
}
/**
* padata_alloc_possible - Allocate and initialize padata instance.
* Use the cpu_possible_mask for serial and
* parallel workers.
*
* @wq: workqueue to use for the allocated padata instance
*
* Must be called from a cpus_read_lock() protected region
*/
struct padata_instance *padata_alloc_possible(struct workqueue_struct *wq)
{
lockdep_assert_cpus_held();
return padata_alloc(wq, cpu_possible_mask, cpu_possible_mask);
}
EXPORT_SYMBOL(padata_alloc_possible);
/**
* padata_free - free a padata instance
*
......
......@@ -552,7 +552,8 @@ static int __init cpu_stop_init(void)
}
early_initcall(cpu_stop_init);
static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
const struct cpumask *cpus)
{
struct multi_stop_data msdata = {
.fn = fn,
......@@ -561,6 +562,8 @@ static int __stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cp
.active_cpus = cpus,
};
lockdep_assert_cpus_held();
if (!stop_machine_initialized) {
/*
* Handle the case where stop_machine() is called
......@@ -590,9 +593,9 @@ int stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
int ret;
/* No CPUs can come up or down during this. */
get_online_cpus();
ret = __stop_machine(fn, data, cpus);
put_online_cpus();
cpus_read_lock();
ret = stop_machine_cpuslocked(fn, data, cpus);
cpus_read_unlock();
return ret;
}
EXPORT_SYMBOL_GPL(stop_machine);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册
新手
引导
客服 返回
顶部