提交 4057adaf 编写于 作者: L Linus Torvalds

Merge branch 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull RCU updates from Ingo Molnar:

 - updates to the handling of expedited grace periods

 - updates to reduce lock contention in the rcu_node combining tree

   [ These are in preparation for the consolidation of RCU-bh,
     RCU-preempt, and RCU-sched into a single flavor, which was
     requested by Linus in response to a security flaw whose root cause
     included confusion between the multiple flavors of RCU ]

 - torture-test updates that save their users some time and effort

 - miscellaneous fixes

* 'core-rcu-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (44 commits)
  rcu/x86: Provide early rcu_cpu_starting() callback
  torture: Make kvm-find-errors.sh find build warnings
  rcutorture: Abbreviate kvm.sh summary lines
  rcutorture: Print end-of-test state in kvm.sh summary
  rcutorture: Print end-of-test state
  torture: Fold parse-torture.sh into parse-console.sh
  torture: Add a script to edit output from failed runs
  rcu: Update list of rcu_future_grace_period() trace events
  rcu: Drop early GP request check from rcu_gp_kthread()
  rcu: Simplify and inline cpu_needs_another_gp()
  rcu: The rcu_gp_cleanup() function does not need cpu_needs_another_gp()
  rcu: Make rcu_start_this_gp() check for out-of-range requests
  rcu: Add funnel locking to rcu_start_this_gp()
  rcu: Make rcu_start_future_gp() caller select grace period
  rcu: Inline rcu_start_gp_advanced() into rcu_start_future_gp()
  rcu: Clear request other than RCU_GP_FLAG_INIT at GP end
  rcu: Cleanup, don't put ->completed into an int
  rcu: Switch __rcu_process_callbacks() to rcu_accelerate_cbs()
  rcu: Avoid __call_rcu_core() root rcu_node ->lock acquisition
  rcu: Make rcu_migrate_callbacks wake GP kthread when needed
  ...
What is RCU? -- "Read, Copy, Update"
Please note that the "What is RCU?" LWN series is an excellent place
to start learning about RCU:
......
......@@ -46,6 +46,7 @@
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/syscore_ops.h>
#include <linux/rcupdate.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
......@@ -793,6 +794,9 @@ void mtrr_ap_init(void)
if (!use_intel() || mtrr_aps_delayed_init)
return;
rcu_cpu_starting(smp_processor_id());
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,
......
......@@ -357,7 +357,7 @@ static void nvme_free_ns_head(struct kref *ref)
nvme_mpath_remove_disk(head);
ida_simple_remove(&head->subsys->ns_ida, head->instance);
list_del_init(&head->entry);
cleanup_srcu_struct(&head->srcu);
cleanup_srcu_struct_quiesced(&head->srcu);
nvme_put_subsystem(head->subsys);
kfree(head);
}
......
......@@ -108,7 +108,6 @@ void rcu_sched_qs(void);
void rcu_bh_qs(void);
void rcu_check_callbacks(int user);
void rcu_report_dead(unsigned int cpu);
void rcu_cpu_starting(unsigned int cpu);
void rcutree_migrate_callbacks(int cpu);
#ifdef CONFIG_RCU_STALL_COMMON
......@@ -188,13 +187,13 @@ static inline void exit_tasks_rcu_finish(void) { }
#endif /* #else #ifdef CONFIG_TASKS_RCU */
/**
* cond_resched_rcu_qs - Report potential quiescent states to RCU
* cond_resched_tasks_rcu_qs - Report potential quiescent states to RCU
*
* This macro resembles cond_resched(), except that it is defined to
* report potential quiescent states to RCU-tasks even if the cond_resched()
* machinery were to be shut off, as some advocate for PREEMPT kernels.
*/
#define cond_resched_rcu_qs() \
#define cond_resched_tasks_rcu_qs() \
do { \
if (!cond_resched()) \
rcu_note_voluntary_context_switch_lite(current); \
......
......@@ -132,5 +132,6 @@ static inline void rcu_all_qs(void) { barrier(); }
#define rcutree_offline_cpu NULL
#define rcutree_dead_cpu NULL
#define rcutree_dying_cpu NULL
static inline void rcu_cpu_starting(unsigned int cpu) { }
#endif /* __LINUX_RCUTINY_H */
......@@ -74,6 +74,7 @@ static inline void synchronize_rcu_bh_expedited(void)
void rcu_barrier(void);
void rcu_barrier_bh(void);
void rcu_barrier_sched(void);
bool rcu_eqs_special_set(int cpu);
unsigned long get_state_synchronize_rcu(void);
void cond_synchronize_rcu(unsigned long oldstate);
unsigned long get_state_synchronize_sched(void);
......@@ -100,5 +101,6 @@ int rcutree_online_cpu(unsigned int cpu);
int rcutree_offline_cpu(unsigned int cpu);
int rcutree_dead_cpu(unsigned int cpu);
int rcutree_dying_cpu(unsigned int cpu);
void rcu_cpu_starting(unsigned int cpu);
#endif /* __LINUX_RCUTREE_H */
......@@ -1661,7 +1661,6 @@ static inline int test_tsk_need_resched(struct task_struct *tsk)
* explicit rescheduling in places that are safe. The return
* value indicates whether a reschedule was done in fact.
* cond_resched_lock() will drop the spinlock before scheduling,
* cond_resched_softirq() will enable bhs before scheduling.
*/
#ifndef CONFIG_PREEMPT
extern int _cond_resched(void);
......@@ -1681,13 +1680,6 @@ extern int __cond_resched_lock(spinlock_t *lock);
__cond_resched_lock(lock); \
})
extern int __cond_resched_softirq(void);
#define cond_resched_softirq() ({ \
___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET); \
__cond_resched_softirq(); \
})
static inline void cond_resched_rcu(void)
{
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
......
......@@ -69,11 +69,45 @@ struct srcu_struct { };
void call_srcu(struct srcu_struct *sp, struct rcu_head *head,
void (*func)(struct rcu_head *head));
void cleanup_srcu_struct(struct srcu_struct *sp);
void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced);
int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp);
void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp);
void synchronize_srcu(struct srcu_struct *sp);
/**
* cleanup_srcu_struct - deconstruct a sleep-RCU structure
* @sp: structure to clean up.
*
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
static inline void cleanup_srcu_struct(struct srcu_struct *sp)
{
_cleanup_srcu_struct(sp, false);
}
/**
* cleanup_srcu_struct_quiesced - deconstruct a quiesced sleep-RCU structure
* @sp: structure to clean up.
*
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory. Also,
* all grace-period processing must have completed.
*
* "Completed" means that the last synchronize_srcu() and
* synchronize_srcu_expedited() calls must have returned before the call
* to cleanup_srcu_struct_quiesced(). It also means that the callback
* from the last call_srcu() must have been invoked before the call to
* cleanup_srcu_struct_quiesced(), but you can use srcu_barrier() to help
* with this last. Violating these rules will get you a WARN_ON() splat
* (with high probability, anyway), and will also cause the srcu_struct
* to be leaked.
*/
static inline void cleanup_srcu_struct_quiesced(struct srcu_struct *sp)
{
_cleanup_srcu_struct(sp, true);
}
#ifdef CONFIG_DEBUG_LOCK_ALLOC
/**
......
......@@ -84,20 +84,21 @@ TRACE_EVENT(rcu_grace_period,
);
/*
* Tracepoint for future grace-period events, including those for no-callbacks
* CPUs. The caller should pull the data from the rcu_node structure,
* other than rcuname, which comes from the rcu_state structure, and event,
* which is one of the following:
* Tracepoint for future grace-period events. The caller should pull
* the data from the rcu_node structure, other than rcuname, which comes
* from the rcu_state structure, and event, which is one of the following:
*
* "Startleaf": Request a nocb grace period based on leaf-node data.
* "Startleaf": Request a grace period based on leaf-node data.
* "Prestarted": Someone beat us to the request
* "Startedleaf": Leaf-node start proved sufficient.
* "Startedleafroot": Leaf-node start proved sufficient after checking root.
* "Startedroot": Requested a nocb grace period based on root-node data.
* "NoGPkthread": The RCU grace-period kthread has not yet started.
* "StartWait": Start waiting for the requested grace period.
* "ResumeWait": Resume waiting after signal.
* "EndWait": Complete wait.
* "Cleanup": Clean up rcu_node structure after previous GP.
* "CleanupMore": Clean up, and another no-CB GP is needed.
* "CleanupMore": Clean up, and another GP is needed.
*/
TRACE_EVENT(rcu_future_grace_period,
......
......@@ -270,6 +270,12 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
}
}
/* Returns first leaf rcu_node of the specified RCU flavor. */
#define rcu_first_leaf_node(rsp) ((rsp)->level[rcu_num_lvls - 1])
/* Is this rcu_node a leaf? */
#define rcu_is_leaf_node(rnp) ((rnp)->level == rcu_num_lvls - 1)
/*
* Do a full breadth-first scan of the rcu_node structures for the
* specified rcu_state structure.
......@@ -284,8 +290,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* rcu_node tree with but one rcu_node structure, this loop is a no-op.
*/
#define rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) \
for ((rnp) = &(rsp)->node[0]; \
(rnp) < (rsp)->level[rcu_num_lvls - 1]; (rnp)++)
for ((rnp) = &(rsp)->node[0]; !rcu_is_leaf_node(rsp, rnp); (rnp)++)
/*
* Scan the leaves of the rcu_node hierarchy for the specified rcu_state
......@@ -294,7 +299,7 @@ static inline void rcu_init_levelspread(int *levelspread, const int *levelcnt)
* It is still a leaf node, even if it is also the root node.
*/
#define rcu_for_each_leaf_node(rsp, rnp) \
for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
for ((rnp) = rcu_first_leaf_node(rsp); \
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
/*
......@@ -486,6 +491,7 @@ void rcu_force_quiescent_state(void);
void rcu_bh_force_quiescent_state(void);
void rcu_sched_force_quiescent_state(void);
extern struct workqueue_struct *rcu_gp_wq;
extern struct workqueue_struct *rcu_par_gp_wq;
#endif /* #else #ifdef CONFIG_TINY_RCU */
#ifdef CONFIG_RCU_NOCB_CPU
......
......@@ -403,24 +403,6 @@ bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq)
return true;
}
/*
* Scan the specified rcu_segcblist structure for callbacks that need
* a grace period later than the one specified by "seq". We don't look
* at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't
* have a grace-period sequence number.
*/
bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
unsigned long seq)
{
int i;
for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++)
if (rsclp->tails[i - 1] != rsclp->tails[i] &&
ULONG_CMP_LT(seq, rsclp->gp_seq[i]))
return true;
return false;
}
/*
* Merge the source rcu_segcblist structure into the destination
* rcu_segcblist structure, then initialize the source. Any pending
......
......@@ -134,7 +134,5 @@ void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp,
struct rcu_cblist *rclp);
void rcu_segcblist_advance(struct rcu_segcblist *rsclp, unsigned long seq);
bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq);
bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp,
unsigned long seq);
void rcu_segcblist_merge(struct rcu_segcblist *dst_rsclp,
struct rcu_segcblist *src_rsclp);
......@@ -369,7 +369,7 @@ static bool __maybe_unused torturing_tasks(void)
*/
static void rcu_perf_wait_shutdown(void)
{
cond_resched_rcu_qs();
cond_resched_tasks_rcu_qs();
if (atomic_read(&n_rcu_perf_writer_finished) < nrealwriters)
return;
while (!torture_must_stop())
......
......@@ -593,7 +593,12 @@ static void srcu_torture_init(void)
static void srcu_torture_cleanup(void)
{
cleanup_srcu_struct(&srcu_ctld);
static DEFINE_TORTURE_RANDOM(rand);
if (torture_random(&rand) & 0x800)
cleanup_srcu_struct(&srcu_ctld);
else
cleanup_srcu_struct_quiesced(&srcu_ctld);
srcu_ctlp = &srcu_ctl; /* In case of a later rcutorture run. */
}
......@@ -1609,6 +1614,9 @@ static enum cpuhp_state rcutor_hp;
static void
rcu_torture_cleanup(void)
{
int flags = 0;
unsigned long gpnum = 0;
unsigned long completed = 0;
int i;
rcutorture_record_test_transition();
......@@ -1639,6 +1647,11 @@ rcu_torture_cleanup(void)
fakewriter_tasks = NULL;
}
rcutorture_get_gp_data(cur_ops->ttype, &flags, &gpnum, &completed);
srcutorture_get_gp_data(cur_ops->ttype, srcu_ctlp,
&flags, &gpnum, &completed);
pr_alert("%s: End-test grace-period state: g%lu c%lu f%#x\n",
cur_ops->name, gpnum, completed, flags);
torture_stop_kthread(rcu_torture_stats, stats_task);
torture_stop_kthread(rcu_torture_fqs, fqs_task);
for (i = 0; i < ncbflooders; i++)
......
......@@ -86,16 +86,19 @@ EXPORT_SYMBOL_GPL(init_srcu_struct);
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
void cleanup_srcu_struct(struct srcu_struct *sp)
void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
{
WARN_ON(sp->srcu_lock_nesting[0] || sp->srcu_lock_nesting[1]);
flush_work(&sp->srcu_work);
if (quiesced)
WARN_ON(work_pending(&sp->srcu_work));
else
flush_work(&sp->srcu_work);
WARN_ON(sp->srcu_gp_running);
WARN_ON(sp->srcu_gp_waiting);
WARN_ON(sp->srcu_cb_head);
WARN_ON(&sp->srcu_cb_head != sp->srcu_cb_tail);
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/*
* Removes the count for the old reader from the appropriate element of
......
......@@ -366,24 +366,28 @@ static unsigned long srcu_get_delay(struct srcu_struct *sp)
return SRCU_INTERVAL;
}
/**
* cleanup_srcu_struct - deconstruct a sleep-RCU structure
* @sp: structure to clean up.
*
* Must invoke this after you are finished using a given srcu_struct that
* was initialized via init_srcu_struct(), else you leak memory.
*/
void cleanup_srcu_struct(struct srcu_struct *sp)
/* Helper for cleanup_srcu_struct() and cleanup_srcu_struct_quiesced(). */
void _cleanup_srcu_struct(struct srcu_struct *sp, bool quiesced)
{
int cpu;
if (WARN_ON(!srcu_get_delay(sp)))
return; /* Leakage unless caller handles error. */
return; /* Just leak it! */
if (WARN_ON(srcu_readers_active(sp)))
return; /* Leakage unless caller handles error. */
flush_delayed_work(&sp->work);
return; /* Just leak it! */
if (quiesced) {
if (WARN_ON(delayed_work_pending(&sp->work)))
return; /* Just leak it! */
} else {
flush_delayed_work(&sp->work);
}
for_each_possible_cpu(cpu)
flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
if (quiesced) {
if (WARN_ON(delayed_work_pending(&per_cpu_ptr(sp->sda, cpu)->work)))
return; /* Just leak it! */
} else {
flush_delayed_work(&per_cpu_ptr(sp->sda, cpu)->work);
}
if (WARN_ON(rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)) != SRCU_STATE_IDLE) ||
WARN_ON(srcu_readers_active(sp))) {
pr_info("%s: Active srcu_struct %p state: %d\n", __func__, sp, rcu_seq_state(READ_ONCE(sp->srcu_gp_seq)));
......@@ -392,7 +396,7 @@ void cleanup_srcu_struct(struct srcu_struct *sp)
free_percpu(sp->sda);
sp->sda = NULL;
}
EXPORT_SYMBOL_GPL(cleanup_srcu_struct);
EXPORT_SYMBOL_GPL(_cleanup_srcu_struct);
/*
* Counts the new reader in the appropriate per-CPU element of the
......
此差异已折叠。
......@@ -58,6 +58,14 @@ struct rcu_dynticks {
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
};
/* Communicate arguments to a workqueue handler. */
struct rcu_exp_work {
smp_call_func_t rew_func;
struct rcu_state *rew_rsp;
unsigned long rew_s;
struct work_struct rew_work;
};
/* RCU's kthread states for tracing. */
#define RCU_KTHREAD_STOPPED 0
#define RCU_KTHREAD_RUNNING 1
......@@ -150,15 +158,32 @@ struct rcu_node {
struct swait_queue_head nocb_gp_wq[2];
/* Place for rcu_nocb_kthread() to wait GP. */
#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
int need_future_gp[2];
/* Counts of upcoming no-CB GP requests. */
u8 need_future_gp[4]; /* Counts of upcoming GP requests. */
raw_spinlock_t fqslock ____cacheline_internodealigned_in_smp;
spinlock_t exp_lock ____cacheline_internodealigned_in_smp;
unsigned long exp_seq_rq;
wait_queue_head_t exp_wq[4];
struct rcu_exp_work rew;
bool exp_need_flush; /* Need to flush workitem? */
} ____cacheline_internodealigned_in_smp;
/* Accessors for ->need_future_gp[] array. */
#define need_future_gp_mask() \
(ARRAY_SIZE(((struct rcu_node *)NULL)->need_future_gp) - 1)
#define need_future_gp_element(rnp, c) \
((rnp)->need_future_gp[(c) & need_future_gp_mask()])
#define need_any_future_gp(rnp) \
({ \
int __i; \
bool __nonzero = false; \
\
for (__i = 0; __i < ARRAY_SIZE((rnp)->need_future_gp); __i++) \
__nonzero = __nonzero || \
READ_ONCE((rnp)->need_future_gp[__i]); \
__nonzero; \
})
/*
* Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
* are indexed relative to this interval rather than the global CPU ID space.
......@@ -224,10 +249,6 @@ struct rcu_data {
#ifdef CONFIG_RCU_FAST_NO_HZ
struct rcu_head oom_head;
#endif /* #ifdef CONFIG_RCU_FAST_NO_HZ */
atomic_long_t exp_workdone0; /* # done by workqueue. */
atomic_long_t exp_workdone1; /* # done by others #1. */
atomic_long_t exp_workdone2; /* # done by others #2. */
atomic_long_t exp_workdone3; /* # done by others #3. */
int exp_dynticks_snap; /* Double-check need for IPI. */
/* 6) Callback offloading. */
......@@ -408,7 +429,6 @@ extern struct rcu_state rcu_preempt_state;
#endif /* #ifdef CONFIG_PREEMPT_RCU */
int rcu_dynticks_snap(struct rcu_dynticks *rdtp);
bool rcu_eqs_special_set(int cpu);
#ifdef CONFIG_RCU_BOOST
DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
......@@ -438,7 +458,6 @@ static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
static void invoke_rcu_callbacks_kthread(void);
static bool rcu_is_callbacks_kthread(void);
#ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void);
static int rcu_spawn_one_boost_kthread(struct rcu_state *rsp,
struct rcu_node *rnp);
#endif /* #ifdef CONFIG_RCU_BOOST */
......@@ -454,7 +473,6 @@ static void print_cpu_stall_info_end(void);
static void zero_cpu_stall_ticks(struct rcu_data *rdp);
static void increment_cpu_stall_ticks(void);
static bool rcu_nocb_cpu_needs_barrier(struct rcu_state *rsp, int cpu);
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq);
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp);
static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq);
static void rcu_init_one_nocb(struct rcu_node *rnp);
......
......@@ -20,6 +20,8 @@
* Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
*/
#include <linux/lockdep.h>
/*
* Record the start of an expedited grace period.
*/
......@@ -154,14 +156,34 @@ static void __maybe_unused sync_exp_reset_tree(struct rcu_state *rsp)
* for the current expedited grace period. Works only for preemptible
* RCU -- other RCU implementation use other means.
*
* Caller must hold the rcu_state's exp_mutex.
* Caller must hold the specificed rcu_node structure's ->lock
*/
static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
{
raw_lockdep_assert_held_rcu_node(rnp);
return rnp->exp_tasks == NULL &&
READ_ONCE(rnp->expmask) == 0;
}
/*
* Like sync_rcu_preempt_exp_done(), but this function assumes the caller
* doesn't hold the rcu_node's ->lock, and will acquire and release the lock
* itself
*/
static bool sync_rcu_preempt_exp_done_unlocked(struct rcu_node *rnp)
{
unsigned long flags;
bool ret;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
ret = sync_rcu_preempt_exp_done(rnp);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
return ret;
}
/*
* Report the exit from RCU read-side critical section for the last task
* that queued itself during or before the current expedited preemptible-RCU
......@@ -170,8 +192,7 @@ static bool sync_rcu_preempt_exp_done(struct rcu_node *rnp)
* recursively up the tree. (Calm down, calm down, we do the recursion
* iteratively!)
*
* Caller must hold the rcu_state's exp_mutex and the specified rcu_node
* structure's ->lock.
* Caller must hold the specified rcu_node structure's ->lock.
*/
static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
bool wake, unsigned long flags)
......@@ -207,8 +228,6 @@ static void __rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
/*
* Report expedited quiescent state for specified node. This is a
* lock-acquisition wrapper function for __rcu_report_exp_rnp().
*
* Caller must hold the rcu_state's exp_mutex.
*/
static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
struct rcu_node *rnp, bool wake)
......@@ -221,8 +240,7 @@ static void __maybe_unused rcu_report_exp_rnp(struct rcu_state *rsp,
/*
* Report expedited quiescent state for multiple CPUs, all covered by the
* specified leaf rcu_node structure. Caller must hold the rcu_state's
* exp_mutex.
* specified leaf rcu_node structure.
*/
static void rcu_report_exp_cpu_mult(struct rcu_state *rsp, struct rcu_node *rnp,
unsigned long mask, bool wake)
......@@ -248,14 +266,12 @@ static void rcu_report_exp_rdp(struct rcu_state *rsp, struct rcu_data *rdp,
}
/* Common code for synchronize_{rcu,sched}_expedited() work-done checking. */
static bool sync_exp_work_done(struct rcu_state *rsp, atomic_long_t *stat,
unsigned long s)
static bool sync_exp_work_done(struct rcu_state *rsp, unsigned long s)
{
if (rcu_exp_gp_seq_done(rsp, s)) {
trace_rcu_exp_grace_period(rsp->name, s, TPS("done"));
/* Ensure test happens before caller kfree(). */
smp_mb__before_atomic(); /* ^^^ */
atomic_long_inc(stat);
return true;
}
return false;
......@@ -289,7 +305,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
* promoting locality and is not strictly needed for correctness.
*/
for (; rnp != NULL; rnp = rnp->parent) {
if (sync_exp_work_done(rsp, &rdp->exp_workdone1, s))
if (sync_exp_work_done(rsp, s))
return true;
/* Work not done, either wait here or go up. */
......@@ -302,8 +318,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
rnp->grplo, rnp->grphi,
TPS("wait"));
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(rsp,
&rdp->exp_workdone2, s));
sync_exp_work_done(rsp, s));
return true;
}
rnp->exp_seq_rq = s; /* Followers can wait on us. */
......@@ -313,7 +328,7 @@ static bool exp_funnel_lock(struct rcu_state *rsp, unsigned long s)
}
mutex_lock(&rsp->exp_mutex);
fastpath:
if (sync_exp_work_done(rsp, &rdp->exp_workdone3, s)) {
if (sync_exp_work_done(rsp, s)) {
mutex_unlock(&rsp->exp_mutex);
return true;
}
......@@ -362,93 +377,129 @@ static void sync_sched_exp_online_cleanup(int cpu)
}
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
* Select the CPUs within the specified rcu_node that the upcoming
* expedited grace period needs to wait for.
*/
static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
smp_call_func_t func)
static void sync_rcu_exp_select_node_cpus(struct work_struct *wp)
{
int cpu;
unsigned long flags;
smp_call_func_t func;
unsigned long mask_ofl_test;
unsigned long mask_ofl_ipi;
int ret;
struct rcu_node *rnp;
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
sync_exp_reset_tree(rsp);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
rcu_for_each_leaf_node(rsp, rnp) {
raw_spin_lock_irqsave_rcu_node(rnp, flags);
struct rcu_exp_work *rewp =
container_of(wp, struct rcu_exp_work, rew_work);
struct rcu_node *rnp = container_of(rewp, struct rcu_node, rew);
struct rcu_state *rsp = rewp->rew_rsp;
/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
int snap;
func = rewp->rew_func;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if (raw_smp_processor_id() == cpu ||
!(rnp->qsmaskinitnext & mask)) {
/* Each pass checks a CPU for identity, offline, and idle. */
mask_ofl_test = 0;
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_dynticks *rdtp = per_cpu_ptr(&rcu_dynticks, cpu);
int snap;
if (raw_smp_processor_id() == cpu ||
!(rnp->qsmaskinitnext & mask)) {
mask_ofl_test |= mask;
} else {
snap = rcu_dynticks_snap(rdtp);
if (rcu_dynticks_in_eqs(snap))
mask_ofl_test |= mask;
} else {
snap = rcu_dynticks_snap(rdtp);
if (rcu_dynticks_in_eqs(snap))
mask_ofl_test |= mask;
else
rdp->exp_dynticks_snap = snap;
}
else
rdp->exp_dynticks_snap = snap;
}
mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
/*
* Need to wait for any blocked tasks as well. Note that
* additional blocking tasks will also block the expedited
* GP until such time as the ->expmask bits are cleared.
*/
if (rcu_preempt_has_tasks(rnp))
rnp->exp_tasks = rnp->blkd_tasks.next;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
mask_ofl_ipi = rnp->expmask & ~mask_ofl_test;
/* IPI the remaining CPUs for expedited quiescent state. */
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
/*
* Need to wait for any blocked tasks as well. Note that
* additional blocking tasks will also block the expedited GP
* until such time as the ->expmask bits are cleared.
*/
if (rcu_preempt_has_tasks(rnp))
rnp->exp_tasks = rnp->blkd_tasks.next;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (!(mask_ofl_ipi & mask))
continue;
/* IPI the remaining CPUs for expedited quiescent state. */
for_each_leaf_node_cpu_mask(rnp, cpu, rnp->expmask) {
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
if (!(mask_ofl_ipi & mask))
continue;
retry_ipi:
if (rcu_dynticks_in_eqs_since(rdp->dynticks,
rdp->exp_dynticks_snap)) {
mask_ofl_test |= mask;
continue;
}
ret = smp_call_function_single(cpu, func, rsp, 0);
if (!ret) {
mask_ofl_ipi &= ~mask;
continue;
}
/* Failed, raced with CPU hotplug operation. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if ((rnp->qsmaskinitnext & mask) &&
(rnp->expmask & mask)) {
/* Online, so delay for a bit and try again. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
schedule_timeout_uninterruptible(1);
goto retry_ipi;
}
/* CPU really is offline, so we can ignore it. */
if (!(rnp->expmask & mask))
mask_ofl_ipi &= ~mask;
if (rcu_dynticks_in_eqs_since(rdp->dynticks,
rdp->exp_dynticks_snap)) {
mask_ofl_test |= mask;
continue;
}
ret = smp_call_function_single(cpu, func, rsp, 0);
if (!ret) {
mask_ofl_ipi &= ~mask;
continue;
}
/* Failed, raced with CPU hotplug operation. */
raw_spin_lock_irqsave_rcu_node(rnp, flags);
if ((rnp->qsmaskinitnext & mask) &&
(rnp->expmask & mask)) {
/* Online, so delay for a bit and try again. */
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("selectofl"));
schedule_timeout_uninterruptible(1);
goto retry_ipi;
}
/* CPU really is offline, so we can ignore it. */
if (!(rnp->expmask & mask))
mask_ofl_ipi &= ~mask;
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
}
/* Report quiescent states for those that went offline. */
mask_ofl_test |= mask_ofl_ipi;
if (mask_ofl_test)
rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
}
/*
* Select the nodes that the upcoming expedited grace period needs
* to wait for.
*/
static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
smp_call_func_t func)
{
struct rcu_node *rnp;
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("reset"));
sync_exp_reset_tree(rsp);
trace_rcu_exp_grace_period(rsp->name, rcu_exp_gp_seq_endval(rsp), TPS("select"));
/* Schedule work for each leaf rcu_node structure. */
rcu_for_each_leaf_node(rsp, rnp) {
rnp->exp_need_flush = false;
if (!READ_ONCE(rnp->expmask))
continue; /* Avoid early boot non-existent wq. */
rnp->rew.rew_func = func;
rnp->rew.rew_rsp = rsp;
if (!READ_ONCE(rcu_par_gp_wq) ||
rcu_scheduler_active != RCU_SCHEDULER_RUNNING) {
/* No workqueues yet. */
sync_rcu_exp_select_node_cpus(&rnp->rew.rew_work);
continue;
}
/* Report quiescent states for those that went offline. */
mask_ofl_test |= mask_ofl_ipi;
if (mask_ofl_test)
rcu_report_exp_cpu_mult(rsp, rnp, mask_ofl_test, false);
INIT_WORK(&rnp->rew.rew_work, sync_rcu_exp_select_node_cpus);
queue_work_on(rnp->grplo, rcu_par_gp_wq, &rnp->rew.rew_work);
rnp->exp_need_flush = true;
}
/* Wait for workqueue jobs (if any) to complete. */
rcu_for_each_leaf_node(rsp, rnp)
if (rnp->exp_need_flush)
flush_work(&rnp->rew.rew_work);
}
static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
......@@ -469,9 +520,9 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
for (;;) {
ret = swait_event_timeout(
rsp->expedited_wq,
sync_rcu_preempt_exp_done(rnp_root),
sync_rcu_preempt_exp_done_unlocked(rnp_root),
jiffies_stall);
if (ret > 0 || sync_rcu_preempt_exp_done(rnp_root))
if (ret > 0 || sync_rcu_preempt_exp_done_unlocked(rnp_root))
return;
WARN_ON(ret < 0); /* workqueues should not be signaled. */
if (rcu_cpu_stall_suppress)
......@@ -504,7 +555,7 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
rcu_for_each_node_breadth_first(rsp, rnp) {
if (rnp == rnp_root)
continue; /* printed unconditionally */
if (sync_rcu_preempt_exp_done(rnp))
if (sync_rcu_preempt_exp_done_unlocked(rnp))
continue;
pr_cont(" l=%u:%d-%d:%#lx/%c",
rnp->level, rnp->grplo, rnp->grphi,
......@@ -560,14 +611,6 @@ static void rcu_exp_wait_wake(struct rcu_state *rsp, unsigned long s)
mutex_unlock(&rsp->exp_wake_mutex);
}
/* Let the workqueue handler know what it is supposed to do. */
struct rcu_exp_work {
smp_call_func_t rew_func;
struct rcu_state *rew_rsp;
unsigned long rew_s;
struct work_struct rew_work;
};
/*
* Common code to drive an expedited grace period forward, used by
* workqueues and mid-boot-time tasks.
......@@ -633,7 +676,7 @@ static void _synchronize_rcu_expedited(struct rcu_state *rsp,
rdp = per_cpu_ptr(rsp->rda, raw_smp_processor_id());
rnp = rcu_get_root(rsp);
wait_event(rnp->exp_wq[rcu_seq_ctr(s) & 0x3],
sync_exp_work_done(rsp, &rdp->exp_workdone0, s));
sync_exp_work_done(rsp, s));
smp_mb(); /* Workqueue actions happen before return. */
/* Let the next expedited grace period start. */
......
......@@ -182,7 +182,7 @@ static void rcu_preempt_ctxt_queue(struct rcu_node *rnp, struct rcu_data *rdp)
raw_lockdep_assert_held_rcu_node(rnp);
WARN_ON_ONCE(rdp->mynode != rnp);
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
/*
* Decide where to queue the newly blocked task. In theory,
......@@ -383,6 +383,50 @@ static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp)
return rnp->gp_tasks != NULL;
}
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
* if we block.
*/
void __rcu_read_lock(void)
{
current->rcu_read_lock_nesting++;
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
/*
* Preemptible RCU implementation for rcu_read_unlock().
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
* invoke rcu_read_unlock_special() to clean up after a context switch
* in an RCU read-side critical section and other special cases.
*/
void __rcu_read_unlock(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting != 1) {
--t->rcu_read_lock_nesting;
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
barrier(); /* ->rcu_read_unlock_special load before assign */
t->rcu_read_lock_nesting = 0;
}
#ifdef CONFIG_PROVE_LOCKING
{
int rrln = READ_ONCE(t->rcu_read_lock_nesting);
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
}
#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
/*
* Advance a ->blkd_tasks-list pointer to the next entry, instead
* returning NULL if at the end of the list.
......@@ -489,7 +533,7 @@ void rcu_read_unlock_special(struct task_struct *t)
rnp = t->rcu_blocked_node;
raw_spin_lock_rcu_node(rnp); /* irqs already disabled. */
WARN_ON_ONCE(rnp != t->rcu_blocked_node);
WARN_ON_ONCE(rnp->level != rcu_num_lvls - 1);
WARN_ON_ONCE(!rcu_is_leaf_node(rnp));
empty_norm = !rcu_preempt_blocked_readers_cgp(rnp);
empty_exp = sync_rcu_preempt_exp_done(rnp);
smp_mb(); /* ensure expedited fastpath sees end of RCU c-s. */
......@@ -685,15 +729,6 @@ static void rcu_preempt_check_callbacks(void)
t->rcu_read_unlock_special.b.need_qs = true;
}
#ifdef CONFIG_RCU_BOOST
static void rcu_preempt_do_callbacks(void)
{
rcu_do_batch(rcu_state_p, this_cpu_ptr(rcu_data_p));
}
#endif /* #ifdef CONFIG_RCU_BOOST */
/**
* call_rcu() - Queue an RCU callback for invocation after a grace period.
* @head: structure to be used for queueing the RCU updates.
......@@ -1140,7 +1175,7 @@ static void rcu_kthread_do_work(void)
{
rcu_do_batch(&rcu_sched_state, this_cpu_ptr(&rcu_sched_data));
rcu_do_batch(&rcu_bh_state, this_cpu_ptr(&rcu_bh_data));
rcu_preempt_do_callbacks();
rcu_do_batch(&rcu_preempt_state, this_cpu_ptr(&rcu_preempt_data));
}
static void rcu_cpu_kthread_setup(unsigned int cpu)
......@@ -1607,7 +1642,7 @@ static int rcu_oom_notify(struct notifier_block *self,
for_each_online_cpu(cpu) {
smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
cond_resched_rcu_qs();
cond_resched_tasks_rcu_qs();
}
/* Unconditionally decrement: no need to wake ourselves up. */
......@@ -1780,19 +1815,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
swake_up_all(sq);
}
/*
* Set the root rcu_node structure's ->need_future_gp field
* based on the sum of those of all rcu_node structures. This does
* double-count the root rcu_node structure's requests, but this
* is necessary to handle the possibility of a rcu_nocb_kthread()
* having awakened during the time that the rcu_node structures
* were being updated for the end of the previous grace period.
*/
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
{
rnp->need_future_gp[(rnp->completed + 1) & 0x1] += nrq;
}
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
{
return &rnp->nocb_gp_wq[rnp->completed & 0x1];
......@@ -1966,7 +1988,7 @@ static void __call_rcu_nocb_enqueue(struct rcu_data *rdp,
trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu,
TPS("WakeOvf"));
} else {
wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE,
wake_nocb_leader_defer(rdp, RCU_NOCB_WAKE_FORCE,
TPS("WakeOvfIsDeferred"));
}
rdp->qlen_last_fqs_check = LONG_MAX / 2;
......@@ -2048,7 +2070,8 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
struct rcu_node *rnp = rdp->mynode;
raw_spin_lock_irqsave_rcu_node(rnp, flags);
needwake = rcu_start_future_gp(rnp, rdp, &c);
c = rcu_cbs_completed(rdp->rsp, rnp);
needwake = rcu_start_this_gp(rnp, rdp, c);
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
if (needwake)
rcu_gp_kthread_wake(rdp->rsp);
......@@ -2057,7 +2080,7 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
* Wait for the grace period. Do so interruptibly to avoid messing
* up the load average.
*/
trace_rcu_future_gp(rnp, rdp, c, TPS("StartWait"));
trace_rcu_this_gp(rnp, rdp, c, TPS("StartWait"));
for (;;) {
swait_event_interruptible(
rnp->nocb_gp_wq[c & 0x1],
......@@ -2065,9 +2088,9 @@ static void rcu_nocb_wait_gp(struct rcu_data *rdp)
if (likely(d))
break;
WARN_ON(signal_pending(current));
trace_rcu_future_gp(rnp, rdp, c, TPS("ResumeWait"));
trace_rcu_this_gp(rnp, rdp, c, TPS("ResumeWait"));
}
trace_rcu_future_gp(rnp, rdp, c, TPS("EndWait"));
trace_rcu_this_gp(rnp, rdp, c, TPS("EndWait"));
smp_mb(); /* Ensure that CB invocation happens after GP end. */
}
......@@ -2236,7 +2259,7 @@ static int rcu_nocb_kthread(void *arg)
cl++;
c++;
local_bh_enable();
cond_resched_rcu_qs();
cond_resched_tasks_rcu_qs();
list = next;
}
trace_rcu_batch_end(rdp->rsp->name, c, !!list, 0, 0, 1);
......@@ -2292,7 +2315,7 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
void __init rcu_init_nohz(void)
{
int cpu;
bool need_rcu_nocb_mask = true;
bool need_rcu_nocb_mask = false;
struct rcu_state *rsp;
#if defined(CONFIG_NO_HZ_FULL)
......@@ -2315,7 +2338,7 @@ void __init rcu_init_nohz(void)
#endif /* #if defined(CONFIG_NO_HZ_FULL) */
if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
pr_info("\tNote: kernel parameter 'rcu_nocbs=', 'nohz_full', or 'isolcpus=' contains nonexistent CPUs.\n");
cpumask_and(rcu_nocb_mask, cpu_possible_mask,
rcu_nocb_mask);
}
......@@ -2495,10 +2518,6 @@ static void rcu_nocb_gp_cleanup(struct swait_queue_head *sq)
{
}
static void rcu_nocb_gp_set(struct rcu_node *rnp, int nrq)
{
}
static struct swait_queue_head *rcu_nocb_gp_get(struct rcu_node *rnp)
{
return NULL;
......@@ -2587,8 +2606,7 @@ static bool rcu_nohz_full_cpu(struct rcu_state *rsp)
}
/*
* Bind the grace-period kthread for the sysidle flavor of RCU to the
* timekeeping CPU.
* Bind the RCU grace-period kthreads to the housekeeping CPU.
*/
static void rcu_bind_gp_kthread(void)
{
......
......@@ -226,54 +226,6 @@ core_initcall(rcu_set_runtime_mode);
#endif /* #if !defined(CONFIG_TINY_RCU) || defined(CONFIG_SRCU) */
#ifdef CONFIG_PREEMPT_RCU
/*
* Preemptible RCU implementation for rcu_read_lock().
* Just increment ->rcu_read_lock_nesting, shared state will be updated
* if we block.
*/
void __rcu_read_lock(void)
{
current->rcu_read_lock_nesting++;
barrier(); /* critical section after entry code. */
}
EXPORT_SYMBOL_GPL(__rcu_read_lock);
/*
* Preemptible RCU implementation for rcu_read_unlock().
* Decrement ->rcu_read_lock_nesting. If the result is zero (outermost
* rcu_read_unlock()) and ->rcu_read_unlock_special is non-zero, then
* invoke rcu_read_unlock_special() to clean up after a context switch
* in an RCU read-side critical section and other special cases.
*/
void __rcu_read_unlock(void)
{
struct task_struct *t = current;
if (t->rcu_read_lock_nesting != 1) {
--t->rcu_read_lock_nesting;
} else {
barrier(); /* critical section before exit code. */
t->rcu_read_lock_nesting = INT_MIN;
barrier(); /* assign before ->rcu_read_unlock_special load */
if (unlikely(READ_ONCE(t->rcu_read_unlock_special.s)))
rcu_read_unlock_special(t);
barrier(); /* ->rcu_read_unlock_special load before assign */
t->rcu_read_lock_nesting = 0;
}
#ifdef CONFIG_PROVE_LOCKING
{
int rrln = READ_ONCE(t->rcu_read_lock_nesting);
WARN_ON_ONCE(rrln < 0 && rrln > INT_MIN / 2);
}
#endif /* #ifdef CONFIG_PROVE_LOCKING */
}
EXPORT_SYMBOL_GPL(__rcu_read_unlock);
#endif /* #ifdef CONFIG_PREEMPT_RCU */
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static struct lock_class_key rcu_lock_key;
struct lockdep_map rcu_lock_map =
......@@ -624,7 +576,7 @@ EXPORT_SYMBOL_GPL(call_rcu_tasks);
* grace period has elapsed, in other words after all currently
* executing rcu-tasks read-side critical sections have elapsed. These
* read-side critical sections are delimited by calls to schedule(),
* cond_resched_rcu_qs(), idle execution, userspace execution, calls
* cond_resched_tasks_rcu_qs(), idle execution, userspace execution, calls
* to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
*
* This is a very specialized primitive, intended only for a few uses in
......
......@@ -5025,20 +5025,6 @@ int __cond_resched_lock(spinlock_t *lock)
}
EXPORT_SYMBOL(__cond_resched_lock);
int __sched __cond_resched_softirq(void)
{
BUG_ON(!in_softirq());
if (should_resched(SOFTIRQ_DISABLE_OFFSET)) {
local_bh_enable();
preempt_schedule_common();
local_bh_disable();
return 1;
}
return 0;
}
EXPORT_SYMBOL(__cond_resched_softirq);
/**
* yield - yield the current processor to other threads.
*
......
......@@ -145,8 +145,7 @@ static void __local_bh_enable(unsigned int cnt)
}
/*
* Special-case - softirqs can safely be enabled in
* cond_resched_softirq(), or by __do_softirq(),
* Special-case - softirqs can safely be enabled by __do_softirq(),
* without processing still-pending softirqs:
*/
void _local_bh_enable(void)
......
......@@ -574,7 +574,7 @@ void stutter_wait(const char *title)
{
int spt;
cond_resched_rcu_qs();
cond_resched_tasks_rcu_qs();
spt = READ_ONCE(stutter_pause_test);
for (; spt; spt = READ_ONCE(stutter_pause_test)) {
if (spt == 1) {
......
......@@ -159,13 +159,13 @@ static int benchmark_event_kthread(void *arg)
* wants to run, schedule in, but if the CPU is idle,
* we'll keep burning cycles.
*
* Note the _rcu_qs() version of cond_resched() will
* Note the tasks_rcu_qs() version of cond_resched() will
* notify synchronize_rcu_tasks() that this thread has
* passed a quiescent state for rcu_tasks. Otherwise
* this thread will never voluntarily schedule which would
* block synchronize_rcu_tasks() indefinitely.
*/
cond_resched();
cond_resched_tasks_rcu_qs();
}
return 0;
......
#!/bin/sh
#
# Invoke a text editor on all console.log files for all runs with diagnostics,
# that is, on all such files having a console.log.diags counterpart.
# Note that both console.log.diags and console.log are passed to the
# editor (currently defaulting to "vi"), allowing the user to get an
# idea of what to search for in the console.log file.
#
# Usage: kvm-find-errors.sh directory
#
# The "directory" above should end with the date/time directory, for example,
# "tools/testing/selftests/rcutorture/res/2018.02.25-14:27:27".
rundir="${1}"
if test -z "$rundir" -o ! -d "$rundir"
then
echo Usage: $0 directory
fi
editor=${EDITOR-vi}
# Find builds with errors
files=
for i in ${rundir}/*/Make.out
do
if egrep -q "error:|warning:" < $i
then
egrep "error:|warning:" < $i > $i.diags
files="$files $i.diags $i"
fi
done
if test -n "$files"
then
$editor $files
else
echo No build errors.
fi
if grep -q -e "--buildonly" < ${rundir}/log
then
echo Build-only run, no console logs to check.
fi
# Find console logs with errors
files=
for i in ${rundir}/*/console.log
do
if test -r $i.diags
then
files="$files $i.diags $i"
fi
done
if test -n "$files"
then
$editor $files
else
echo No errors in console logs.
fi
......@@ -34,11 +34,15 @@ fi
configfile=`echo $i | sed -e 's/^.*\///'`
ngps=`grep ver: $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* ver: //' -e 's/ .*$//'`
stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
tr -d '\012\015'`"
if test -z "$ngps"
then
echo "$configfile -------"
echo "$configfile ------- " $stopstate
else
title="$configfile ------- $ngps grace periods"
title="$configfile ------- $ngps GPs"
dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
if test -z "$dur"
then
......@@ -46,9 +50,9 @@ else
else
ngpsps=`awk -v ngps=$ngps -v dur=$dur '
BEGIN { print ngps / dur }' < /dev/null`
title="$title ($ngpsps per second)"
title="$title ($ngpsps/s)"
fi
echo $title
echo $title $stopstate
nclosecalls=`grep --binary-files=text 'torture: Reader Batch' $i/console.log | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
if test -z "$nclosecalls"
then
......
......@@ -48,10 +48,6 @@ do
cat $i/Make.oldconfig.err
fi
parse-build.sh $i/Make.out $configfile
if test "$TORTURE_SUITE" != rcuperf
then
parse-torture.sh $i/console.log $configfile
fi
parse-console.sh $i/console.log $configfile
if test -r $i/Warnings
then
......
......@@ -267,5 +267,4 @@ then
echo Unknown PID, cannot kill qemu command
fi
parse-torture.sh $resdir/console.log $title
parse-console.sh $resdir/console.log $title
......@@ -24,57 +24,146 @@
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
T=${TMPDIR-/tmp}/parse-console.sh.$$
file="$1"
title="$2"
trap 'rm -f $T.seq $T.diags' 0
. functions.sh
# Check for presence and readability of console output file
if test -f "$file" -a -r "$file"
then
:
else
echo $title unreadable console output file: $file
exit 1
fi
if grep -Pq '\x00' < $file
then
print_warning Console output contains nul bytes, old qemu still running?
fi
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file | grep -v 'ODEBUG: ' | grep -v 'Warning: unable to open an initial console' > $1.diags
if test -s $1.diags
cat /dev/null > $file.diags
# Check for proper termination, except that rcuperf runs don't indicate this.
if test "$TORTURE_SUITE" != rcuperf
then
print_warning Assertion failure in $file $title
# cat $1.diags
# check for abject failure
if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
then
nerrs=`grep --binary-files=text '!!!' $file |
tail -1 |
awk '
{
for (i=NF-8;i<=NF;i++)
sum+=$i;
}
END { print sum }'`
print_bug $title FAILURE, $nerrs instances
exit
fi
grep --binary-files=text 'torture:.*ver:' $file |
egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
awk '
BEGIN {
ver = 0;
badseq = 0;
}
{
if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
badseqno1 = ver;
badseqno2 = $5;
badseqnr = NR;
badseq = 1;
}
ver = $5
}
END {
if (badseq) {
if (badseqno1 == badseqno2 && badseqno2 == ver)
print "GP HANG at " ver " torture stat " badseqnr;
else
print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
}
}' > $T.seq
if grep -q SUCCESS $file
then
if test -s $T.seq
then
print_warning $title `cat $T.seq`
echo " " $file
exit 2
fi
else
if grep -q "_HOTPLUG:" $file
then
print_warning HOTPLUG FAILURES $title `cat $T.seq`
echo " " $file
exit 3
fi
echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
if test -s $T.seq
then
print_warning $title `cat $T.seq`
fi
exit 2
fi
fi | tee -a $file.diags
egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
grep -v 'ODEBUG: ' |
grep -v 'Warning: unable to open an initial console' > $T.diags
if test -s $T.diags
then
print_warning "Assertion failure in $file $title"
# cat $T.diags
summary=""
n_badness=`grep -c Badness $1`
n_badness=`grep -c Badness $file`
if test "$n_badness" -ne 0
then
summary="$summary Badness: $n_badness"
fi
n_warn=`grep -v 'Warning: unable to open an initial console' $1 | egrep -c 'WARNING:|Warn'`
n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
if test "$n_warn" -ne 0
then
summary="$summary Warnings: $n_warn"
fi
n_bugs=`egrep -c 'BUG|Oops:' $1`
n_bugs=`egrep -c 'BUG|Oops:' $file`
if test "$n_bugs" -ne 0
then
summary="$summary Bugs: $n_bugs"
fi
n_calltrace=`grep -c 'Call Trace:' $1`
n_calltrace=`grep -c 'Call Trace:' $file`
if test "$n_calltrace" -ne 0
then
summary="$summary Call Traces: $n_calltrace"
fi
n_lockdep=`grep -c =========== $1`
n_lockdep=`grep -c =========== $file`
if test "$n_badness" -ne 0
then
summary="$summary lockdep: $n_badness"
fi
n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $1`
n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
if test "$n_stalls" -ne 0
then
summary="$summary Stalls: $n_stalls"
fi
n_starves=`grep -c 'rcu_.*kthread starved for' $1`
n_starves=`grep -c 'rcu_.*kthread starved for' $file`
if test "$n_starves" -ne 0
then
summary="$summary Starves: $n_starves"
fi
print_warning Summary: $summary
else
rm $1.diags
cat $T.diags >> $file.diags
fi
if ! test -s $file.diags
then
rm -f $file.diags
fi
#!/bin/bash
#
# Check the console output from a torture run for goodness.
# The "file" is a pathname on the local system, and "title" is
# a text string for error-message purposes.
#
# The file must contain torture output, but can be interspersed
# with other dmesg text, as in console-log output.
#
# Usage: parse-torture.sh file title
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, you can access it online at
# http://www.gnu.org/licenses/gpl-2.0.html.
#
# Copyright (C) IBM Corporation, 2011
#
# Authors: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
T=${TMPDIR-/tmp}/parse-torture.sh.$$
file="$1"
title="$2"
trap 'rm -f $T.seq' 0
. functions.sh
# check for presence of torture output file.
if test -f "$file" -a -r "$file"
then
:
else
echo $title unreadable torture output file: $file
exit 1
fi
# check for abject failure
if grep -q FAILURE $file || grep -q -e '-torture.*!!!' $file
then
nerrs=`grep --binary-files=text '!!!' $file | tail -1 | awk '{for (i=NF-8;i<=NF;i++) sum+=$i; } END {print sum}'`
print_bug $title FAILURE, $nerrs instances
echo " " $url
exit
fi
grep --binary-files=text 'torture:.*ver:' $file | egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' | sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
awk '
BEGIN {
ver = 0;
badseq = 0;
}
{
if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
badseqno1 = ver;
badseqno2 = $5;
badseqnr = NR;
badseq = 1;
}
ver = $5
}
END {
if (badseq) {
if (badseqno1 == badseqno2 && badseqno2 == ver)
print "GP HANG at " ver " torture stat " badseqnr;
else
print "BAD SEQ " badseqno1 ":" badseqno2 " last:" ver " version " badseqnr;
}
}' > $T.seq
if grep -q SUCCESS $file
then
if test -s $T.seq
then
print_warning $title $title `cat $T.seq`
echo " " $file
exit 2
fi
else
if grep -q "_HOTPLUG:" $file
then
print_warning HOTPLUG FAILURES $title `cat $T.seq`
echo " " $file
exit 3
fi
echo $title no success message, `grep --binary-files=text 'ver:' $file | wc -l` successful version messages
if test -s $T.seq
then
print_warning $title `cat $T.seq`
fi
exit 2
fi
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册