diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 5f68d0a391cee8506f8e0d94cda72d8bd357b10f..1c0cf3102fdc327a4474d51f6da13d9ee80b1b20 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -233,9 +233,83 @@ static inline void raw_write_seqcount_end(seqcount_t *s) s->sequence++; } -/* +/** * raw_write_seqcount_latch - redirect readers to even/odd copy * @s: pointer to seqcount_t + * + * The latch technique is a multiversion concurrency control method that allows + * queries during non-atomic modifications. If you can guarantee queries never + * interrupt the modification -- e.g. the concurrency is strictly between CPUs + * -- you most likely do not need this. + * + * Where the traditional RCU/lockless data structures rely on atomic + * modifications to ensure queries observe either the old or the new state the + * latch allows the same for non-atomic updates. The trade-off is doubling the + * cost of storage; we have to maintain two copies of the entire data + * structure. + * + * Very simply put: we first modify one copy and then the other. This ensures + * there is always one copy in a stable state, ready to give us an answer. + * + * The basic form is a data structure like: + * + * struct latch_struct { + * seqcount_t seq; + * struct data_struct data[2]; + * }; + * + * Where a modification, which is assumed to be externally serialized, does the + * following: + * + * void latch_modify(struct latch_struct *latch, ...) + * { + * smp_wmb(); <- Ensure that the last data[1] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[0], ...); + * + * smp_wmb(); <- Ensure that the data[0] update is visible + * latch->seq++; + * smp_wmb(); <- Ensure that the seqcount update is visible + * + * modify(latch->data[1], ...); + * } + * + * The query will have a form like: + * + * struct entry *latch_query(struct latch_struct *latch, ...) + * { + * struct entry *entry; + * unsigned seq, idx; + * + * do { + * seq = latch->seq; + * smp_rmb(); + * + * idx = seq & 0x01; + * entry = data_query(latch->data[idx], ...); + * + * smp_rmb(); + * } while (seq != latch->seq); + * + * return entry; + * } + * + * So during the modification, queries are first redirected to data[1]. Then we + * modify data[0]. When that is complete, we redirect queries back to data[0] + * and we can modify data[1]. + * + * NOTE: The non-requirement for atomic modifications does _NOT_ include + * the publishing of new entries in the case where data is a dynamic + * data structure. + * + * An iteration might start in data[0] and get suspended long enough + * to miss an entire modification sequence, once it resumes it might + * observe the new entry. + * + * NOTE: When data is a dynamic data structure; one should use regular RCU + * patterns to manage the lifetimes of the objects within. */ static inline void raw_write_seqcount_latch(seqcount_t *s) { diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 946acb72179facb1c173e54592b3c1c3637f8abd..cbfedddbf0cb07be2c474e3881777ca033580415 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -330,32 +330,7 @@ static inline s64 timekeeping_get_ns(struct tk_read_base *tkr) * We want to use this from any context including NMI and tracing / * instrumenting the timekeeping code itself. * - * So we handle this differently than the other timekeeping accessor - * functions which retry when the sequence count has changed. The - * update side does: - * - * smp_wmb(); <- Ensure that the last base[1] update is visible - * tkf->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[0], tkr); - * smp_wmb(); <- Ensure that the base[0] update is visible - * tkf->seq++; - * smp_wmb(); <- Ensure that the seqcount update is visible - * update(tkf->base[1], tkr); - * - * The reader side does: - * - * do { - * seq = tkf->seq; - * smp_rmb(); - * idx = seq & 0x01; - * now = now(tkf->base[idx]); - * smp_rmb(); - * } while (seq != tkf->seq) - * - * As long as we update base[0] readers are forced off to - * base[1]. Once base[0] is updated readers are redirected to base[0] - * and the base[1] update takes place. + * Employ the latch technique; see @raw_write_seqcount_latch. * * So if a NMI hits the update of base[0] then it will use base[1] * which is still consistent. In the worst case this can result is a