未验证 提交 623763f1 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!850 Fix race condition in __percpu_counter_sum() function within cpu hotplug

Merge Pull Request from: @henryze 
 
The dying CPU has been removed from the online_mask, but the hotplug notifier have not been called to fold the percpu count into the global counter sum.
This race condition is avoided by including the dying CPU in the iteration mask. 
 
Link:https://gitee.com/openeuler/kernel/pulls/850 

Reviewed-by: Wei Li <liwei391@huawei.com> 
Reviewed-by: Zheng Zengkai <zhengzengkai@huawei.com> 
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com> 
......@@ -97,4 +97,42 @@ extern unsigned long find_next_clump8(unsigned long *clump,
#define find_first_clump8(clump, bits, size) \
find_next_clump8((clump), (bits), (size), 0)
unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long nbits, unsigned long start);
#ifndef find_next_or_bit
/**
* find_next_or_bit - find the next set bit in either memory regions
* @addr1: The first address to base the search on
* @addr2: The second address to base the search on
* @size: The bitmap size in bits
* @offset: The bitnumber to start searching at
*
* Returns the bit number for the next set bit
* If no bits are set, returns @size.
*/
static inline
unsigned long find_next_or_bit(const unsigned long *addr1,
const unsigned long *addr2, unsigned long size,
unsigned long offset)
{
if (small_const_nbits(size)) {
unsigned long val;
if (unlikely(offset >= size))
return size;
val = (*addr1 | *addr2) & GENMASK(size - 1, offset);
return val ? __ffs(val) : size;
}
return _find_next_or_bit(addr1, addr2, size, offset);
}
#endif
#define for_each_or_bit(bit, addr1, addr2, size) \
for ((bit) = 0; \
(bit) = find_next_or_bit((addr1), (addr2), (size), (bit)), (bit) < (size);\
(bit)++)
#endif /*_ASM_GENERIC_BITOPS_FIND_H_ */
......@@ -23,4 +23,16 @@
#define BITS_PER_LONG_LONG 64
#endif
/*
* small_const_nbits(n) is true precisely when it is known at compile-time
* that BITMAP_SIZE(n) is 1, i.e. 1 <= n <= BITS_PER_LONG. This allows
* various bit/bitmap APIs to provide a fast inline implementation. Bitmaps
* of size 0 are very rare, and a compile-time-known-size 0 is most likely
* a sign of error. They will be handled correctly by the bit/bitmap APIs,
* but using the out-of-line functions, so that the inline implementations
* can unconditionally dereference the pointer(s).
*/
#define small_const_nbits(nbits) \
(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
#endif /* __ASM_GENERIC_BITS_PER_LONG */
......@@ -234,14 +234,6 @@ extern int bitmap_print_to_pagebuf(bool list, char *buf,
#define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1)))
#define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1)))
/*
* The static inlines below do not handle constant nbits==0 correctly,
* so make such users (should any ever turn up) call the out-of-line
* versions.
*/
#define small_const_nbits(nbits) \
(__builtin_constant_p(nbits) && (nbits) <= BITS_PER_LONG && (nbits) > 0)
static inline void bitmap_zero(unsigned long *dst, unsigned int nbits)
{
unsigned int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long);
......
......@@ -91,10 +91,12 @@ extern struct cpumask __cpu_possible_mask;
extern struct cpumask __cpu_online_mask;
extern struct cpumask __cpu_present_mask;
extern struct cpumask __cpu_active_mask;
extern struct cpumask __cpu_dying_mask;
#define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask)
#define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask)
#define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask)
#define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask)
#define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask)
extern atomic_t __num_online_cpus;
......@@ -118,6 +120,11 @@ static inline unsigned int num_online_cpus(void)
#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask)
#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask)
#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask)
static inline int cpumask_test_cpu(int cpu, const struct cpumask *cpumask);
static inline bool cpu_dying(unsigned int cpu)
{
return cpumask_test_cpu(cpu, cpu_dying_mask);
}
#else
#define num_online_cpus() 1U
#define num_possible_cpus() 1U
......@@ -127,6 +134,10 @@ static inline unsigned int num_online_cpus(void)
#define cpu_possible(cpu) ((cpu) == 0)
#define cpu_present(cpu) ((cpu) == 0)
#define cpu_active(cpu) ((cpu) == 0)
static inline bool cpu_dying(unsigned int cpu)
{
return false;
}
#endif
extern cpumask_t cpus_booted_once_mask;
......@@ -314,6 +325,58 @@ extern int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool
(cpu) < nr_cpu_ids;)
#endif /* SMP */
/*
* We have several different "preferred sizes" for the cpumask
* operations, depending on operation.
*
* For example, the bitmap scanning and operating operations have
* optimized routines that work for the single-word case, but only when
* the size is constant. So if NR_CPUS fits in one single word, we are
* better off using that small constant, in order to trigger the
* optimized bit finding. That is 'small_cpumask_size'.
*
* The clearing and copying operations will similarly perform better
* with a constant size, but we limit that size arbitrarily to four
* words. We call this 'large_cpumask_size'.
*
* Finally, some operations just want the exact limit, either because
* they set bits or just don't have any faster fixed-sized versions. We
* call this just 'nr_cpumask_bits'.
*
* Note that these optional constants are always guaranteed to be at
* least as big as 'nr_cpu_ids' itself is, and all our cpumask
* allocations are at least that size (see cpumask_size()). The
* optimization comes from being able to potentially use a compile-time
* constant instead of a run-time generated exact number of CPUs.
*/
#if NR_CPUS <= BITS_PER_LONG
#define small_cpumask_bits ((unsigned int)NR_CPUS)
#define large_cpumask_bits ((unsigned int)NR_CPUS)
#elif NR_CPUS <= 4*BITS_PER_LONG
#define small_cpumask_bits nr_cpu_ids
#define large_cpumask_bits ((unsigned int)NR_CPUS)
#else
#define small_cpumask_bits nr_cpu_ids
#define large_cpumask_bits nr_cpu_ids
#endif
/**
* for_each_cpu_or - iterate over every cpu present in either mask
* @cpu: the (optionally unsigned) integer iterator
* @mask1: the first cpumask pointer
* @mask2: the second cpumask pointer
*
* This saves a temporary CPU mask in many places. It is equivalent to:
* struct cpumask tmp;
* cpumask_or(&tmp, &mask1, &mask2);
* for_each_cpu(cpu, &tmp)
* ...
*
* After the loop, cpu is >= nr_cpu_ids.
*/
#define for_each_cpu_or(cpu, mask1, mask2) \
for_each_or_bit(cpu, cpumask_bits(mask1), cpumask_bits(mask2), small_cpumask_bits)
#define CPU_BITS_NONE \
{ \
[0 ... BITS_TO_LONGS(NR_CPUS)-1] = 0UL \
......@@ -851,6 +914,14 @@ set_cpu_active(unsigned int cpu, bool active)
cpumask_clear_cpu(cpu, &__cpu_active_mask);
}
static inline void
set_cpu_dying(unsigned int cpu, bool dying)
{
if (dying)
cpumask_set_cpu(cpu, &__cpu_dying_mask);
else
cpumask_clear_cpu(cpu, &__cpu_dying_mask);
}
/**
* to_cpumask - convert an NR_CPUS bitmap to a struct cpumask *
......
......@@ -157,6 +157,9 @@ static int cpuhp_invoke_callback(unsigned int cpu, enum cpuhp_state state,
int (*cb)(unsigned int cpu);
int ret, cnt;
if (cpu_dying(cpu) != !bringup)
set_cpu_dying(cpu, !bringup);
if (st->fail == state) {
st->fail = CPUHP_INVALID;
......@@ -2502,6 +2505,9 @@ EXPORT_SYMBOL(__cpu_present_mask);
struct cpumask __cpu_active_mask __read_mostly;
EXPORT_SYMBOL(__cpu_active_mask);
struct cpumask __cpu_dying_mask __read_mostly;
EXPORT_SYMBOL(__cpu_dying_mask);
atomic_t __num_online_cpus __read_mostly;
EXPORT_SYMBOL(__num_online_cpus);
......
......@@ -81,6 +81,43 @@ unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
EXPORT_SYMBOL(find_next_bit);
#endif
/*
* Common helper for find_next_bit() function family
* @FETCH: The expression that fetches and pre-processes each word of bitmap(s)
* @MUNGE: The expression that post-processes a word containing found bit (may be empty)
* @size: The bitmap size in bits
* @start: The bitnumber to start searching at
*/
#define FIND_NEXT_BIT(FETCH, MUNGE, size, start) \
({ \
unsigned long mask, idx, tmp, sz = (size), __start = (start); \
\
if (unlikely(__start >= sz)) \
goto out; \
\
mask = MUNGE(BITMAP_FIRST_WORD_MASK(__start)); \
idx = __start / BITS_PER_LONG; \
\
for (tmp = (FETCH) & mask; !tmp; tmp = (FETCH)) { \
if ((idx + 1) * BITS_PER_LONG >= sz) \
goto out; \
idx++; \
} \
\
sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(tmp)), sz); \
out: \
sz; \
})
#ifndef find_next_or_bit
unsigned long _find_next_or_bit(const unsigned long *addr1, const unsigned long *addr2,
unsigned long nbits, unsigned long start)
{
return FIND_NEXT_BIT(addr1[idx] | addr2[idx], /* nop */, nbits, start);
}
EXPORT_SYMBOL(_find_next_or_bit);
#endif
#ifndef find_next_zero_bit
unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
unsigned long offset)
......
......@@ -119,7 +119,15 @@ EXPORT_SYMBOL(percpu_counter_sync);
/*
* Add up all the per-cpu counts, return the result. This is a more accurate
* but much slower version of percpu_counter_read_positive()
* but much slower version of percpu_counter_read_positive().
*
* We use the cpu mask of (cpu_online_mask | cpu_dying_mask) to capture sums
* from CPUs that are in the process of being taken offline. Dying cpus have
* been removed from the online mask, but may not have had the hotplug dead
* notifier called to fold the percpu count back into the global counter sum.
* By including dying CPUs in the iteration mask, we avoid this race condition
* so __percpu_counter_sum() just does the right thing when CPUs are being taken
* offline.
*/
s64 __percpu_counter_sum(struct percpu_counter *fbc)
{
......@@ -129,7 +137,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
raw_spin_lock_irqsave(&fbc->lock, flags);
ret = fbc->count;
for_each_online_cpu(cpu) {
for_each_cpu_or(cpu, cpu_online_mask, cpu_dying_mask) {
s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
ret += *pcount;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册