sort.c 7.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2
/*
3
 * A fast, small, non-recursive O(n log n) sort for the Linux kernel
L
Linus Torvalds 已提交
4
 *
5 6 7 8 9 10
 * This performs n*log2(n) + 0.37*n + o(n) comparisons on average,
 * and 1.5*n*log2(n) + O(n) in the (very contrived) worst case.
 *
 * Glibc qsort() manages n*log2(n) - 1.26*n for random inputs (1.63*n
 * better) at the expense of stack usage and much larger code to avoid
 * quicksort's O(n^2) worst case.
L
Linus Torvalds 已提交
11 12
 */

13 14
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

15 16
#include <linux/types.h>
#include <linux/export.h>
A
Adrian Bunk 已提交
17
#include <linux/sort.h>
L
Linus Torvalds 已提交
18

19 20 21 22
/**
 * is_aligned - is this pointer & size okay for word-wide copying?
 * @base: pointer to data
 * @size: size of each element
23
 * @align: required alignment (typically 4 or 8)
24 25 26 27 28 29 30 31 32 33
 *
 * Returns true if elements can be copied using word loads and stores.
 * The size must be a multiple of the alignment, and the base address must
 * be if we do not have CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS.
 *
 * For some reason, gcc doesn't know to optimize "if (a & mask || b & mask)"
 * to "if ((a | b) & mask)", so we do that by hand.
 */
__attribute_const__ __always_inline
static bool is_aligned(const void *base, size_t size, unsigned char align)
34
{
35 36 37 38 39 40 41
	unsigned char lsbits = (unsigned char)size;

	(void)base;
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
	lsbits |= (unsigned char)(uintptr_t)base;
#endif
	return (lsbits & (align - 1)) == 0;
42 43
}

44 45 46 47 48 49 50 51 52 53 54 55 56 57
/**
 * swap_words_32 - swap two elements in 32-bit chunks
 * @a, @b: pointers to the elements
 * @size: element size (must be a multiple of 4)
 *
 * Exchange the two objects in memory.  This exploits base+index addressing,
 * which basically all CPUs have, to minimize loop overhead computations.
 *
 * For some reason, on x86 gcc 7.3.0 adds a redundant test of n at the
 * bottom of the loop, even though the zero flag is stil valid from the
 * subtract (since the intervening mov instructions don't alter the flags).
 * Gcc 8.1.0 doesn't have that problem.
 */
static void swap_words_32(void *a, void *b, int size)
L
Linus Torvalds 已提交
58
{
59 60 61 62 63 64 65
	size_t n = (unsigned int)size;

	do {
		u32 t = *(u32 *)(a + (n -= 4));
		*(u32 *)(a + n) = *(u32 *)(b + n);
		*(u32 *)(b + n) = t;
	} while (n);
L
Linus Torvalds 已提交
66 67
}

68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
/**
 * swap_words_64 - swap two elements in 64-bit chunks
 * @a, @b: pointers to the elements
 * @size: element size (must be a multiple of 8)
 *
 * Exchange the two objects in memory.  This exploits base+index
 * addressing, which basically all CPUs have, to minimize loop overhead
 * computations.
 *
 * We'd like to use 64-bit loads if possible.  If they're not, emulating
 * one requires base+index+4 addressing which x86 has but most other
 * processors do not.  If CONFIG_64BIT, we definitely have 64-bit loads,
 * but it's possible to have 64-bit loads without 64-bit pointers (e.g.
 * x32 ABI).  Are there any cases the kernel needs to worry about?
 */
static void swap_words_64(void *a, void *b, int size)
84
{
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
	size_t n = (unsigned int)size;

	do {
#ifdef CONFIG_64BIT
		u64 t = *(u64 *)(a + (n -= 8));
		*(u64 *)(a + n) = *(u64 *)(b + n);
		*(u64 *)(b + n) = t;
#else
		/* Use two 32-bit transfers to avoid base+index+4 addressing */
		u32 t = *(u32 *)(a + (n -= 4));
		*(u32 *)(a + n) = *(u32 *)(b + n);
		*(u32 *)(b + n) = t;

		t = *(u32 *)(a + (n -= 4));
		*(u32 *)(a + n) = *(u32 *)(b + n);
		*(u32 *)(b + n) = t;
#endif
	} while (n);
103 104
}

105 106 107 108 109 110 111 112
/**
 * swap_bytes - swap two elements a byte at a time
 * @a, @b: pointers to the elements
 * @size: element size
 *
 * This is the fallback if alignment doesn't allow using larger chunks.
 */
static void swap_bytes(void *a, void *b, int size)
L
Linus Torvalds 已提交
113
{
114
	size_t n = (unsigned int)size;
L
Linus Torvalds 已提交
115 116

	do {
117 118 119 120
		char t = ((char *)a)[--n];
		((char *)a)[n] = ((char *)b)[n];
		((char *)b)[n] = t;
	} while (n);
L
Linus Torvalds 已提交
121 122
}

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148
/**
 * parent - given the offset of the child, find the offset of the parent.
 * @i: the offset of the heap element whose parent is sought.  Non-zero.
 * @lsbit: a precomputed 1-bit mask, equal to "size & -size"
 * @size: size of each element
 *
 * In terms of array indexes, the parent of element j = @i/@size is simply
 * (j-1)/2.  But when working in byte offsets, we can't use implicit
 * truncation of integer divides.
 *
 * Fortunately, we only need one bit of the quotient, not the full divide.
 * @size has a least significant bit.  That bit will be clear if @i is
 * an even multiple of @size, and set if it's an odd multiple.
 *
 * Logically, we're doing "if (i & lsbit) i -= size;", but since the
 * branch is unpredictable, it's done with a bit of clever branch-free
 * code instead.
 */
__attribute_const__ __always_inline
static size_t parent(size_t i, unsigned int lsbit, size_t size)
{
	i -= size;
	i -= size & -(i & lsbit);
	return i / 2;
}

149
/**
L
Linus Torvalds 已提交
150 151 152 153
 * sort - sort an array of elements
 * @base: pointer to data to sort
 * @num: number of elements
 * @size: size of each element
154 155
 * @cmp_func: pointer to comparison function
 * @swap_func: pointer to swap function or NULL
L
Linus Torvalds 已提交
156
 *
157 158 159 160
 * This function does a heapsort on the given array.  You may provide
 * a swap_func function if you need to do something more than a memory
 * copy (e.g. fix up pointers or auxiliary data), but the built-in swap
 * isn't usually a bottleneck.
L
Linus Torvalds 已提交
161 162
 *
 * Sorting time is O(n log n) both on average and worst-case. While
163
 * quicksort is slightly faster on average, it suffers from exploitable
L
Linus Torvalds 已提交
164 165 166 167
 * O(n*n) worst-case behavior and extra memory requirements that make
 * it less suitable for kernel use.
 */
void sort(void *base, size_t num, size_t size,
168 169
	  int (*cmp_func)(const void *, const void *),
	  void (*swap_func)(void *, void *, int size))
L
Linus Torvalds 已提交
170 171
{
	/* pre-scale counters for performance */
172 173 174 175 176
	size_t n = num * size, a = (num/2) * size;
	const unsigned int lsbit = size & -size;  /* Used to find parent */

	if (!a)		/* num < 2 || size == 0 */
		return;
L
Linus Torvalds 已提交
177

178
	if (!swap_func) {
179 180 181 182
		if (is_aligned(base, size, 8))
			swap_func = swap_words_64;
		else if (is_aligned(base, size, 4))
			swap_func = swap_words_32;
183
		else
184
			swap_func = swap_bytes;
185
	}
L
Linus Torvalds 已提交
186

187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
	/*
	 * Loop invariants:
	 * 1. elements [a,n) satisfy the heap property (compare greater than
	 *    all of their children),
	 * 2. elements [n,num*size) are sorted, and
	 * 3. a <= b <= c <= d <= n (whenever they are valid).
	 */
	for (;;) {
		size_t b, c, d;

		if (a)			/* Building heap: sift down --a */
			a -= size;
		else if (n -= size)	/* Sorting: Extract root to --n */
			swap_func(base, base + n, size);
		else			/* Sort complete */
			break;

		/*
		 * Sift element at "a" down into heap.  This is the
		 * "bottom-up" variant, which significantly reduces
		 * calls to cmp_func(): we find the sift-down path all
		 * the way to the leaves (one compare per level), then
		 * backtrack to find where to insert the target element.
		 *
		 * Because elements tend to sift down close to the leaves,
		 * this uses fewer compares than doing two per level
		 * on the way down.  (A bit more than half as many on
		 * average, 3/4 worst-case.)
		 */
		for (b = a; c = 2*b + size, (d = c + size) < n;)
			b = cmp_func(base + c, base + d) >= 0 ? c : d;
		if (d == n)	/* Special case last leaf with no sibling */
			b = c;

		/* Now backtrack from "b" to the correct location for "a" */
		while (b != a && cmp_func(base + a, base + b) >= 0)
			b = parent(b, lsbit, size);
		c = b;			/* Where "a" belongs */
		while (b != a) {	/* Shift it into place */
			b = parent(b, lsbit, size);
			swap_func(base + b, base + c, size);
L
Linus Torvalds 已提交
228 229 230 231
		}
	}
}
EXPORT_SYMBOL(sort);