rhashtable.h 12.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
/*
 * Resizable, Scalable, Concurrent Hash Table
 *
 * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch>
 * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net>
 *
 * Based on the following paper by Josh Triplett, Paul E. McKenney
 * and Jonathan Walpole:
 * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf
 *
 * Code partially derived from nft_hash
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */

#ifndef _LINUX_RHASHTABLE_H
#define _LINUX_RHASHTABLE_H

21
#include <linux/compiler.h>
22
#include <linux/list_nulls.h>
23
#include <linux/workqueue.h>
Y
Ying Xue 已提交
24
#include <linux/mutex.h>
25

26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
/*
 * The end of the chain is marked with a special nulls marks which has
 * the following format:
 *
 * +-------+-----------------------------------------------------+-+
 * | Base  |                      Hash                           |1|
 * +-------+-----------------------------------------------------+-+
 *
 * Base (4 bits) : Reserved to distinguish between multiple tables.
 *                 Specified via &struct rhashtable_params.nulls_base.
 * Hash (27 bits): Full hash (unmasked) of first element added to bucket
 * 1 (1 bit)     : Nulls marker (always set)
 *
 * The remaining bits of the next pointer remain unused for now.
 */
#define RHT_BASE_BITS		4
#define RHT_HASH_BITS		27
#define RHT_BASE_SHIFT		RHT_HASH_BITS

45
struct rhash_head {
46
	struct rhash_head __rcu		*next;
47 48
};

49 50 51 52 53 54 55
/**
 * struct bucket_table - Table of hash buckets
 * @size: Number of hash buckets
 * @locks_mask: Mask to apply before accessing locks[]
 * @locks: Array of spinlocks protecting individual buckets
 * @buckets: size * hash buckets
 */
56 57
struct bucket_table {
	size_t				size;
58 59
	unsigned int			locks_mask;
	spinlock_t			*locks;
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
	struct rhash_head __rcu		*buckets[];
};

typedef u32 (*rht_hashfn_t)(const void *data, u32 len, u32 seed);
typedef u32 (*rht_obj_hashfn_t)(const void *data, u32 seed);

struct rhashtable;

/**
 * struct rhashtable_params - Hash table construction parameters
 * @nelem_hint: Hint on number of elements, should be 75% of desired size
 * @key_len: Length of key
 * @key_offset: Offset of key in struct to be hashed
 * @head_offset: Offset of rhash_head in struct to be hashed
 * @hash_rnd: Seed to use while hashing
 * @max_shift: Maximum number of shifts while expanding
76
 * @min_shift: Minimum number of shifts while shrinking
77
 * @nulls_base: Base value to generate nulls marker
78
 * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
79 80 81 82
 * @hashfn: Function to hash key
 * @obj_hashfn: Function to hash object
 * @grow_decision: If defined, may return true if table should expand
 * @shrink_decision: If defined, may return true if table should shrink
83 84 85 86
 *
 * Note: when implementing the grow and shrink decision function, min/max
 * shift must be enforced, otherwise, resizing watermarks they set may be
 * useless.
87 88 89 90 91 92 93 94
 */
struct rhashtable_params {
	size_t			nelem_hint;
	size_t			key_len;
	size_t			key_offset;
	size_t			head_offset;
	u32			hash_rnd;
	size_t			max_shift;
95
	size_t			min_shift;
96
	u32			nulls_base;
97
	size_t			locks_mul;
98 99 100 101 102 103 104 105 106 107 108
	rht_hashfn_t		hashfn;
	rht_obj_hashfn_t	obj_hashfn;
	bool			(*grow_decision)(const struct rhashtable *ht,
						 size_t new_size);
	bool			(*shrink_decision)(const struct rhashtable *ht,
						   size_t new_size);
};

/**
 * struct rhashtable - Hash table handle
 * @tbl: Bucket table
109
 * @future_tbl: Table under construction during expansion/shrinking
110 111 112
 * @nelems: Number of elements in table
 * @shift: Current size (1 << shift)
 * @p: Configuration parameters
113 114
 * @run_work: Deferred worker to expand/shrink asynchronously
 * @mutex: Mutex to protect current/future table swapping
115
 * @walkers: List of active walkers
116
 * @being_destroyed: True if table is set up for destruction
117 118 119
 */
struct rhashtable {
	struct bucket_table __rcu	*tbl;
120 121
	struct bucket_table __rcu       *future_tbl;
	atomic_t			nelems;
122
	atomic_t			shift;
123
	struct rhashtable_params	p;
124
	struct work_struct		run_work;
125
	struct mutex                    mutex;
126
	struct list_head		walkers;
127
	bool                            being_destroyed;
128 129
};

130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155
/**
 * struct rhashtable_walker - Hash table walker
 * @list: List entry on list of walkers
 * @resize: Resize event occured
 */
struct rhashtable_walker {
	struct list_head list;
	bool resize;
};

/**
 * struct rhashtable_iter - Hash table iterator, fits into netlink cb
 * @ht: Table to iterate through
 * @p: Current pointer
 * @walker: Associated rhashtable walker
 * @slot: Current slot
 * @skip: Number of entries to skip in slot
 */
struct rhashtable_iter {
	struct rhashtable *ht;
	struct rhash_head *p;
	struct rhashtable_walker *walker;
	unsigned int slot;
	unsigned int skip;
};

156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
{
	return NULLS_MARKER(ht->p.nulls_base + hash);
}

#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
	((ptr) = (typeof(ptr)) rht_marker(ht, hash))

static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
{
	return ((unsigned long) ptr & 1);
}

static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
{
	return ((unsigned long) ptr) >> 1;
}

174
#ifdef CONFIG_PROVE_LOCKING
175
int lockdep_rht_mutex_is_held(struct rhashtable *ht);
176
int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
177
#else
178
static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
179 180 181
{
	return 1;
}
182 183 184 185 186 187

static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
					     u32 hash)
{
	return 1;
}
188 189 190 191
#endif /* CONFIG_PROVE_LOCKING */

int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);

192 193
void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
194 195 196 197

bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);

198 199
int rhashtable_expand(struct rhashtable *ht);
int rhashtable_shrink(struct rhashtable *ht);
200

201 202
void *rhashtable_lookup(struct rhashtable *ht, const void *key);
void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
203
				bool (*compare)(void *, void *), void *arg);
204

205
bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj);
206 207 208 209
bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
				      struct rhash_head *obj,
				      bool (*compare)(void *, void *),
				      void *arg);
210

211 212 213 214 215 216
int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
void rhashtable_walk_exit(struct rhashtable_iter *iter);
int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
void *rhashtable_walk_next(struct rhashtable_iter *iter);
void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);

217
void rhashtable_destroy(struct rhashtable *ht);
218 219 220 221 222 223 224

#define rht_dereference(p, ht) \
	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))

#define rht_dereference_rcu(p, ht) \
	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))

225 226
#define rht_dereference_bucket(p, tbl, hash) \
	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
227

228 229 230 231 232
#define rht_dereference_bucket_rcu(p, tbl, hash) \
	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))

#define rht_entry(tpos, pos, member) \
	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
233 234

/**
235 236 237 238 239
 * rht_for_each_continue - continue iterating over hash chain
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @head:	the previous &struct rhash_head to continue from
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
240
 */
241 242
#define rht_for_each_continue(pos, head, tbl, hash) \
	for (pos = rht_dereference_bucket(head, tbl, hash); \
243
	     !rht_is_a_nulls(pos); \
244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
	     pos = rht_dereference_bucket((pos)->next, tbl, hash))

/**
 * rht_for_each - iterate over hash chain
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
 */
#define rht_for_each(pos, tbl, hash) \
	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)

/**
 * rht_for_each_entry_continue - continue iterating over hash chain
 * @tpos:	the type * to use as a loop cursor.
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @head:	the previous &struct rhash_head to continue from
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
 * @member:	name of the &struct rhash_head within the hashable struct.
 */
#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member)	\
	for (pos = rht_dereference_bucket(head, tbl, hash);		\
266
	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	\
267
	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
268 269 270

/**
 * rht_for_each_entry - iterate over hash chain of given type
271 272 273 274 275
 * @tpos:	the type * to use as a loop cursor.
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
 * @member:	name of the &struct rhash_head within the hashable struct.
276
 */
277 278 279
#define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
				    tbl, hash, member)
280 281 282

/**
 * rht_for_each_entry_safe - safely iterate over hash chain of given type
283 284 285 286 287 288
 * @tpos:	the type * to use as a loop cursor.
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @next:	the &struct rhash_head to use as next in loop cursor.
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
 * @member:	name of the &struct rhash_head within the hashable struct.
289 290 291 292
 *
 * This hash chain list-traversal primitive allows for the looped code to
 * remove the loop cursor from the list.
 */
293 294
#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
295 296 297
	     next = !rht_is_a_nulls(pos) ?				    \
		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
298 299 300
	     pos = next,						    \
	     next = !rht_is_a_nulls(pos) ?				    \
		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
301 302 303 304 305 306 307 308 309 310 311 312 313 314 315

/**
 * rht_for_each_rcu_continue - continue iterating over rcu hash chain
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @head:	the previous &struct rhash_head to continue from
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
 *
 * This hash chain list-traversal primitive may safely run concurrently with
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 * traversal is guarded by rcu_read_lock().
 */
#define rht_for_each_rcu_continue(pos, head, tbl, hash)			\
	for (({barrier(); }),						\
	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		\
316
	     !rht_is_a_nulls(pos);					\
317
	     pos = rcu_dereference_raw(pos->next))
318 319 320

/**
 * rht_for_each_rcu - iterate over rcu hash chain
321 322 323
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
324 325
 *
 * This hash chain list-traversal primitive may safely run concurrently with
326
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
327 328
 * traversal is guarded by rcu_read_lock().
 */
329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
#define rht_for_each_rcu(pos, tbl, hash)				\
	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)

/**
 * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
 * @tpos:	the type * to use as a loop cursor.
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @head:	the previous &struct rhash_head to continue from
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
 * @member:	name of the &struct rhash_head within the hashable struct.
 *
 * This hash chain list-traversal primitive may safely run concurrently with
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
 * traversal is guarded by rcu_read_lock().
 */
#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
	for (({barrier(); }),						    \
	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		    \
348
	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
349
	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
350 351 352

/**
 * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
353 354 355 356 357
 * @tpos:	the type * to use as a loop cursor.
 * @pos:	the &struct rhash_head to use as a loop cursor.
 * @tbl:	the &struct bucket_table
 * @hash:	the hash value / bucket index
 * @member:	name of the &struct rhash_head within the hashable struct.
358 359
 *
 * This hash chain list-traversal primitive may safely run concurrently with
360
 * the _rcu mutation primitives such as rhashtable_insert() as long as the
361 362
 * traversal is guarded by rcu_read_lock().
 */
363 364 365
#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
					tbl, hash, member)
366 367

#endif /* _LINUX_RHASHTABLE_H */