closure.h 16.2 KB
Newer Older
K
Kent Overstreet 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
#ifndef _LINUX_CLOSURE_H
#define _LINUX_CLOSURE_H

#include <linux/llist.h>
#include <linux/sched.h>
#include <linux/workqueue.h>

/*
 * Closure is perhaps the most overused and abused term in computer science, but
 * since I've been unable to come up with anything better you're stuck with it
 * again.
 *
 * What are closures?
 *
 * They embed a refcount. The basic idea is they count "things that are in
 * progress" - in flight bios, some other thread that's doing something else -
 * anything you might want to wait on.
 *
 * The refcount may be manipulated with closure_get() and closure_put().
 * closure_put() is where many of the interesting things happen, when it causes
 * the refcount to go to 0.
 *
 * Closures can be used to wait on things both synchronously and asynchronously,
 * and synchronous and asynchronous use can be mixed without restriction. To
 * wait synchronously, use closure_sync() - you will sleep until your closure's
 * refcount hits 1.
 *
 * To wait asynchronously, use
 *   continue_at(cl, next_function, workqueue);
 *
 * passing it, as you might expect, the function to run when nothing is pending
 * and the workqueue to run that function out of.
 *
 * continue_at() also, critically, is a macro that returns the calling function.
 * There's good reason for this.
 *
 * To use safely closures asynchronously, they must always have a refcount while
 * they are running owned by the thread that is running them. Otherwise, suppose
 * you submit some bios and wish to have a function run when they all complete:
 *
 * foo_endio(struct bio *bio, int error)
 * {
 *	closure_put(cl);
 * }
 *
 * closure_init(cl);
 *
 * do_stuff();
 * closure_get(cl);
 * bio1->bi_endio = foo_endio;
 * bio_submit(bio1);
 *
 * do_more_stuff();
 * closure_get(cl);
 * bio2->bi_endio = foo_endio;
 * bio_submit(bio2);
 *
 * continue_at(cl, complete_some_read, system_wq);
 *
 * If closure's refcount started at 0, complete_some_read() could run before the
 * second bio was submitted - which is almost always not what you want! More
 * importantly, it wouldn't be possible to say whether the original thread or
 * complete_some_read()'s thread owned the closure - and whatever state it was
 * associated with!
 *
 * So, closure_init() initializes a closure's refcount to 1 - and when a
 * closure_fn is run, the refcount will be reset to 1 first.
 *
 * Then, the rule is - if you got the refcount with closure_get(), release it
 * with closure_put() (i.e, in a bio->bi_endio function). If you have a refcount
 * on a closure because you called closure_init() or you were run out of a
 * closure - _always_ use continue_at(). Doing so consistently will help
 * eliminate an entire class of particularly pernicious races.
 *
 * For a closure to wait on an arbitrary event, we need to introduce waitlists:
 *
 * struct closure_waitlist list;
 * closure_wait_event(list, cl, condition);
 * closure_wake_up(wait_list);
 *
 * These work analagously to wait_event() and wake_up() - except that instead of
 * operating on the current thread (for wait_event()) and lists of threads, they
 * operate on an explicit closure and lists of closures.
 *
 * Because it's a closure we can now wait either synchronously or
 * asynchronously. closure_wait_event() returns the current value of the
 * condition, and if it returned false continue_at() or closure_sync() can be
 * used to wait for it to become true.
 *
 * It's useful for waiting on things when you can't sleep in the context in
 * which you must check the condition (perhaps a spinlock held, or you might be
 * beneath generic_make_request() - in which case you can't sleep on IO).
 *
 * closure_wait_event() will wait either synchronously or asynchronously,
 * depending on whether the closure is in blocking mode or not. You can pick a
 * mode explicitly with closure_wait_event_sync() and
 * closure_wait_event_async(), which do just what you might expect.
 *
 * Lastly, you might have a wait list dedicated to a specific event, and have no
 * need for specifying the condition - you just want to wait until someone runs
 * closure_wake_up() on the appropriate wait list. In that case, just use
 * closure_wait(). It will return either true or false, depending on whether the
 * closure was already on a wait list or not - a closure can only be on one wait
 * list at a time.
 *
 * Parents:
 *
 * closure_init() takes two arguments - it takes the closure to initialize, and
 * a (possibly null) parent.
 *
 * If parent is non null, the new closure will have a refcount for its lifetime;
 * a closure is considered to be "finished" when its refcount hits 0 and the
 * function to run is null. Hence
 *
 * continue_at(cl, NULL, NULL);
 *
 * returns up the (spaghetti) stack of closures, precisely like normal return
 * returns up the C stack. continue_at() with non null fn is better thought of
 * as doing a tail call.
 *
 * All this implies that a closure should typically be embedded in a particular
 * struct (which its refcount will normally control the lifetime of), and that
 * struct can very much be thought of as a stack frame.
 *
 * Locking:
 *
 * Closures are based on work items but they can be thought of as more like
 * threads - in that like threads and unlike work items they have a well
 * defined lifetime; they are created (with closure_init()) and eventually
 * complete after a continue_at(cl, NULL, NULL).
 *
 * Suppose you've got some larger structure with a closure embedded in it that's
 * used for periodically doing garbage collection. You only want one garbage
 * collection happening at a time, so the natural thing to do is protect it with
 * a lock. However, it's difficult to use a lock protecting a closure correctly
 * because the unlock should come after the last continue_to() (additionally, if
 * you're using the closure asynchronously a mutex won't work since a mutex has
 * to be unlocked by the same process that locked it).
 *
 * So to make it less error prone and more efficient, we also have the ability
 * to use closures as locks:
 *
 * closure_init_unlocked();
 * closure_trylock();
 *
 * That's all we need for trylock() - the last closure_put() implicitly unlocks
 * it for you.  But for closure_lock(), we also need a wait list:
 *
 * struct closure_with_waitlist frobnicator_cl;
 *
 * closure_init_unlocked(&frobnicator_cl);
 * closure_lock(&frobnicator_cl);
 *
 * A closure_with_waitlist embeds a closure and a wait list - much like struct
 * delayed_work embeds a work item and a timer_list. The important thing is, use
 * it exactly like you would a regular closure and closure_put() will magically
 * handle everything for you.
 */

struct closure;
typedef void (closure_fn) (struct closure *);

struct closure_waitlist {
	struct llist_head	list;
};

enum closure_type {
	TYPE_closure				= 0,
	TYPE_closure_with_waitlist		= 1,
K
Kent Overstreet 已提交
170
	MAX_CLOSURE_TYPE			= 1,
K
Kent Overstreet 已提交
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
};

enum closure_state {
	/*
	 * CLOSURE_WAITING: Set iff the closure is on a waitlist. Must be set by
	 * the thread that owns the closure, and cleared by the thread that's
	 * waking up the closure.
	 *
	 * CLOSURE_SLEEPING: Must be set before a thread uses a closure to sleep
	 * - indicates that cl->task is valid and closure_put() may wake it up.
	 * Only set or cleared by the thread that owns the closure.
	 *
	 * The rest are for debugging and don't affect behaviour:
	 *
	 * CLOSURE_RUNNING: Set when a closure is running (i.e. by
	 * closure_init() and when closure_put() runs then next function), and
	 * must be cleared before remaining hits 0. Primarily to help guard
	 * against incorrect usage and accidentally transferring references.
	 * continue_at() and closure_return() clear it for you, if you're doing
	 * something unusual you can use closure_set_dead() which also helps
	 * annotate where references are being transferred.
	 *
	 * CLOSURE_STACK: Sanity check - remaining should never hit 0 on a
	 * closure with this flag set
	 */

K
Kent Overstreet 已提交
197 198 199 200
	CLOSURE_BITS_START	= (1 << 23),
	CLOSURE_DESTRUCTOR	= (1 << 23),
	CLOSURE_WAITING		= (1 << 25),
	CLOSURE_SLEEPING	= (1 << 27),
K
Kent Overstreet 已提交
201 202 203 204 205
	CLOSURE_RUNNING		= (1 << 29),
	CLOSURE_STACK		= (1 << 31),
};

#define CLOSURE_GUARD_MASK					\
K
Kent Overstreet 已提交
206 207
	((CLOSURE_DESTRUCTOR|CLOSURE_WAITING|CLOSURE_SLEEPING|	\
	  CLOSURE_RUNNING|CLOSURE_STACK) << 1)
K
Kent Overstreet 已提交
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269

#define CLOSURE_REMAINING_MASK		(CLOSURE_BITS_START - 1)
#define CLOSURE_REMAINING_INITIALIZER	(1|CLOSURE_RUNNING)

struct closure {
	union {
		struct {
			struct workqueue_struct *wq;
			struct task_struct	*task;
			struct llist_node	list;
			closure_fn		*fn;
		};
		struct work_struct	work;
	};

	struct closure		*parent;

	atomic_t		remaining;

	enum closure_type	type;

#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
#define CLOSURE_MAGIC_DEAD	0xc054dead
#define CLOSURE_MAGIC_ALIVE	0xc054a11e

	unsigned		magic;
	struct list_head	all;
	unsigned long		ip;
	unsigned long		waiting_on;
#endif
};

struct closure_with_waitlist {
	struct closure		cl;
	struct closure_waitlist	wait;
};

extern unsigned invalid_closure_type(void);

#define __CLOSURE_TYPE(cl, _t)						\
	  __builtin_types_compatible_p(typeof(cl), struct _t)		\
		? TYPE_ ## _t :						\

#define __closure_type(cl)						\
(									\
	__CLOSURE_TYPE(cl, closure)					\
	__CLOSURE_TYPE(cl, closure_with_waitlist)			\
	invalid_closure_type()						\
)

void closure_sub(struct closure *cl, int v);
void closure_put(struct closure *cl);
void __closure_wake_up(struct closure_waitlist *list);
bool closure_wait(struct closure_waitlist *list, struct closure *cl);
void closure_sync(struct closure *cl);

bool closure_trylock(struct closure *cl, struct closure *parent);
void __closure_lock(struct closure *cl, struct closure *parent,
		    struct closure_waitlist *wait_list);

#ifdef CONFIG_BCACHE_CLOSURES_DEBUG

270
void closure_debug_init(void);
K
Kent Overstreet 已提交
271 272 273 274 275
void closure_debug_create(struct closure *cl);
void closure_debug_destroy(struct closure *cl);

#else

276
static inline void closure_debug_init(void) {}
K
Kent Overstreet 已提交
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
static inline void closure_debug_create(struct closure *cl) {}
static inline void closure_debug_destroy(struct closure *cl) {}

#endif

static inline void closure_set_ip(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
	cl->ip = _THIS_IP_;
#endif
}

static inline void closure_set_ret_ip(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
	cl->ip = _RET_IP_;
#endif
}

static inline void closure_get(struct closure *cl)
{
#ifdef CONFIG_BCACHE_CLOSURES_DEBUG
	BUG_ON((atomic_inc_return(&cl->remaining) &
		CLOSURE_REMAINING_MASK) <= 1);
#else
	atomic_inc(&cl->remaining);
#endif
}

static inline void closure_set_stopped(struct closure *cl)
{
	atomic_sub(CLOSURE_RUNNING, &cl->remaining);
}

static inline bool closure_is_unlocked(struct closure *cl)
{
	return atomic_read(&cl->remaining) == -1;
}

static inline void do_closure_init(struct closure *cl, struct closure *parent,
				   bool running)
{
	cl->parent = parent;
	if (parent)
		closure_get(parent);

	if (running) {
		closure_debug_create(cl);
		atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER);
	} else
		atomic_set(&cl->remaining, -1);

	closure_set_ip(cl);
}

/*
 * Hack to get at the embedded closure if there is one, by doing an unsafe cast:
 * the result of __closure_type() is thrown away, it's used merely for type
 * checking.
 */
#define __to_internal_closure(cl)				\
({								\
	BUILD_BUG_ON(__closure_type(*cl) > MAX_CLOSURE_TYPE);	\
	(struct closure *) cl;					\
})

#define closure_init_type(cl, parent, running)			\
do {								\
	struct closure *_cl = __to_internal_closure(cl);	\
	_cl->type = __closure_type(*(cl));			\
	do_closure_init(_cl, parent, running);			\
} while (0)

/**
 * closure_init() - Initialize a closure, setting the refcount to 1
 * @cl:		closure to initialize
 * @parent:	parent of the new closure. cl will take a refcount on it for its
 *		lifetime; may be NULL.
 */
#define closure_init(cl, parent)				\
K
Kent Overstreet 已提交
357
	closure_init_type(cl, parent, true)
K
Kent Overstreet 已提交
358 359 360 361

static inline void closure_init_stack(struct closure *cl)
{
	memset(cl, 0, sizeof(struct closure));
K
Kent Overstreet 已提交
362
	atomic_set(&cl->remaining, CLOSURE_REMAINING_INITIALIZER|CLOSURE_STACK);
K
Kent Overstreet 已提交
363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
}

/**
 * closure_init_unlocked() - Initialize a closure but leave it unlocked.
 * @cl:		closure to initialize
 *
 * For when the closure will be used as a lock. The closure may not be used
 * until after a closure_lock() or closure_trylock().
 */
#define closure_init_unlocked(cl)				\
do {								\
	memset((cl), 0, sizeof(*(cl)));				\
	closure_init_type(cl, NULL, false);			\
} while (0)

/**
 * closure_lock() - lock and initialize a closure.
 * @cl:		the closure to lock
 * @parent:	the new parent for this closure
 *
 * The closure must be of one of the types that has a waitlist (otherwise we
 * wouldn't be able to sleep on contention).
 *
 * @parent has exactly the same meaning as in closure_init(); if non null, the
 * closure will take a reference on @parent which will be released when it is
 * unlocked.
 */
#define closure_lock(cl, parent)				\
	__closure_lock(__to_internal_closure(cl), parent, &(cl)->wait)

static inline void __closure_end_sleep(struct closure *cl)
{
	__set_current_state(TASK_RUNNING);

	if (atomic_read(&cl->remaining) & CLOSURE_SLEEPING)
		atomic_sub(CLOSURE_SLEEPING, &cl->remaining);
}

static inline void __closure_start_sleep(struct closure *cl)
{
	closure_set_ip(cl);
	cl->task = current;
	set_current_state(TASK_UNINTERRUPTIBLE);

	if (!(atomic_read(&cl->remaining) & CLOSURE_SLEEPING))
		atomic_add(CLOSURE_SLEEPING, &cl->remaining);
}

/**
 * closure_wake_up() - wake up all closures on a wait list.
 */
static inline void closure_wake_up(struct closure_waitlist *list)
{
	smp_mb();
	__closure_wake_up(list);
}

/*
 * Wait on an event, synchronously or asynchronously - analogous to wait_event()
 * but for closures.
 *
 * The loop is oddly structured so as to avoid a race; we must check the
 * condition again after we've added ourself to the waitlist. We know if we were
 * already on the waitlist because closure_wait() returns false; thus, we only
 * schedule or break if closure_wait() returns false. If it returns true, we
 * just loop again - rechecking the condition.
 *
 * The __closure_wake_up() is necessary because we may race with the event
 * becoming true; i.e. we see event false -> wait -> recheck condition, but the
 * thread that made the event true may have called closure_wake_up() before we
 * added ourself to the wait list.
 *
 * We have to call closure_sync() at the end instead of just
 * __closure_end_sleep() because a different thread might've called
 * closure_wake_up() before us and gotten preempted before they dropped the
 * refcount on our closure. If this was a stack allocated closure, that would be
 * bad.
 */
K
Kent Overstreet 已提交
441
#define closure_wait_event(list, cl, condition)				\
K
Kent Overstreet 已提交
442 443 444 445 446 447 448
({									\
	typeof(condition) ret;						\
									\
	while (1) {							\
		ret = (condition);					\
		if (ret) {						\
			__closure_wake_up(list);			\
K
Kent Overstreet 已提交
449
			closure_sync(cl);				\
K
Kent Overstreet 已提交
450 451 452
			break;						\
		}							\
									\
K
Kent Overstreet 已提交
453
		__closure_start_sleep(cl);				\
K
Kent Overstreet 已提交
454
									\
K
Kent Overstreet 已提交
455
		if (!closure_wait(list, cl))				\
K
Kent Overstreet 已提交
456 457 458 459 460 461
			schedule();					\
	}								\
									\
	ret;								\
})

K
Kent Overstreet 已提交
462 463 464 465 466 467 468 469 470
static inline void closure_queue(struct closure *cl)
{
	struct workqueue_struct *wq = cl->wq;
	if (wq) {
		INIT_WORK(&cl->work, cl->work.func);
		BUG_ON(!queue_work(wq, &cl->work));
	} else
		cl->fn(cl);
}
K
Kent Overstreet 已提交
471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494

static inline void set_closure_fn(struct closure *cl, closure_fn *fn,
				  struct workqueue_struct *wq)
{
	BUG_ON(object_is_on_stack(cl));
	closure_set_ip(cl);
	cl->fn = fn;
	cl->wq = wq;
	/* between atomic_dec() in closure_put() */
	smp_mb__before_atomic_dec();
}

#define continue_at(_cl, _fn, _wq)					\
do {									\
	set_closure_fn(_cl, _fn, _wq);					\
	closure_sub(_cl, CLOSURE_RUNNING + 1);				\
	return;								\
} while (0)

#define closure_return(_cl)	continue_at((_cl), NULL, NULL)

#define continue_at_nobarrier(_cl, _fn, _wq)				\
do {									\
	set_closure_fn(_cl, _fn, _wq);					\
495
	closure_queue(_cl);						\
K
Kent Overstreet 已提交
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522
	return;								\
} while (0)

#define closure_return_with_destructor(_cl, _destructor)		\
do {									\
	set_closure_fn(_cl, _destructor, NULL);				\
	closure_sub(_cl, CLOSURE_RUNNING - CLOSURE_DESTRUCTOR + 1);	\
	return;								\
} while (0)

static inline void closure_call(struct closure *cl, closure_fn fn,
				struct workqueue_struct *wq,
				struct closure *parent)
{
	closure_init(cl, parent);
	continue_at_nobarrier(cl, fn, wq);
}

static inline void closure_trylock_call(struct closure *cl, closure_fn fn,
					struct workqueue_struct *wq,
					struct closure *parent)
{
	if (closure_trylock(cl, parent))
		continue_at_nobarrier(cl, fn, wq);
}

#endif /* _LINUX_CLOSURE_H */