glock.c 65.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
D
David Teigland 已提交
2 3
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4
 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
D
David Teigland 已提交
5 6
 */

7 8
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

D
David Teigland 已提交
9 10 11 12 13 14
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/buffer_head.h>
#include <linux/delay.h>
#include <linux/sort.h>
15
#include <linux/hash.h>
D
David Teigland 已提交
16
#include <linux/jhash.h>
S
Steven Whitehouse 已提交
17
#include <linux/kallsyms.h>
18
#include <linux/gfs2_ondisk.h>
19
#include <linux/list.h>
20
#include <linux/wait.h>
A
akpm@linux-foundation.org 已提交
21
#include <linux/module.h>
22
#include <linux/uaccess.h>
23 24
#include <linux/seq_file.h>
#include <linux/debugfs.h>
25 26
#include <linux/kthread.h>
#include <linux/freezer.h>
27 28
#include <linux/workqueue.h>
#include <linux/jiffies.h>
29 30 31
#include <linux/rcupdate.h>
#include <linux/rculist_bl.h>
#include <linux/bit_spinlock.h>
32
#include <linux/percpu.h>
33
#include <linux/list_sort.h>
S
Steven Whitehouse 已提交
34
#include <linux/lockref.h>
35
#include <linux/rhashtable.h>
D
David Teigland 已提交
36 37

#include "gfs2.h"
38
#include "incore.h"
D
David Teigland 已提交
39 40 41 42 43 44 45
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "lops.h"
#include "meta_io.h"
#include "quota.h"
#include "super.h"
46
#include "util.h"
47
#include "bmap.h"
S
Steven Whitehouse 已提交
48 49
#define CREATE_TRACE_POINTS
#include "trace_gfs2.h"
D
David Teigland 已提交
50

51
struct gfs2_glock_iter {
52
	struct gfs2_sbd *sdp;		/* incore superblock           */
53
	struct rhashtable_iter hti;	/* rhashtable iterator         */
54 55
	struct gfs2_glock *gl;		/* current glock struct        */
	loff_t last_pos;		/* last position               */
56 57
};

D
David Teigland 已提交
58 59
typedef void (*glock_examiner) (struct gfs2_glock * gl);

60
static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target);
61

62
static struct dentry *gfs2_root;
63
static struct workqueue_struct *glock_workqueue;
64
struct workqueue_struct *gfs2_delete_workqueue;
65 66
static LIST_HEAD(lru_list);
static atomic_t lru_count = ATOMIC_INIT(0);
J
Julia Lawall 已提交
67
static DEFINE_SPINLOCK(lru_lock);
68

69
#define GFS2_GL_HASH_SHIFT      15
F
Fabian Frederick 已提交
70
#define GFS2_GL_HASH_SIZE       BIT(GFS2_GL_HASH_SHIFT)
71

A
Arvind Yadav 已提交
72
static const struct rhashtable_params ht_parms = {
73
	.nelem_hint = GFS2_GL_HASH_SIZE * 3 / 4,
74
	.key_len = offsetofend(struct lm_lockname, ln_type),
75 76 77
	.key_offset = offsetof(struct gfs2_glock, gl_name),
	.head_offset = offsetof(struct gfs2_glock, gl_node),
};
D
David Teigland 已提交
78

79
static struct rhashtable gl_hash_table;
D
David Teigland 已提交
80

81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
#define GLOCK_WAIT_TABLE_BITS 12
#define GLOCK_WAIT_TABLE_SIZE (1 << GLOCK_WAIT_TABLE_BITS)
static wait_queue_head_t glock_wait_table[GLOCK_WAIT_TABLE_SIZE] __cacheline_aligned;

struct wait_glock_queue {
	struct lm_lockname *name;
	wait_queue_entry_t wait;
};

static int glock_wake_function(wait_queue_entry_t *wait, unsigned int mode,
			       int sync, void *key)
{
	struct wait_glock_queue *wait_glock =
		container_of(wait, struct wait_glock_queue, wait);
	struct lm_lockname *wait_name = wait_glock->name;
	struct lm_lockname *wake_name = key;

	if (wake_name->ln_sbd != wait_name->ln_sbd ||
	    wake_name->ln_number != wait_name->ln_number ||
	    wake_name->ln_type != wait_name->ln_type)
		return 0;
	return autoremove_wake_function(wait, mode, sync, key);
}

static wait_queue_head_t *glock_waitqueue(struct lm_lockname *name)
{
107
	u32 hash = jhash2((u32 *)name, ht_parms.key_len / 4, 0);
108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123

	return glock_wait_table + hash_32(hash, GLOCK_WAIT_TABLE_BITS);
}

/**
 * wake_up_glock  -  Wake up waiters on a glock
 * @gl: the glock
 */
static void wake_up_glock(struct gfs2_glock *gl)
{
	wait_queue_head_t *wq = glock_waitqueue(&gl->gl_name);

	if (waitqueue_active(wq))
		__wake_up(wq, TASK_NORMAL, 1, &gl->gl_name);
}

124
static void gfs2_glock_dealloc(struct rcu_head *rcu)
D
David Teigland 已提交
125
{
126
	struct gfs2_glock *gl = container_of(rcu, struct gfs2_glock, gl_rcu);
D
David Teigland 已提交
127

128 129
	kfree(gl->gl_lksb.sb_lvbptr);
	if (gl->gl_ops->go_flags & GLOF_ASPACE)
130
		kmem_cache_free(gfs2_glock_aspace_cachep, gl);
131
	else
132
		kmem_cache_free(gfs2_glock_cachep, gl);
133 134
}

135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161
/**
 * glock_blocked_by_withdraw - determine if we can still use a glock
 * @gl: the glock
 *
 * We need to allow some glocks to be enqueued, dequeued, promoted, and demoted
 * when we're withdrawn. For example, to maintain metadata integrity, we should
 * disallow the use of inode and rgrp glocks when withdrawn. Other glocks, like
 * iopen or the transaction glocks may be safely used because none of their
 * metadata goes through the journal. So in general, we should disallow all
 * glocks that are journaled, and allow all the others. One exception is:
 * we need to allow our active journal to be promoted and demoted so others
 * may recover it and we can reacquire it when they're done.
 */
static bool glock_blocked_by_withdraw(struct gfs2_glock *gl)
{
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;

	if (likely(!gfs2_withdrawn(sdp)))
		return false;
	if (gl->gl_ops->go_flags & GLOF_NONDISK)
		return false;
	if (!sdp->sd_jdesc ||
	    gl->gl_name.ln_number == sdp->sd_jdesc->jd_no_addr)
		return false;
	return true;
}

162 163 164 165
void gfs2_glock_free(struct gfs2_glock *gl)
{
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;

166
	gfs2_glock_assert_withdraw(gl, atomic_read(&gl->gl_revokes) == 0);
167 168 169
	rhashtable_remove_fast(&gl_hash_table, &gl->gl_node, ht_parms);
	smp_mb();
	wake_up_glock(gl);
170
	call_rcu(&gl->gl_rcu, gfs2_glock_dealloc);
171 172
	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
		wake_up(&sdp->sd_glock_wait);
D
David Teigland 已提交
173 174 175 176 177 178 179 180
}

/**
 * gfs2_glock_hold() - increment reference count on glock
 * @gl: The glock to hold
 *
 */

181
void gfs2_glock_hold(struct gfs2_glock *gl)
D
David Teigland 已提交
182
{
S
Steven Whitehouse 已提交
183 184
	GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
	lockref_get(&gl->gl_lockref);
D
David Teigland 已提交
185 186
}

187 188 189 190 191 192 193 194 195 196 197 198 199
/**
 * demote_ok - Check to see if it's ok to unlock a glock
 * @gl: the glock
 *
 * Returns: 1 if it's ok
 */

static int demote_ok(const struct gfs2_glock *gl)
{
	const struct gfs2_glock_operations *glops = gl->gl_ops;

	if (gl->gl_state == LM_ST_UNLOCKED)
		return 0;
200
	if (!list_empty(&gl->gl_holders))
201 202 203 204 205 206
		return 0;
	if (glops->go_demote_ok)
		return glops->go_demote_ok(gl);
	return 1;
}

207

208 209
void gfs2_glock_add_to_lru(struct gfs2_glock *gl)
{
210 211 212
	if (!(gl->gl_ops->go_flags & GLOF_LRU))
		return;

213 214
	spin_lock(&lru_lock);

215 216 217 218 219
	list_del(&gl->gl_lru);
	list_add_tail(&gl->gl_lru, &lru_list);

	if (!test_bit(GLF_LRU, &gl->gl_flags)) {
		set_bit(GLF_LRU, &gl->gl_flags);
220
		atomic_inc(&lru_count);
221
	}
222 223 224 225

	spin_unlock(&lru_lock);
}

226
static void gfs2_glock_remove_from_lru(struct gfs2_glock *gl)
227
{
228 229 230
	if (!(gl->gl_ops->go_flags & GLOF_LRU))
		return;

231
	spin_lock(&lru_lock);
232
	if (test_bit(GLF_LRU, &gl->gl_flags)) {
233 234 235 236 237 238 239
		list_del_init(&gl->gl_lru);
		atomic_dec(&lru_count);
		clear_bit(GLF_LRU, &gl->gl_flags);
	}
	spin_unlock(&lru_lock);
}

240 241 242
/*
 * Enqueue the glock on the work queue.  Passes one glock reference on to the
 * work queue.
D
David Teigland 已提交
243
 */
244 245 246 247 248 249 250 251 252 253 254 255
static void __gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
	if (!queue_delayed_work(glock_workqueue, &gl->gl_work, delay)) {
		/*
		 * We are holding the lockref spinlock, and the work was still
		 * queued above.  The queued work (glock_work_func) takes that
		 * spinlock before dropping its glock reference(s), so it
		 * cannot have dropped them in the meantime.
		 */
		GLOCK_BUG_ON(gl, gl->gl_lockref.count < 2);
		gl->gl_lockref.count--;
	}
}
D
David Teigland 已提交
256

257 258 259 260 261 262 263
static void gfs2_glock_queue_work(struct gfs2_glock *gl, unsigned long delay) {
	spin_lock(&gl->gl_lockref.lock);
	__gfs2_glock_queue_work(gl, delay);
	spin_unlock(&gl->gl_lockref.lock);
}

static void __gfs2_glock_put(struct gfs2_glock *gl)
D
David Teigland 已提交
264
{
265
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
266
	struct address_space *mapping = gfs2_glock2aspace(gl);
D
David Teigland 已提交
267

S
Steven Whitehouse 已提交
268 269
	lockref_mark_dead(&gl->gl_lockref);

270
	gfs2_glock_remove_from_lru(gl);
S
Steven Whitehouse 已提交
271 272
	spin_unlock(&gl->gl_lockref.lock);
	GLOCK_BUG_ON(gl, !list_empty(&gl->gl_holders));
273 274 275
	if (mapping) {
		truncate_inode_pages_final(mapping);
		if (!gfs2_withdrawn(sdp))
276
			GLOCK_BUG_ON(gl, !mapping_empty(mapping));
277
	}
S
Steven Whitehouse 已提交
278 279
	trace_gfs2_glock_put(gl);
	sdp->sd_lockstruct.ls_ops->lm_put_lock(gl);
D
David Teigland 已提交
280 281
}

282 283 284 285 286 287 288 289
/*
 * Cause the glock to be put in work queue context.
 */
void gfs2_glock_queue_put(struct gfs2_glock *gl)
{
	gfs2_glock_queue_work(gl, 0);
}

290 291 292 293 294 295 296 297 298 299 300 301 302 303
/**
 * gfs2_glock_put() - Decrement reference count on glock
 * @gl: The glock to put
 *
 */

void gfs2_glock_put(struct gfs2_glock *gl)
{
	if (lockref_put_or_lock(&gl->gl_lockref))
		return;

	__gfs2_glock_put(gl);
}

304 305 306 307 308 309 310 311 312 313
/**
 * may_grant - check if its ok to grant a new lock
 * @gl: The glock
 * @gh: The lock request which we wish to grant
 *
 * Returns: true if its ok to grant the lock
 */

static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
{
314
	const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331

	if (gh != gh_head) {
		/**
		 * Here we make a special exception to grant holders who agree
		 * to share the EX lock with other holders who also have the
		 * bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit
		 * is set, we grant more holders with the bit set.
		 */
		if (gh_head->gh_state == LM_ST_EXCLUSIVE &&
		    (gh_head->gh_flags & LM_FLAG_NODE_SCOPE) &&
		    gh->gh_state == LM_ST_EXCLUSIVE &&
		    (gh->gh_flags & LM_FLAG_NODE_SCOPE))
			return 1;
		if ((gh->gh_state == LM_ST_EXCLUSIVE ||
		     gh_head->gh_state == LM_ST_EXCLUSIVE))
			return 0;
	}
332 333 334 335
	if (gl->gl_state == gh->gh_state)
		return 1;
	if (gh->gh_flags & GL_EXACT)
		return 0;
336 337 338 339 340 341
	if (gl->gl_state == LM_ST_EXCLUSIVE) {
		if (gh->gh_state == LM_ST_SHARED && gh_head->gh_state == LM_ST_SHARED)
			return 1;
		if (gh->gh_state == LM_ST_DEFERRED && gh_head->gh_state == LM_ST_DEFERRED)
			return 1;
	}
342 343 344 345 346 347 348 349
	if (gl->gl_state != LM_ST_UNLOCKED && (gh->gh_flags & LM_FLAG_ANY))
		return 1;
	return 0;
}

static void gfs2_holder_wake(struct gfs2_holder *gh)
{
	clear_bit(HIF_WAIT, &gh->gh_iflags);
350
	smp_mb__after_atomic();
351
	wake_up_bit(&gh->gh_iflags, HIF_WAIT);
B
Bob Peterson 已提交
352 353 354 355 356
	if (gh->gh_flags & GL_ASYNC) {
		struct gfs2_sbd *sdp = gh->gh_gl->gl_name.ln_sbd;

		wake_up(&sdp->sd_async_glock_wait);
	}
357 358
}

359 360
/**
 * do_error - Something unexpected has happened during a lock request
361 362
 * @gl: The glock
 * @ret: The status from the DLM
363 364
 */

365
static void do_error(struct gfs2_glock *gl, const int ret)
366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
{
	struct gfs2_holder *gh, *tmp;

	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
			continue;
		if (ret & LM_OUT_ERROR)
			gh->gh_error = -EIO;
		else if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))
			gh->gh_error = GLR_TRYFAILED;
		else
			continue;
		list_del_init(&gh->gh_list);
		trace_gfs2_glock_queue(gh, 0);
		gfs2_holder_wake(gh);
	}
}

384 385 386 387
/**
 * do_promote - promote as many requests as possible on the current queue
 * @gl: The glock
 * 
388 389
 * Returns: 1 if there is a blocked holder at the head of the list, or 2
 *          if a type specific operation is underway.
390 391 392
 */

static int do_promote(struct gfs2_glock *gl)
A
Andreas Gruenbacher 已提交
393 394
__releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
395 396 397 398 399 400 401 402 403 404 405 406
{
	const struct gfs2_glock_operations *glops = gl->gl_ops;
	struct gfs2_holder *gh, *tmp;
	int ret;

restart:
	list_for_each_entry_safe(gh, tmp, &gl->gl_holders, gh_list) {
		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
			continue;
		if (may_grant(gl, gh)) {
			if (gh->gh_list.prev == &gl->gl_holders &&
			    glops->go_lock) {
A
Andreas Gruenbacher 已提交
407
				spin_unlock(&gl->gl_lockref.lock);
408 409
				/* FIXME: eliminate this eventually */
				ret = glops->go_lock(gh);
A
Andreas Gruenbacher 已提交
410
				spin_lock(&gl->gl_lockref.lock);
411
				if (ret) {
412 413
					if (ret == 1)
						return 2;
414 415
					gh->gh_error = ret;
					list_del_init(&gh->gh_list);
S
Steven Whitehouse 已提交
416
					trace_gfs2_glock_queue(gh, 0);
417 418 419 420
					gfs2_holder_wake(gh);
					goto restart;
				}
				set_bit(HIF_HOLDER, &gh->gh_iflags);
S
Steven Whitehouse 已提交
421
				trace_gfs2_promote(gh, 1);
422 423 424 425
				gfs2_holder_wake(gh);
				goto restart;
			}
			set_bit(HIF_HOLDER, &gh->gh_iflags);
S
Steven Whitehouse 已提交
426
			trace_gfs2_promote(gh, 0);
427 428 429 430 431
			gfs2_holder_wake(gh);
			continue;
		}
		if (gh->gh_list.prev == &gl->gl_holders)
			return 1;
432
		do_error(gl, 0);
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
		break;
	}
	return 0;
}

/**
 * find_first_waiter - find the first gh that's waiting for the glock
 * @gl: the glock
 */

static inline struct gfs2_holder *find_first_waiter(const struct gfs2_glock *gl)
{
	struct gfs2_holder *gh;

	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
		if (!test_bit(HIF_HOLDER, &gh->gh_iflags))
			return gh;
	}
	return NULL;
}

/**
 * state_change - record that the glock is now in a different state
 * @gl: the glock
457
 * @new_state: the new state
458 459 460 461 462 463 464 465 466 467
 */

static void state_change(struct gfs2_glock *gl, unsigned int new_state)
{
	int held1, held2;

	held1 = (gl->gl_state != LM_ST_UNLOCKED);
	held2 = (new_state != LM_ST_UNLOCKED);

	if (held1 != held2) {
S
Steven Whitehouse 已提交
468
		GLOCK_BUG_ON(gl, __lockref_is_dead(&gl->gl_lockref));
469
		if (held2)
S
Steven Whitehouse 已提交
470
			gl->gl_lockref.count++;
471
		else
S
Steven Whitehouse 已提交
472
			gl->gl_lockref.count--;
473
	}
474 475 476 477
	if (new_state != gl->gl_target)
		/* shorten our minimum hold time */
		gl->gl_hold_time = max(gl->gl_hold_time - GL_GLOCK_HOLD_DECR,
				       GL_GLOCK_MIN_HOLD);
478 479 480 481
	gl->gl_state = new_state;
	gl->gl_tchange = jiffies;
}

482 483 484 485 486 487 488 489 490
static void gfs2_set_demote(struct gfs2_glock *gl)
{
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;

	set_bit(GLF_DEMOTE, &gl->gl_flags);
	smp_mb();
	wake_up(&sdp->sd_async_glock_wait);
}

491 492 493 494
static void gfs2_demote_wake(struct gfs2_glock *gl)
{
	gl->gl_demote_state = LM_ST_EXCLUSIVE;
	clear_bit(GLF_DEMOTE, &gl->gl_flags);
495
	smp_mb__after_atomic();
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
	wake_up_bit(&gl->gl_flags, GLF_DEMOTE);
}

/**
 * finish_xmote - The DLM has replied to one of our lock requests
 * @gl: The glock
 * @ret: The status from the DLM
 *
 */

static void finish_xmote(struct gfs2_glock *gl, unsigned int ret)
{
	const struct gfs2_glock_operations *glops = gl->gl_ops;
	struct gfs2_holder *gh;
	unsigned state = ret & LM_OUT_ST_MASK;
511
	int rv;
512

A
Andreas Gruenbacher 已提交
513
	spin_lock(&gl->gl_lockref.lock);
S
Steven Whitehouse 已提交
514
	trace_gfs2_glock_state_change(gl, state);
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553
	state_change(gl, state);
	gh = find_first_waiter(gl);

	/* Demote to UN request arrived during demote to SH or DF */
	if (test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags) &&
	    state != LM_ST_UNLOCKED && gl->gl_demote_state == LM_ST_UNLOCKED)
		gl->gl_target = LM_ST_UNLOCKED;

	/* Check for state != intended state */
	if (unlikely(state != gl->gl_target)) {
		if (gh && !test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags)) {
			/* move to back of queue and try next entry */
			if (ret & LM_OUT_CANCELED) {
				if ((gh->gh_flags & LM_FLAG_PRIORITY) == 0)
					list_move_tail(&gh->gh_list, &gl->gl_holders);
				gh = find_first_waiter(gl);
				gl->gl_target = gh->gh_state;
				goto retry;
			}
			/* Some error or failed "try lock" - report it */
			if ((ret & LM_OUT_ERROR) ||
			    (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
				gl->gl_target = gl->gl_state;
				do_error(gl, ret);
				goto out;
			}
		}
		switch(state) {
		/* Unlocked due to conversion deadlock, try again */
		case LM_ST_UNLOCKED:
retry:
			do_xmote(gl, gh, gl->gl_target);
			break;
		/* Conversion fails, unlock and try again */
		case LM_ST_SHARED:
		case LM_ST_DEFERRED:
			do_xmote(gl, gh, LM_ST_UNLOCKED);
			break;
		default: /* Everything else */
554 555
			fs_err(gl->gl_name.ln_sbd, "wanted %u got %u\n",
			       gl->gl_target, state);
556 557
			GLOCK_BUG_ON(gl, 1);
		}
A
Andreas Gruenbacher 已提交
558
		spin_unlock(&gl->gl_lockref.lock);
559 560 561 562 563 564 565 566
		return;
	}

	/* Fast path - we got what we asked for */
	if (test_and_clear_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags))
		gfs2_demote_wake(gl);
	if (state != LM_ST_UNLOCKED) {
		if (glops->go_xmote_bh) {
A
Andreas Gruenbacher 已提交
567
			spin_unlock(&gl->gl_lockref.lock);
568
			rv = glops->go_xmote_bh(gl);
A
Andreas Gruenbacher 已提交
569
			spin_lock(&gl->gl_lockref.lock);
570 571 572 573 574
			if (rv) {
				do_error(gl, rv);
				goto out;
			}
		}
575 576 577
		rv = do_promote(gl);
		if (rv == 2)
			goto out_locked;
578 579 580
	}
out:
	clear_bit(GLF_LOCK, &gl->gl_flags);
581
out_locked:
A
Andreas Gruenbacher 已提交
582
	spin_unlock(&gl->gl_lockref.lock);
583 584
}

585 586 587 588 589 590 591 592 593 594
static bool is_system_glock(struct gfs2_glock *gl)
{
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
	struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);

	if (gl == m_ip->i_gl)
		return true;
	return false;
}

595 596 597 598 599 600 601 602 603
/**
 * do_xmote - Calls the DLM to change the state of a lock
 * @gl: The lock state
 * @gh: The holder (only for promotes)
 * @target: The target lock state
 *
 */

static void do_xmote(struct gfs2_glock *gl, struct gfs2_holder *gh, unsigned int target)
A
Andreas Gruenbacher 已提交
604 605
__releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
606 607
{
	const struct gfs2_glock_operations *glops = gl->gl_ops;
608
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
B
Bob Peterson 已提交
609
	unsigned int lck_flags = (unsigned int)(gh ? gh->gh_flags : 0);
610 611
	int ret;

612 613
	if (target != LM_ST_UNLOCKED && glock_blocked_by_withdraw(gl) &&
	    gh && !(gh->gh_flags & LM_FLAG_NOEXP))
614
		return;
615 616
	lck_flags &= (LM_FLAG_TRY | LM_FLAG_TRY_1CB | LM_FLAG_NOEXP |
		      LM_FLAG_PRIORITY);
617 618
	GLOCK_BUG_ON(gl, gl->gl_state == target);
	GLOCK_BUG_ON(gl, gl->gl_state == gl->gl_target);
619 620
	if ((target == LM_ST_UNLOCKED || target == LM_ST_DEFERRED) &&
	    glops->go_inval) {
621 622 623 624 625 626 627 628
		/*
		 * If another process is already doing the invalidate, let that
		 * finish first.  The glock state machine will get back to this
		 * holder again later.
		 */
		if (test_and_set_bit(GLF_INVALIDATE_IN_PROGRESS,
				     &gl->gl_flags))
			return;
629 630
		do_error(gl, 0); /* Fail queued try locks */
	}
631
	gl->gl_req = target;
632 633 634 635 636
	set_bit(GLF_BLOCKING, &gl->gl_flags);
	if ((gl->gl_req == LM_ST_UNLOCKED) ||
	    (gl->gl_state == LM_ST_EXCLUSIVE) ||
	    (lck_flags & (LM_FLAG_TRY|LM_FLAG_TRY_1CB)))
		clear_bit(GLF_BLOCKING, &gl->gl_flags);
A
Andreas Gruenbacher 已提交
637
	spin_unlock(&gl->gl_lockref.lock);
638 639 640 641 642 643 644 645 646 647 648
	if (glops->go_sync) {
		ret = glops->go_sync(gl);
		/* If we had a problem syncing (due to io errors or whatever,
		 * we should not invalidate the metadata or tell dlm to
		 * release the glock to other nodes.
		 */
		if (ret) {
			if (cmpxchg(&sdp->sd_log_error, 0, ret)) {
				fs_err(sdp, "Error %d syncing glock \n", ret);
				gfs2_dump_glock(NULL, gl, true);
			}
649
			goto skip_inval;
650 651
		}
	}
652 653 654 655 656 657 658 659 660 661
	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags)) {
		/*
		 * The call to go_sync should have cleared out the ail list.
		 * If there are still items, we have a problem. We ought to
		 * withdraw, but we can't because the withdraw code also uses
		 * glocks. Warn about the error, dump the glock, then fall
		 * through and wait for logd to do the withdraw for us.
		 */
		if ((atomic_read(&gl->gl_ail_count) != 0) &&
		    (!cmpxchg(&sdp->sd_log_error, 0, -EIO))) {
662 663
			gfs2_glock_assert_warn(gl,
					       !atomic_read(&gl->gl_ail_count));
664 665
			gfs2_dump_glock(NULL, gl, true);
		}
666
		glops->go_inval(gl, target == LM_ST_DEFERRED ? 0 : DIO_METADATA);
667 668
		clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
	}
669

670
skip_inval:
671
	gfs2_glock_hold(gl);
672 673 674 675 676 677 678 679 680 681 682 683
	/*
	 * Check for an error encountered since we called go_sync and go_inval.
	 * If so, we can't withdraw from the glock code because the withdraw
	 * code itself uses glocks (see function signal_our_withdraw) to
	 * change the mount to read-only. Most importantly, we must not call
	 * dlm to unlock the glock until the journal is in a known good state
	 * (after journal replay) otherwise other nodes may use the object
	 * (rgrp or dinode) and then later, journal replay will corrupt the
	 * file system. The best we can do here is wait for the logd daemon
	 * to see sd_log_error and withdraw, and in the meantime, requeue the
	 * work for later.
	 *
684 685 686 687 688
	 * We make a special exception for some system glocks, such as the
	 * system statfs inode glock, which needs to be granted before the
	 * gfs2_quotad daemon can exit, and that exit needs to finish before
	 * we can unmount the withdrawn file system.
	 *
689 690 691 692 693 694
	 * However, if we're just unlocking the lock (say, for unmount, when
	 * gfs2_gl_hash_clear calls clear_glock) and recovery is complete
	 * then it's okay to tell dlm to unlock it.
	 */
	if (unlikely(sdp->sd_log_error && !gfs2_withdrawn(sdp)))
		gfs2_withdraw_delayed(sdp);
695 696 697 698
	if (glock_blocked_by_withdraw(gl) &&
	    (target != LM_ST_UNLOCKED ||
	     test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags))) {
		if (!is_system_glock(gl)) {
699 700
			gfs2_glock_queue_work(gl, GL_GLOCK_DFT_HOLD);
			goto out;
701 702
		} else {
			clear_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags);
703 704 705
		}
	}

706 707 708
	if (sdp->sd_lockstruct.ls_ops->lm_lock)	{
		/* lock_dlm */
		ret = sdp->sd_lockstruct.ls_ops->lm_lock(gl, target, lck_flags);
709 710 711 712
		if (ret == -EINVAL && gl->gl_target == LM_ST_UNLOCKED &&
		    target == LM_ST_UNLOCKED &&
		    test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags)) {
			finish_xmote(gl, target);
713
			gfs2_glock_queue_work(gl, 0);
714
		} else if (ret) {
715
			fs_err(sdp, "lm_lock ret %d\n", ret);
716
			GLOCK_BUG_ON(gl, !gfs2_withdrawn(sdp));
717
		}
718 719
	} else { /* lock_nolock */
		finish_xmote(gl, target);
720
		gfs2_glock_queue_work(gl, 0);
721
	}
722
out:
A
Andreas Gruenbacher 已提交
723
	spin_lock(&gl->gl_lockref.lock);
724 725 726 727 728 729 730 731 732 733 734 735
}

/**
 * find_first_holder - find the first "holder" gh
 * @gl: the glock
 */

static inline struct gfs2_holder *find_first_holder(const struct gfs2_glock *gl)
{
	struct gfs2_holder *gh;

	if (!list_empty(&gl->gl_holders)) {
736
		gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
737 738 739 740 741 742 743 744 745 746 747 748 749 750
		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
			return gh;
	}
	return NULL;
}

/**
 * run_queue - do all outstanding tasks related to a glock
 * @gl: The glock in question
 * @nonblock: True if we must not block in run_queue
 *
 */

static void run_queue(struct gfs2_glock *gl, const int nonblock)
A
Andreas Gruenbacher 已提交
751 752
__releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
753 754
{
	struct gfs2_holder *gh = NULL;
755
	int ret;
756 757 758 759 760 761 762 763 764

	if (test_and_set_bit(GLF_LOCK, &gl->gl_flags))
		return;

	GLOCK_BUG_ON(gl, test_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags));

	if (test_bit(GLF_DEMOTE, &gl->gl_flags) &&
	    gl->gl_demote_state != gl->gl_state) {
		if (find_first_holder(gl))
765
			goto out_unlock;
766 767 768
		if (nonblock)
			goto out_sched;
		set_bit(GLF_DEMOTE_IN_PROGRESS, &gl->gl_flags);
769
		GLOCK_BUG_ON(gl, gl->gl_demote_state == LM_ST_EXCLUSIVE);
770 771 772 773
		gl->gl_target = gl->gl_demote_state;
	} else {
		if (test_bit(GLF_DEMOTE, &gl->gl_flags))
			gfs2_demote_wake(gl);
774 775
		ret = do_promote(gl);
		if (ret == 0)
776
			goto out_unlock;
777
		if (ret == 2)
778
			goto out;
779 780 781 782 783 784
		gh = find_first_waiter(gl);
		gl->gl_target = gh->gh_state;
		if (!(gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)))
			do_error(gl, 0); /* Fail queued try locks */
	}
	do_xmote(gl, gh, gl->gl_target);
785
out:
786 787 788
	return;

out_sched:
789
	clear_bit(GLF_LOCK, &gl->gl_flags);
790
	smp_mb__after_atomic();
S
Steven Whitehouse 已提交
791
	gl->gl_lockref.count++;
792
	__gfs2_glock_queue_work(gl, 0);
793 794
	return;

795
out_unlock:
796
	clear_bit(GLF_LOCK, &gl->gl_flags);
797
	smp_mb__after_atomic();
798
	return;
799 800
}

801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819
void gfs2_inode_remember_delete(struct gfs2_glock *gl, u64 generation)
{
	struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;

	if (ri->ri_magic == 0)
		ri->ri_magic = cpu_to_be32(GFS2_MAGIC);
	if (ri->ri_magic == cpu_to_be32(GFS2_MAGIC))
		ri->ri_generation_deleted = cpu_to_be64(generation);
}

bool gfs2_inode_already_deleted(struct gfs2_glock *gl, u64 generation)
{
	struct gfs2_inode_lvb *ri = (void *)gl->gl_lksb.sb_lvbptr;

	if (ri->ri_magic != cpu_to_be32(GFS2_MAGIC))
		return false;
	return generation <= be64_to_cpu(ri->ri_generation_deleted);
}

820 821 822 823 824 825
static void gfs2_glock_poke(struct gfs2_glock *gl)
{
	int flags = LM_FLAG_TRY_1CB | LM_FLAG_ANY | GL_SKIP;
	struct gfs2_holder gh;
	int error;

826 827
	gfs2_holder_init(gl, LM_ST_SHARED, flags, &gh);
	error = gfs2_glock_nq(&gh);
828 829
	if (!error)
		gfs2_glock_dq(&gh);
830
	gfs2_holder_uninit(&gh);
831 832
}

833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853
static bool gfs2_try_evict(struct gfs2_glock *gl)
{
	struct gfs2_inode *ip;
	bool evicted = false;

	/*
	 * If there is contention on the iopen glock and we have an inode, try
	 * to grab and release the inode so that it can be evicted.  This will
	 * allow the remote node to go ahead and delete the inode without us
	 * having to do it, which will avoid rgrp glock thrashing.
	 *
	 * The remote node is likely still holding the corresponding inode
	 * glock, so it will run before we get to verify that the delete has
	 * happened below.
	 */
	spin_lock(&gl->gl_lockref.lock);
	ip = gl->gl_object;
	if (ip && !igrab(&ip->i_inode))
		ip = NULL;
	spin_unlock(&gl->gl_lockref.lock);
	if (ip) {
854 855
		struct gfs2_glock *inode_gl = NULL;

856
		gl->gl_no_formal_ino = ip->i_no_formal_ino;
857 858 859 860 861 862 863
		set_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
		d_prune_aliases(&ip->i_inode);
		iput(&ip->i_inode);

		/* If the inode was evicted, gl->gl_object will now be NULL. */
		spin_lock(&gl->gl_lockref.lock);
		ip = gl->gl_object;
864 865 866
		if (ip) {
			inode_gl = ip->i_gl;
			lockref_get(&inode_gl->gl_lockref);
867
			clear_bit(GIF_DEFERRED_DELETE, &ip->i_flags);
868
		}
869
		spin_unlock(&gl->gl_lockref.lock);
870 871 872 873
		if (inode_gl) {
			gfs2_glock_poke(inode_gl);
			gfs2_glock_put(inode_gl);
		}
874 875 876 877 878
		evicted = !ip;
	}
	return evicted;
}

879 880
static void delete_work_func(struct work_struct *work)
{
881 882
	struct delayed_work *dwork = to_delayed_work(work);
	struct gfs2_glock *gl = container_of(dwork, struct gfs2_glock, gl_delete);
883
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
884
	struct inode *inode;
885 886
	u64 no_addr = gl->gl_name.ln_number;

887 888 889 890
	spin_lock(&gl->gl_lockref.lock);
	clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
	spin_unlock(&gl->gl_lockref.lock);

891 892 893 894 895 896 897
	if (test_bit(GLF_DEMOTE, &gl->gl_flags)) {
		/*
		 * If we can evict the inode, give the remote node trying to
		 * delete the inode some time before verifying that the delete
		 * has happened.  Otherwise, if we cause contention on the inode glock
		 * immediately, the remote node will think that we still have
		 * the inode in use, and so it will give up waiting.
898 899 900 901 902 903 904 905 906 907
		 *
		 * If we can't evict the inode, signal to the remote node that
		 * the inode is still in use.  We'll later try to delete the
		 * inode locally in gfs2_evict_inode.
		 *
		 * FIXME: We only need to verify that the remote node has
		 * deleted the inode because nodes before this remote delete
		 * rework won't cooperate.  At a later time, when we no longer
		 * care about compatibility with such nodes, we can skip this
		 * step entirely.
908 909 910 911 912
		 */
		if (gfs2_try_evict(gl)) {
			if (gfs2_queue_delete_work(gl, 5 * HZ))
				return;
		}
913
		goto out;
914 915
	}

916 917
	inode = gfs2_lookup_by_inum(sdp, no_addr, gl->gl_no_formal_ino,
				    GFS2_BLKST_UNLINKED);
K
Kefeng Wang 已提交
918
	if (!IS_ERR_OR_NULL(inode)) {
919 920
		d_prune_aliases(inode);
		iput(inode);
921
	}
922
out:
923 924 925
	gfs2_glock_put(gl);
}

926 927
static void glock_work_func(struct work_struct *work)
{
928
	unsigned long delay = 0;
929
	struct gfs2_glock *gl = container_of(work, struct gfs2_glock, gl_work.work);
930
	unsigned int drop_refs = 1;
931

932
	if (test_and_clear_bit(GLF_REPLY_PENDING, &gl->gl_flags)) {
933
		finish_xmote(gl, gl->gl_reply);
934
		drop_refs++;
935
	}
A
Andreas Gruenbacher 已提交
936
	spin_lock(&gl->gl_lockref.lock);
937
	if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
938 939
	    gl->gl_state != LM_ST_UNLOCKED &&
	    gl->gl_demote_state != LM_ST_EXCLUSIVE) {
940
		unsigned long holdtime, now = jiffies;
941

942
		holdtime = gl->gl_tchange + gl->gl_hold_time;
943 944
		if (time_before(now, holdtime))
			delay = holdtime - now;
945 946 947

		if (!delay) {
			clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
948
			gfs2_set_demote(gl);
949
		}
950 951
	}
	run_queue(gl, 0);
952 953 954
	if (delay) {
		/* Keep one glock reference for the work we requeue. */
		drop_refs--;
955 956
		if (gl->gl_name.ln_type != LM_TYPE_INODE)
			delay = 0;
957
		__gfs2_glock_queue_work(gl, delay);
958
	}
959 960 961 962 963 964 965 966 967 968 969 970

	/*
	 * Drop the remaining glock references manually here. (Mind that
	 * __gfs2_glock_queue_work depends on the lockref spinlock begin held
	 * here as well.)
	 */
	gl->gl_lockref.count -= drop_refs;
	if (!gl->gl_lockref.count) {
		__gfs2_glock_put(gl);
		return;
	}
	spin_unlock(&gl->gl_lockref.lock);
971 972
}

973 974 975 976
static struct gfs2_glock *find_insert_glock(struct lm_lockname *name,
					    struct gfs2_glock *new)
{
	struct wait_glock_queue wait;
977
	wait_queue_head_t *wq = glock_waitqueue(name);
978 979
	struct gfs2_glock *gl;

980 981 982 983
	wait.name = name;
	init_wait(&wait.wait);
	wait.wait.func = glock_wake_function;

984
again:
985
	prepare_to_wait(wq, &wait.wait, TASK_UNINTERRUPTIBLE);
986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002
	rcu_read_lock();
	if (new) {
		gl = rhashtable_lookup_get_insert_fast(&gl_hash_table,
			&new->gl_node, ht_parms);
		if (IS_ERR(gl))
			goto out;
	} else {
		gl = rhashtable_lookup_fast(&gl_hash_table,
			name, ht_parms);
	}
	if (gl && !lockref_get_not_dead(&gl->gl_lockref)) {
		rcu_read_unlock();
		schedule();
		goto again;
	}
out:
	rcu_read_unlock();
1003
	finish_wait(wq, &wait.wait);
1004 1005 1006
	return gl;
}

D
David Teigland 已提交
1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019
/**
 * gfs2_glock_get() - Get a glock, or create one if one doesn't exist
 * @sdp: The GFS2 superblock
 * @number: the lock number
 * @glops: The glock_operations to use
 * @create: If 0, don't create the glock if it doesn't exist
 * @glp: the glock is returned here
 *
 * This does not lock a glock, just finds/creates structures for one.
 *
 * Returns: errno
 */

1020
int gfs2_glock_get(struct gfs2_sbd *sdp, u64 number,
1021
		   const struct gfs2_glock_operations *glops, int create,
D
David Teigland 已提交
1022 1023
		   struct gfs2_glock **glp)
{
1024
	struct super_block *s = sdp->sd_vfs;
1025 1026 1027
	struct lm_lockname name = { .ln_number = number,
				    .ln_type = glops->go_type,
				    .ln_sbd = sdp };
1028
	struct gfs2_glock *gl, *tmp;
1029
	struct address_space *mapping;
1030
	struct kmem_cache *cachep;
1031
	int ret = 0;
D
David Teigland 已提交
1032

1033 1034 1035
	gl = find_insert_glock(&name, NULL);
	if (gl) {
		*glp = gl;
D
David Teigland 已提交
1036
		return 0;
1037
	}
1038 1039
	if (!create)
		return -ENOENT;
D
David Teigland 已提交
1040

1041
	if (glops->go_flags & GLOF_ASPACE)
1042
		cachep = gfs2_glock_aspace_cachep;
1043
	else
1044
		cachep = gfs2_glock_cachep;
1045
	gl = kmem_cache_alloc(cachep, GFP_NOFS);
D
David Teigland 已提交
1046 1047 1048
	if (!gl)
		return -ENOMEM;

1049 1050 1051
	memset(&gl->gl_lksb, 0, sizeof(struct dlm_lksb));

	if (glops->go_flags & GLOF_LVB) {
1052
		gl->gl_lksb.sb_lvbptr = kzalloc(GDLM_LVB_SIZE, GFP_NOFS);
1053
		if (!gl->gl_lksb.sb_lvbptr) {
1054 1055 1056 1057 1058
			kmem_cache_free(cachep, gl);
			return -ENOMEM;
		}
	}

1059
	atomic_inc(&sdp->sd_glock_disposal);
1060
	gl->gl_node.next = NULL;
1061
	gl->gl_flags = 0;
D
David Teigland 已提交
1062
	gl->gl_name = name;
1063
	lockdep_set_subclass(&gl->gl_lockref.lock, glops->go_subclass);
S
Steven Whitehouse 已提交
1064
	gl->gl_lockref.count = 1;
D
David Teigland 已提交
1065
	gl->gl_state = LM_ST_UNLOCKED;
1066
	gl->gl_target = LM_ST_UNLOCKED;
1067
	gl->gl_demote_state = LM_ST_EXCLUSIVE;
D
David Teigland 已提交
1068
	gl->gl_ops = glops;
T
Thomas Gleixner 已提交
1069
	gl->gl_dstamp = 0;
1070 1071 1072 1073 1074 1075
	preempt_disable();
	/* We use the global stats to estimate the initial per-glock stats */
	gl->gl_stats = this_cpu_ptr(sdp->sd_lkstats)->lkstats[glops->go_type];
	preempt_enable();
	gl->gl_stats.stats[GFS2_LKS_DCOUNT] = 0;
	gl->gl_stats.stats[GFS2_LKS_QCOUNT] = 0;
1076
	gl->gl_tchange = jiffies;
1077
	gl->gl_object = NULL;
1078
	gl->gl_hold_time = GL_GLOCK_DFT_HOLD;
1079
	INIT_DELAYED_WORK(&gl->gl_work, glock_work_func);
1080 1081
	if (gl->gl_name.ln_type == LM_TYPE_IOPEN)
		INIT_DELAYED_WORK(&gl->gl_delete, delete_work_func);
D
David Teigland 已提交
1082

1083 1084 1085 1086 1087 1088
	mapping = gfs2_glock2aspace(gl);
	if (mapping) {
                mapping->a_ops = &gfs2_meta_aops;
		mapping->host = s->s_bdev->bd_inode;
		mapping->flags = 0;
		mapping_set_gfp_mask(mapping, GFP_NOFS);
1089
		mapping->private_data = NULL;
1090
		mapping->writeback_index = 0;
D
David Teigland 已提交
1091 1092
	}

1093
	tmp = find_insert_glock(&name, gl);
1094
	if (!tmp) {
1095
		*glp = gl;
1096
		goto out;
D
David Teigland 已提交
1097
	}
1098 1099 1100 1101
	if (IS_ERR(tmp)) {
		ret = PTR_ERR(tmp);
		goto out_free;
	}
1102
	*glp = tmp;
1103 1104

out_free:
1105 1106
	kfree(gl->gl_lksb.sb_lvbptr);
	kmem_cache_free(cachep, gl);
1107 1108
	if (atomic_dec_and_test(&sdp->sd_glock_disposal))
		wake_up(&sdp->sd_glock_wait);
D
David Teigland 已提交
1109

1110
out:
1111
	return ret;
D
David Teigland 已提交
1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
}

/**
 * gfs2_holder_init - initialize a struct gfs2_holder in the default way
 * @gl: the glock
 * @state: the state we're requesting
 * @flags: the modifier flags
 * @gh: the holder structure
 *
 */

B
Bob Peterson 已提交
1123
void gfs2_holder_init(struct gfs2_glock *gl, unsigned int state, u16 flags,
D
David Teigland 已提交
1124 1125 1126 1127
		      struct gfs2_holder *gh)
{
	INIT_LIST_HEAD(&gh->gh_list);
	gh->gh_gl = gl;
1128
	gh->gh_ip = _RET_IP_;
1129
	gh->gh_owner_pid = get_pid(task_pid(current));
D
David Teigland 已提交
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
	gh->gh_state = state;
	gh->gh_flags = flags;
	gh->gh_error = 0;
	gh->gh_iflags = 0;
	gfs2_glock_hold(gl);
}

/**
 * gfs2_holder_reinit - reinitialize a struct gfs2_holder so we can requeue it
 * @state: the state we're requesting
 * @flags: the modifier flags
 * @gh: the holder structure
 *
 * Don't mess with the glock.
 *
 */

B
Bob Peterson 已提交
1147
void gfs2_holder_reinit(unsigned int state, u16 flags, struct gfs2_holder *gh)
D
David Teigland 已提交
1148 1149
{
	gh->gh_state = state;
1150
	gh->gh_flags = flags;
1151
	gh->gh_iflags = 0;
1152
	gh->gh_ip = _RET_IP_;
1153
	put_pid(gh->gh_owner_pid);
B
Bob Peterson 已提交
1154
	gh->gh_owner_pid = get_pid(task_pid(current));
D
David Teigland 已提交
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
}

/**
 * gfs2_holder_uninit - uninitialize a holder structure (drop glock reference)
 * @gh: the holder structure
 *
 */

void gfs2_holder_uninit(struct gfs2_holder *gh)
{
1165
	put_pid(gh->gh_owner_pid);
D
David Teigland 已提交
1166
	gfs2_glock_put(gh->gh_gl);
A
Andreas Gruenbacher 已提交
1167
	gfs2_holder_mark_uninitialized(gh);
S
Steven Whitehouse 已提交
1168
	gh->gh_ip = 0;
D
David Teigland 已提交
1169 1170
}

1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181
static void gfs2_glock_update_hold_time(struct gfs2_glock *gl,
					unsigned long start_time)
{
	/* Have we waited longer that a second? */
	if (time_after(jiffies, start_time + HZ)) {
		/* Lengthen the minimum hold time. */
		gl->gl_hold_time = min(gl->gl_hold_time + GL_GLOCK_HOLD_INCR,
				       GL_GLOCK_MAX_HOLD);
	}
}

1182 1183 1184 1185 1186 1187 1188 1189
/**
 * gfs2_glock_wait - wait on a glock acquisition
 * @gh: the glock holder
 *
 * Returns: 0 on success
 */

int gfs2_glock_wait(struct gfs2_holder *gh)
1190
{
1191
	unsigned long start_time = jiffies;
1192

1193
	might_sleep();
1194
	wait_on_bit(&gh->gh_iflags, HIF_WAIT, TASK_UNINTERRUPTIBLE);
1195
	gfs2_glock_update_hold_time(gh->gh_gl, start_time);
1196
	return gh->gh_error;
1197 1198
}

B
Bob Peterson 已提交
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283
static int glocks_pending(unsigned int num_gh, struct gfs2_holder *ghs)
{
	int i;

	for (i = 0; i < num_gh; i++)
		if (test_bit(HIF_WAIT, &ghs[i].gh_iflags))
			return 1;
	return 0;
}

/**
 * gfs2_glock_async_wait - wait on multiple asynchronous glock acquisitions
 * @num_gh: the number of holders in the array
 * @ghs: the glock holder array
 *
 * Returns: 0 on success, meaning all glocks have been granted and are held.
 *          -ESTALE if the request timed out, meaning all glocks were released,
 *          and the caller should retry the operation.
 */

int gfs2_glock_async_wait(unsigned int num_gh, struct gfs2_holder *ghs)
{
	struct gfs2_sbd *sdp = ghs[0].gh_gl->gl_name.ln_sbd;
	int i, ret = 0, timeout = 0;
	unsigned long start_time = jiffies;
	bool keep_waiting;

	might_sleep();
	/*
	 * Total up the (minimum hold time * 2) of all glocks and use that to
	 * determine the max amount of time we should wait.
	 */
	for (i = 0; i < num_gh; i++)
		timeout += ghs[i].gh_gl->gl_hold_time << 1;

wait_for_dlm:
	if (!wait_event_timeout(sdp->sd_async_glock_wait,
				!glocks_pending(num_gh, ghs), timeout))
		ret = -ESTALE; /* request timed out. */

	/*
	 * If dlm granted all our requests, we need to adjust the glock
	 * minimum hold time values according to how long we waited.
	 *
	 * If our request timed out, we need to repeatedly release any held
	 * glocks we acquired thus far to allow dlm to acquire the remaining
	 * glocks without deadlocking.  We cannot currently cancel outstanding
	 * glock acquisitions.
	 *
	 * The HIF_WAIT bit tells us which requests still need a response from
	 * dlm.
	 *
	 * If dlm sent us any errors, we return the first error we find.
	 */
	keep_waiting = false;
	for (i = 0; i < num_gh; i++) {
		/* Skip holders we have already dequeued below. */
		if (!gfs2_holder_queued(&ghs[i]))
			continue;
		/* Skip holders with a pending DLM response. */
		if (test_bit(HIF_WAIT, &ghs[i].gh_iflags)) {
			keep_waiting = true;
			continue;
		}

		if (test_bit(HIF_HOLDER, &ghs[i].gh_iflags)) {
			if (ret == -ESTALE)
				gfs2_glock_dq(&ghs[i]);
			else
				gfs2_glock_update_hold_time(ghs[i].gh_gl,
							    start_time);
		}
		if (!ret)
			ret = ghs[i].gh_error;
	}

	if (keep_waiting)
		goto wait_for_dlm;

	/*
	 * At this point, we've either acquired all locks or released them all.
	 */
	return ret;
}

D
David Teigland 已提交
1284
/**
1285 1286 1287
 * handle_callback - process a demote request
 * @gl: the glock
 * @state: the state the caller wants us to change to
1288 1289
 * @delay: zero to demote immediately; otherwise pending demote
 * @remote: true if this came from a different cluster node
D
David Teigland 已提交
1290
 *
1291 1292
 * There are only two requests that we are going to see in actual
 * practise: LM_ST_SHARED and LM_ST_UNLOCKED
D
David Teigland 已提交
1293 1294
 */

1295
static void handle_callback(struct gfs2_glock *gl, unsigned int state,
1296
			    unsigned long delay, bool remote)
D
David Teigland 已提交
1297
{
1298 1299 1300 1301
	if (delay)
		set_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);
	else
		gfs2_set_demote(gl);
1302 1303 1304 1305 1306 1307
	if (gl->gl_demote_state == LM_ST_EXCLUSIVE) {
		gl->gl_demote_state = state;
		gl->gl_demote_time = jiffies;
	} else if (gl->gl_demote_state != LM_ST_UNLOCKED &&
			gl->gl_demote_state != state) {
		gl->gl_demote_state = LM_ST_UNLOCKED;
D
David Teigland 已提交
1308
	}
1309
	if (gl->gl_ops->go_callback)
1310
		gl->gl_ops->go_callback(gl, remote);
1311
	trace_gfs2_demote_rq(gl, remote);
D
David Teigland 已提交
1312 1313
}

1314
void gfs2_print_dbg(struct seq_file *seq, const char *fmt, ...)
1315
{
1316
	struct va_format vaf;
1317 1318 1319
	va_list args;

	va_start(args, fmt);
1320

1321
	if (seq) {
1322
		seq_vprintf(seq, fmt, args);
1323
	} else {
1324 1325 1326
		vaf.fmt = fmt;
		vaf.va = &args;

1327
		pr_err("%pV", &vaf);
1328
	}
1329

1330 1331 1332
	va_end(args);
}

D
David Teigland 已提交
1333 1334 1335 1336
/**
 * add_to_queue - Add a holder to the wait queue (but look for recursion)
 * @gh: the holder structure to add
 *
1337 1338 1339 1340
 * Eventually we should move the recursive locking trap to a
 * debugging option or something like that. This is the fast
 * path and needs to have the minimum number of distractions.
 * 
D
David Teigland 已提交
1341 1342
 */

1343
static inline void add_to_queue(struct gfs2_holder *gh)
A
Andreas Gruenbacher 已提交
1344 1345
__releases(&gl->gl_lockref.lock)
__acquires(&gl->gl_lockref.lock)
D
David Teigland 已提交
1346 1347
{
	struct gfs2_glock *gl = gh->gh_gl;
1348
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1349 1350
	struct list_head *insert_pt = NULL;
	struct gfs2_holder *gh2;
1351
	int try_futile = 0;
D
David Teigland 已提交
1352

B
Bob Peterson 已提交
1353
	GLOCK_BUG_ON(gl, gh->gh_owner_pid == NULL);
1354
	if (test_and_set_bit(HIF_WAIT, &gh->gh_iflags))
B
Bob Peterson 已提交
1355
		GLOCK_BUG_ON(gl, true);
1356

1357 1358
	if (gh->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB)) {
		if (test_bit(GLF_LOCK, &gl->gl_flags))
1359
			try_futile = !may_grant(gl, gh);
1360 1361 1362 1363 1364 1365 1366 1367
		if (test_bit(GLF_INVALIDATE_IN_PROGRESS, &gl->gl_flags))
			goto fail;
	}

	list_for_each_entry(gh2, &gl->gl_holders, gh_list) {
		if (unlikely(gh2->gh_owner_pid == gh->gh_owner_pid &&
		    (gh->gh_gl->gl_ops->go_type != LM_TYPE_FLOCK)))
			goto trap_recursive;
1368 1369
		if (try_futile &&
		    !(gh2->gh_flags & (LM_FLAG_TRY | LM_FLAG_TRY_1CB))) {
1370 1371 1372 1373
fail:
			gh->gh_error = GLR_TRYFAILED;
			gfs2_holder_wake(gh);
			return;
1374
		}
1375 1376 1377 1378 1379
		if (test_bit(HIF_HOLDER, &gh2->gh_iflags))
			continue;
		if (unlikely((gh->gh_flags & LM_FLAG_PRIORITY) && !insert_pt))
			insert_pt = &gh2->gh_list;
	}
1380
	trace_gfs2_glock_queue(gh, 1);
1381 1382
	gfs2_glstats_inc(gl, GFS2_LKS_QCOUNT);
	gfs2_sbstats_inc(gl, GFS2_LKS_QCOUNT);
1383 1384 1385 1386 1387 1388 1389 1390
	if (likely(insert_pt == NULL)) {
		list_add_tail(&gh->gh_list, &gl->gl_holders);
		if (unlikely(gh->gh_flags & LM_FLAG_PRIORITY))
			goto do_cancel;
		return;
	}
	list_add_tail(&gh->gh_list, insert_pt);
do_cancel:
1391
	gh = list_first_entry(&gl->gl_holders, struct gfs2_holder, gh_list);
1392
	if (!(gh->gh_flags & LM_FLAG_PRIORITY)) {
A
Andreas Gruenbacher 已提交
1393
		spin_unlock(&gl->gl_lockref.lock);
S
Steven Whitehouse 已提交
1394
		if (sdp->sd_lockstruct.ls_ops->lm_cancel)
1395
			sdp->sd_lockstruct.ls_ops->lm_cancel(gl);
A
Andreas Gruenbacher 已提交
1396
		spin_lock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1397
	}
1398
	return;
D
David Teigland 已提交
1399

1400
trap_recursive:
1401 1402 1403
	fs_err(sdp, "original: %pSR\n", (void *)gh2->gh_ip);
	fs_err(sdp, "pid: %d\n", pid_nr(gh2->gh_owner_pid));
	fs_err(sdp, "lock type: %d req lock state : %d\n",
1404
	       gh2->gh_gl->gl_name.ln_type, gh2->gh_state);
1405 1406 1407
	fs_err(sdp, "new: %pSR\n", (void *)gh->gh_ip);
	fs_err(sdp, "pid: %d\n", pid_nr(gh->gh_owner_pid));
	fs_err(sdp, "lock type: %d req lock state : %d\n",
1408
	       gh->gh_gl->gl_name.ln_type, gh->gh_state);
1409
	gfs2_dump_glock(NULL, gl, true);
1410
	BUG();
D
David Teigland 已提交
1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426
}

/**
 * gfs2_glock_nq - enqueue a struct gfs2_holder onto a glock (acquire a glock)
 * @gh: the holder structure
 *
 * if (gh->gh_flags & GL_ASYNC), this never returns an error
 *
 * Returns: 0, GLR_TRYFAILED, or errno on failure
 */

int gfs2_glock_nq(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;
	int error = 0;

1427
	if (glock_blocked_by_withdraw(gl) && !(gh->gh_flags & LM_FLAG_NOEXP))
D
David Teigland 已提交
1428 1429
		return -EIO;

1430 1431 1432
	if (test_bit(GLF_LRU, &gl->gl_flags))
		gfs2_glock_remove_from_lru(gl);

A
Andreas Gruenbacher 已提交
1433
	spin_lock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1434
	add_to_queue(gh);
1435 1436
	if (unlikely((LM_FLAG_NOEXP & gh->gh_flags) &&
		     test_and_clear_bit(GLF_FROZEN, &gl->gl_flags))) {
1437
		set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1438
		gl->gl_lockref.count++;
1439
		__gfs2_glock_queue_work(gl, 0);
1440
	}
1441
	run_queue(gl, 1);
A
Andreas Gruenbacher 已提交
1442
	spin_unlock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1443

1444 1445
	if (!(gh->gh_flags & GL_ASYNC))
		error = gfs2_glock_wait(gh);
D
David Teigland 已提交
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458

	return error;
}

/**
 * gfs2_glock_poll - poll to see if an async request has been completed
 * @gh: the holder
 *
 * Returns: 1 if the request is ready to be gfs2_glock_wait()ed on
 */

int gfs2_glock_poll(struct gfs2_holder *gh)
{
1459
	return test_bit(HIF_WAIT, &gh->gh_iflags) ? 0 : 1;
D
David Teigland 已提交
1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470
}

/**
 * gfs2_glock_dq - dequeue a struct gfs2_holder from a glock (release a glock)
 * @gh: the glock holder
 *
 */

void gfs2_glock_dq(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;
1471
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
1472
	unsigned delay = 0;
1473
	int fast_path = 0;
D
David Teigland 已提交
1474

A
Andreas Gruenbacher 已提交
1475
	spin_lock(&gl->gl_lockref.lock);
1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486
	/*
	 * If we're in the process of file system withdraw, we cannot just
	 * dequeue any glocks until our journal is recovered, lest we
	 * introduce file system corruption. We need two exceptions to this
	 * rule: We need to allow unlocking of nondisk glocks and the glock
	 * for our own journal that needs recovery.
	 */
	if (test_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags) &&
	    glock_blocked_by_withdraw(gl) &&
	    gh->gh_gl != sdp->sd_jinode_gl) {
		sdp->sd_glock_dqs_held++;
1487
		spin_unlock(&gl->gl_lockref.lock);
1488 1489 1490
		might_sleep();
		wait_on_bit(&sdp->sd_flags, SDF_WITHDRAW_RECOVERY,
			    TASK_UNINTERRUPTIBLE);
1491
		spin_lock(&gl->gl_lockref.lock);
1492
	}
D
David Teigland 已提交
1493
	if (gh->gh_flags & GL_NOCACHE)
1494
		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
D
David Teigland 已提交
1495 1496

	list_del_init(&gh->gh_list);
1497
	clear_bit(HIF_HOLDER, &gh->gh_iflags);
1498 1499 1500 1501 1502
	if (find_first_holder(gl) == NULL) {
		if (list_empty(&gl->gl_holders) &&
		    !test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
		    !test_bit(GLF_DEMOTE, &gl->gl_flags))
			fast_path = 1;
D
David Teigland 已提交
1503
	}
1504
	if (!test_bit(GLF_LFLUSH, &gl->gl_flags) && demote_ok(gl))
1505 1506
		gfs2_glock_add_to_lru(gl);

S
Steven Whitehouse 已提交
1507
	trace_gfs2_glock_queue(gh, 0);
1508 1509 1510 1511 1512 1513 1514 1515
	if (unlikely(!fast_path)) {
		gl->gl_lockref.count++;
		if (test_bit(GLF_PENDING_DEMOTE, &gl->gl_flags) &&
		    !test_bit(GLF_DEMOTE, &gl->gl_flags) &&
		    gl->gl_name.ln_type == LM_TYPE_INODE)
			delay = gl->gl_hold_time;
		__gfs2_glock_queue_work(gl, delay);
	}
A
Andreas Gruenbacher 已提交
1516
	spin_unlock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1517 1518
}

A
Abhijith Das 已提交
1519 1520 1521 1522
void gfs2_glock_dq_wait(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;
	gfs2_glock_dq(gh);
1523
	might_sleep();
1524
	wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
A
Abhijith Das 已提交
1525 1526
}

D
David Teigland 已提交
1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544
/**
 * gfs2_glock_dq_uninit - dequeue a holder from a glock and initialize it
 * @gh: the holder structure
 *
 */

void gfs2_glock_dq_uninit(struct gfs2_holder *gh)
{
	gfs2_glock_dq(gh);
	gfs2_holder_uninit(gh);
}

/**
 * gfs2_glock_nq_num - acquire a glock based on lock number
 * @sdp: the filesystem
 * @number: the lock number
 * @glops: the glock operations for the type of glock
 * @state: the state to acquire the glock in
L
Lucas De Marchi 已提交
1545
 * @flags: modifier flags for the acquisition
D
David Teigland 已提交
1546 1547 1548 1549 1550
 * @gh: the struct gfs2_holder
 *
 * Returns: errno
 */

1551
int gfs2_glock_nq_num(struct gfs2_sbd *sdp, u64 number,
1552
		      const struct gfs2_glock_operations *glops,
B
Bob Peterson 已提交
1553
		      unsigned int state, u16 flags, struct gfs2_holder *gh)
D
David Teigland 已提交
1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575
{
	struct gfs2_glock *gl;
	int error;

	error = gfs2_glock_get(sdp, number, glops, CREATE, &gl);
	if (!error) {
		error = gfs2_glock_nq_init(gl, state, flags, gh);
		gfs2_glock_put(gl);
	}

	return error;
}

/**
 * glock_compare - Compare two struct gfs2_glock structures for sorting
 * @arg_a: the first structure
 * @arg_b: the second structure
 *
 */

static int glock_compare(const void *arg_a, const void *arg_b)
{
1576 1577 1578 1579
	const struct gfs2_holder *gh_a = *(const struct gfs2_holder **)arg_a;
	const struct gfs2_holder *gh_b = *(const struct gfs2_holder **)arg_b;
	const struct lm_lockname *a = &gh_a->gh_gl->gl_name;
	const struct lm_lockname *b = &gh_b->gh_gl->gl_name;
D
David Teigland 已提交
1580 1581

	if (a->ln_number > b->ln_number)
1582 1583 1584
		return 1;
	if (a->ln_number < b->ln_number)
		return -1;
1585
	BUG_ON(gh_a->gh_gl->gl_ops->go_type == gh_b->gh_gl->gl_ops->go_type);
1586
	return 0;
D
David Teigland 已提交
1587 1588 1589 1590 1591 1592
}

/**
 * nq_m_sync - synchonously acquire more than one glock in deadlock free order
 * @num_gh: the number of structures
 * @ghs: an array of struct gfs2_holder structures
1593
 * @p: placeholder for the holder structure to pass back
D
David Teigland 已提交
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635
 *
 * Returns: 0 on success (all glocks acquired),
 *          errno on failure (no glocks acquired)
 */

static int nq_m_sync(unsigned int num_gh, struct gfs2_holder *ghs,
		     struct gfs2_holder **p)
{
	unsigned int x;
	int error = 0;

	for (x = 0; x < num_gh; x++)
		p[x] = &ghs[x];

	sort(p, num_gh, sizeof(struct gfs2_holder *), glock_compare, NULL);

	for (x = 0; x < num_gh; x++) {
		p[x]->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);

		error = gfs2_glock_nq(p[x]);
		if (error) {
			while (x--)
				gfs2_glock_dq(p[x]);
			break;
		}
	}

	return error;
}

/**
 * gfs2_glock_nq_m - acquire multiple glocks
 * @num_gh: the number of structures
 * @ghs: an array of struct gfs2_holder structures
 *
 *
 * Returns: 0 on success (all glocks acquired),
 *          errno on failure (no glocks acquired)
 */

int gfs2_glock_nq_m(unsigned int num_gh, struct gfs2_holder *ghs)
{
1636 1637
	struct gfs2_holder *tmp[4];
	struct gfs2_holder **pph = tmp;
D
David Teigland 已提交
1638 1639
	int error = 0;

1640 1641
	switch(num_gh) {
	case 0:
D
David Teigland 已提交
1642
		return 0;
1643
	case 1:
D
David Teigland 已提交
1644 1645
		ghs->gh_flags &= ~(LM_FLAG_TRY | GL_ASYNC);
		return gfs2_glock_nq(ghs);
1646 1647
	default:
		if (num_gh <= 4)
D
David Teigland 已提交
1648
			break;
1649 1650
		pph = kmalloc_array(num_gh, sizeof(struct gfs2_holder *),
				    GFP_NOFS);
1651 1652
		if (!pph)
			return -ENOMEM;
D
David Teigland 已提交
1653 1654
	}

1655
	error = nq_m_sync(num_gh, ghs, pph);
D
David Teigland 已提交
1656

1657 1658
	if (pph != tmp)
		kfree(pph);
D
David Teigland 已提交
1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671

	return error;
}

/**
 * gfs2_glock_dq_m - release multiple glocks
 * @num_gh: the number of structures
 * @ghs: an array of struct gfs2_holder structures
 *
 */

void gfs2_glock_dq_m(unsigned int num_gh, struct gfs2_holder *ghs)
{
1672 1673
	while (num_gh--)
		gfs2_glock_dq(&ghs[num_gh]);
D
David Teigland 已提交
1674 1675
}

1676
void gfs2_glock_cb(struct gfs2_glock *gl, unsigned int state)
1677
{
1678 1679 1680
	unsigned long delay = 0;
	unsigned long holdtime;
	unsigned long now = jiffies;
D
David Teigland 已提交
1681

1682
	gfs2_glock_hold(gl);
1683
	spin_lock(&gl->gl_lockref.lock);
1684
	holdtime = gl->gl_tchange + gl->gl_hold_time;
1685
	if (!list_empty(&gl->gl_holders) &&
1686
	    gl->gl_name.ln_type == LM_TYPE_INODE) {
1687 1688 1689
		if (time_before(now, holdtime))
			delay = holdtime - now;
		if (test_bit(GLF_REPLY_PENDING, &gl->gl_flags))
1690
			delay = gl->gl_hold_time;
1691
	}
1692
	handle_callback(gl, state, delay, true);
1693
	__gfs2_glock_queue_work(gl, delay);
A
Andreas Gruenbacher 已提交
1694
	spin_unlock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1695 1696
}

1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
/**
 * gfs2_should_freeze - Figure out if glock should be frozen
 * @gl: The glock in question
 *
 * Glocks are not frozen if (a) the result of the dlm operation is
 * an error, (b) the locking operation was an unlock operation or
 * (c) if there is a "noexp" flagged request anywhere in the queue
 *
 * Returns: 1 if freezing should occur, 0 otherwise
 */

static int gfs2_should_freeze(const struct gfs2_glock *gl)
{
	const struct gfs2_holder *gh;

	if (gl->gl_reply & ~LM_OUT_ST_MASK)
		return 0;
	if (gl->gl_target == LM_ST_UNLOCKED)
		return 0;

	list_for_each_entry(gh, &gl->gl_holders, gh_list) {
		if (test_bit(HIF_HOLDER, &gh->gh_iflags))
			continue;
		if (LM_FLAG_NOEXP & gh->gh_flags)
			return 0;
	}

	return 1;
}

D
David Teigland 已提交
1727
/**
1728 1729 1730
 * gfs2_glock_complete - Callback used by locking
 * @gl: Pointer to the glock
 * @ret: The return value from the dlm
D
David Teigland 已提交
1731
 *
A
Andreas Gruenbacher 已提交
1732
 * The gl_reply field is under the gl_lockref.lock lock so that it is ok
1733
 * to use a bitfield shared with other glock state fields.
D
David Teigland 已提交
1734 1735
 */

1736
void gfs2_glock_complete(struct gfs2_glock *gl, int ret)
D
David Teigland 已提交
1737
{
1738
	struct lm_lockstruct *ls = &gl->gl_name.ln_sbd->sd_lockstruct;
1739

A
Andreas Gruenbacher 已提交
1740
	spin_lock(&gl->gl_lockref.lock);
1741
	gl->gl_reply = ret;
1742

1743
	if (unlikely(test_bit(DFL_BLOCK_LOCKS, &ls->ls_recover_flags))) {
1744
		if (gfs2_should_freeze(gl)) {
1745
			set_bit(GLF_FROZEN, &gl->gl_flags);
A
Andreas Gruenbacher 已提交
1746
			spin_unlock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1747
			return;
1748
		}
D
David Teigland 已提交
1749
	}
1750

S
Steven Whitehouse 已提交
1751
	gl->gl_lockref.count++;
1752
	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
1753
	__gfs2_glock_queue_work(gl, 0);
A
Andreas Gruenbacher 已提交
1754
	spin_unlock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1755 1756
}

1757 1758
static int glock_cmp(void *priv, const struct list_head *a,
		     const struct list_head *b)
1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795
{
	struct gfs2_glock *gla, *glb;

	gla = list_entry(a, struct gfs2_glock, gl_lru);
	glb = list_entry(b, struct gfs2_glock, gl_lru);

	if (gla->gl_name.ln_number > glb->gl_name.ln_number)
		return 1;
	if (gla->gl_name.ln_number < glb->gl_name.ln_number)
		return -1;

	return 0;
}

/**
 * gfs2_dispose_glock_lru - Demote a list of glocks
 * @list: The list to dispose of
 *
 * Disposing of glocks may involve disk accesses, so that here we sort
 * the glocks by number (i.e. disk location of the inodes) so that if
 * there are any such accesses, they'll be sent in order (mostly).
 *
 * Must be called under the lru_lock, but may drop and retake this
 * lock. While the lru_lock is dropped, entries may vanish from the
 * list, but no new entries will appear on the list (since it is
 * private)
 */

static void gfs2_dispose_glock_lru(struct list_head *list)
__releases(&lru_lock)
__acquires(&lru_lock)
{
	struct gfs2_glock *gl;

	list_sort(NULL, list, glock_cmp);

	while(!list_empty(list)) {
1796
		gl = list_first_entry(list, struct gfs2_glock, gl_lru);
1797
		list_del_init(&gl->gl_lru);
A
Andreas Gruenbacher 已提交
1798
		if (!spin_trylock(&gl->gl_lockref.lock)) {
1799
add_back_to_lru:
S
Steven Whitehouse 已提交
1800
			list_add(&gl->gl_lru, &lru_list);
1801
			set_bit(GLF_LRU, &gl->gl_flags);
S
Steven Whitehouse 已提交
1802 1803 1804
			atomic_inc(&lru_count);
			continue;
		}
1805
		if (test_and_set_bit(GLF_LOCK, &gl->gl_flags)) {
A
Andreas Gruenbacher 已提交
1806
			spin_unlock(&gl->gl_lockref.lock);
1807 1808
			goto add_back_to_lru;
		}
S
Steven Whitehouse 已提交
1809
		gl->gl_lockref.count++;
1810
		if (demote_ok(gl))
1811
			handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1812
		WARN_ON(!test_and_clear_bit(GLF_LOCK, &gl->gl_flags));
1813
		__gfs2_glock_queue_work(gl, 0);
A
Andreas Gruenbacher 已提交
1814
		spin_unlock(&gl->gl_lockref.lock);
1815
		cond_resched_lock(&lru_lock);
1816 1817 1818
	}
}

1819 1820 1821 1822
/**
 * gfs2_scan_glock_lru - Scan the LRU looking for locks to demote
 * @nr: The number of entries to scan
 *
1823 1824 1825
 * This function selects the entries on the LRU which are able to
 * be demoted, and then kicks off the process by calling
 * gfs2_dispose_glock_lru() above.
1826
 */
D
David Teigland 已提交
1827

1828
static long gfs2_scan_glock_lru(int nr)
D
David Teigland 已提交
1829 1830
{
	struct gfs2_glock *gl;
1831
	LIST_HEAD(skipped);
1832
	LIST_HEAD(dispose);
1833
	long freed = 0;
D
David Teigland 已提交
1834

1835
	spin_lock(&lru_lock);
1836
	while ((nr-- >= 0) && !list_empty(&lru_list)) {
1837
		gl = list_first_entry(&lru_list, struct gfs2_glock, gl_lru);
1838 1839

		/* Test for being demotable */
1840
		if (!test_bit(GLF_LOCK, &gl->gl_flags)) {
1841 1842
			list_move(&gl->gl_lru, &dispose);
			atomic_dec(&lru_count);
1843
			clear_bit(GLF_LRU, &gl->gl_flags);
1844
			freed++;
S
Steven Whitehouse 已提交
1845
			continue;
1846
		}
1847 1848

		list_move(&gl->gl_lru, &skipped);
D
David Teigland 已提交
1849
	}
1850
	list_splice(&skipped, &lru_list);
1851 1852
	if (!list_empty(&dispose))
		gfs2_dispose_glock_lru(&dispose);
1853
	spin_unlock(&lru_lock);
1854 1855

	return freed;
1856 1857
}

1858 1859
static unsigned long gfs2_glock_shrink_scan(struct shrinker *shrink,
					    struct shrink_control *sc)
1860
{
1861 1862 1863 1864
	if (!(sc->gfp_mask & __GFP_FS))
		return SHRINK_STOP;
	return gfs2_scan_glock_lru(sc->nr_to_scan);
}
1865

1866 1867 1868
static unsigned long gfs2_glock_shrink_count(struct shrinker *shrink,
					     struct shrink_control *sc)
{
1869
	return vfs_pressure_ratio(atomic_read(&lru_count));
D
David Teigland 已提交
1870 1871
}

1872 1873
static struct shrinker glock_shrinker = {
	.seeks = DEFAULT_SEEKS,
1874 1875
	.count_objects = gfs2_glock_shrink_count,
	.scan_objects = gfs2_glock_shrink_scan,
1876 1877
};

D
David Teigland 已提交
1878
/**
1879
 * glock_hash_walk - Call a function for glock in a hash bucket
D
David Teigland 已提交
1880 1881 1882
 * @examiner: the function
 * @sdp: the filesystem
 *
1883 1884 1885
 * Note that the function can be called multiple times on the same
 * object.  So the user must ensure that the function can cope with
 * that.
D
David Teigland 已提交
1886 1887
 */

1888
static void glock_hash_walk(glock_examiner examiner, const struct gfs2_sbd *sdp)
D
David Teigland 已提交
1889
{
1890
	struct gfs2_glock *gl;
1891 1892 1893 1894 1895
	struct rhashtable_iter iter;

	rhashtable_walk_enter(&gl_hash_table, &iter);

	do {
1896
		rhashtable_walk_start(&iter);
1897 1898

		while ((gl = rhashtable_walk_next(&iter)) && !IS_ERR(gl))
B
Bob Peterson 已提交
1899
			if (gl->gl_name.ln_sbd == sdp &&
1900 1901
			    lockref_get_not_dead(&gl->gl_lockref))
				examiner(gl);
1902 1903 1904 1905 1906

		rhashtable_walk_stop(&iter);
	} while (cond_resched(), gl == ERR_PTR(-EAGAIN));

	rhashtable_walk_exit(&iter);
1907 1908
}

1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936
bool gfs2_queue_delete_work(struct gfs2_glock *gl, unsigned long delay)
{
	bool queued;

	spin_lock(&gl->gl_lockref.lock);
	queued = queue_delayed_work(gfs2_delete_workqueue,
				    &gl->gl_delete, delay);
	if (queued)
		set_bit(GLF_PENDING_DELETE, &gl->gl_flags);
	spin_unlock(&gl->gl_lockref.lock);
	return queued;
}

void gfs2_cancel_delete_work(struct gfs2_glock *gl)
{
	if (cancel_delayed_work_sync(&gl->gl_delete)) {
		clear_bit(GLF_PENDING_DELETE, &gl->gl_flags);
		gfs2_glock_put(gl);
	}
}

bool gfs2_delete_work_queued(const struct gfs2_glock *gl)
{
	return test_bit(GLF_PENDING_DELETE, &gl->gl_flags);
}

static void flush_delete_work(struct gfs2_glock *gl)
{
1937 1938 1939 1940 1941
	if (gl->gl_name.ln_type == LM_TYPE_IOPEN) {
		if (cancel_delayed_work(&gl->gl_delete)) {
			queue_delayed_work(gfs2_delete_workqueue,
					   &gl->gl_delete, 0);
		}
1942
	}
1943 1944 1945 1946 1947 1948 1949 1950 1951
	gfs2_glock_queue_work(gl, 0);
}

void gfs2_flush_delete_work(struct gfs2_sbd *sdp)
{
	glock_hash_walk(flush_delete_work, sdp);
	flush_workqueue(gfs2_delete_workqueue);
}

1952 1953 1954 1955 1956 1957 1958 1959
/**
 * thaw_glock - thaw out a glock which has an unprocessed reply waiting
 * @gl: The glock to thaw
 *
 */

static void thaw_glock(struct gfs2_glock *gl)
{
1960
	if (!test_and_clear_bit(GLF_FROZEN, &gl->gl_flags)) {
1961
		gfs2_glock_put(gl);
1962
		return;
1963
	}
1964 1965
	set_bit(GLF_REPLY_PENDING, &gl->gl_flags);
	gfs2_glock_queue_work(gl, 0);
1966 1967
}

D
David Teigland 已提交
1968 1969 1970 1971 1972 1973 1974 1975
/**
 * clear_glock - look at a glock and see if we can free it from glock cache
 * @gl: the glock to look at
 *
 */

static void clear_glock(struct gfs2_glock *gl)
{
1976
	gfs2_glock_remove_from_lru(gl);
D
David Teigland 已提交
1977

A
Andreas Gruenbacher 已提交
1978
	spin_lock(&gl->gl_lockref.lock);
1979
	if (gl->gl_state != LM_ST_UNLOCKED)
1980
		handle_callback(gl, LM_ST_UNLOCKED, 0, false);
1981
	__gfs2_glock_queue_work(gl, 0);
A
Andreas Gruenbacher 已提交
1982
	spin_unlock(&gl->gl_lockref.lock);
D
David Teigland 已提交
1983 1984
}

1985 1986 1987 1988 1989 1990 1991 1992
/**
 * gfs2_glock_thaw - Thaw any frozen glocks
 * @sdp: The super block
 *
 */

void gfs2_glock_thaw(struct gfs2_sbd *sdp)
{
1993 1994
	glock_hash_walk(thaw_glock, sdp);
}
1995

1996
static void dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
1997
{
A
Andreas Gruenbacher 已提交
1998
	spin_lock(&gl->gl_lockref.lock);
1999
	gfs2_dump_glock(seq, gl, fsid);
A
Andreas Gruenbacher 已提交
2000
	spin_unlock(&gl->gl_lockref.lock);
2001 2002 2003 2004
}

static void dump_glock_func(struct gfs2_glock *gl)
{
2005
	dump_glock(NULL, gl, true);
2006 2007
}

D
David Teigland 已提交
2008 2009 2010 2011
/**
 * gfs2_gl_hash_clear - Empty out the glock hash table
 * @sdp: the filesystem
 *
2012
 * Called when unmounting the filesystem.
D
David Teigland 已提交
2013 2014
 */

2015
void gfs2_gl_hash_clear(struct gfs2_sbd *sdp)
D
David Teigland 已提交
2016
{
2017
	set_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags);
2018
	flush_workqueue(glock_workqueue);
2019
	glock_hash_walk(clear_glock, sdp);
2020
	flush_workqueue(glock_workqueue);
2021 2022 2023
	wait_event_timeout(sdp->sd_glock_wait,
			   atomic_read(&sdp->sd_glock_disposal) == 0,
			   HZ * 600);
2024
	glock_hash_walk(dump_glock_func, sdp);
D
David Teigland 已提交
2025 2026
}

2027 2028 2029 2030 2031 2032
void gfs2_glock_finish_truncate(struct gfs2_inode *ip)
{
	struct gfs2_glock *gl = ip->i_gl;
	int ret;

	ret = gfs2_truncatei_resume(ip);
2033
	gfs2_glock_assert_withdraw(gl, ret == 0);
2034

A
Andreas Gruenbacher 已提交
2035
	spin_lock(&gl->gl_lockref.lock);
2036 2037
	clear_bit(GLF_LOCK, &gl->gl_flags);
	run_queue(gl, 1);
A
Andreas Gruenbacher 已提交
2038
	spin_unlock(&gl->gl_lockref.lock);
2039 2040
}

2041
static const char *state2str(unsigned state)
2042
{
2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055
	switch(state) {
	case LM_ST_UNLOCKED:
		return "UN";
	case LM_ST_SHARED:
		return "SH";
	case LM_ST_DEFERRED:
		return "DF";
	case LM_ST_EXCLUSIVE:
		return "EX";
	}
	return "??";
}

B
Bob Peterson 已提交
2056
static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
2057 2058 2059 2060 2061 2062 2063 2064 2065
{
	char *p = buf;
	if (flags & LM_FLAG_TRY)
		*p++ = 't';
	if (flags & LM_FLAG_TRY_1CB)
		*p++ = 'T';
	if (flags & LM_FLAG_NOEXP)
		*p++ = 'e';
	if (flags & LM_FLAG_ANY)
2066
		*p++ = 'A';
2067 2068
	if (flags & LM_FLAG_PRIORITY)
		*p++ = 'p';
2069 2070
	if (flags & LM_FLAG_NODE_SCOPE)
		*p++ = 'n';
2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084
	if (flags & GL_ASYNC)
		*p++ = 'a';
	if (flags & GL_EXACT)
		*p++ = 'E';
	if (flags & GL_NOCACHE)
		*p++ = 'c';
	if (test_bit(HIF_HOLDER, &iflags))
		*p++ = 'H';
	if (test_bit(HIF_WAIT, &iflags))
		*p++ = 'W';
	if (test_bit(HIF_FIRST, &iflags))
		*p++ = 'F';
	*p = 0;
	return buf;
2085 2086
}

D
David Teigland 已提交
2087 2088
/**
 * dump_holder - print information about a glock holder
2089
 * @seq: the seq_file struct
D
David Teigland 已提交
2090
 * @gh: the glock holder
2091
 * @fs_id_buf: pointer to file system id (if requested)
D
David Teigland 已提交
2092 2093 2094
 *
 */

2095 2096
static void dump_holder(struct seq_file *seq, const struct gfs2_holder *gh,
			const char *fs_id_buf)
D
David Teigland 已提交
2097
{
2098 2099
	struct task_struct *gh_owner = NULL;
	char flags_buf[32];
D
David Teigland 已提交
2100

2101
	rcu_read_lock();
2102
	if (gh->gh_owner_pid)
2103
		gh_owner = pid_task(gh->gh_owner_pid, PIDTYPE_PID);
2104 2105
	gfs2_print_dbg(seq, "%s H: s:%s f:%s e:%d p:%ld [%s] %pS\n",
		       fs_id_buf, state2str(gh->gh_state),
2106 2107 2108 2109 2110
		       hflags2str(flags_buf, gh->gh_flags, gh->gh_iflags),
		       gh->gh_error,
		       gh->gh_owner_pid ? (long)pid_nr(gh->gh_owner_pid) : -1,
		       gh_owner ? gh_owner->comm : "(ended)",
		       (void *)gh->gh_ip);
2111
	rcu_read_unlock();
D
David Teigland 已提交
2112 2113
}

2114
static const char *gflags2str(char *buf, const struct gfs2_glock *gl)
2115
{
2116
	const unsigned long *gflags = &gl->gl_flags;
2117
	char *p = buf;
2118

2119 2120 2121 2122 2123 2124 2125 2126 2127 2128 2129 2130 2131 2132 2133 2134
	if (test_bit(GLF_LOCK, gflags))
		*p++ = 'l';
	if (test_bit(GLF_DEMOTE, gflags))
		*p++ = 'D';
	if (test_bit(GLF_PENDING_DEMOTE, gflags))
		*p++ = 'd';
	if (test_bit(GLF_DEMOTE_IN_PROGRESS, gflags))
		*p++ = 'p';
	if (test_bit(GLF_DIRTY, gflags))
		*p++ = 'y';
	if (test_bit(GLF_LFLUSH, gflags))
		*p++ = 'f';
	if (test_bit(GLF_INVALIDATE_IN_PROGRESS, gflags))
		*p++ = 'i';
	if (test_bit(GLF_REPLY_PENDING, gflags))
		*p++ = 'r';
2135
	if (test_bit(GLF_INITIAL, gflags))
2136
		*p++ = 'I';
2137 2138
	if (test_bit(GLF_FROZEN, gflags))
		*p++ = 'F';
2139
	if (!list_empty(&gl->gl_holders))
2140
		*p++ = 'q';
2141 2142 2143 2144
	if (test_bit(GLF_LRU, gflags))
		*p++ = 'L';
	if (gl->gl_object)
		*p++ = 'o';
2145 2146
	if (test_bit(GLF_BLOCKING, gflags))
		*p++ = 'b';
2147 2148 2149 2150
	if (test_bit(GLF_PENDING_DELETE, gflags))
		*p++ = 'P';
	if (test_bit(GLF_FREEING, gflags))
		*p++ = 'x';
2151 2152
	*p = 0;
	return buf;
D
David Teigland 已提交
2153 2154 2155
}

/**
2156
 * gfs2_dump_glock - print information about a glock
2157
 * @seq: The seq_file struct
D
David Teigland 已提交
2158
 * @gl: the glock
2159
 * @fsid: If true, also dump the file system id
2160 2161 2162 2163 2164 2165 2166 2167 2168 2169
 *
 * The file format is as follows:
 * One line per object, capital letters are used to indicate objects
 * G = glock, I = Inode, R = rgrp, H = holder. Glocks are not indented,
 * other objects are indented by a single space and follow the glock to
 * which they are related. Fields are indicated by lower case letters
 * followed by a colon and the field value, except for strings which are in
 * [] so that its possible to see if they are composed of spaces for
 * example. The field's are n = number (id of the object), f = flags,
 * t = type, s = state, r = refcount, e = error, p = pid.
D
David Teigland 已提交
2170 2171 2172
 *
 */

2173
void gfs2_dump_glock(struct seq_file *seq, struct gfs2_glock *gl, bool fsid)
D
David Teigland 已提交
2174
{
2175 2176 2177 2178
	const struct gfs2_glock_operations *glops = gl->gl_ops;
	unsigned long long dtime;
	const struct gfs2_holder *gh;
	char gflags_buf[32];
2179
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
2180
	char fs_id_buf[sizeof(sdp->sd_fsname) + 7];
2181
	unsigned long nrpages = 0;
D
David Teigland 已提交
2182

2183 2184 2185 2186 2187
	if (gl->gl_ops->go_flags & GLOF_ASPACE) {
		struct address_space *mapping = gfs2_glock2aspace(gl);

		nrpages = mapping->nrpages;
	}
2188 2189 2190
	memset(fs_id_buf, 0, sizeof(fs_id_buf));
	if (fsid && sdp) /* safety precaution */
		sprintf(fs_id_buf, "fsid=%s: ", sdp->sd_fsname);
2191 2192 2193 2194
	dtime = jiffies - gl->gl_demote_time;
	dtime *= 1000000/HZ; /* demote time in uSec */
	if (!test_bit(GLF_DEMOTE, &gl->gl_flags))
		dtime = 0;
2195
	gfs2_print_dbg(seq, "%sG:  s:%s n:%u/%llx f:%s t:%s d:%s/%llu a:%d "
2196 2197 2198 2199 2200 2201 2202 2203 2204 2205
		       "v:%d r:%d m:%ld p:%lu\n",
		       fs_id_buf, state2str(gl->gl_state),
		       gl->gl_name.ln_type,
		       (unsigned long long)gl->gl_name.ln_number,
		       gflags2str(gflags_buf, gl),
		       state2str(gl->gl_target),
		       state2str(gl->gl_demote_state), dtime,
		       atomic_read(&gl->gl_ail_count),
		       atomic_read(&gl->gl_revokes),
		       (int)gl->gl_lockref.count, gl->gl_hold_time, nrpages);
D
David Teigland 已提交
2206

2207
	list_for_each_entry(gh, &gl->gl_holders, gh_list)
2208
		dump_holder(seq, gh, fs_id_buf);
2209

2210
	if (gl->gl_state != LM_ST_UNLOCKED && glops->go_dump)
2211
		glops->go_dump(seq, gl, fs_id_buf);
D
David Teigland 已提交
2212 2213
}

2214 2215 2216 2217
static int gfs2_glstats_seq_show(struct seq_file *seq, void *iter_ptr)
{
	struct gfs2_glock *gl = iter_ptr;

2218
	seq_printf(seq, "G: n:%u/%llx rtt:%llu/%llu rttb:%llu/%llu irt:%llu/%llu dcnt: %llu qcnt: %llu\n",
2219 2220
		   gl->gl_name.ln_type,
		   (unsigned long long)gl->gl_name.ln_number,
2221 2222 2223 2224 2225 2226 2227 2228
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTT],
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVAR],
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTB],
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SRTTVARB],
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRT],
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_SIRTVAR],
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_DCOUNT],
		   (unsigned long long)gl->gl_stats.stats[GFS2_LKS_QCOUNT]);
2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258 2259 2260
	return 0;
}

static const char *gfs2_gltype[] = {
	"type",
	"reserved",
	"nondisk",
	"inode",
	"rgrp",
	"meta",
	"iopen",
	"flock",
	"plock",
	"quota",
	"journal",
};

static const char *gfs2_stype[] = {
	[GFS2_LKS_SRTT]		= "srtt",
	[GFS2_LKS_SRTTVAR]	= "srttvar",
	[GFS2_LKS_SRTTB]	= "srttb",
	[GFS2_LKS_SRTTVARB]	= "srttvarb",
	[GFS2_LKS_SIRT]		= "sirt",
	[GFS2_LKS_SIRTVAR]	= "sirtvar",
	[GFS2_LKS_DCOUNT]	= "dlm",
	[GFS2_LKS_QCOUNT]	= "queue",
};

#define GFS2_NR_SBSTATS (ARRAY_SIZE(gfs2_gltype) * ARRAY_SIZE(gfs2_stype))

static int gfs2_sbstats_seq_show(struct seq_file *seq, void *iter_ptr)
{
2261 2262 2263 2264
	struct gfs2_sbd *sdp = seq->private;
	loff_t pos = *(loff_t *)iter_ptr;
	unsigned index = pos >> 3;
	unsigned subindex = pos & 0x07;
2265 2266 2267 2268
	int i;

	if (index == 0 && subindex != 0)
		return 0;
2269

2270 2271
	seq_printf(seq, "%-10s %8s:", gfs2_gltype[index],
		   (index == 0) ? "cpu": gfs2_stype[subindex]);
D
David Teigland 已提交
2272

2273 2274
	for_each_possible_cpu(i) {
                const struct gfs2_pcpu_lkstats *lkstats = per_cpu_ptr(sdp->sd_lkstats, i);
2275 2276 2277 2278 2279 2280

		if (index == 0)
			seq_printf(seq, " %15u", i);
		else
			seq_printf(seq, " %15llu", (unsigned long long)lkstats->
				   lkstats[index - 1].stats[subindex]);
2281 2282 2283 2284
	}
	seq_putc(seq, '\n');
	return 0;
}
2285

2286 2287
int __init gfs2_glock_init(void)
{
2288
	int i, ret;
2289 2290 2291 2292

	ret = rhashtable_init(&gl_hash_table, &ht_parms);
	if (ret < 0)
		return ret;
2293

2294
	glock_workqueue = alloc_workqueue("glock_workqueue", WQ_MEM_RECLAIM |
2295
					  WQ_HIGHPRI | WQ_FREEZABLE, 0);
2296 2297
	if (!glock_workqueue) {
		rhashtable_destroy(&gl_hash_table);
2298
		return -ENOMEM;
2299
	}
2300
	gfs2_delete_workqueue = alloc_workqueue("delete_workqueue",
2301
						WQ_MEM_RECLAIM | WQ_FREEZABLE,
2302
						0);
2303
	if (!gfs2_delete_workqueue) {
2304
		destroy_workqueue(glock_workqueue);
2305
		rhashtable_destroy(&gl_hash_table);
2306
		return -ENOMEM;
2307
	}
2308

2309 2310 2311 2312 2313 2314 2315
	ret = register_shrinker(&glock_shrinker);
	if (ret) {
		destroy_workqueue(gfs2_delete_workqueue);
		destroy_workqueue(glock_workqueue);
		rhashtable_destroy(&gl_hash_table);
		return ret;
	}
2316

2317 2318 2319
	for (i = 0; i < GLOCK_WAIT_TABLE_SIZE; i++)
		init_waitqueue_head(glock_wait_table + i);

2320 2321 2322
	return 0;
}

2323 2324
void gfs2_glock_exit(void)
{
2325
	unregister_shrinker(&glock_shrinker);
2326
	rhashtable_destroy(&gl_hash_table);
2327
	destroy_workqueue(glock_workqueue);
2328
	destroy_workqueue(gfs2_delete_workqueue);
2329 2330
}

2331
static void gfs2_glock_iter_next(struct gfs2_glock_iter *gi, loff_t n)
2332
{
2333 2334 2335 2336 2337 2338 2339
	struct gfs2_glock *gl = gi->gl;

	if (gl) {
		if (n == 0)
			return;
		if (!lockref_put_not_zero(&gl->gl_lockref))
			gfs2_glock_queue_put(gl);
2340 2341
	}
	for (;;) {
2342 2343 2344 2345 2346
		gl = rhashtable_walk_next(&gi->hti);
		if (IS_ERR_OR_NULL(gl)) {
			if (gl == ERR_PTR(-EAGAIN)) {
				n = 1;
				continue;
2347
			}
2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360
			gl = NULL;
			break;
		}
		if (gl->gl_name.ln_sbd != gi->sdp)
			continue;
		if (n <= 1) {
			if (!lockref_get_not_dead(&gl->gl_lockref))
				continue;
			break;
		} else {
			if (__lockref_is_dead(&gl->gl_lockref))
				continue;
			n--;
2361
		}
2362
	}
2363
	gi->gl = gl;
2364 2365
}

2366
static void *gfs2_glock_seq_start(struct seq_file *seq, loff_t *pos)
B
Bob Peterson 已提交
2367
	__acquires(RCU)
2368
{
2369
	struct gfs2_glock_iter *gi = seq->private;
2370
	loff_t n;
2371

2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382
	/*
	 * We can either stay where we are, skip to the next hash table
	 * entry, or start from the beginning.
	 */
	if (*pos < gi->last_pos) {
		rhashtable_walk_exit(&gi->hti);
		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
		n = *pos + 1;
	} else {
		n = *pos - gi->last_pos;
	}
2383

2384
	rhashtable_walk_start(&gi->hti);
2385

2386
	gfs2_glock_iter_next(gi, n);
2387
	gi->last_pos = *pos;
2388
	return gi->gl;
2389 2390
}

2391
static void *gfs2_glock_seq_next(struct seq_file *seq, void *iter_ptr,
2392 2393
				 loff_t *pos)
{
2394
	struct gfs2_glock_iter *gi = seq->private;
2395 2396

	(*pos)++;
2397
	gi->last_pos = *pos;
2398
	gfs2_glock_iter_next(gi, 1);
2399
	return gi->gl;
2400 2401
}

2402
static void gfs2_glock_seq_stop(struct seq_file *seq, void *iter_ptr)
B
Bob Peterson 已提交
2403
	__releases(RCU)
2404
{
2405
	struct gfs2_glock_iter *gi = seq->private;
2406

2407
	rhashtable_walk_stop(&gi->hti);
2408 2409
}

2410
static int gfs2_glock_seq_show(struct seq_file *seq, void *iter_ptr)
2411
{
2412
	dump_glock(seq, iter_ptr, false);
2413
	return 0;
2414 2415
}

2416 2417
static void *gfs2_sbstats_seq_start(struct seq_file *seq, loff_t *pos)
{
2418
	preempt_disable();
2419 2420
	if (*pos >= GFS2_NR_SBSTATS)
		return NULL;
2421
	return pos;
2422 2423 2424 2425 2426 2427
}

static void *gfs2_sbstats_seq_next(struct seq_file *seq, void *iter_ptr,
				   loff_t *pos)
{
	(*pos)++;
2428
	if (*pos >= GFS2_NR_SBSTATS)
2429
		return NULL;
2430
	return pos;
2431 2432 2433 2434 2435 2436 2437
}

static void gfs2_sbstats_seq_stop(struct seq_file *seq, void *iter_ptr)
{
	preempt_enable();
}

2438
static const struct seq_operations gfs2_glock_seq_ops = {
2439 2440 2441 2442 2443 2444
	.start = gfs2_glock_seq_start,
	.next  = gfs2_glock_seq_next,
	.stop  = gfs2_glock_seq_stop,
	.show  = gfs2_glock_seq_show,
};

2445 2446 2447 2448 2449 2450 2451
static const struct seq_operations gfs2_glstats_seq_ops = {
	.start = gfs2_glock_seq_start,
	.next  = gfs2_glock_seq_next,
	.stop  = gfs2_glock_seq_stop,
	.show  = gfs2_glstats_seq_show,
};

2452
static const struct seq_operations gfs2_sbstats_sops = {
2453 2454 2455 2456 2457 2458
	.start = gfs2_sbstats_seq_start,
	.next  = gfs2_sbstats_seq_next,
	.stop  = gfs2_sbstats_seq_stop,
	.show  = gfs2_sbstats_seq_show,
};

2459 2460
#define GFS2_SEQ_GOODSIZE min(PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER, 65536UL)

2461 2462
static int __gfs2_glocks_open(struct inode *inode, struct file *file,
			      const struct seq_operations *ops)
2463
{
2464
	int ret = seq_open_private(file, ops, sizeof(struct gfs2_glock_iter));
2465 2466 2467
	if (ret == 0) {
		struct seq_file *seq = file->private_data;
		struct gfs2_glock_iter *gi = seq->private;
2468

2469
		gi->sdp = inode->i_private;
2470
		seq->buf = kmalloc(GFS2_SEQ_GOODSIZE, GFP_KERNEL | __GFP_NOWARN);
2471
		if (seq->buf)
2472
			seq->size = GFS2_SEQ_GOODSIZE;
2473 2474 2475 2476 2477
		/*
		 * Initially, we are "before" the first hash table entry; the
		 * first call to rhashtable_walk_next gets us the first entry.
		 */
		gi->last_pos = -1;
2478
		gi->gl = NULL;
2479
		rhashtable_walk_enter(&gl_hash_table, &gi->hti);
2480 2481
	}
	return ret;
2482 2483
}

2484 2485 2486 2487 2488
static int gfs2_glocks_open(struct inode *inode, struct file *file)
{
	return __gfs2_glocks_open(inode, file, &gfs2_glock_seq_ops);
}

2489 2490 2491 2492 2493
static int gfs2_glocks_release(struct inode *inode, struct file *file)
{
	struct seq_file *seq = file->private_data;
	struct gfs2_glock_iter *gi = seq->private;

2494 2495
	if (gi->gl)
		gfs2_glock_put(gi->gl);
2496
	rhashtable_walk_exit(&gi->hti);
2497 2498 2499
	return seq_release_private(inode, file);
}

2500 2501
static int gfs2_glstats_open(struct inode *inode, struct file *file)
{
2502
	return __gfs2_glocks_open(inode, file, &gfs2_glstats_seq_ops);
2503 2504 2505 2506 2507 2508 2509
}

static const struct file_operations gfs2_glocks_fops = {
	.owner   = THIS_MODULE,
	.open    = gfs2_glocks_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
2510
	.release = gfs2_glocks_release,
2511 2512 2513
};

static const struct file_operations gfs2_glstats_fops = {
2514
	.owner   = THIS_MODULE,
2515 2516 2517
	.open    = gfs2_glstats_open,
	.read    = seq_read,
	.llseek  = seq_lseek,
2518
	.release = gfs2_glocks_release,
2519 2520
};

2521
DEFINE_SEQ_ATTRIBUTE(gfs2_sbstats);
2522

2523 2524 2525
void gfs2_create_debugfs_file(struct gfs2_sbd *sdp)
{
	sdp->debugfs_dir = debugfs_create_dir(sdp->sd_table_name, gfs2_root);
2526

2527 2528 2529 2530 2531 2532 2533 2534
	debugfs_create_file("glocks", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
			    &gfs2_glocks_fops);

	debugfs_create_file("glstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
			    &gfs2_glstats_fops);

	debugfs_create_file("sbstats", S_IFREG | S_IRUGO, sdp->debugfs_dir, sdp,
			    &gfs2_sbstats_fops);
2535 2536 2537 2538
}

void gfs2_delete_debugfs_file(struct gfs2_sbd *sdp)
{
2539 2540
	debugfs_remove_recursive(sdp->debugfs_dir);
	sdp->debugfs_dir = NULL;
2541 2542
}

2543
void gfs2_register_debugfs(void)
2544 2545 2546 2547 2548 2549 2550
{
	gfs2_root = debugfs_create_dir("gfs2", NULL);
}

void gfs2_unregister_debugfs(void)
{
	debugfs_remove(gfs2_root);
R
Robert Peterson 已提交
2551
	gfs2_root = NULL;
2552
}