glops.c 21.2 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0-only
D
David Teigland 已提交
2 3
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
4
 * Copyright (C) 2004-2008 Red Hat, Inc.  All rights reserved.
D
David Teigland 已提交
5 6 7 8 9
 */

#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
10
#include <linux/gfs2_ondisk.h>
11
#include <linux/bio.h>
12
#include <linux/posix_acl.h>
13
#include <linux/security.h>
D
David Teigland 已提交
14 15

#include "gfs2.h"
16
#include "incore.h"
D
David Teigland 已提交
17 18 19 20 21 22 23 24
#include "bmap.h"
#include "glock.h"
#include "glops.h"
#include "inode.h"
#include "log.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
25
#include "util.h"
26
#include "trans.h"
27
#include "dir.h"
A
Abhi Das 已提交
28
#include "lops.h"
D
David Teigland 已提交
29

30 31
struct workqueue_struct *gfs2_freeze_wq;

32 33
extern struct workqueue_struct *gfs2_control_wq;

34 35
static void gfs2_ail_error(struct gfs2_glock *gl, const struct buffer_head *bh)
{
36 37 38
	fs_err(gl->gl_name.ln_sbd,
	       "AIL buffer %p: blocknr %llu state 0x%08lx mapping %p page "
	       "state 0x%lx\n",
39 40
	       bh, (unsigned long long)bh->b_blocknr, bh->b_state,
	       bh->b_page->mapping, bh->b_page->flags);
41
	fs_err(gl->gl_name.ln_sbd, "AIL glock %u:%llu mapping %p\n",
42 43
	       gl->gl_name.ln_type, gl->gl_name.ln_number,
	       gfs2_glock2aspace(gl));
44 45
	gfs2_lm(gl->gl_name.ln_sbd, "AIL error\n");
	gfs2_withdraw(gl->gl_name.ln_sbd);
46 47
}

48
/**
S
Steven Whitehouse 已提交
49
 * __gfs2_ail_flush - remove all buffers for a given lock from the AIL
50
 * @gl: the glock
51
 * @fsync: set when called from fsync (not all buffers will be clean)
52 53 54 55
 *
 * None of the buffers should be dirty, locked, or pinned.
 */

56 57
static void __gfs2_ail_flush(struct gfs2_glock *gl, bool fsync,
			     unsigned int nr_revokes)
58
{
59
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
60
	struct list_head *head = &gl->gl_ail_list;
61
	struct gfs2_bufdata *bd, *tmp;
62
	struct buffer_head *bh;
63
	const unsigned long b_state = (1UL << BH_Dirty)|(1UL << BH_Pinned)|(1UL << BH_Lock);
64

65
	gfs2_log_lock(sdp);
D
Dave Chinner 已提交
66
	spin_lock(&sdp->sd_ail_lock);
67 68 69
	list_for_each_entry_safe_reverse(bd, tmp, head, bd_ail_gl_list) {
		if (nr_revokes == 0)
			break;
70
		bh = bd->bd_bh;
71 72 73
		if (bh->b_state & b_state) {
			if (fsync)
				continue;
74
			gfs2_ail_error(gl, bh);
75
		}
76
		gfs2_trans_add_revoke(sdp, bd);
77
		nr_revokes--;
78
	}
79
	GLOCK_BUG_ON(gl, !fsync && atomic_read(&gl->gl_ail_count));
D
Dave Chinner 已提交
80
	spin_unlock(&sdp->sd_ail_lock);
81
	gfs2_log_unlock(sdp);
S
Steven Whitehouse 已提交
82 83 84
}


85
static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
S
Steven Whitehouse 已提交
86
{
87
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
S
Steven Whitehouse 已提交
88
	struct gfs2_trans tr;
89
	unsigned int revokes;
90
	int ret;
S
Steven Whitehouse 已提交
91

92
	revokes = atomic_read(&gl->gl_ail_count);
S
Steven Whitehouse 已提交
93

94
	if (!revokes) {
95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117
		bool have_revokes;
		bool log_in_flight;

		/*
		 * We have nothing on the ail, but there could be revokes on
		 * the sdp revoke queue, in which case, we still want to flush
		 * the log and wait for it to finish.
		 *
		 * If the sdp revoke list is empty too, we might still have an
		 * io outstanding for writing revokes, so we should wait for
		 * it before returning.
		 *
		 * If none of these conditions are true, our revokes are all
		 * flushed and we can return.
		 */
		gfs2_log_lock(sdp);
		have_revokes = !list_empty(&sdp->sd_log_revokes);
		log_in_flight = atomic_read(&sdp->sd_log_in_flight);
		gfs2_log_unlock(sdp);
		if (have_revokes)
			goto flush;
		if (log_in_flight)
			log_flush_wait(sdp);
118
		return 0;
119
	}
S
Steven Whitehouse 已提交
120

121 122 123 124 125 126
	memset(&tr, 0, sizeof(tr));
	set_bit(TR_ONSTACK, &tr.tr_flags);
	ret = __gfs2_trans_begin(&tr, sdp, 0, revokes, _RET_IP_);
	if (ret)
		goto flush;
	__gfs2_ail_flush(gl, 0, revokes);
S
Steven Whitehouse 已提交
127
	gfs2_trans_end(sdp);
128

129
flush:
130 131
	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
		       GFS2_LFC_AIL_EMPTY_GL);
132
	return 0;
S
Steven Whitehouse 已提交
133
}
134

135
void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
S
Steven Whitehouse 已提交
136
{
137
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
S
Steven Whitehouse 已提交
138 139 140 141 142 143
	unsigned int revokes = atomic_read(&gl->gl_ail_count);
	int ret;

	if (!revokes)
		return;

144
	ret = gfs2_trans_begin(sdp, 0, revokes);
S
Steven Whitehouse 已提交
145 146
	if (ret)
		return;
147
	__gfs2_ail_flush(gl, fsync, revokes);
148
	gfs2_trans_end(sdp);
149 150
	gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
		       GFS2_LFC_AIL_FLUSH);
151
}
S
Steven Whitehouse 已提交
152

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
/**
 * gfs2_rgrp_metasync - sync out the metadata of a resource group
 * @gl: the glock protecting the resource group
 *
 */

static int gfs2_rgrp_metasync(struct gfs2_glock *gl)
{
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
	struct address_space *metamapping = &sdp->sd_aspace;
	struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
	const unsigned bsize = sdp->sd_sb.sb_bsize;
	loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
	loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
	int error;

	filemap_fdatawrite_range(metamapping, start, end);
	error = filemap_fdatawait_range(metamapping, start, end);
	WARN_ON_ONCE(error && !gfs2_withdrawn(sdp));
	mapping_set_error(metamapping, error);
	if (error)
		gfs2_io_error(sdp);
	return error;
}

S
Steven Whitehouse 已提交
178
/**
S
Steven Whitehouse 已提交
179
 * rgrp_go_sync - sync out the metadata for this glock
D
David Teigland 已提交
180 181 182 183
 * @gl: the glock
 *
 * Called when demoting or unlocking an EX glock.  We must flush
 * to disk all dirty buffers/pages relating to this glock, and must not
184
 * return to caller to demote/unlock the glock until I/O is complete.
D
David Teigland 已提交
185 186
 */

187
static int rgrp_go_sync(struct gfs2_glock *gl)
D
David Teigland 已提交
188
{
189
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
190
	struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
S
Steven Whitehouse 已提交
191 192 193
	int error;

	if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
194
		return 0;
195
	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
S
Steven Whitehouse 已提交
196

197 198
	gfs2_log_flush(sdp, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
		       GFS2_LFC_RGRP_GO_SYNC);
199
	error = gfs2_rgrp_metasync(gl);
200 201
	if (!error)
		error = gfs2_ail_empty_gl(gl);
B
Bob Peterson 已提交
202
	gfs2_free_clones(rgd);
203
	return error;
D
David Teigland 已提交
204 205 206
}

/**
S
Steven Whitehouse 已提交
207
 * rgrp_go_inval - invalidate the metadata for this glock
D
David Teigland 已提交
208 209 210
 * @gl: the glock
 * @flags:
 *
S
Steven Whitehouse 已提交
211 212 213
 * We never used LM_ST_DEFERRED with resource groups, so that we
 * should always see the metadata flag set here.
 *
D
David Teigland 已提交
214 215
 */

S
Steven Whitehouse 已提交
216
static void rgrp_go_inval(struct gfs2_glock *gl, int flags)
D
David Teigland 已提交
217
{
218
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
219
	struct address_space *mapping = &sdp->sd_aspace;
220
	struct gfs2_rgrpd *rgd = gfs2_glock2rgrp(gl);
B
Bob Peterson 已提交
221 222 223
	const unsigned bsize = sdp->sd_sb.sb_bsize;
	loff_t start = (rgd->rd_addr * bsize) & PAGE_MASK;
	loff_t end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
224

B
Bob Peterson 已提交
225
	gfs2_rgrp_brelse(rgd);
226
	WARN_ON_ONCE(!(flags & DIO_METADATA));
B
Bob Peterson 已提交
227 228
	truncate_inode_pages_range(mapping, start, end);
	rgd->rd_flags &= ~GFS2_RDF_UPTODATE;
D
David Teigland 已提交
229 230
}

231 232 233
static void gfs2_rgrp_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
			      const char *fs_id_buf)
{
234
	struct gfs2_rgrpd *rgd = gl->gl_object;
235 236 237 238 239

	if (rgd)
		gfs2_rgrp_dump(seq, rgd, fs_id_buf);
}

240 241 242 243 244 245 246 247 248 249 250 251
static struct gfs2_inode *gfs2_glock2inode(struct gfs2_glock *gl)
{
	struct gfs2_inode *ip;

	spin_lock(&gl->gl_lockref.lock);
	ip = gl->gl_object;
	if (ip)
		set_bit(GIF_GLOP_PENDING, &ip->i_flags);
	spin_unlock(&gl->gl_lockref.lock);
	return ip;
}

252 253 254 255 256 257 258 259 260 261 262
struct gfs2_rgrpd *gfs2_glock2rgrp(struct gfs2_glock *gl)
{
	struct gfs2_rgrpd *rgd;

	spin_lock(&gl->gl_lockref.lock);
	rgd = gl->gl_object;
	spin_unlock(&gl->gl_lockref.lock);

	return rgd;
}

263 264 265 266 267 268 269 270 271
static void gfs2_clear_glop_pending(struct gfs2_inode *ip)
{
	if (!ip)
		return;

	clear_bit_unlock(GIF_GLOP_PENDING, &ip->i_flags);
	wake_up_bit(&ip->i_flags, GIF_GLOP_PENDING);
}

S
Steven Whitehouse 已提交
272
/**
273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
 * gfs2_inode_metasync - sync out the metadata of an inode
 * @gl: the glock protecting the inode
 *
 */
int gfs2_inode_metasync(struct gfs2_glock *gl)
{
	struct address_space *metamapping = gfs2_glock2aspace(gl);
	int error;

	filemap_fdatawrite(metamapping);
	error = filemap_fdatawait(metamapping);
	if (error)
		gfs2_io_error(gl->gl_name.ln_sbd);
	return error;
}

/**
 * inode_go_sync - Sync the dirty metadata of an inode
S
Steven Whitehouse 已提交
291 292 293 294
 * @gl: the glock protecting the inode
 *
 */

295
static int inode_go_sync(struct gfs2_glock *gl)
S
Steven Whitehouse 已提交
296
{
297 298
	struct gfs2_inode *ip = gfs2_glock2inode(gl);
	int isreg = ip && S_ISREG(ip->i_inode.i_mode);
299
	struct address_space *metamapping = gfs2_glock2aspace(gl);
300
	int error = 0, ret;
301

302
	if (isreg) {
303 304 305 306
		if (test_and_clear_bit(GIF_SW_PAGED, &ip->i_flags))
			unmap_shared_mapping_range(ip->i_inode.i_mapping, 0, 0);
		inode_dio_wait(&ip->i_inode);
	}
S
Steven Whitehouse 已提交
307
	if (!test_and_clear_bit(GLF_DIRTY, &gl->gl_flags))
308
		goto out;
S
Steven Whitehouse 已提交
309

310
	GLOCK_BUG_ON(gl, gl->gl_state != LM_ST_EXCLUSIVE);
S
Steven Whitehouse 已提交
311

312 313
	gfs2_log_flush(gl->gl_name.ln_sbd, gl, GFS2_LOG_HEAD_FLUSH_NORMAL |
		       GFS2_LFC_INODE_GO_SYNC);
S
Steven Whitehouse 已提交
314
	filemap_fdatawrite(metamapping);
315
	if (isreg) {
S
Steven Whitehouse 已提交
316 317 318 319
		struct address_space *mapping = ip->i_inode.i_mapping;
		filemap_fdatawrite(mapping);
		error = filemap_fdatawait(mapping);
		mapping_set_error(mapping, error);
S
Steven Whitehouse 已提交
320
	}
321
	ret = gfs2_inode_metasync(gl);
322 323
	if (!error)
		error = ret;
S
Steven Whitehouse 已提交
324
	gfs2_ail_empty_gl(gl);
325 326 327 328
	/*
	 * Writeback of the data mapping may cause the dirty flag to be set
	 * so we have to clear it again here.
	 */
329
	smp_mb__before_atomic();
330
	clear_bit(GLF_DIRTY, &gl->gl_flags);
331 332 333

out:
	gfs2_clear_glop_pending(ip);
334
	return error;
S
Steven Whitehouse 已提交
335 336
}

D
David Teigland 已提交
337 338 339 340
/**
 * inode_go_inval - prepare a inode glock to be released
 * @gl: the glock
 * @flags:
341 342
 *
 * Normally we invalidate everything, but if we are moving into
S
Steven Whitehouse 已提交
343 344
 * LM_ST_DEFERRED from LM_ST_SHARED or LM_ST_EXCLUSIVE then we
 * can keep hold of the metadata, since it won't have changed.
D
David Teigland 已提交
345 346 347 348 349
 *
 */

static void inode_go_inval(struct gfs2_glock *gl, int flags)
{
350
	struct gfs2_inode *ip = gfs2_glock2inode(gl);
D
David Teigland 已提交
351

S
Steven Whitehouse 已提交
352
	if (flags & DIO_METADATA) {
353
		struct address_space *mapping = gfs2_glock2aspace(gl);
S
Steven Whitehouse 已提交
354
		truncate_inode_pages(mapping, 0);
355
		if (ip) {
356
			set_bit(GIF_INVALID, &ip->i_flags);
357
			forget_all_cached_acls(&ip->i_inode);
358
			security_inode_invalidate_secctx(&ip->i_inode);
359
			gfs2_dir_hash_inval(ip);
360
		}
361 362
	}

363
	if (ip == GFS2_I(gl->gl_name.ln_sbd->sd_rindex)) {
364
		gfs2_log_flush(gl->gl_name.ln_sbd, NULL,
365 366
			       GFS2_LOG_HEAD_FLUSH_NORMAL |
			       GFS2_LFC_INODE_GO_INVAL);
367
		gl->gl_name.ln_sbd->sd_rindex_uptodate = 0;
368
	}
369
	if (ip && S_ISREG(ip->i_inode.i_mode))
370
		truncate_inode_pages(ip->i_inode.i_mapping, 0);
371 372

	gfs2_clear_glop_pending(ip);
D
David Teigland 已提交
373 374 375 376 377 378 379 380 381
}

/**
 * inode_go_demote_ok - Check to see if it's ok to unlock an inode glock
 * @gl: the glock
 *
 * Returns: 1 if it's ok
 */

382
static int inode_go_demote_ok(const struct gfs2_glock *gl)
D
David Teigland 已提交
383
{
384
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
385

386 387
	if (sdp->sd_jindex == gl->gl_object || sdp->sd_rindex == gl->gl_object)
		return 0;
388

389
	return 1;
D
David Teigland 已提交
390 391
}

392 393 394
static int gfs2_dinode_in(struct gfs2_inode *ip, const void *buf)
{
	const struct gfs2_dinode *str = buf;
395
	struct timespec64 atime;
396 397 398 399 400 401 402 403 404 405 406 407 408
	u16 height, depth;

	if (unlikely(ip->i_no_addr != be64_to_cpu(str->di_num.no_addr)))
		goto corrupt;
	ip->i_no_formal_ino = be64_to_cpu(str->di_num.no_formal_ino);
	ip->i_inode.i_mode = be32_to_cpu(str->di_mode);
	ip->i_inode.i_rdev = 0;
	switch (ip->i_inode.i_mode & S_IFMT) {
	case S_IFBLK:
	case S_IFCHR:
		ip->i_inode.i_rdev = MKDEV(be32_to_cpu(str->di_major),
					   be32_to_cpu(str->di_minor));
		break;
409
	}
410

411 412
	i_uid_write(&ip->i_inode, be32_to_cpu(str->di_uid));
	i_gid_write(&ip->i_inode, be32_to_cpu(str->di_gid));
413
	set_nlink(&ip->i_inode, be32_to_cpu(str->di_nlink));
414 415 416 417
	i_size_write(&ip->i_inode, be64_to_cpu(str->di_size));
	gfs2_set_inode_blocks(&ip->i_inode, be64_to_cpu(str->di_blocks));
	atime.tv_sec = be64_to_cpu(str->di_atime);
	atime.tv_nsec = be32_to_cpu(str->di_atime_nsec);
418
	if (timespec64_compare(&ip->i_inode.i_atime, &atime) < 0)
419 420 421 422 423 424 425 426 427 428
		ip->i_inode.i_atime = atime;
	ip->i_inode.i_mtime.tv_sec = be64_to_cpu(str->di_mtime);
	ip->i_inode.i_mtime.tv_nsec = be32_to_cpu(str->di_mtime_nsec);
	ip->i_inode.i_ctime.tv_sec = be64_to_cpu(str->di_ctime);
	ip->i_inode.i_ctime.tv_nsec = be32_to_cpu(str->di_ctime_nsec);

	ip->i_goal = be64_to_cpu(str->di_goal_meta);
	ip->i_generation = be64_to_cpu(str->di_generation);

	ip->i_diskflags = be32_to_cpu(str->di_flags);
S
Steven Whitehouse 已提交
429 430
	ip->i_eattr = be64_to_cpu(str->di_eattr);
	/* i_diskflags and i_eattr must be set before gfs2_set_inode_flags() */
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
	gfs2_set_inode_flags(&ip->i_inode);
	height = be16_to_cpu(str->di_height);
	if (unlikely(height > GFS2_MAX_META_HEIGHT))
		goto corrupt;
	ip->i_height = (u8)height;

	depth = be16_to_cpu(str->di_depth);
	if (unlikely(depth > GFS2_DIR_MAX_DEPTH))
		goto corrupt;
	ip->i_depth = (u8)depth;
	ip->i_entries = be32_to_cpu(str->di_entries);

	if (S_ISREG(ip->i_inode.i_mode))
		gfs2_set_aops(&ip->i_inode);

	return 0;
corrupt:
	gfs2_consist_inode(ip);
	return -EIO;
}

/**
 * gfs2_inode_refresh - Refresh the incore copy of the dinode
 * @ip: The GFS2 inode
 *
 * Returns: errno
 */

int gfs2_inode_refresh(struct gfs2_inode *ip)
{
	struct buffer_head *dibh;
	int error;

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (error)
		return error;

	error = gfs2_dinode_in(ip, dibh->b_data);
	brelse(dibh);
	clear_bit(GIF_INVALID, &ip->i_flags);

	return error;
}

D
David Teigland 已提交
475 476 477 478 479 480 481 482 483 484 485
/**
 * inode_go_lock - operation done after an inode lock is locked by a process
 * @gl: the glock
 * @flags:
 *
 * Returns: errno
 */

static int inode_go_lock(struct gfs2_holder *gh)
{
	struct gfs2_glock *gl = gh->gh_gl;
486
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
487
	struct gfs2_inode *ip = gl->gl_object;
D
David Teigland 已提交
488 489
	int error = 0;

490
	if (!ip || (gh->gh_flags & GL_SKIP))
D
David Teigland 已提交
491 492
		return 0;

493
	if (test_bit(GIF_INVALID, &ip->i_flags)) {
D
David Teigland 已提交
494 495 496 497 498
		error = gfs2_inode_refresh(ip);
		if (error)
			return error;
	}

499 500 501
	if (gh->gh_state != LM_ST_DEFERRED)
		inode_dio_wait(&ip->i_inode);

502
	if ((ip->i_diskflags & GFS2_DIF_TRUNC_IN_PROG) &&
D
David Teigland 已提交
503
	    (gl->gl_state == LM_ST_EXCLUSIVE) &&
504 505 506
	    (gh->gh_state == LM_ST_EXCLUSIVE)) {
		spin_lock(&sdp->sd_trunc_lock);
		if (list_empty(&ip->i_trunc_list))
507
			list_add(&ip->i_trunc_list, &sdp->sd_trunc_list);
508 509 510 511
		spin_unlock(&sdp->sd_trunc_lock);
		wake_up(&sdp->sd_quota_wait);
		return 1;
	}
D
David Teigland 已提交
512 513 514 515

	return error;
}

516 517 518 519
/**
 * inode_go_dump - print information about an inode
 * @seq: The iterator
 * @ip: the inode
520
 * @fs_id_buf: file system id (may be empty)
521 522 523
 *
 */

524 525
static void inode_go_dump(struct seq_file *seq, struct gfs2_glock *gl,
			  const char *fs_id_buf)
526
{
527 528 529 530
	struct gfs2_inode *ip = gl->gl_object;
	struct inode *inode = &ip->i_inode;
	unsigned long nrpages;

531
	if (ip == NULL)
532
		return;
533 534 535 536 537

	xa_lock_irq(&inode->i_data.i_pages);
	nrpages = inode->i_data.nrpages;
	xa_unlock_irq(&inode->i_data.i_pages);

538 539
	gfs2_print_dbg(seq, "%s I: n:%llu/%llu t:%u f:0x%02lx d:0x%08x s:%llu "
		       "p:%lu\n", fs_id_buf,
540 541
		  (unsigned long long)ip->i_no_formal_ino,
		  (unsigned long long)ip->i_no_addr,
542 543
		  IF2DT(ip->i_inode.i_mode), ip->i_flags,
		  (unsigned int)ip->i_diskflags,
544
		  (unsigned long long)i_size_read(inode), nrpages);
545 546
}

D
David Teigland 已提交
547
/**
548
 * freeze_go_sync - promote/demote the freeze glock
D
David Teigland 已提交
549 550 551 552 553 554
 * @gl: the glock
 * @state: the requested state
 * @flags:
 *
 */

555
static int freeze_go_sync(struct gfs2_glock *gl)
D
David Teigland 已提交
556
{
557
	int error = 0;
558
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
D
David Teigland 已提交
559

560 561 562 563 564 565 566 567 568 569 570
	/*
	 * We need to check gl_state == LM_ST_SHARED here and not gl_req ==
	 * LM_ST_EXCLUSIVE. That's because when any node does a freeze,
	 * all the nodes should have the freeze glock in SH mode and they all
	 * call do_xmote: One for EX and the others for UN. They ALL must
	 * freeze locally, and they ALL must queue freeze work. The freeze_work
	 * calls freeze_func, which tries to reacquire the freeze glock in SH,
	 * effectively waiting for the thaw on the node who holds it in EX.
	 * Once thawed, the work func acquires the freeze glock in
	 * SH and everybody goes back to thawed.
	 */
571 572
	if (gl->gl_state == LM_ST_SHARED && !gfs2_withdrawn(sdp) &&
	    !test_bit(SDF_NORECOVERY, &sdp->sd_flags)) {
573 574 575
		atomic_set(&sdp->sd_freeze_state, SFS_STARTING_FREEZE);
		error = freeze_super(sdp->sd_vfs);
		if (error) {
576 577
			fs_info(sdp, "GFS2: couldn't freeze filesystem: %d\n",
				error);
578 579
			if (gfs2_withdrawn(sdp)) {
				atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
580
				return 0;
581
			}
582 583 584
			gfs2_assert_withdraw(sdp, 0);
		}
		queue_work(gfs2_freeze_wq, &sdp->sd_freeze_work);
585 586 587 588 589
		if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
			gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_FREEZE |
				       GFS2_LFC_FREEZE_GO_SYNC);
		else /* read-only mounts */
			atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
D
David Teigland 已提交
590
	}
591
	return 0;
D
David Teigland 已提交
592 593 594
}

/**
595
 * freeze_go_xmote_bh - After promoting/demoting the freeze glock
D
David Teigland 已提交
596 597 598 599
 * @gl: the glock
 *
 */

600
static int freeze_go_xmote_bh(struct gfs2_glock *gl, struct gfs2_holder *gh)
D
David Teigland 已提交
601
{
602
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
603
	struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
604
	struct gfs2_glock *j_gl = ip->i_gl;
A
Al Viro 已提交
605
	struct gfs2_log_header_host head;
D
David Teigland 已提交
606 607
	int error;

608
	if (test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags)) {
609
		j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
D
David Teigland 已提交
610

A
Abhi Das 已提交
611
		error = gfs2_find_jhead(sdp->sd_jdesc, &head, false);
D
David Teigland 已提交
612 613 614 615 616 617
		if (error)
			gfs2_consist(sdp);
		if (!(head.lh_flags & GFS2_LOG_HEAD_UNMOUNT))
			gfs2_consist(sdp);

		/*  Initialize some head of the log stuff  */
618
		if (!gfs2_withdrawn(sdp)) {
D
David Teigland 已提交
619 620 621 622
			sdp->sd_log_sequence = head.lh_sequence + 1;
			gfs2_log_pointers_init(sdp, head.lh_blkno);
		}
	}
623
	return 0;
D
David Teigland 已提交
624 625
}

626 627 628 629 630 631 632
/**
 * trans_go_demote_ok
 * @gl: the glock
 *
 * Always returns 0
 */

633
static int freeze_go_demote_ok(const struct gfs2_glock *gl)
634 635 636 637
{
	return 0;
}

638 639 640 641
/**
 * iopen_go_callback - schedule the dcache entry for the inode to be deleted
 * @gl: the glock
 *
A
Andreas Gruenbacher 已提交
642
 * gl_lockref.lock lock is held while calling this
643
 */
644
static void iopen_go_callback(struct gfs2_glock *gl, bool remote)
645
{
646
	struct gfs2_inode *ip = gl->gl_object;
647
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
648

649
	if (!remote || sb_rdonly(sdp->sd_vfs))
650
		return;
651 652

	if (gl->gl_demote_state == LM_ST_UNLOCKED &&
653
	    gl->gl_state == LM_ST_SHARED && ip) {
S
Steven Whitehouse 已提交
654
		gl->gl_lockref.count++;
655 656
		if (!queue_delayed_work(gfs2_delete_workqueue,
					&gl->gl_delete, 0))
S
Steven Whitehouse 已提交
657
			gl->gl_lockref.count--;
658 659 660
	}
}

661 662 663 664 665
static int iopen_go_demote_ok(const struct gfs2_glock *gl)
{
       return !gfs2_delete_work_queued(gl);
}

666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
/**
 * inode_go_free - wake up anyone waiting for dlm's unlock ast to free it
 * @gl: glock being freed
 *
 * For now, this is only used for the journal inode glock. In withdraw
 * situations, we need to wait for the glock to be freed so that we know
 * other nodes may proceed with recovery / journal replay.
 */
static void inode_go_free(struct gfs2_glock *gl)
{
	/* Note that we cannot reference gl_object because it's already set
	 * to NULL by this point in its lifecycle. */
	if (!test_bit(GLF_FREEING, &gl->gl_flags))
		return;
	clear_bit_unlock(GLF_FREEING, &gl->gl_flags);
	wake_up_bit(&gl->gl_flags, GLF_FREEING);
}

/**
 * nondisk_go_callback - used to signal when a node did a withdraw
 * @gl: the nondisk glock
 * @remote: true if this came from a different cluster node
 *
 */
static void nondisk_go_callback(struct gfs2_glock *gl, bool remote)
{
	struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;

	/* Ignore the callback unless it's from another node, and it's the
	   live lock. */
	if (!remote || gl->gl_name.ln_number != GFS2_LIVE_LOCK)
		return;

	/* First order of business is to cancel the demote request. We don't
	 * really want to demote a nondisk glock. At best it's just to inform
	 * us of another node's withdraw. We'll keep it in SH mode. */
	clear_bit(GLF_DEMOTE, &gl->gl_flags);
	clear_bit(GLF_PENDING_DEMOTE, &gl->gl_flags);

	/* Ignore the unlock if we're withdrawn, unmounting, or in recovery. */
	if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) ||
	    test_bit(SDF_WITHDRAWN, &sdp->sd_flags) ||
	    test_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags))
		return;

	/* We only care when a node wants us to unlock, because that means
	 * they want a journal recovered. */
	if (gl->gl_demote_state != LM_ST_UNLOCKED)
		return;

	if (sdp->sd_args.ar_spectator) {
		fs_warn(sdp, "Spectator node cannot recover journals.\n");
		return;
	}

	fs_warn(sdp, "Some node has withdrawn; checking for recovery.\n");
	set_bit(SDF_REMOTE_WITHDRAW, &sdp->sd_flags);
	/*
	 * We can't call remote_withdraw directly here or gfs2_recover_journal
	 * because this is called from the glock unlock function and the
	 * remote_withdraw needs to enqueue and dequeue the same "live" glock
	 * we were called from. So we queue it to the control work queue in
	 * lock_dlm.
	 */
	queue_delayed_work(gfs2_control_wq, &sdp->sd_control_work, 0);
}

733
const struct gfs2_glock_operations gfs2_meta_glops = {
734
	.go_type = LM_TYPE_META,
735
	.go_flags = GLOF_NONDISK,
D
David Teigland 已提交
736 737
};

738
const struct gfs2_glock_operations gfs2_inode_glops = {
739
	.go_sync = inode_go_sync,
D
David Teigland 已提交
740 741 742
	.go_inval = inode_go_inval,
	.go_demote_ok = inode_go_demote_ok,
	.go_lock = inode_go_lock,
743
	.go_dump = inode_go_dump,
744
	.go_type = LM_TYPE_INODE,
745
	.go_flags = GLOF_ASPACE | GLOF_LRU | GLOF_LVB,
746
	.go_free = inode_go_free,
D
David Teigland 已提交
747 748
};

749
const struct gfs2_glock_operations gfs2_rgrp_glops = {
750
	.go_sync = rgrp_go_sync,
S
Steven Whitehouse 已提交
751
	.go_inval = rgrp_go_inval,
752
	.go_lock = gfs2_rgrp_go_lock,
753
	.go_dump = gfs2_rgrp_go_dump,
754
	.go_type = LM_TYPE_RGRP,
755
	.go_flags = GLOF_LVB,
D
David Teigland 已提交
756 757
};

758 759 760 761
const struct gfs2_glock_operations gfs2_freeze_glops = {
	.go_sync = freeze_go_sync,
	.go_xmote_bh = freeze_go_xmote_bh,
	.go_demote_ok = freeze_go_demote_ok,
762
	.go_type = LM_TYPE_NONDISK,
763
	.go_flags = GLOF_NONDISK,
D
David Teigland 已提交
764 765
};

766
const struct gfs2_glock_operations gfs2_iopen_glops = {
767
	.go_type = LM_TYPE_IOPEN,
768
	.go_callback = iopen_go_callback,
769
	.go_demote_ok = iopen_go_demote_ok,
770
	.go_flags = GLOF_LRU | GLOF_NONDISK,
771
	.go_subclass = 1,
D
David Teigland 已提交
772 773
};

774
const struct gfs2_glock_operations gfs2_flock_glops = {
775
	.go_type = LM_TYPE_FLOCK,
776
	.go_flags = GLOF_LRU | GLOF_NONDISK,
D
David Teigland 已提交
777 778
};

779
const struct gfs2_glock_operations gfs2_nondisk_glops = {
780
	.go_type = LM_TYPE_NONDISK,
781
	.go_flags = GLOF_NONDISK,
782
	.go_callback = nondisk_go_callback,
D
David Teigland 已提交
783 784
};

785
const struct gfs2_glock_operations gfs2_quota_glops = {
786
	.go_type = LM_TYPE_QUOTA,
787
	.go_flags = GLOF_LVB | GLOF_LRU | GLOF_NONDISK,
D
David Teigland 已提交
788 789
};

790
const struct gfs2_glock_operations gfs2_journal_glops = {
791
	.go_type = LM_TYPE_JOURNAL,
792
	.go_flags = GLOF_NONDISK,
D
David Teigland 已提交
793 794
};

795 796 797 798 799 800 801 802 803 804 805
const struct gfs2_glock_operations *gfs2_glops_list[] = {
	[LM_TYPE_META] = &gfs2_meta_glops,
	[LM_TYPE_INODE] = &gfs2_inode_glops,
	[LM_TYPE_RGRP] = &gfs2_rgrp_glops,
	[LM_TYPE_IOPEN] = &gfs2_iopen_glops,
	[LM_TYPE_FLOCK] = &gfs2_flock_glops,
	[LM_TYPE_NONDISK] = &gfs2_nondisk_glops,
	[LM_TYPE_QUOTA] = &gfs2_quota_glops,
	[LM_TYPE_JOURNAL] = &gfs2_journal_glops,
};