lops.c 19.3 KB
Newer Older
D
David Teigland 已提交
1 2
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3
 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
D
David Teigland 已提交
4 5 6
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
7
 * of the GNU General Public License version 2.
D
David Teigland 已提交
8 9 10 11 12 13 14
 */

#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
15
#include <linux/gfs2_ondisk.h>
S
Steven Whitehouse 已提交
16 17
#include <linux/bio.h>
#include <linux/fs.h>
D
David Teigland 已提交
18 19

#include "gfs2.h"
20
#include "incore.h"
21
#include "inode.h"
D
David Teigland 已提交
22 23 24 25 26 27 28
#include "glock.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
#include "trans.h"
29
#include "util.h"
S
Steven Whitehouse 已提交
30
#include "trace_gfs2.h"
D
David Teigland 已提交
31

32 33 34 35 36 37 38 39 40 41 42
/**
 * gfs2_pin - Pin a buffer in memory
 * @sdp: The superblock
 * @bh: The buffer to be pinned
 *
 * The log lock must be held when calling this function
 */
static void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh)
{
	struct gfs2_bufdata *bd;

43
	BUG_ON(!current->journal_info);
44 45 46 47 48 49 50 51 52 53

	clear_buffer_dirty(bh);
	if (test_set_buffer_pinned(bh))
		gfs2_assert_withdraw(sdp, 0);
	if (!buffer_uptodate(bh))
		gfs2_io_error_bh(sdp, bh);
	bd = bh->b_private;
	/* If this buffer is in the AIL and it has already been written
	 * to in-place disk block, remove it from the AIL.
	 */
54
	spin_lock(&sdp->sd_ail_lock);
55 56
	if (bd->bd_ail)
		list_move(&bd->bd_ail_st_list, &bd->bd_ail->ai_ail2_list);
57
	spin_unlock(&sdp->sd_ail_lock);
58
	get_bh(bh);
59
	atomic_inc(&sdp->sd_log_pinned);
S
Steven Whitehouse 已提交
60
	trace_gfs2_pin(bd, 1);
61 62
}

63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
static bool buffer_is_rgrp(const struct gfs2_bufdata *bd)
{
	return bd->bd_gl->gl_name.ln_type == LM_TYPE_RGRP;
}

static void maybe_release_space(struct gfs2_bufdata *bd)
{
	struct gfs2_glock *gl = bd->bd_gl;
	struct gfs2_sbd *sdp = gl->gl_sbd;
	struct gfs2_rgrpd *rgd = gl->gl_object;
	unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
	struct gfs2_bitmap *bi = rgd->rd_bits + index;

	if (bi->bi_clone == 0)
		return;
	if (sdp->sd_args.ar_discard)
		gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi);
	memcpy(bi->bi_clone + bi->bi_offset,
	       bd->bd_bh->b_data + bi->bi_offset, bi->bi_len);
	clear_bit(GBF_FULL, &bi->bi_flags);
	rgd->rd_free_clone = rgd->rd_free;
}

86 87 88 89 90
/**
 * gfs2_unpin - Unpin a buffer
 * @sdp: the filesystem the buffer belongs to
 * @bh: The buffer to unpin
 * @ai:
91
 * @flags: The inode dirty flags
92 93 94 95 96 97 98 99
 *
 */

static void gfs2_unpin(struct gfs2_sbd *sdp, struct buffer_head *bh,
		       struct gfs2_ail *ai)
{
	struct gfs2_bufdata *bd = bh->b_private;

100 101
	BUG_ON(!buffer_uptodate(bh));
	BUG_ON(!buffer_pinned(bh));
102 103 104 105 106

	lock_buffer(bh);
	mark_buffer_dirty(bh);
	clear_buffer_pinned(bh);

107 108 109
	if (buffer_is_rgrp(bd))
		maybe_release_space(bd);

D
Dave Chinner 已提交
110
	spin_lock(&sdp->sd_ail_lock);
111 112 113 114 115 116 117 118 119 120
	if (bd->bd_ail) {
		list_del(&bd->bd_ail_st_list);
		brelse(bh);
	} else {
		struct gfs2_glock *gl = bd->bd_gl;
		list_add(&bd->bd_ail_gl_list, &gl->gl_ail_list);
		atomic_inc(&gl->gl_ail_count);
	}
	bd->bd_ail = ai;
	list_add(&bd->bd_ail_st_list, &ai->ai_ail1_list);
D
Dave Chinner 已提交
121 122
	spin_unlock(&sdp->sd_ail_lock);

123
	clear_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
S
Steven Whitehouse 已提交
124
	trace_gfs2_pin(bd, 0);
125
	unlock_buffer(bh);
126
	atomic_dec(&sdp->sd_log_pinned);
127 128
}

129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161

static inline struct gfs2_log_descriptor *bh_log_desc(struct buffer_head *bh)
{
	return (struct gfs2_log_descriptor *)bh->b_data;
}

static inline __be64 *bh_log_ptr(struct buffer_head *bh)
{
	struct gfs2_log_descriptor *ld = bh_log_desc(bh);
	return (__force __be64 *)(ld + 1);
}

static inline __be64 *bh_ptr_end(struct buffer_head *bh)
{
	return (__force __be64 *)(bh->b_data + bh->b_size);
}


static struct buffer_head *gfs2_get_log_desc(struct gfs2_sbd *sdp, u32 ld_type)
{
	struct buffer_head *bh = gfs2_log_get_buf(sdp);
	struct gfs2_log_descriptor *ld = bh_log_desc(bh);
	ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
	ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
	ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
	ld->ld_type = cpu_to_be32(ld_type);
	ld->ld_length = 0;
	ld->ld_data1 = 0;
	ld->ld_data2 = 0;
	memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
	return bh;
}

D
David Teigland 已提交
162 163 164
static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
165
	struct gfs2_meta_header *mh;
D
David Teigland 已提交
166 167
	struct gfs2_trans *tr;

168
	lock_buffer(bd->bd_bh);
169
	gfs2_log_lock(sdp);
170 171
	if (!list_empty(&bd->bd_list_tr))
		goto out;
172
	tr = current->journal_info;
D
David Teigland 已提交
173 174 175 176
	tr->tr_touched = 1;
	tr->tr_num_buf++;
	list_add(&bd->bd_list_tr, &tr->tr_list_buf);
	if (!list_empty(&le->le_list))
177
		goto out;
178 179
	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
D
David Teigland 已提交
180
	gfs2_meta_check(sdp, bd->bd_bh);
181
	gfs2_pin(sdp, bd->bd_bh);
182 183 184
	mh = (struct gfs2_meta_header *)bd->bd_bh->b_data;
	mh->__pad0 = cpu_to_be64(0);
	mh->mh_jid = cpu_to_be32(sdp->sd_jdesc->jd_jid);
D
David Teigland 已提交
185 186 187
	sdp->sd_log_num_buf++;
	list_add(&le->le_list, &sdp->sd_log_le_buf);
	tr->tr_num_buf_new++;
188 189 190
out:
	gfs2_log_unlock(sdp);
	unlock_buffer(bd->bd_bh);
D
David Teigland 已提交
191 192 193 194 195 196 197
}

static void buf_lo_before_commit(struct gfs2_sbd *sdp)
{
	struct buffer_head *bh;
	struct gfs2_log_descriptor *ld;
	struct gfs2_bufdata *bd1 = NULL, *bd2;
198
	unsigned int total;
D
David Teigland 已提交
199 200 201 202 203
	unsigned int limit;
	unsigned int num;
	unsigned n;
	__be64 *ptr;

204
	limit = buf_limit(sdp);
D
David Teigland 已提交
205 206
	/* for 4k blocks, limit = 503 */

207 208
	gfs2_log_lock(sdp);
	total = sdp->sd_log_num_buf;
D
David Teigland 已提交
209 210 211 212 213
	bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
	while(total) {
		num = total;
		if (total > limit)
			num = limit;
214
		gfs2_log_unlock(sdp);
215
		bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_METADATA);
216
		gfs2_log_lock(sdp);
217 218
		ld = bh_log_desc(bh);
		ptr = bh_log_ptr(bh);
D
David Teigland 已提交
219 220 221 222
		ld->ld_length = cpu_to_be32(num + 1);
		ld->ld_data1 = cpu_to_be32(num);

		n = 0;
223 224
		list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
					     bd_le.le_list) {
D
David Teigland 已提交
225 226 227 228 229
			*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
			if (++n >= num)
				break;
		}

230
		gfs2_log_unlock(sdp);
J
Jens Axboe 已提交
231
		submit_bh(WRITE_SYNC, bh);
232
		gfs2_log_lock(sdp);
D
David Teigland 已提交
233 234

		n = 0;
235 236
		list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
					     bd_le.le_list) {
237
			get_bh(bd2->bd_bh);
238
			gfs2_log_unlock(sdp);
239
			lock_buffer(bd2->bd_bh);
D
David Teigland 已提交
240
			bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
J
Jens Axboe 已提交
241
			submit_bh(WRITE_SYNC, bh);
242
			gfs2_log_lock(sdp);
D
David Teigland 已提交
243 244 245 246
			if (++n >= num)
				break;
		}

247
		BUG_ON(total < num);
D
David Teigland 已提交
248 249
		total -= num;
	}
250
	gfs2_log_unlock(sdp);
D
David Teigland 已提交
251 252 253 254 255 256 257 258 259 260 261 262
}

static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
	struct list_head *head = &sdp->sd_log_le_buf;
	struct gfs2_bufdata *bd;

	while (!list_empty(head)) {
		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
		list_del_init(&bd->bd_le.le_list);
		sdp->sd_log_num_buf--;

263
		gfs2_unpin(sdp, bd->bd_bh, ai);
D
David Teigland 已提交
264 265 266 267 268
	}
	gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
}

static void buf_lo_before_scan(struct gfs2_jdesc *jd,
A
Al Viro 已提交
269
			       struct gfs2_log_header_host *head, int pass)
D
David Teigland 已提交
270
{
271
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
272 273 274 275 276 277 278 279 280 281 282 283

	if (pass != 0)
		return;

	sdp->sd_found_blocks = 0;
	sdp->sd_replayed_blocks = 0;
}

static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
				struct gfs2_log_descriptor *ld, __be64 *ptr,
				int pass)
{
284 285
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
286
	struct gfs2_glock *gl = ip->i_gl;
D
David Teigland 已提交
287 288
	unsigned int blks = be32_to_cpu(ld->ld_data1);
	struct buffer_head *bh_log, *bh_ip;
289
	u64 blkno;
D
David Teigland 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
	int error = 0;

	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
		return 0;

	gfs2_replay_incr_blk(sdp, &start);

	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
		blkno = be64_to_cpu(*ptr++);

		sdp->sd_found_blocks++;

		if (gfs2_revoke_check(sdp, blkno, start))
			continue;

		error = gfs2_replay_read_block(jd, start, &bh_log);
S
Steven Whitehouse 已提交
306 307
		if (error)
			return error;
D
David Teigland 已提交
308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330

		bh_ip = gfs2_meta_new(gl, blkno);
		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);

		if (gfs2_meta_check(sdp, bh_ip))
			error = -EIO;
		else
			mark_buffer_dirty(bh_ip);

		brelse(bh_log);
		brelse(bh_ip);

		if (error)
			break;

		sdp->sd_replayed_blocks++;
	}

	return error;
}

static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
331 332
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
333 334

	if (error) {
S
Steven Whitehouse 已提交
335
		gfs2_meta_sync(ip->i_gl);
D
David Teigland 已提交
336 337 338 339 340
		return;
	}
	if (pass != 1)
		return;

S
Steven Whitehouse 已提交
341
	gfs2_meta_sync(ip->i_gl);
D
David Teigland 已提交
342 343 344 345 346 347 348

	fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
	        jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}

static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
349 350
	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
	struct gfs2_glock *gl = bd->bd_gl;
D
David Teigland 已提交
351 352
	struct gfs2_trans *tr;

353
	tr = current->journal_info;
D
David Teigland 已提交
354 355 356
	tr->tr_touched = 1;
	tr->tr_num_revoke++;
	sdp->sd_log_num_revoke++;
357 358
	atomic_inc(&gl->gl_revokes);
	set_bit(GLF_LFLUSH, &gl->gl_flags);
D
David Teigland 已提交
359 360 361 362 363 364 365 366 367 368
	list_add(&le->le_list, &sdp->sd_log_le_revoke);
}

static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
{
	struct gfs2_log_descriptor *ld;
	struct gfs2_meta_header *mh;
	struct buffer_head *bh;
	unsigned int offset;
	struct list_head *head = &sdp->sd_log_le_revoke;
369
	struct gfs2_bufdata *bd;
D
David Teigland 已提交
370 371 372 373

	if (!sdp->sd_log_num_revoke)
		return;

374 375
	bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_REVOKE);
	ld = bh_log_desc(bh);
376
	ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
377
						    sizeof(u64)));
D
David Teigland 已提交
378 379 380
	ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
	offset = sizeof(struct gfs2_log_descriptor);

381
	list_for_each_entry(bd, head, bd_le.le_list) {
D
David Teigland 已提交
382 383
		sdp->sd_log_num_revoke--;

384
		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
J
Jens Axboe 已提交
385
			submit_bh(WRITE_SYNC, bh);
D
David Teigland 已提交
386 387 388 389

			bh = gfs2_log_get_buf(sdp);
			mh = (struct gfs2_meta_header *)bh->b_data;
			mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
390 391
			mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
			mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
D
David Teigland 已提交
392 393 394
			offset = sizeof(struct gfs2_meta_header);
		}

395
		*(__be64 *)(bh->b_data + offset) = cpu_to_be64(bd->bd_blkno);
396
		offset += sizeof(u64);
D
David Teigland 已提交
397 398 399
	}
	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);

J
Jens Axboe 已提交
400
	submit_bh(WRITE_SYNC, bh);
D
David Teigland 已提交
401 402
}

403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
static void revoke_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
	struct list_head *head = &sdp->sd_log_le_revoke;
	struct gfs2_bufdata *bd;
	struct gfs2_glock *gl;

	while (!list_empty(head)) {
		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
		list_del_init(&bd->bd_le.le_list);
		gl = bd->bd_gl;
		atomic_dec(&gl->gl_revokes);
		clear_bit(GLF_LFLUSH, &gl->gl_flags);
		kmem_cache_free(gfs2_bufdata_cachep, bd);
	}
}

D
David Teigland 已提交
419
static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
A
Al Viro 已提交
420
				  struct gfs2_log_header_host *head, int pass)
D
David Teigland 已提交
421
{
422
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
423 424 425 426 427 428 429 430 431 432 433 434

	if (pass != 0)
		return;

	sdp->sd_found_revokes = 0;
	sdp->sd_replay_tail = head->lh_tail;
}

static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
				   struct gfs2_log_descriptor *ld, __be64 *ptr,
				   int pass)
{
435
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
436 437 438 439
	unsigned int blks = be32_to_cpu(ld->ld_length);
	unsigned int revokes = be32_to_cpu(ld->ld_data1);
	struct buffer_head *bh;
	unsigned int offset;
440
	u64 blkno;
D
David Teigland 已提交
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
	int first = 1;
	int error;

	if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
		return 0;

	offset = sizeof(struct gfs2_log_descriptor);

	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
		error = gfs2_replay_read_block(jd, start, &bh);
		if (error)
			return error;

		if (!first)
			gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);

457
		while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
D
David Teigland 已提交
458 459 460
			blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));

			error = gfs2_revoke_add(sdp, blkno, start);
B
Bob Peterson 已提交
461 462
			if (error < 0) {
				brelse(bh);
D
David Teigland 已提交
463
				return error;
B
Bob Peterson 已提交
464
			}
D
David Teigland 已提交
465 466 467 468 469
			else if (error)
				sdp->sd_found_revokes++;

			if (!--revokes)
				break;
470
			offset += sizeof(u64);
D
David Teigland 已提交
471 472 473 474 475 476 477 478 479 480 481 482
		}

		brelse(bh);
		offset = sizeof(struct gfs2_meta_header);
		first = 0;
	}

	return 0;
}

static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
483
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
484 485 486 487 488 489 490 491 492 493 494 495 496 497

	if (error) {
		gfs2_revoke_clean(sdp);
		return;
	}
	if (pass != 1)
		return;

	fs_info(sdp, "jid=%u: Found %u revoke tags\n",
	        jd->jd_jid, sdp->sd_found_revokes);

	gfs2_revoke_clean(sdp);
}

498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513
/**
 * databuf_lo_add - Add a databuf to the transaction.
 *
 * This is used in two distinct cases:
 * i) In ordered write mode
 *    We put the data buffer on a list so that we can ensure that its
 *    synced to disk at the right time
 * ii) In journaled data mode
 *    We need to journal the data block in the same way as metadata in
 *    the functions above. The difference is that here we have a tag
 *    which is two __be64's being the block number (as per meta data)
 *    and a flag which says whether the data block needs escaping or
 *    not. This means we need a new log entry for each 251 or so data
 *    blocks, which isn't an enormous overhead but twice as much as
 *    for normal metadata blocks.
 */
D
David Teigland 已提交
514 515
static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
516
	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
517
	struct gfs2_trans *tr = current->journal_info;
518
	struct address_space *mapping = bd->bd_bh->b_page->mapping;
519
	struct gfs2_inode *ip = GFS2_I(mapping->host);
D
David Teigland 已提交
520

521
	lock_buffer(bd->bd_bh);
522
	gfs2_log_lock(sdp);
523 524 525 526 527 528 529 530
	if (tr) {
		if (!list_empty(&bd->bd_list_tr))
			goto out;
		tr->tr_touched = 1;
		if (gfs2_is_jdata(ip)) {
			tr->tr_num_buf++;
			list_add(&bd->bd_list_tr, &tr->tr_list_buf);
		}
531
	}
532
	if (!list_empty(&le->le_list))
533
		goto out;
534

535 536
	set_bit(GLF_LFLUSH, &bd->bd_gl->gl_flags);
	set_bit(GLF_DIRTY, &bd->bd_gl->gl_flags);
537 538 539
	if (gfs2_is_jdata(ip)) {
		gfs2_pin(sdp, bd->bd_bh);
		tr->tr_num_databuf_new++;
540
		sdp->sd_log_num_databuf++;
541
		list_add_tail(&le->le_list, &sdp->sd_log_le_databuf);
542
	} else {
543
		list_add_tail(&le->le_list, &sdp->sd_log_le_ordered);
544 545
	}
out:
D
David Teigland 已提交
546
	gfs2_log_unlock(sdp);
547
	unlock_buffer(bd->bd_bh);
D
David Teigland 已提交
548 549
}

550
static void gfs2_check_magic(struct buffer_head *bh)
551 552 553 554
{
	void *kaddr;
	__be32 *ptr;

555 556
	clear_buffer_escaped(bh);
	kaddr = kmap_atomic(bh->b_page, KM_USER0);
557 558
	ptr = kaddr + bh_offset(bh);
	if (*ptr == cpu_to_be32(GFS2_MAGIC))
559
		set_buffer_escaped(bh);
560
	kunmap_atomic(kaddr, KM_USER0);
561 562
}

563 564 565
static void gfs2_write_blocks(struct gfs2_sbd *sdp, struct buffer_head *bh,
			      struct list_head *list, struct list_head *done,
			      unsigned int n)
D
David Teigland 已提交
566
{
567
	struct buffer_head *bh1;
568
	struct gfs2_log_descriptor *ld;
569 570
	struct gfs2_bufdata *bd;
	__be64 *ptr;
571

572 573
	if (!bh)
		return;
D
David Teigland 已提交
574

575 576 577
	ld = bh_log_desc(bh);
	ld->ld_length = cpu_to_be32(n + 1);
	ld->ld_data1 = cpu_to_be32(n);
D
David Teigland 已提交
578

579 580 581
	ptr = bh_log_ptr(bh);
	
	get_bh(bh);
J
Jens Axboe 已提交
582
	submit_bh(WRITE_SYNC, bh);
583
	gfs2_log_lock(sdp);
584 585 586 587 588 589 590
	while(!list_empty(list)) {
		bd = list_entry(list->next, struct gfs2_bufdata, bd_le.le_list);
		list_move_tail(&bd->bd_le.le_list, done);
		get_bh(bd->bd_bh);
		while (be64_to_cpu(*ptr) != bd->bd_bh->b_blocknr) {
			gfs2_log_incr_head(sdp);
			ptr += 2;
591
		}
592
		gfs2_log_unlock(sdp);
593 594 595 596 597 598 599 600 601 602 603 604 605 606
		lock_buffer(bd->bd_bh);
		if (buffer_escaped(bd->bd_bh)) {
			void *kaddr;
			bh1 = gfs2_log_get_buf(sdp);
			kaddr = kmap_atomic(bd->bd_bh->b_page, KM_USER0);
			memcpy(bh1->b_data, kaddr + bh_offset(bd->bd_bh),
			       bh1->b_size);
			kunmap_atomic(kaddr, KM_USER0);
			*(__be32 *)bh1->b_data = 0;
			clear_buffer_escaped(bd->bd_bh);
			unlock_buffer(bd->bd_bh);
			brelse(bd->bd_bh);
		} else {
			bh1 = gfs2_log_fake_buf(sdp, bd->bd_bh);
607
		}
J
Jens Axboe 已提交
608
		submit_bh(WRITE_SYNC, bh1);
609
		gfs2_log_lock(sdp);
610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632
		ptr += 2;
	}
	gfs2_log_unlock(sdp);
	brelse(bh);
}

/**
 * databuf_lo_before_commit - Scan the data buffers, writing as we go
 *
 */

static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
	struct gfs2_bufdata *bd = NULL;
	struct buffer_head *bh = NULL;
	unsigned int n = 0;
	__be64 *ptr = NULL, *end = NULL;
	LIST_HEAD(processed);
	LIST_HEAD(in_progress);

	gfs2_log_lock(sdp);
	while (!list_empty(&sdp->sd_log_le_databuf)) {
		if (ptr == end) {
633
			gfs2_log_unlock(sdp);
634 635 636 637 638
			gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
			n = 0;
			bh = gfs2_get_log_desc(sdp, GFS2_LOG_DESC_JDATA);
			ptr = bh_log_ptr(bh);
			end = bh_ptr_end(bh) - 1;
639
			gfs2_log_lock(sdp);
640
			continue;
641
		}
642 643 644 645 646 647
		bd = list_entry(sdp->sd_log_le_databuf.next, struct gfs2_bufdata, bd_le.le_list);
		list_move_tail(&bd->bd_le.le_list, &in_progress);
		gfs2_check_magic(bd->bd_bh);
		*ptr++ = cpu_to_be64(bd->bd_bh->b_blocknr);
		*ptr++ = cpu_to_be64(buffer_escaped(bh) ? 1 : 0);
		n++;
D
David Teigland 已提交
648
	}
649
	gfs2_log_unlock(sdp);
650 651 652 653
	gfs2_write_blocks(sdp, bh, &in_progress, &processed, n);
	gfs2_log_lock(sdp);
	list_splice(&processed, &sdp->sd_log_le_databuf);
	gfs2_log_unlock(sdp);
654 655 656 657 658 659
}

static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
				    struct gfs2_log_descriptor *ld,
				    __be64 *ptr, int pass)
{
660 661
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
662
	struct gfs2_glock *gl = ip->i_gl;
663 664
	unsigned int blks = be32_to_cpu(ld->ld_data1);
	struct buffer_head *bh_log, *bh_ip;
665 666
	u64 blkno;
	u64 esc;
667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708
	int error = 0;

	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
		return 0;

	gfs2_replay_incr_blk(sdp, &start);
	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
		blkno = be64_to_cpu(*ptr++);
		esc = be64_to_cpu(*ptr++);

		sdp->sd_found_blocks++;

		if (gfs2_revoke_check(sdp, blkno, start))
			continue;

		error = gfs2_replay_read_block(jd, start, &bh_log);
		if (error)
			return error;

		bh_ip = gfs2_meta_new(gl, blkno);
		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);

		/* Unescape */
		if (esc) {
			__be32 *eptr = (__be32 *)bh_ip->b_data;
			*eptr = cpu_to_be32(GFS2_MAGIC);
		}
		mark_buffer_dirty(bh_ip);

		brelse(bh_log);
		brelse(bh_ip);

		sdp->sd_replayed_blocks++;
	}

	return error;
}

/* FIXME: sort out accounting for log blocks etc. */

static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
709 710
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
711 712

	if (error) {
S
Steven Whitehouse 已提交
713
		gfs2_meta_sync(ip->i_gl);
714 715 716 717 718 719
		return;
	}
	if (pass != 1)
		return;

	/* data sync? */
S
Steven Whitehouse 已提交
720
	gfs2_meta_sync(ip->i_gl);
721 722 723 724 725 726 727 728 729 730 731 732

	fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
		jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}

static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
	struct list_head *head = &sdp->sd_log_le_databuf;
	struct gfs2_bufdata *bd;

	while (!list_empty(head)) {
		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
S
Steven Whitehouse 已提交
733
		list_del_init(&bd->bd_le.le_list);
734 735 736
		sdp->sd_log_num_databuf--;
		gfs2_unpin(sdp, bd->bd_bh, ai);
	}
D
David Teigland 已提交
737 738 739
	gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
}

740

741
const struct gfs2_log_operations gfs2_buf_lops = {
D
David Teigland 已提交
742 743 744 745 746 747
	.lo_add = buf_lo_add,
	.lo_before_commit = buf_lo_before_commit,
	.lo_after_commit = buf_lo_after_commit,
	.lo_before_scan = buf_lo_before_scan,
	.lo_scan_elements = buf_lo_scan_elements,
	.lo_after_scan = buf_lo_after_scan,
748
	.lo_name = "buf",
D
David Teigland 已提交
749 750
};

751
const struct gfs2_log_operations gfs2_revoke_lops = {
D
David Teigland 已提交
752 753
	.lo_add = revoke_lo_add,
	.lo_before_commit = revoke_lo_before_commit,
754
	.lo_after_commit = revoke_lo_after_commit,
D
David Teigland 已提交
755 756 757
	.lo_before_scan = revoke_lo_before_scan,
	.lo_scan_elements = revoke_lo_scan_elements,
	.lo_after_scan = revoke_lo_after_scan,
758
	.lo_name = "revoke",
D
David Teigland 已提交
759 760
};

761
const struct gfs2_log_operations gfs2_rg_lops = {
762
	.lo_name = "rg",
D
David Teigland 已提交
763 764
};

765
const struct gfs2_log_operations gfs2_databuf_lops = {
D
David Teigland 已提交
766 767
	.lo_add = databuf_lo_add,
	.lo_before_commit = databuf_lo_before_commit,
768 769 770
	.lo_after_commit = databuf_lo_after_commit,
	.lo_scan_elements = databuf_lo_scan_elements,
	.lo_after_scan = databuf_lo_after_scan,
771
	.lo_name = "databuf",
D
David Teigland 已提交
772 773
};

774
const struct gfs2_log_operations *gfs2_log_ops[] = {
775
	&gfs2_databuf_lops,
D
David Teigland 已提交
776 777
	&gfs2_buf_lops,
	&gfs2_rg_lops,
778
	&gfs2_revoke_lops,
779
	NULL,
D
David Teigland 已提交
780 781
};