lops.c 20.6 KB
Newer Older
D
David Teigland 已提交
1 2
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3
 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
D
David Teigland 已提交
4 5 6
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
7
 * of the GNU General Public License version 2.
D
David Teigland 已提交
8 9 10 11 12 13 14
 */

#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
15
#include <linux/gfs2_ondisk.h>
16
#include <linux/lm_interface.h>
D
David Teigland 已提交
17 18

#include "gfs2.h"
19
#include "incore.h"
20
#include "inode.h"
D
David Teigland 已提交
21 22 23 24 25 26 27
#include "glock.h"
#include "log.h"
#include "lops.h"
#include "meta_io.h"
#include "recovery.h"
#include "rgrp.h"
#include "trans.h"
28
#include "util.h"
D
David Teigland 已提交
29 30 31 32

static void glock_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
	struct gfs2_glock *gl;
33
	struct gfs2_trans *tr = current->journal_info;
D
David Teigland 已提交
34

35
	tr->tr_touched = 1;
D
David Teigland 已提交
36 37 38 39 40 41

	gl = container_of(le, struct gfs2_glock, gl_le);
	if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl)))
		return;

	gfs2_log_lock(sdp);
42 43 44 45 46 47
	if (!list_empty(&le->le_list)){
		gfs2_log_unlock(sdp);
		return;
	}
	gfs2_glock_hold(gl);
	set_bit(GLF_DIRTY, &gl->gl_flags);
D
David Teigland 已提交
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
	sdp->sd_log_num_gl++;
	list_add(&le->le_list, &sdp->sd_log_le_gl);
	gfs2_log_unlock(sdp);
}

static void glock_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
	struct list_head *head = &sdp->sd_log_le_gl;
	struct gfs2_glock *gl;

	while (!list_empty(head)) {
		gl = list_entry(head->next, struct gfs2_glock, gl_le.le_list);
		list_del_init(&gl->gl_le.le_list);
		sdp->sd_log_num_gl--;

		gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(gl));
		gfs2_glock_put(gl);
	}
	gfs2_assert_warn(sdp, !sdp->sd_log_num_gl);
}

static void buf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
	struct gfs2_trans *tr;

74 75 76
	gfs2_log_lock(sdp);
	if (!list_empty(&bd->bd_list_tr)) {
		gfs2_log_unlock(sdp);
D
David Teigland 已提交
77
		return;
78
	}
79
	tr = current->journal_info;
D
David Teigland 已提交
80 81 82
	tr->tr_touched = 1;
	tr->tr_num_buf++;
	list_add(&bd->bd_list_tr, &tr->tr_list_buf);
83
	gfs2_log_unlock(sdp);
D
David Teigland 已提交
84 85 86 87 88 89 90

	if (!list_empty(&le->le_list))
		return;

	gfs2_trans_add_gl(bd->bd_gl);

	gfs2_meta_check(sdp, bd->bd_bh);
91
	gfs2_pin(sdp, bd->bd_bh);
D
David Teigland 已提交
92 93 94 95 96 97 98 99 100 101 102 103 104
	gfs2_log_lock(sdp);
	sdp->sd_log_num_buf++;
	list_add(&le->le_list, &sdp->sd_log_le_buf);
	gfs2_log_unlock(sdp);

	tr->tr_num_buf_new++;
}

static void buf_lo_incore_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
{
	struct list_head *head = &tr->tr_list_buf;
	struct gfs2_bufdata *bd;

105
	gfs2_log_lock(sdp);
D
David Teigland 已提交
106 107 108 109 110
	while (!list_empty(head)) {
		bd = list_entry(head->next, struct gfs2_bufdata, bd_list_tr);
		list_del_init(&bd->bd_list_tr);
		tr->tr_num_buf--;
	}
111
	gfs2_log_unlock(sdp);
D
David Teigland 已提交
112 113 114 115 116 117 118 119
	gfs2_assert_warn(sdp, !tr->tr_num_buf);
}

static void buf_lo_before_commit(struct gfs2_sbd *sdp)
{
	struct buffer_head *bh;
	struct gfs2_log_descriptor *ld;
	struct gfs2_bufdata *bd1 = NULL, *bd2;
120
	unsigned int total;
121
	unsigned int offset = BUF_OFFSET;
D
David Teigland 已提交
122 123 124 125 126
	unsigned int limit;
	unsigned int num;
	unsigned n;
	__be64 *ptr;

127
	limit = buf_limit(sdp);
D
David Teigland 已提交
128 129
	/* for 4k blocks, limit = 503 */

130 131
	gfs2_log_lock(sdp);
	total = sdp->sd_log_num_buf;
D
David Teigland 已提交
132 133 134 135 136
	bd1 = bd2 = list_prepare_entry(bd1, &sdp->sd_log_le_buf, bd_le.le_list);
	while(total) {
		num = total;
		if (total > limit)
			num = limit;
137
		gfs2_log_unlock(sdp);
D
David Teigland 已提交
138
		bh = gfs2_log_get_buf(sdp);
139
		gfs2_log_lock(sdp);
D
David Teigland 已提交
140 141 142
		ld = (struct gfs2_log_descriptor *)bh->b_data;
		ptr = (__be64 *)(bh->b_data + offset);
		ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
143 144
		ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
		ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
D
David Teigland 已提交
145 146 147 148 149 150 151
		ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_METADATA);
		ld->ld_length = cpu_to_be32(num + 1);
		ld->ld_data1 = cpu_to_be32(num);
		ld->ld_data2 = cpu_to_be32(0);
		memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));

		n = 0;
152 153
		list_for_each_entry_continue(bd1, &sdp->sd_log_le_buf,
					     bd_le.le_list) {
D
David Teigland 已提交
154 155 156 157 158
			*ptr++ = cpu_to_be64(bd1->bd_bh->b_blocknr);
			if (++n >= num)
				break;
		}

159
		gfs2_log_unlock(sdp);
D
David Teigland 已提交
160 161
		set_buffer_dirty(bh);
		ll_rw_block(WRITE, 1, &bh);
162
		gfs2_log_lock(sdp);
D
David Teigland 已提交
163 164

		n = 0;
165 166
		list_for_each_entry_continue(bd2, &sdp->sd_log_le_buf,
					     bd_le.le_list) {
167
			gfs2_log_unlock(sdp);
D
David Teigland 已提交
168 169 170
			bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
			set_buffer_dirty(bh);
			ll_rw_block(WRITE, 1, &bh);
171
			gfs2_log_lock(sdp);
D
David Teigland 已提交
172 173 174 175
			if (++n >= num)
				break;
		}

176
		BUG_ON(total < num);
D
David Teigland 已提交
177 178
		total -= num;
	}
179
	gfs2_log_unlock(sdp);
D
David Teigland 已提交
180 181 182 183 184 185 186 187 188 189 190 191
}

static void buf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
	struct list_head *head = &sdp->sd_log_le_buf;
	struct gfs2_bufdata *bd;

	while (!list_empty(head)) {
		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
		list_del_init(&bd->bd_le.le_list);
		sdp->sd_log_num_buf--;

192
		gfs2_unpin(sdp, bd->bd_bh, ai);
D
David Teigland 已提交
193 194 195 196 197
	}
	gfs2_assert_warn(sdp, !sdp->sd_log_num_buf);
}

static void buf_lo_before_scan(struct gfs2_jdesc *jd,
A
Al Viro 已提交
198
			       struct gfs2_log_header_host *head, int pass)
D
David Teigland 已提交
199
{
200
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
201 202 203 204 205 206 207 208 209 210 211 212

	if (pass != 0)
		return;

	sdp->sd_found_blocks = 0;
	sdp->sd_replayed_blocks = 0;
}

static int buf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
				struct gfs2_log_descriptor *ld, __be64 *ptr,
				int pass)
{
213 214
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
215
	struct gfs2_glock *gl = ip->i_gl;
D
David Teigland 已提交
216 217
	unsigned int blks = be32_to_cpu(ld->ld_data1);
	struct buffer_head *bh_log, *bh_ip;
218
	u64 blkno;
D
David Teigland 已提交
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
	int error = 0;

	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_METADATA)
		return 0;

	gfs2_replay_incr_blk(sdp, &start);

	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
		blkno = be64_to_cpu(*ptr++);

		sdp->sd_found_blocks++;

		if (gfs2_revoke_check(sdp, blkno, start))
			continue;

		error = gfs2_replay_read_block(jd, start, &bh_log);
S
Steven Whitehouse 已提交
235 236
		if (error)
			return error;
D
David Teigland 已提交
237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259

		bh_ip = gfs2_meta_new(gl, blkno);
		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);

		if (gfs2_meta_check(sdp, bh_ip))
			error = -EIO;
		else
			mark_buffer_dirty(bh_ip);

		brelse(bh_log);
		brelse(bh_ip);

		if (error)
			break;

		sdp->sd_replayed_blocks++;
	}

	return error;
}

static void buf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
260 261
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
262 263

	if (error) {
S
Steven Whitehouse 已提交
264
		gfs2_meta_sync(ip->i_gl);
D
David Teigland 已提交
265 266 267 268 269
		return;
	}
	if (pass != 1)
		return;

S
Steven Whitehouse 已提交
270
	gfs2_meta_sync(ip->i_gl);
D
David Teigland 已提交
271 272 273 274 275 276 277 278 279

	fs_info(sdp, "jid=%u: Replayed %u of %u blocks\n",
	        jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}

static void revoke_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
	struct gfs2_trans *tr;

280
	tr = current->journal_info;
D
David Teigland 已提交
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304
	tr->tr_touched = 1;
	tr->tr_num_revoke++;

	gfs2_log_lock(sdp);
	sdp->sd_log_num_revoke++;
	list_add(&le->le_list, &sdp->sd_log_le_revoke);
	gfs2_log_unlock(sdp);
}

static void revoke_lo_before_commit(struct gfs2_sbd *sdp)
{
	struct gfs2_log_descriptor *ld;
	struct gfs2_meta_header *mh;
	struct buffer_head *bh;
	unsigned int offset;
	struct list_head *head = &sdp->sd_log_le_revoke;
	struct gfs2_revoke *rv;

	if (!sdp->sd_log_num_revoke)
		return;

	bh = gfs2_log_get_buf(sdp);
	ld = (struct gfs2_log_descriptor *)bh->b_data;
	ld->ld_header.mh_magic = cpu_to_be32(GFS2_MAGIC);
305 306
	ld->ld_header.mh_type = cpu_to_be32(GFS2_METATYPE_LD);
	ld->ld_header.mh_format = cpu_to_be32(GFS2_FORMAT_LD);
D
David Teigland 已提交
307
	ld->ld_type = cpu_to_be32(GFS2_LOG_DESC_REVOKE);
308
	ld->ld_length = cpu_to_be32(gfs2_struct2blk(sdp, sdp->sd_log_num_revoke,
309
						    sizeof(u64)));
D
David Teigland 已提交
310 311 312 313 314 315 316
	ld->ld_data1 = cpu_to_be32(sdp->sd_log_num_revoke);
	ld->ld_data2 = cpu_to_be32(0);
	memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
	offset = sizeof(struct gfs2_log_descriptor);

	while (!list_empty(head)) {
		rv = list_entry(head->next, struct gfs2_revoke, rv_le.le_list);
317
		list_del_init(&rv->rv_le.le_list);
D
David Teigland 已提交
318 319
		sdp->sd_log_num_revoke--;

320
		if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
D
David Teigland 已提交
321 322 323 324 325 326
			set_buffer_dirty(bh);
			ll_rw_block(WRITE, 1, &bh);

			bh = gfs2_log_get_buf(sdp);
			mh = (struct gfs2_meta_header *)bh->b_data;
			mh->mh_magic = cpu_to_be32(GFS2_MAGIC);
327 328
			mh->mh_type = cpu_to_be32(GFS2_METATYPE_LB);
			mh->mh_format = cpu_to_be32(GFS2_FORMAT_LB);
D
David Teigland 已提交
329 330 331 332 333 334
			offset = sizeof(struct gfs2_meta_header);
		}

		*(__be64 *)(bh->b_data + offset) = cpu_to_be64(rv->rv_blkno);
		kfree(rv);

335
		offset += sizeof(u64);
D
David Teigland 已提交
336 337 338 339 340 341 342 343
	}
	gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);

	set_buffer_dirty(bh);
	ll_rw_block(WRITE, 1, &bh);
}

static void revoke_lo_before_scan(struct gfs2_jdesc *jd,
A
Al Viro 已提交
344
				  struct gfs2_log_header_host *head, int pass)
D
David Teigland 已提交
345
{
346
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
347 348 349 350 351 352 353 354 355 356 357 358

	if (pass != 0)
		return;

	sdp->sd_found_revokes = 0;
	sdp->sd_replay_tail = head->lh_tail;
}

static int revoke_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
				   struct gfs2_log_descriptor *ld, __be64 *ptr,
				   int pass)
{
359
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
360 361 362 363
	unsigned int blks = be32_to_cpu(ld->ld_length);
	unsigned int revokes = be32_to_cpu(ld->ld_data1);
	struct buffer_head *bh;
	unsigned int offset;
364
	u64 blkno;
D
David Teigland 已提交
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380
	int first = 1;
	int error;

	if (pass != 0 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_REVOKE)
		return 0;

	offset = sizeof(struct gfs2_log_descriptor);

	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
		error = gfs2_replay_read_block(jd, start, &bh);
		if (error)
			return error;

		if (!first)
			gfs2_metatype_check(sdp, bh, GFS2_METATYPE_LB);

381
		while (offset + sizeof(u64) <= sdp->sd_sb.sb_bsize) {
D
David Teigland 已提交
382 383 384 385 386 387 388 389 390 391
			blkno = be64_to_cpu(*(__be64 *)(bh->b_data + offset));

			error = gfs2_revoke_add(sdp, blkno, start);
			if (error < 0)
				return error;
			else if (error)
				sdp->sd_found_revokes++;

			if (!--revokes)
				break;
392
			offset += sizeof(u64);
D
David Teigland 已提交
393 394 395 396 397 398 399 400 401 402 403 404
		}

		brelse(bh);
		offset = sizeof(struct gfs2_meta_header);
		first = 0;
	}

	return 0;
}

static void revoke_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
405
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
D
David Teigland 已提交
406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422

	if (error) {
		gfs2_revoke_clean(sdp);
		return;
	}
	if (pass != 1)
		return;

	fs_info(sdp, "jid=%u: Found %u revoke tags\n",
	        jd->jd_jid, sdp->sd_found_revokes);

	gfs2_revoke_clean(sdp);
}

static void rg_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
	struct gfs2_rgrpd *rgd;
423
	struct gfs2_trans *tr = current->journal_info;
D
David Teigland 已提交
424

425
	tr->tr_touched = 1;
D
David Teigland 已提交
426 427 428 429

	rgd = container_of(le, struct gfs2_rgrpd, rd_le);

	gfs2_log_lock(sdp);
430 431 432 433 434
	if (!list_empty(&le->le_list)){
		gfs2_log_unlock(sdp);
		return;
	}
	gfs2_rgrp_bh_hold(rgd);
D
David Teigland 已提交
435 436
	sdp->sd_log_num_rg++;
	list_add(&le->le_list, &sdp->sd_log_le_rg);
437
	gfs2_log_unlock(sdp);
D
David Teigland 已提交
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
}

static void rg_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
	struct list_head *head = &sdp->sd_log_le_rg;
	struct gfs2_rgrpd *rgd;

	while (!list_empty(head)) {
		rgd = list_entry(head->next, struct gfs2_rgrpd, rd_le.le_list);
		list_del_init(&rgd->rd_le.le_list);
		sdp->sd_log_num_rg--;

		gfs2_rgrp_repolish_clones(rgd);
		gfs2_rgrp_bh_put(rgd);
	}
	gfs2_assert_warn(sdp, !sdp->sd_log_num_rg);
}

456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
/**
 * databuf_lo_add - Add a databuf to the transaction.
 *
 * This is used in two distinct cases:
 * i) In ordered write mode
 *    We put the data buffer on a list so that we can ensure that its
 *    synced to disk at the right time
 * ii) In journaled data mode
 *    We need to journal the data block in the same way as metadata in
 *    the functions above. The difference is that here we have a tag
 *    which is two __be64's being the block number (as per meta data)
 *    and a flag which says whether the data block needs escaping or
 *    not. This means we need a new log entry for each 251 or so data
 *    blocks, which isn't an enormous overhead but twice as much as
 *    for normal metadata blocks.
 */
D
David Teigland 已提交
472 473
static void databuf_lo_add(struct gfs2_sbd *sdp, struct gfs2_log_element *le)
{
474
	struct gfs2_bufdata *bd = container_of(le, struct gfs2_bufdata, bd_le);
475
	struct gfs2_trans *tr = current->journal_info;
476
	struct address_space *mapping = bd->bd_bh->b_page->mapping;
477
	struct gfs2_inode *ip = GFS2_I(mapping->host);
D
David Teigland 已提交
478

479
	gfs2_log_lock(sdp);
480
	if (!list_empty(&bd->bd_list_tr)) {
481
		gfs2_log_unlock(sdp);
482
		return;
483
	}
484
	tr->tr_touched = 1;
485 486 487 488
	if (gfs2_is_jdata(ip)) {
		tr->tr_num_buf++;
		list_add(&bd->bd_list_tr, &tr->tr_list_buf);
	}
489 490 491 492
	gfs2_log_unlock(sdp);
	if (!list_empty(&le->le_list))
		return;

493
	gfs2_trans_add_gl(bd->bd_gl);
494 495 496
	if (gfs2_is_jdata(ip)) {
		gfs2_pin(sdp, bd->bd_bh);
		tr->tr_num_databuf_new++;
497
	}
498
	gfs2_log_lock(sdp);
499 500
	if (gfs2_is_jdata(ip))
		sdp->sd_log_num_jdata++;
501
	sdp->sd_log_num_databuf++;
502
	list_add(&le->le_list, &sdp->sd_log_le_databuf);
D
David Teigland 已提交
503 504 505
	gfs2_log_unlock(sdp);
}

506 507 508 509 510 511 512 513 514 515 516
static int gfs2_check_magic(struct buffer_head *bh)
{
	struct page *page = bh->b_page;
	void *kaddr;
	__be32 *ptr;
	int rv = 0;

	kaddr = kmap_atomic(page, KM_USER0);
	ptr = kaddr + bh_offset(bh);
	if (*ptr == cpu_to_be32(GFS2_MAGIC))
		rv = 1;
517
	kunmap_atomic(kaddr, KM_USER0);
518 519 520 521 522 523 524 525 526 527 528 529

	return rv;
}

/**
 * databuf_lo_before_commit - Scan the data buffers, writing as we go
 *
 * Here we scan through the lists of buffers and make the assumption
 * that any buffer thats been pinned is being journaled, and that
 * any unpinned buffer is an ordered write data buffer and therefore
 * will be written back rather than journaled.
 */
D
David Teigland 已提交
530 531 532
static void databuf_lo_before_commit(struct gfs2_sbd *sdp)
{
	LIST_HEAD(started);
533
	struct gfs2_bufdata *bd1 = NULL, *bd2, *bdt;
R
Russell Cattelan 已提交
534
	struct buffer_head *bh = NULL,*bh1 = NULL;
535 536
	struct gfs2_log_descriptor *ld;
	unsigned int limit;
537
	unsigned int total_dbuf;
538
	unsigned int total_jdata;
539
	unsigned int num, n;
540
	__be64 *ptr = NULL;
D
David Teigland 已提交
541

542
	limit = databuf_limit(sdp);
D
David Teigland 已提交
543

544 545 546 547
	/*
	 * Start writing ordered buffers, write journaled buffers
	 * into the log along with a header
	 */
548
	gfs2_log_lock(sdp);
549
	total_dbuf = sdp->sd_log_num_databuf;
550
	total_jdata = sdp->sd_log_num_jdata;
551 552
	bd2 = bd1 = list_prepare_entry(bd1, &sdp->sd_log_le_databuf,
				       bd_le.le_list);
553 554 555 556 557
	while(total_dbuf) {
		num = total_jdata;
		if (num > limit)
			num = limit;
		n = 0;
558 559 560
		list_for_each_entry_safe_continue(bd1, bdt,
						  &sdp->sd_log_le_databuf,
						  bd_le.le_list) {
R
Russell Cattelan 已提交
561 562 563 564 565
			/* store off the buffer head in a local ptr since
			 * gfs2_bufdata might change when we drop the log lock
			 */
			bh1 = bd1->bd_bh;

566
			/* An ordered write buffer */
R
Russell Cattelan 已提交
567
			if (bh1 && !buffer_pinned(bh1)) {
568 569 570
				list_move(&bd1->bd_le.le_list, &started);
				if (bd1 == bd2) {
					bd2 = NULL;
571 572 573
					bd2 = list_prepare_entry(bd2,
							&sdp->sd_log_le_databuf,
							bd_le.le_list);
574 575
				}
				total_dbuf--;
R
Russell Cattelan 已提交
576 577 578 579
				if (bh1) {
					if (buffer_dirty(bh1)) {
						get_bh(bh1);

580
						gfs2_log_unlock(sdp);
R
Russell Cattelan 已提交
581 582 583 584

						ll_rw_block(SWRITE, 1, &bh1);
						brelse(bh1);

585
						gfs2_log_lock(sdp);
586 587 588 589
					}
					continue;
				}
				continue;
R
Russell Cattelan 已提交
590
			} else if (bh1) { /* A journaled buffer */
591 592 593 594
				int magic;
				gfs2_log_unlock(sdp);
				if (!bh) {
					bh = gfs2_log_get_buf(sdp);
595 596
					ld = (struct gfs2_log_descriptor *)
					     bh->b_data;
597 598
					ptr = (__be64 *)(bh->b_data +
							 DATABUF_OFFSET);
599 600 601
					ld->ld_header.mh_magic =
						cpu_to_be32(GFS2_MAGIC);
					ld->ld_header.mh_type =
602
						cpu_to_be32(GFS2_METATYPE_LD);
603
					ld->ld_header.mh_format =
604
						cpu_to_be32(GFS2_FORMAT_LD);
605 606
					ld->ld_type =
						cpu_to_be32(GFS2_LOG_DESC_JDATA);
607 608 609 610 611
					ld->ld_length = cpu_to_be32(num + 1);
					ld->ld_data1 = cpu_to_be32(num);
					ld->ld_data2 = cpu_to_be32(0);
					memset(ld->ld_reserved, 0, sizeof(ld->ld_reserved));
				}
R
Russell Cattelan 已提交
612 613
				magic = gfs2_check_magic(bh1);
				*ptr++ = cpu_to_be64(bh1->b_blocknr);
614
				*ptr++ = cpu_to_be64((__u64)magic);
R
Russell Cattelan 已提交
615
				clear_buffer_escaped(bh1);
616
				if (unlikely(magic != 0))
R
Russell Cattelan 已提交
617
					set_buffer_escaped(bh1);
618
				gfs2_log_lock(sdp);
619
				if (++n >= num)
620
					break;
R
Russell Cattelan 已提交
621
			} else if (!bh1) {
622 623 624 625 626 627 628 629 630 631
				total_dbuf--;
				sdp->sd_log_num_databuf--;
				list_del_init(&bd1->bd_le.le_list);
				if (bd1 == bd2) {
					bd2 = NULL;
					bd2 = list_prepare_entry(bd2,
						&sdp->sd_log_le_databuf,
						bd_le.le_list);
                                }
				kmem_cache_free(gfs2_bufdata_cachep, bd1);
632 633
			}
		}
634
		gfs2_log_unlock(sdp);
D
David Teigland 已提交
635
		if (bh) {
636 637 638
			set_buffer_dirty(bh);
			ll_rw_block(WRITE, 1, &bh);
			bh = NULL;
639
			ptr = NULL;
640 641
		}
		n = 0;
642
		gfs2_log_lock(sdp);
643 644
		list_for_each_entry_continue(bd2, &sdp->sd_log_le_databuf,
					     bd_le.le_list) {
645 646 647
			if (!bd2->bd_bh)
				continue;
			/* copy buffer if it needs escaping */
648
			gfs2_log_unlock(sdp);
649 650 651 652 653
			if (unlikely(buffer_escaped(bd2->bd_bh))) {
				void *kaddr;
				struct page *page = bd2->bd_bh->b_page;
				bh = gfs2_log_get_buf(sdp);
				kaddr = kmap_atomic(page, KM_USER0);
654 655 656
				memcpy(bh->b_data,
				       kaddr + bh_offset(bd2->bd_bh),
				       sdp->sd_sb.sb_bsize);
657
				kunmap_atomic(kaddr, KM_USER0);
658 659 660
				*(__be32 *)bh->b_data = 0;
			} else {
				bh = gfs2_log_fake_buf(sdp, bd2->bd_bh);
D
David Teigland 已提交
661
			}
662 663
			set_buffer_dirty(bh);
			ll_rw_block(WRITE, 1, &bh);
664
			gfs2_log_lock(sdp);
665 666 667 668
			if (++n >= num)
				break;
		}
		bh = NULL;
669
		BUG_ON(total_dbuf < num);
670 671
		total_dbuf -= num;
		total_jdata -= num;
D
David Teigland 已提交
672
	}
673 674
	gfs2_log_unlock(sdp);

675
	/* Wait on all ordered buffers */
D
David Teigland 已提交
676
	while (!list_empty(&started)) {
677
		gfs2_log_lock(sdp);
678 679
		bd1 = list_entry(started.next, struct gfs2_bufdata,
				 bd_le.le_list);
680
		list_del_init(&bd1->bd_le.le_list);
D
David Teigland 已提交
681
		sdp->sd_log_num_databuf--;
682
		bh = bd1->bd_bh;
D
David Teigland 已提交
683
		if (bh) {
684
			bh->b_private = NULL;
685
			get_bh(bh);
D
David Teigland 已提交
686 687 688 689 690 691
			gfs2_log_unlock(sdp);
			wait_on_buffer(bh);
			brelse(bh);
		} else
			gfs2_log_unlock(sdp);

692
		kmem_cache_free(gfs2_bufdata_cachep, bd1);
D
David Teigland 已提交
693 694
	}

695 696 697 698 699 700 701 702
	/* We've removed all the ordered write bufs here, so only jdata left */
	gfs2_assert_warn(sdp, sdp->sd_log_num_databuf == sdp->sd_log_num_jdata);
}

static int databuf_lo_scan_elements(struct gfs2_jdesc *jd, unsigned int start,
				    struct gfs2_log_descriptor *ld,
				    __be64 *ptr, int pass)
{
703 704
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
705
	struct gfs2_glock *gl = ip->i_gl;
706 707
	unsigned int blks = be32_to_cpu(ld->ld_data1);
	struct buffer_head *bh_log, *bh_ip;
708 709
	u64 blkno;
	u64 esc;
710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
	int error = 0;

	if (pass != 1 || be32_to_cpu(ld->ld_type) != GFS2_LOG_DESC_JDATA)
		return 0;

	gfs2_replay_incr_blk(sdp, &start);
	for (; blks; gfs2_replay_incr_blk(sdp, &start), blks--) {
		blkno = be64_to_cpu(*ptr++);
		esc = be64_to_cpu(*ptr++);

		sdp->sd_found_blocks++;

		if (gfs2_revoke_check(sdp, blkno, start))
			continue;

		error = gfs2_replay_read_block(jd, start, &bh_log);
		if (error)
			return error;

		bh_ip = gfs2_meta_new(gl, blkno);
		memcpy(bh_ip->b_data, bh_log->b_data, bh_log->b_size);

		/* Unescape */
		if (esc) {
			__be32 *eptr = (__be32 *)bh_ip->b_data;
			*eptr = cpu_to_be32(GFS2_MAGIC);
		}
		mark_buffer_dirty(bh_ip);

		brelse(bh_log);
		brelse(bh_ip);
		if (error)
			break;

		sdp->sd_replayed_blocks++;
	}

	return error;
}

/* FIXME: sort out accounting for log blocks etc. */

static void databuf_lo_after_scan(struct gfs2_jdesc *jd, int error, int pass)
{
754 755
	struct gfs2_inode *ip = GFS2_I(jd->jd_inode);
	struct gfs2_sbd *sdp = GFS2_SB(jd->jd_inode);
756 757

	if (error) {
S
Steven Whitehouse 已提交
758
		gfs2_meta_sync(ip->i_gl);
759 760 761 762 763 764
		return;
	}
	if (pass != 1)
		return;

	/* data sync? */
S
Steven Whitehouse 已提交
765
	gfs2_meta_sync(ip->i_gl);
766 767 768 769 770 771 772 773 774 775 776 777

	fs_info(sdp, "jid=%u: Replayed %u of %u data blocks\n",
		jd->jd_jid, sdp->sd_replayed_blocks, sdp->sd_found_blocks);
}

static void databuf_lo_after_commit(struct gfs2_sbd *sdp, struct gfs2_ail *ai)
{
	struct list_head *head = &sdp->sd_log_le_databuf;
	struct gfs2_bufdata *bd;

	while (!list_empty(head)) {
		bd = list_entry(head->next, struct gfs2_bufdata, bd_le.le_list);
S
Steven Whitehouse 已提交
778
		list_del_init(&bd->bd_le.le_list);
779 780 781 782
		sdp->sd_log_num_databuf--;
		sdp->sd_log_num_jdata--;
		gfs2_unpin(sdp, bd->bd_bh, ai);
	}
D
David Teigland 已提交
783
	gfs2_assert_warn(sdp, !sdp->sd_log_num_databuf);
784
	gfs2_assert_warn(sdp, !sdp->sd_log_num_jdata);
D
David Teigland 已提交
785 786
}

787

788
const struct gfs2_log_operations gfs2_glock_lops = {
D
David Teigland 已提交
789 790
	.lo_add = glock_lo_add,
	.lo_after_commit = glock_lo_after_commit,
791
	.lo_name = "glock",
D
David Teigland 已提交
792 793
};

794
const struct gfs2_log_operations gfs2_buf_lops = {
D
David Teigland 已提交
795 796 797 798 799 800 801
	.lo_add = buf_lo_add,
	.lo_incore_commit = buf_lo_incore_commit,
	.lo_before_commit = buf_lo_before_commit,
	.lo_after_commit = buf_lo_after_commit,
	.lo_before_scan = buf_lo_before_scan,
	.lo_scan_elements = buf_lo_scan_elements,
	.lo_after_scan = buf_lo_after_scan,
802
	.lo_name = "buf",
D
David Teigland 已提交
803 804
};

805
const struct gfs2_log_operations gfs2_revoke_lops = {
D
David Teigland 已提交
806 807 808 809 810
	.lo_add = revoke_lo_add,
	.lo_before_commit = revoke_lo_before_commit,
	.lo_before_scan = revoke_lo_before_scan,
	.lo_scan_elements = revoke_lo_scan_elements,
	.lo_after_scan = revoke_lo_after_scan,
811
	.lo_name = "revoke",
D
David Teigland 已提交
812 813
};

814
const struct gfs2_log_operations gfs2_rg_lops = {
D
David Teigland 已提交
815 816
	.lo_add = rg_lo_add,
	.lo_after_commit = rg_lo_after_commit,
817
	.lo_name = "rg",
D
David Teigland 已提交
818 819
};

820
const struct gfs2_log_operations gfs2_databuf_lops = {
D
David Teigland 已提交
821
	.lo_add = databuf_lo_add,
822
	.lo_incore_commit = buf_lo_incore_commit,
D
David Teigland 已提交
823
	.lo_before_commit = databuf_lo_before_commit,
824 825 826
	.lo_after_commit = databuf_lo_after_commit,
	.lo_scan_elements = databuf_lo_scan_elements,
	.lo_after_scan = databuf_lo_after_scan,
827
	.lo_name = "databuf",
D
David Teigland 已提交
828 829
};

830
const struct gfs2_log_operations *gfs2_log_ops[] = {
D
David Teigland 已提交
831 832 833 834 835
	&gfs2_glock_lops,
	&gfs2_buf_lops,
	&gfs2_revoke_lops,
	&gfs2_rg_lops,
	&gfs2_databuf_lops,
836
	NULL,
D
David Teigland 已提交
837 838
};