bmap.c 28.3 KB
Newer Older
D
David Teigland 已提交
1 2
/*
 * Copyright (C) Sistina Software, Inc.  1997-2003 All rights reserved.
3
 * Copyright (C) 2004-2006 Red Hat, Inc.  All rights reserved.
D
David Teigland 已提交
4 5 6
 *
 * This copyrighted material is made available to anyone wishing to use,
 * modify, copy, or redistribute it subject to the terms and conditions
7
 * of the GNU General Public License version 2.
D
David Teigland 已提交
8 9 10 11 12 13
 */

#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/completion.h>
#include <linux/buffer_head.h>
14
#include <linux/gfs2_ondisk.h>
15
#include <linux/crc32.h>
16
#include <linux/lm_interface.h>
D
David Teigland 已提交
17 18

#include "gfs2.h"
19
#include "incore.h"
D
David Teigland 已提交
20 21 22 23 24 25 26
#include "bmap.h"
#include "glock.h"
#include "inode.h"
#include "meta_io.h"
#include "quota.h"
#include "rgrp.h"
#include "trans.h"
27
#include "dir.h"
28
#include "util.h"
S
Steven Whitehouse 已提交
29
#include "ops_address.h"
D
David Teigland 已提交
30 31 32 33 34 35 36 37 38 39

/* This doesn't need to be that large as max 64 bit pointers in a 4k
 * block is 512, so __u16 is fine for that. It saves stack space to
 * keep it small.
 */
struct metapath {
	__u16 mp_list[GFS2_MAX_META_HEIGHT];
};

typedef int (*block_call_t) (struct gfs2_inode *ip, struct buffer_head *dibh,
A
Al Viro 已提交
40 41
			     struct buffer_head *bh, __be64 *top,
			     __be64 *bottom, unsigned int height,
D
David Teigland 已提交
42 43 44 45 46 47 48
			     void *data);

struct strip_mine {
	int sm_first;
	unsigned int sm_height;
};

49 50 51 52 53 54 55 56 57 58 59
/**
 * gfs2_unstuffer_page - unstuff a stuffed inode into a block cached by a page
 * @ip: the inode
 * @dibh: the dinode buffer
 * @block: the block number that was allocated
 * @private: any locked page held by the caller process
 *
 * Returns: errno
 */

static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
60
			       u64 block, struct page *page)
61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
{
	struct inode *inode = &ip->i_inode;
	struct buffer_head *bh;
	int release = 0;

	if (!page || page->index) {
		page = grab_cache_page(inode->i_mapping, 0);
		if (!page)
			return -ENOMEM;
		release = 1;
	}

	if (!PageUptodate(page)) {
		void *kaddr = kmap(page);

		memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode),
		       ip->i_di.di_size);
		memset(kaddr + ip->i_di.di_size, 0,
		       PAGE_CACHE_SIZE - ip->i_di.di_size);
		kunmap(page);

		SetPageUptodate(page);
	}

	if (!page_has_buffers(page))
		create_empty_buffers(page, 1 << inode->i_blkbits,
				     (1 << BH_Uptodate));

	bh = page_buffers(page);

	if (!buffer_mapped(bh))
		map_bh(bh, inode->i_sb, block);

	set_buffer_uptodate(bh);
95 96
	if (!gfs2_is_jdata(ip))
		mark_buffer_dirty(bh);
97
	if (!gfs2_is_writeback(ip))
98
		gfs2_trans_add_bh(ip->i_gl, bh, 0);
99 100 101 102 103 104 105 106 107

	if (release) {
		unlock_page(page);
		page_cache_release(page);
	}

	return 0;
}

D
David Teigland 已提交
108 109 110 111 112 113 114 115 116 117 118 119
/**
 * gfs2_unstuff_dinode - Unstuff a dinode when the data has grown too big
 * @ip: The GFS2 inode to unstuff
 * @unstuffer: the routine that handles unstuffing a non-zero length file
 * @private: private data for the unstuffer
 *
 * This routine unstuffs a dinode and returns it to a "normal" state such
 * that the height can be grown in the traditional way.
 *
 * Returns: errno
 */

120
int gfs2_unstuff_dinode(struct gfs2_inode *ip, struct page *page)
D
David Teigland 已提交
121 122
{
	struct buffer_head *bh, *dibh;
123
	struct gfs2_dinode *di;
124
	u64 block = 0;
125
	int isdir = gfs2_is_dir(ip);
D
David Teigland 已提交
126 127 128 129 130 131 132
	int error;

	down_write(&ip->i_rw_mutex);

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (error)
		goto out;
133

D
David Teigland 已提交
134 135 136 137
	if (ip->i_di.di_size) {
		/* Get a free block, fill it with the stuffed data,
		   and write it out to disk */

138
		if (isdir) {
D
David Teigland 已提交
139 140
			block = gfs2_alloc_meta(ip);

141
			error = gfs2_dir_get_new_buffer(ip, block, &bh);
D
David Teigland 已提交
142 143
			if (error)
				goto out_brelse;
144
			gfs2_buffer_copy_tail(bh, sizeof(struct gfs2_meta_header),
D
David Teigland 已提交
145 146 147 148 149
					      dibh, sizeof(struct gfs2_dinode));
			brelse(bh);
		} else {
			block = gfs2_alloc_data(ip);

150
			error = gfs2_unstuffer_page(ip, dibh, block, page);
D
David Teigland 已提交
151 152 153 154 155 156 157
			if (error)
				goto out_brelse;
		}
	}

	/*  Set up the pointer to the new block  */

158
	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
159
	di = (struct gfs2_dinode *)dibh->b_data;
D
David Teigland 已提交
160 161 162
	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));

	if (ip->i_di.di_size) {
163
		*(__be64 *)(di + 1) = cpu_to_be64(block);
D
David Teigland 已提交
164
		ip->i_di.di_blocks++;
165
		gfs2_set_inode_blocks(&ip->i_inode);
166
		di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
D
David Teigland 已提交
167 168
	}

169
	ip->i_height = 1;
170
	di->di_height = cpu_to_be16(1);
D
David Teigland 已提交
171

172
out_brelse:
D
David Teigland 已提交
173
	brelse(dibh);
174
out:
D
David Teigland 已提交
175 176 177 178 179 180 181 182 183 184 185 186 187
	up_write(&ip->i_rw_mutex);
	return error;
}

/**
 * build_height - Build a metadata tree of the requested height
 * @ip: The GFS2 inode
 * @height: The height to build to
 *
 *
 * Returns: errno
 */

188
static int build_height(struct inode *inode, unsigned height)
D
David Teigland 已提交
189
{
190
	struct gfs2_inode *ip = GFS2_I(inode);
191
	unsigned new_height = height - ip->i_height;
192 193
	struct buffer_head *dibh;
	struct buffer_head *blocks[GFS2_MAX_META_HEIGHT];
194
	struct gfs2_dinode *di;
D
David Teigland 已提交
195
	int error;
A
Al Viro 已提交
196
	__be64 *bp;
197 198
	u64 bn;
	unsigned n;
D
David Teigland 已提交
199

200
	if (height <= ip->i_height)
201
		return 0;
D
David Teigland 已提交
202

203 204 205
	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (error)
		return error;
D
David Teigland 已提交
206

207 208 209 210 211
	for(n = 0; n < new_height; n++) {
		bn = gfs2_alloc_meta(ip);
		blocks[n] = gfs2_meta_new(ip->i_gl, bn);
		gfs2_trans_add_bh(ip->i_gl, blocks[n], 1);
	}
212

213 214 215 216 217
	n = 0;
	bn = blocks[0]->b_blocknr;
	if (new_height > 1) {
		for(; n < new_height-1; n++) {
			gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN,
D
David Teigland 已提交
218
					  GFS2_FORMAT_IN);
219 220
			gfs2_buffer_clear_tail(blocks[n],
					       sizeof(struct gfs2_meta_header));
A
Al Viro 已提交
221
			bp = (__be64 *)(blocks[n]->b_data +
222 223 224 225
				     sizeof(struct gfs2_meta_header));
			*bp = cpu_to_be64(blocks[n+1]->b_blocknr);
			brelse(blocks[n]);
			blocks[n] = NULL;
D
David Teigland 已提交
226 227
		}
	}
228 229 230 231 232
	gfs2_metatype_set(blocks[n], GFS2_METATYPE_IN, GFS2_FORMAT_IN);
	gfs2_buffer_copy_tail(blocks[n], sizeof(struct gfs2_meta_header),
			      dibh, sizeof(struct gfs2_dinode));
	brelse(blocks[n]);
	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
233
	di = (struct gfs2_dinode *)dibh->b_data;
234
	gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
235
	*(__be64 *)(di + 1) = cpu_to_be64(bn);
236
	ip->i_height += new_height;
237
	ip->i_di.di_blocks += new_height;
238
	gfs2_set_inode_blocks(&ip->i_inode);
239
	di->di_height = cpu_to_be16(ip->i_height);
240
	di->di_blocks = cpu_to_be64(ip->i_di.di_blocks);
241 242
	brelse(dibh);
	return error;
D
David Teigland 已提交
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303
}

/**
 * find_metapath - Find path through the metadata tree
 * @ip: The inode pointer
 * @mp: The metapath to return the result in
 * @block: The disk block to look up
 *
 *   This routine returns a struct metapath structure that defines a path
 *   through the metadata of inode "ip" to get to block "block".
 *
 *   Example:
 *   Given:  "ip" is a height 3 file, "offset" is 101342453, and this is a
 *   filesystem with a blocksize of 4096.
 *
 *   find_metapath() would return a struct metapath structure set to:
 *   mp_offset = 101342453, mp_height = 3, mp_list[0] = 0, mp_list[1] = 48,
 *   and mp_list[2] = 165.
 *
 *   That means that in order to get to the block containing the byte at
 *   offset 101342453, we would load the indirect block pointed to by pointer
 *   0 in the dinode.  We would then load the indirect block pointed to by
 *   pointer 48 in that indirect block.  We would then load the data block
 *   pointed to by pointer 165 in that indirect block.
 *
 *             ----------------------------------------
 *             | Dinode |                             |
 *             |        |                            4|
 *             |        |0 1 2 3 4 5                 9|
 *             |        |                            6|
 *             ----------------------------------------
 *                       |
 *                       |
 *                       V
 *             ----------------------------------------
 *             | Indirect Block                       |
 *             |                                     5|
 *             |            4 4 4 4 4 5 5            1|
 *             |0           5 6 7 8 9 0 1            2|
 *             ----------------------------------------
 *                                |
 *                                |
 *                                V
 *             ----------------------------------------
 *             | Indirect Block                       |
 *             |                         1 1 1 1 1   5|
 *             |                         6 6 6 6 6   1|
 *             |0                        3 4 5 6 7   2|
 *             ----------------------------------------
 *                                           |
 *                                           |
 *                                           V
 *             ----------------------------------------
 *             | Data block containing offset         |
 *             |            101342453                 |
 *             |                                      |
 *             |                                      |
 *             ----------------------------------------
 *
 */

304
static void find_metapath(struct gfs2_inode *ip, u64 block,
305
			  struct metapath *mp)
D
David Teigland 已提交
306
{
307
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
308
	u64 b = block;
D
David Teigland 已提交
309 310
	unsigned int i;

311
	for (i = ip->i_height; i--;)
312
		mp->mp_list[i] = do_div(b, sdp->sd_inptrs);
D
David Teigland 已提交
313 314 315 316 317 318 319 320 321 322 323 324 325 326

}

/**
 * metapointer - Return pointer to start of metadata in a buffer
 * @bh: The buffer
 * @height: The metadata height (0 = dinode)
 * @mp: The metapath
 *
 * Return a pointer to the block number of the next height of the metadata
 * tree given a buffer containing the pointer to the current height of the
 * metadata tree.
 */

A
Al Viro 已提交
327
static inline __be64 *metapointer(struct buffer_head *bh, int *boundary,
S
Steven Whitehouse 已提交
328
			       unsigned int height, const struct metapath *mp)
D
David Teigland 已提交
329 330 331
{
	unsigned int head_size = (height > 0) ?
		sizeof(struct gfs2_meta_header) : sizeof(struct gfs2_dinode);
A
Al Viro 已提交
332
	__be64 *ptr;
S
Steven Whitehouse 已提交
333
	*boundary = 0;
A
Al Viro 已提交
334 335
	ptr = ((__be64 *)(bh->b_data + head_size)) + mp->mp_list[height];
	if (ptr + 1 == (__be64 *)(bh->b_data + bh->b_size))
S
Steven Whitehouse 已提交
336 337
		*boundary = 1;
	return ptr;
D
David Teigland 已提交
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
}

/**
 * lookup_block - Get the next metadata block in metadata tree
 * @ip: The GFS2 inode
 * @bh: Buffer containing the pointers to metadata blocks
 * @height: The height of the tree (0 = dinode)
 * @mp: The metapath
 * @create: Non-zero if we may create a new meatdata block
 * @new: Used to indicate if we did create a new metadata block
 * @block: the returned disk block number
 *
 * Given a metatree, complete to a particular height, checks to see if the next
 * height of the tree exists. If not the next height of the tree is created.
 * The block number of the next height of the metadata tree is returned.
 *
 */

S
Steven Whitehouse 已提交
356 357
static int lookup_block(struct gfs2_inode *ip, struct buffer_head *bh,
			unsigned int height, struct metapath *mp, int create,
358
			int *new, u64 *block)
D
David Teigland 已提交
359
{
S
Steven Whitehouse 已提交
360
	int boundary;
A
Al Viro 已提交
361
	__be64 *ptr = metapointer(bh, &boundary, height, mp);
D
David Teigland 已提交
362 363 364

	if (*ptr) {
		*block = be64_to_cpu(*ptr);
S
Steven Whitehouse 已提交
365
		return boundary;
D
David Teigland 已提交
366 367 368 369 370
	}

	*block = 0;

	if (!create)
S
Steven Whitehouse 已提交
371
		return 0;
D
David Teigland 已提交
372

373
	if (height == ip->i_height - 1 && !gfs2_is_dir(ip))
D
David Teigland 已提交
374 375 376 377
		*block = gfs2_alloc_data(ip);
	else
		*block = gfs2_alloc_meta(ip);

378
	gfs2_trans_add_bh(ip->i_gl, bh, 1);
D
David Teigland 已提交
379 380 381

	*ptr = cpu_to_be64(*block);
	ip->i_di.di_blocks++;
382
	gfs2_set_inode_blocks(&ip->i_inode);
D
David Teigland 已提交
383 384

	*new = 1;
S
Steven Whitehouse 已提交
385
	return 0;
D
David Teigland 已提交
386 387
}

388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405
static inline void bmap_lock(struct inode *inode, int create)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	if (create)
		down_write(&ip->i_rw_mutex);
	else
		down_read(&ip->i_rw_mutex);
}

static inline void bmap_unlock(struct inode *inode, int create)
{
	struct gfs2_inode *ip = GFS2_I(inode);
	if (create)
		up_write(&ip->i_rw_mutex);
	else
		up_read(&ip->i_rw_mutex);
}

D
David Teigland 已提交
406
/**
407
 * gfs2_block_map - Map a block from an inode to a disk block
S
Steven Whitehouse 已提交
408
 * @inode: The inode
D
David Teigland 已提交
409
 * @lblock: The logical block number
410
 * @bh_map: The bh to be mapped
D
David Teigland 已提交
411 412 413 414 415 416 417
 *
 * Find the block number on the current device which corresponds to an
 * inode's block. If the block had to be created, "new" will be set.
 *
 * Returns: errno
 */

418 419
int gfs2_block_map(struct inode *inode, sector_t lblock,
		   struct buffer_head *bh_map, int create)
D
David Teigland 已提交
420
{
421 422
	struct gfs2_inode *ip = GFS2_I(inode);
	struct gfs2_sbd *sdp = GFS2_SB(inode);
D
David Teigland 已提交
423
	struct buffer_head *bh;
424
	unsigned int bsize = sdp->sd_sb.sb_bsize;
D
David Teigland 已提交
425 426 427
	unsigned int end_of_metadata;
	unsigned int x;
	int error = 0;
428 429 430
	int new = 0;
	u64 dblock = 0;
	int boundary;
431
	unsigned int maxlen = bh_map->b_size >> inode->i_blkbits;
432 433
	struct metapath mp;
	u64 size;
434
	struct buffer_head *dibh = NULL;
435
	const u64 *arr = sdp->sd_heightsize;
S
Steven Whitehouse 已提交
436 437
	BUG_ON(maxlen == 0);

D
David Teigland 已提交
438
	if (gfs2_assert_warn(sdp, !gfs2_is_stuffed(ip)))
439
		return 0;
D
David Teigland 已提交
440

441 442 443 444
	bmap_lock(inode, create);
	clear_buffer_mapped(bh_map);
	clear_buffer_new(bh_map);
	clear_buffer_boundary(bh_map);
445 446 447 448
	if (gfs2_is_dir(ip)) {
		bsize = sdp->sd_jbsize;
		arr = sdp->sd_jheightsize;
	}
449 450
	size = (lblock + 1) * bsize;

451 452 453 454 455 456 457 458 459
	if (size > arr[ip->i_height]) {
		u8 height = ip->i_height;
		if (!create)
			goto out_ok;
		while (size > arr[height])
			height++;
		error = build_height(inode, height);
		if (error)
			goto out_fail;
D
David Teigland 已提交
460 461
	}

462
	find_metapath(ip, lblock, &mp);
463
	end_of_metadata = ip->i_height - 1;
D
David Teigland 已提交
464 465
	error = gfs2_meta_inode_buffer(ip, &bh);
	if (error)
466
		goto out_fail;
467 468
	dibh = bh;
	get_bh(dibh);
D
David Teigland 已提交
469 470

	for (x = 0; x < end_of_metadata; x++) {
471
		lookup_block(ip, bh, x, &mp, create, &new, &dblock);
D
David Teigland 已提交
472
		brelse(bh);
473
		if (!dblock)
474
			goto out_ok;
D
David Teigland 已提交
475

476
		error = gfs2_meta_indirect_buffer(ip, x+1, dblock, new, &bh);
D
David Teigland 已提交
477
		if (error)
478
			goto out_fail;
D
David Teigland 已提交
479 480
	}

481
	boundary = lookup_block(ip, bh, end_of_metadata, &mp, create, &new, &dblock);
482 483 484
	if (dblock) {
		map_bh(bh_map, inode->i_sb, dblock);
		if (boundary)
485
			set_buffer_boundary(bh_map);
486
		if (new) {
487 488
			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
			gfs2_dinode_out(ip, dibh->b_data);
489 490 491 492 493 494
			set_buffer_new(bh_map);
			goto out_brelse;
		}
		while(--maxlen && !buffer_boundary(bh_map)) {
			u64 eblock;

495 496
			mp.mp_list[end_of_metadata]++;
			boundary = lookup_block(ip, bh, end_of_metadata, &mp, 0, &new, &eblock);
497 498
			if (eblock != ++dblock)
				break;
499
			bh_map->b_size += (1 << inode->i_blkbits);
500 501
			if (boundary)
				set_buffer_boundary(bh_map);
D
David Teigland 已提交
502 503
		}
	}
504 505
out_brelse:
	brelse(bh);
506 507 508
out_ok:
	error = 0;
out_fail:
509 510
	if (dibh)
		brelse(dibh);
S
Steven Whitehouse 已提交
511
	bmap_unlock(inode, create);
512
	return error;
S
Steven Whitehouse 已提交
513 514
}

515 516 517
/*
 * Deprecated: do not use in new code
 */
S
Steven Whitehouse 已提交
518 519
int gfs2_extent_map(struct inode *inode, u64 lblock, int *new, u64 *dblock, unsigned *extlen)
{
520
	struct buffer_head bh = { .b_state = 0, .b_blocknr = 0 };
521
	int ret;
S
Steven Whitehouse 已提交
522 523 524 525 526 527
	int create = *new;

	BUG_ON(!extlen);
	BUG_ON(!dblock);
	BUG_ON(!new);

528
	bh.b_size = 1 << (inode->i_blkbits + 5);
529
	ret = gfs2_block_map(inode, lblock, &bh, create);
530 531 532 533 534 535 536
	*extlen = bh.b_size >> inode->i_blkbits;
	*dblock = bh.b_blocknr;
	if (buffer_new(&bh))
		*new = 1;
	else
		*new = 0;
	return ret;
D
David Teigland 已提交
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557
}

/**
 * recursive_scan - recursively scan through the end of a file
 * @ip: the inode
 * @dibh: the dinode buffer
 * @mp: the path through the metadata to the point to start
 * @height: the height the recursion is at
 * @block: the indirect block to look at
 * @first: 1 if this is the first block
 * @bc: the call to make for each piece of metadata
 * @data: data opaque to this function to pass to @bc
 *
 * When this is first called @height and @block should be zero and
 * @first should be 1.
 *
 * Returns: errno
 */

static int recursive_scan(struct gfs2_inode *ip, struct buffer_head *dibh,
			  struct metapath *mp, unsigned int height,
558
			  u64 block, int first, block_call_t bc,
D
David Teigland 已提交
559 560
			  void *data)
{
561
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
D
David Teigland 已提交
562
	struct buffer_head *bh = NULL;
A
Al Viro 已提交
563
	__be64 *top, *bottom;
564
	u64 bn;
D
David Teigland 已提交
565 566 567 568 569 570 571 572 573
	int error;
	int mh_size = sizeof(struct gfs2_meta_header);

	if (!height) {
		error = gfs2_meta_inode_buffer(ip, &bh);
		if (error)
			return error;
		dibh = bh;

A
Al Viro 已提交
574 575
		top = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + mp->mp_list[0];
		bottom = (__be64 *)(bh->b_data + sizeof(struct gfs2_dinode)) + sdp->sd_diptrs;
D
David Teigland 已提交
576 577 578 579 580
	} else {
		error = gfs2_meta_indirect_buffer(ip, height, block, 0, &bh);
		if (error)
			return error;

A
Al Viro 已提交
581
		top = (__be64 *)(bh->b_data + mh_size) +
J
Jan Engelhardt 已提交
582
				  (first ? mp->mp_list[height] : 0);
D
David Teigland 已提交
583

A
Al Viro 已提交
584
		bottom = (__be64 *)(bh->b_data + mh_size) + sdp->sd_inptrs;
D
David Teigland 已提交
585 586 587 588 589 590
	}

	error = bc(ip, dibh, bh, top, bottom, height, data);
	if (error)
		goto out;

591
	if (height < ip->i_height - 1)
D
David Teigland 已提交
592 593 594 595 596 597 598 599 600 601 602 603
		for (; top < bottom; top++, first = 0) {
			if (!*top)
				continue;

			bn = be64_to_cpu(*top);

			error = recursive_scan(ip, dibh, mp, height + 1, bn,
					       first, bc, data);
			if (error)
				break;
		}

604
out:
D
David Teigland 已提交
605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622
	brelse(bh);
	return error;
}

/**
 * do_strip - Look for a layer a particular layer of the file and strip it off
 * @ip: the inode
 * @dibh: the dinode buffer
 * @bh: A buffer of pointers
 * @top: The first pointer in the buffer
 * @bottom: One more than the last pointer
 * @height: the height this buffer is at
 * @data: a pointer to a struct strip_mine
 *
 * Returns: errno
 */

static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh,
A
Al Viro 已提交
623
		    struct buffer_head *bh, __be64 *top, __be64 *bottom,
D
David Teigland 已提交
624 625
		    unsigned int height, void *data)
{
626 627
	struct strip_mine *sm = data;
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
D
David Teigland 已提交
628
	struct gfs2_rgrp_list rlist;
629 630
	u64 bn, bstart;
	u32 blen;
A
Al Viro 已提交
631
	__be64 *p;
D
David Teigland 已提交
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
	unsigned int rg_blocks = 0;
	int metadata;
	unsigned int revokes = 0;
	int x;
	int error;

	if (!*top)
		sm->sm_first = 0;

	if (height != sm->sm_height)
		return 0;

	if (sm->sm_first) {
		top++;
		sm->sm_first = 0;
	}

649
	metadata = (height != ip->i_height - 1);
D
David Teigland 已提交
650 651 652
	if (metadata)
		revokes = (height) ? sdp->sd_inptrs : sdp->sd_diptrs;

653
	error = gfs2_rindex_hold(sdp, &ip->i_alloc->al_ri_gh);
D
David Teigland 已提交
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686
	if (error)
		return error;

	memset(&rlist, 0, sizeof(struct gfs2_rgrp_list));
	bstart = 0;
	blen = 0;

	for (p = top; p < bottom; p++) {
		if (!*p)
			continue;

		bn = be64_to_cpu(*p);

		if (bstart + blen == bn)
			blen++;
		else {
			if (bstart)
				gfs2_rlist_add(sdp, &rlist, bstart);

			bstart = bn;
			blen = 1;
		}
	}

	if (bstart)
		gfs2_rlist_add(sdp, &rlist, bstart);
	else
		goto out; /* Nothing to do */

	gfs2_rlist_alloc(&rlist, LM_ST_EXCLUSIVE, 0);

	for (x = 0; x < rlist.rl_rgrps; x++) {
		struct gfs2_rgrpd *rgd;
687
		rgd = rlist.rl_ghs[x].gh_gl->gl_object;
688
		rg_blocks += rgd->rd_length;
D
David Teigland 已提交
689 690 691 692 693 694 695 696 697 698 699 700 701 702
	}

	error = gfs2_glock_nq_m(rlist.rl_rgrps, rlist.rl_ghs);
	if (error)
		goto out_rlist;

	error = gfs2_trans_begin(sdp, rg_blocks + RES_DINODE +
				 RES_INDIRECT + RES_STATFS + RES_QUOTA,
				 revokes);
	if (error)
		goto out_rg_gunlock;

	down_write(&ip->i_rw_mutex);

703 704
	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
	gfs2_trans_add_bh(ip->i_gl, bh, 1);
D
David Teigland 已提交
705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732

	bstart = 0;
	blen = 0;

	for (p = top; p < bottom; p++) {
		if (!*p)
			continue;

		bn = be64_to_cpu(*p);

		if (bstart + blen == bn)
			blen++;
		else {
			if (bstart) {
				if (metadata)
					gfs2_free_meta(ip, bstart, blen);
				else
					gfs2_free_data(ip, bstart, blen);
			}

			bstart = bn;
			blen = 1;
		}

		*p = 0;
		if (!ip->i_di.di_blocks)
			gfs2_consist_inode(ip);
		ip->i_di.di_blocks--;
733
		gfs2_set_inode_blocks(&ip->i_inode);
D
David Teigland 已提交
734 735 736 737 738 739 740 741
	}
	if (bstart) {
		if (metadata)
			gfs2_free_meta(ip, bstart, blen);
		else
			gfs2_free_data(ip, bstart, blen);
	}

742
	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
D
David Teigland 已提交
743

744
	gfs2_dinode_out(ip, dibh->b_data);
D
David Teigland 已提交
745 746 747 748 749

	up_write(&ip->i_rw_mutex);

	gfs2_trans_end(sdp);

750
out_rg_gunlock:
D
David Teigland 已提交
751
	gfs2_glock_dq_m(rlist.rl_rgrps, rlist.rl_ghs);
752
out_rlist:
D
David Teigland 已提交
753
	gfs2_rlist_free(&rlist);
754
out:
755
	gfs2_glock_dq_uninit(&ip->i_alloc->al_ri_gh);
D
David Teigland 已提交
756 757 758 759 760 761 762 763 764 765 766 767 768
	return error;
}

/**
 * do_grow - Make a file look bigger than it is
 * @ip: the inode
 * @size: the size to set the file to
 *
 * Called with an exclusive lock on @ip.
 *
 * Returns: errno
 */

769
static int do_grow(struct gfs2_inode *ip, u64 size)
D
David Teigland 已提交
770
{
771
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
D
David Teigland 已提交
772 773 774 775 776 777 778 779 780 781
	struct gfs2_alloc *al;
	struct buffer_head *dibh;
	int error;

	al = gfs2_alloc_get(ip);

	error = gfs2_quota_lock(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
	if (error)
		goto out;

782
	error = gfs2_quota_check(ip, ip->i_inode.i_uid, ip->i_inode.i_gid);
D
David Teigland 已提交
783 784 785 786 787 788 789 790 791 792
	if (error)
		goto out_gunlock_q;

	al->al_requested = sdp->sd_max_height + RES_DATA;

	error = gfs2_inplace_reserve(ip);
	if (error)
		goto out_gunlock_q;

	error = gfs2_trans_begin(sdp,
793
			sdp->sd_max_height + al->al_rgd->rd_length +
D
David Teigland 已提交
794 795 796 797 798
			RES_JDATA + RES_DINODE + RES_STATFS + RES_QUOTA, 0);
	if (error)
		goto out_ipres;

	if (size > sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode)) {
799
		const u64 *arr = sdp->sd_heightsize;
D
David Teigland 已提交
800
		if (gfs2_is_stuffed(ip)) {
801
			error = gfs2_unstuff_dinode(ip, NULL);
D
David Teigland 已提交
802 803 804 805
			if (error)
				goto out_end_trans;
		}

806 807 808 809 810 811
		down_write(&ip->i_rw_mutex);
		if (size > arr[ip->i_height]) {
			u8 height = ip->i_height;
			while(size > arr[height])
				height++;
			error = build_height(&ip->i_inode, height);
D
David Teigland 已提交
812
		}
813 814 815
		up_write(&ip->i_rw_mutex);
		if (error)
			goto out_end_trans;
D
David Teigland 已提交
816 817 818
	}

	ip->i_di.di_size = size;
819
	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
D
David Teigland 已提交
820 821 822 823 824

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (error)
		goto out_end_trans;

825
	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
826
	gfs2_dinode_out(ip, dibh->b_data);
D
David Teigland 已提交
827 828
	brelse(dibh);

829
out_end_trans:
D
David Teigland 已提交
830
	gfs2_trans_end(sdp);
831
out_ipres:
D
David Teigland 已提交
832
	gfs2_inplace_release(ip);
833
out_gunlock_q:
D
David Teigland 已提交
834
	gfs2_quota_unlock(ip);
835
out:
D
David Teigland 已提交
836 837 838 839
	gfs2_alloc_put(ip);
	return error;
}

S
Steven Whitehouse 已提交
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880

/**
 * gfs2_block_truncate_page - Deal with zeroing out data for truncate
 *
 * This is partly borrowed from ext3.
 */
static int gfs2_block_truncate_page(struct address_space *mapping)
{
	struct inode *inode = mapping->host;
	struct gfs2_inode *ip = GFS2_I(inode);
	loff_t from = inode->i_size;
	unsigned long index = from >> PAGE_CACHE_SHIFT;
	unsigned offset = from & (PAGE_CACHE_SIZE-1);
	unsigned blocksize, iblock, length, pos;
	struct buffer_head *bh;
	struct page *page;
	int err;

	page = grab_cache_page(mapping, index);
	if (!page)
		return 0;

	blocksize = inode->i_sb->s_blocksize;
	length = blocksize - (offset & (blocksize - 1));
	iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);

	if (!page_has_buffers(page))
		create_empty_buffers(page, blocksize, 0);

	/* Find the buffer that contains "offset" */
	bh = page_buffers(page);
	pos = blocksize;
	while (offset >= pos) {
		bh = bh->b_this_page;
		iblock++;
		pos += blocksize;
	}

	err = 0;

	if (!buffer_mapped(bh)) {
881
		gfs2_block_map(inode, iblock, bh, 0);
S
Steven Whitehouse 已提交
882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
		/* unmapped? It's a hole - nothing to do */
		if (!buffer_mapped(bh))
			goto unlock;
	}

	/* Ok, it's mapped. Make sure it's up-to-date */
	if (PageUptodate(page))
		set_buffer_uptodate(bh);

	if (!buffer_uptodate(bh)) {
		err = -EIO;
		ll_rw_block(READ, 1, &bh);
		wait_on_buffer(bh);
		/* Uhhuh. Read error. Complain and punt. */
		if (!buffer_uptodate(bh))
			goto unlock;
898
		err = 0;
S
Steven Whitehouse 已提交
899 900
	}

901
	if (!gfs2_is_writeback(ip))
S
Steven Whitehouse 已提交
902 903
		gfs2_trans_add_bh(ip->i_gl, bh, 0);

904
	zero_user(page, offset, length);
S
Steven Whitehouse 已提交
905 906 907 908 909 910 911

unlock:
	unlock_page(page);
	page_cache_release(page);
	return err;
}

912
static int trunc_start(struct gfs2_inode *ip, u64 size)
D
David Teigland 已提交
913
{
914
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
D
David Teigland 已提交
915 916 917 918 919
	struct buffer_head *dibh;
	int journaled = gfs2_is_jdata(ip);
	int error;

	error = gfs2_trans_begin(sdp,
J
Jan Engelhardt 已提交
920
				 RES_DINODE + (journaled ? RES_JDATA : 0), 0);
D
David Teigland 已提交
921 922 923 924 925 926 927 928 929
	if (error)
		return error;

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (error)
		goto out;

	if (gfs2_is_stuffed(ip)) {
		ip->i_di.di_size = size;
930
		ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
931
		gfs2_trans_add_bh(ip->i_gl, dibh, 1);
932
		gfs2_dinode_out(ip, dibh->b_data);
D
David Teigland 已提交
933 934 935 936
		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode) + size);
		error = 1;

	} else {
937
		if (size & (u64)(sdp->sd_sb.sb_bsize - 1))
938
			error = gfs2_block_truncate_page(ip->i_inode.i_mapping);
D
David Teigland 已提交
939 940 941

		if (!error) {
			ip->i_di.di_size = size;
942
			ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
D
David Teigland 已提交
943
			ip->i_di.di_flags |= GFS2_DIF_TRUNC_IN_PROG;
944
			gfs2_trans_add_bh(ip->i_gl, dibh, 1);
945
			gfs2_dinode_out(ip, dibh->b_data);
D
David Teigland 已提交
946 947 948 949 950
		}
	}

	brelse(dibh);

951
out:
D
David Teigland 已提交
952 953 954 955
	gfs2_trans_end(sdp);
	return error;
}

956
static int trunc_dealloc(struct gfs2_inode *ip, u64 size)
D
David Teigland 已提交
957
{
958
	unsigned int height = ip->i_height;
959
	u64 lblock;
D
David Teigland 已提交
960 961 962 963 964
	struct metapath mp;
	int error;

	if (!size)
		lblock = 0;
965
	else
966
		lblock = (size - 1) >> GFS2_SB(&ip->i_inode)->sd_sb.sb_bsize_shift;
D
David Teigland 已提交
967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982 983 984 985 986

	find_metapath(ip, lblock, &mp);
	gfs2_alloc_get(ip);

	error = gfs2_quota_hold(ip, NO_QUOTA_CHANGE, NO_QUOTA_CHANGE);
	if (error)
		goto out;

	while (height--) {
		struct strip_mine sm;
		sm.sm_first = !!size;
		sm.sm_height = height;

		error = recursive_scan(ip, NULL, &mp, 0, 0, 1, do_strip, &sm);
		if (error)
			break;
	}

	gfs2_quota_unhold(ip);

987
out:
D
David Teigland 已提交
988 989 990 991 992 993
	gfs2_alloc_put(ip);
	return error;
}

static int trunc_end(struct gfs2_inode *ip)
{
994
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
D
David Teigland 已提交
995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008
	struct buffer_head *dibh;
	int error;

	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
	if (error)
		return error;

	down_write(&ip->i_rw_mutex);

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (error)
		goto out;

	if (!ip->i_di.di_size) {
1009
		ip->i_height = 0;
D
David Teigland 已提交
1010 1011
		ip->i_di.di_goal_meta =
			ip->i_di.di_goal_data =
1012
			ip->i_no_addr;
D
David Teigland 已提交
1013 1014
		gfs2_buffer_clear_tail(dibh, sizeof(struct gfs2_dinode));
	}
1015
	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
D
David Teigland 已提交
1016 1017
	ip->i_di.di_flags &= ~GFS2_DIF_TRUNC_IN_PROG;

1018
	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
1019
	gfs2_dinode_out(ip, dibh->b_data);
D
David Teigland 已提交
1020 1021
	brelse(dibh);

1022
out:
D
David Teigland 已提交
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038
	up_write(&ip->i_rw_mutex);
	gfs2_trans_end(sdp);
	return error;
}

/**
 * do_shrink - make a file smaller
 * @ip: the inode
 * @size: the size to make the file
 * @truncator: function to truncate the last partial block
 *
 * Called with an exclusive lock on @ip.
 *
 * Returns: errno
 */

1039
static int do_shrink(struct gfs2_inode *ip, u64 size)
D
David Teigland 已提交
1040 1041 1042
{
	int error;

1043
	error = trunc_start(ip, size);
D
David Teigland 已提交
1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055
	if (error < 0)
		return error;
	if (error > 0)
		return 0;

	error = trunc_dealloc(ip, size);
	if (!error)
		error = trunc_end(ip);

	return error;
}

W
Wendy Cheng 已提交
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
static int do_touch(struct gfs2_inode *ip, u64 size)
{
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
	struct buffer_head *dibh;
	int error;

	error = gfs2_trans_begin(sdp, RES_DINODE, 0);
	if (error)
		return error;

	down_write(&ip->i_rw_mutex);

	error = gfs2_meta_inode_buffer(ip, &dibh);
	if (error)
		goto do_touch_out;

	ip->i_inode.i_mtime = ip->i_inode.i_ctime = CURRENT_TIME;
	gfs2_trans_add_bh(ip->i_gl, dibh, 1);
	gfs2_dinode_out(ip, dibh->b_data);
	brelse(dibh);

do_touch_out:
	up_write(&ip->i_rw_mutex);
	gfs2_trans_end(sdp);
	return error;
}

D
David Teigland 已提交
1083
/**
1084
 * gfs2_truncatei - make a file a given size
D
David Teigland 已提交
1085 1086 1087 1088 1089 1090 1091 1092 1093
 * @ip: the inode
 * @size: the size to make the file
 * @truncator: function to truncate the last partial block
 *
 * The file size can grow, shrink, or stay the same size.
 *
 * Returns: errno
 */

1094
int gfs2_truncatei(struct gfs2_inode *ip, u64 size)
D
David Teigland 已提交
1095 1096 1097
{
	int error;

1098
	if (gfs2_assert_warn(GFS2_SB(&ip->i_inode), S_ISREG(ip->i_inode.i_mode)))
D
David Teigland 已提交
1099 1100 1101 1102
		return -EINVAL;

	if (size > ip->i_di.di_size)
		error = do_grow(ip, size);
W
Wendy Cheng 已提交
1103
	else if (size < ip->i_di.di_size)
1104
		error = do_shrink(ip, size);
W
Wendy Cheng 已提交
1105 1106 1107
	else
		/* update time stamps */
		error = do_touch(ip, size);
D
David Teigland 已提交
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137

	return error;
}

int gfs2_truncatei_resume(struct gfs2_inode *ip)
{
	int error;
	error = trunc_dealloc(ip, ip->i_di.di_size);
	if (!error)
		error = trunc_end(ip);
	return error;
}

int gfs2_file_dealloc(struct gfs2_inode *ip)
{
	return trunc_dealloc(ip, 0);
}

/**
 * gfs2_write_calc_reserv - calculate number of blocks needed to write to a file
 * @ip: the file
 * @len: the number of bytes to be written to the file
 * @data_blocks: returns the number of data blocks required
 * @ind_blocks: returns the number of indirect blocks required
 *
 */

void gfs2_write_calc_reserv(struct gfs2_inode *ip, unsigned int len,
			    unsigned int *data_blocks, unsigned int *ind_blocks)
{
1138
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
D
David Teigland 已提交
1139 1140
	unsigned int tmp;

1141
	if (gfs2_is_dir(ip)) {
1142
		*data_blocks = DIV_ROUND_UP(len, sdp->sd_jbsize) + 2;
D
David Teigland 已提交
1143 1144 1145 1146 1147 1148 1149
		*ind_blocks = 3 * (sdp->sd_max_jheight - 1);
	} else {
		*data_blocks = (len >> sdp->sd_sb.sb_bsize_shift) + 3;
		*ind_blocks = 3 * (sdp->sd_max_height - 1);
	}

	for (tmp = *data_blocks; tmp > sdp->sd_diptrs;) {
1150
		tmp = DIV_ROUND_UP(tmp, sdp->sd_inptrs);
D
David Teigland 已提交
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164
		*ind_blocks += tmp;
	}
}

/**
 * gfs2_write_alloc_required - figure out if a write will require an allocation
 * @ip: the file being written to
 * @offset: the offset to write to
 * @len: the number of bytes being written
 * @alloc_required: set to 1 if an alloc is required, 0 otherwise
 *
 * Returns: errno
 */

1165
int gfs2_write_alloc_required(struct gfs2_inode *ip, u64 offset,
D
David Teigland 已提交
1166 1167
			      unsigned int len, int *alloc_required)
{
1168
	struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
1169 1170 1171
	struct buffer_head bh;
	unsigned int shift;
	u64 lblock, lblock_stop, size;
D
David Teigland 已提交
1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184

	*alloc_required = 0;

	if (!len)
		return 0;

	if (gfs2_is_stuffed(ip)) {
		if (offset + len >
		    sdp->sd_sb.sb_bsize - sizeof(struct gfs2_dinode))
			*alloc_required = 1;
		return 0;
	}

1185 1186
	*alloc_required = 1;
	shift = sdp->sd_sb.sb_bsize_shift;
1187
	if (gfs2_is_dir(ip)) {
D
David Teigland 已提交
1188 1189 1190 1191 1192 1193
		unsigned int bsize = sdp->sd_jbsize;
		lblock = offset;
		do_div(lblock, bsize);
		lblock_stop = offset + len + bsize - 1;
		do_div(lblock_stop, bsize);
	} else {
1194
		u64 end_of_file = (ip->i_di.di_size + sdp->sd_sb.sb_bsize - 1) >> shift;
D
David Teigland 已提交
1195 1196
		lblock = offset >> shift;
		lblock_stop = (offset + len + sdp->sd_sb.sb_bsize - 1) >> shift;
1197
		if (lblock_stop > end_of_file)
1198
			return 0;
D
David Teigland 已提交
1199 1200
	}

1201 1202 1203 1204 1205 1206
	size = (lblock_stop - lblock) << shift;
	do {
		bh.b_state = 0;
		bh.b_size = size;
		gfs2_block_map(&ip->i_inode, lblock, &bh, 0);
		if (!buffer_mapped(&bh))
D
David Teigland 已提交
1207
			return 0;
1208 1209 1210
		size -= bh.b_size;
		lblock += (bh.b_size >> ip->i_inode.i_blkbits);
	} while(size > 0);
D
David Teigland 已提交
1211

1212
	*alloc_required = 0;
D
David Teigland 已提交
1213 1214 1215
	return 0;
}