bitmap.c 36.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Copyright 2000 by Hans Reiser, licensing governed by reiserfs/README
 */
/* Reiserfs block (de)allocator, bitmap-based. */

#include <linux/time.h>
#include <linux/reiserfs_fs.h>
#include <linux/errno.h>
#include <linux/buffer_head.h>
#include <linux/kernel.h>
#include <linux/pagemap.h>
#include <linux/reiserfs_fs_sb.h>
#include <linux/reiserfs_fs_i.h>
#include <linux/quotaops.h>

#define PREALLOCATION_SIZE 9

/* different reiserfs block allocator options */

#define SB_ALLOC_OPTS(s) (REISERFS_SB(s)->s_alloc_options.bits)

#define  _ALLOC_concentrating_formatted_nodes 0
#define  _ALLOC_displacing_large_files 1
#define  _ALLOC_displacing_new_packing_localities 2
#define  _ALLOC_old_hashed_relocation 3
#define  _ALLOC_new_hashed_relocation 4
#define  _ALLOC_skip_busy 5
#define  _ALLOC_displace_based_on_dirid 6
#define  _ALLOC_hashed_formatted_nodes 7
#define  _ALLOC_old_way 8
#define  _ALLOC_hundredth_slices 9
#define  _ALLOC_dirid_groups 10
#define  _ALLOC_oid_groups 11
#define  _ALLOC_packing_groups 12

#define  concentrating_formatted_nodes(s)	test_bit(_ALLOC_concentrating_formatted_nodes, &SB_ALLOC_OPTS(s))
#define  displacing_large_files(s)		test_bit(_ALLOC_displacing_large_files, &SB_ALLOC_OPTS(s))
#define  displacing_new_packing_localities(s)	test_bit(_ALLOC_displacing_new_packing_localities, &SB_ALLOC_OPTS(s))

#define SET_OPTION(optname) \
   do { \
        reiserfs_warning(s, "reiserfs: option \"%s\" is set", #optname); \
        set_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)); \
    } while(0)
#define TEST_OPTION(optname, s) \
    test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s))

48 49
static inline void get_bit_address(struct super_block *s,
				   b_blocknr_t block, int *bmap_nr, int *offset)
L
Linus Torvalds 已提交
50
{
51 52
	/* It is in the bitmap block number equal to the block
	 * number divided by the number of bits in a block. */
53
	*bmap_nr = block >> (s->s_blocksize_bits + 3);
54 55
	/* Within that bitmap block it is located at bit offset *offset. */
	*offset = block & ((s->s_blocksize << 3) - 1);
L
Linus Torvalds 已提交
56 57 58
}

#ifdef CONFIG_REISERFS_CHECK
59
int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value)
L
Linus Torvalds 已提交
60
{
61
	int bmap, offset;
62
	struct buffer_head *bh;
L
Linus Torvalds 已提交
63

64 65 66 67 68
	if (block == 0 || block >= SB_BLOCK_COUNT(s)) {
		reiserfs_warning(s,
				 "vs-4010: is_reusable: block number is out of range %lu (%u)",
				 block, SB_BLOCK_COUNT(s));
		return 0;
L
Linus Torvalds 已提交
69 70
	}

71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
	get_bit_address(s, block, &bmap, &offset);

	/* Old format filesystem? Unlikely, but the bitmaps are all up front so
	 * we need to account for it. */
	if (unlikely(test_bit(REISERFS_OLD_FORMAT,
			      &(REISERFS_SB(s)->s_properties)))) {
		b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1;
		if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) {
			reiserfs_warning(s, "vs: 4019: is_reusable: "
					 "bitmap block %lu(%u) can't be freed or reused",
					 block, SB_BMAP_NR(s));
			return 0;
		}
	} else {
		if (offset == 0) {
86 87 88 89 90
			reiserfs_warning(s, "vs: 4020: is_reusable: "
					 "bitmap block %lu(%u) can't be freed or reused",
					 block, SB_BMAP_NR(s));
			return 0;
		}
91
	}
L
Linus Torvalds 已提交
92

93
	if (bmap >= SB_BMAP_NR(s)) {
94 95
		reiserfs_warning(s,
				 "vs-4030: is_reusable: there is no so many bitmap blocks: "
96
				 "block=%lu, bitmap_nr=%d", block, bmap);
97 98
		return 0;
	}
L
Linus Torvalds 已提交
99

100 101 102 103 104
	bh = SB_AP_BITMAP(s)[bmap].bh;
	get_bh(bh);

	if ((bit_value == 0 && reiserfs_test_le_bit(offset, bh->b_data)) ||
	    (bit_value == 1 && reiserfs_test_le_bit(offset, bh->b_data) == 0)) {
105 106
		reiserfs_warning(s,
				 "vs-4040: is_reusable: corresponding bit of block %lu does not "
107
				 "match required value (bmap==%d, offset==%d) test_bit==%d",
108 109
				 block, bmap, offset,
				 reiserfs_test_le_bit(offset, bh->b_data));
110

111
		brelse(bh);
112 113
		return 0;
	}
114
	brelse(bh);
L
Linus Torvalds 已提交
115

116 117 118 119 120 121 122 123
	if (bit_value == 0 && block == SB_ROOT_BLOCK(s)) {
		reiserfs_warning(s,
				 "vs-4050: is_reusable: this is root block (%u), "
				 "it must be busy", SB_ROOT_BLOCK(s));
		return 0;
	}

	return 1;
L
Linus Torvalds 已提交
124
}
125
#endif				/* CONFIG_REISERFS_CHECK */
L
Linus Torvalds 已提交
126 127 128

/* searches in journal structures for a given block number (bmap, off). If block
   is found in reiserfs journal it suggests next free block candidate to test. */
129 130
static inline int is_block_in_journal(struct super_block *s, int bmap, int
				      off, int *next)
L
Linus Torvalds 已提交
131
{
132 133 134 135 136 137 138 139 140 141 142 143
	b_blocknr_t tmp;

	if (reiserfs_in_journal(s, bmap, off, 1, &tmp)) {
		if (tmp) {	/* hint supplied */
			*next = tmp;
			PROC_INFO_INC(s, scan_bitmap.in_journal_hint);
		} else {
			(*next) = off + 1;	/* inc offset to avoid looping. */
			PROC_INFO_INC(s, scan_bitmap.in_journal_nohint);
		}
		PROC_INFO_INC(s, scan_bitmap.retry);
		return 1;
L
Linus Torvalds 已提交
144
	}
145
	return 0;
L
Linus Torvalds 已提交
146 147 148 149
}

/* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap
 * block; */
150 151 152
static int scan_bitmap_block(struct reiserfs_transaction_handle *th,
			     int bmap_n, int *beg, int boundary, int min,
			     int max, int unfm)
L
Linus Torvalds 已提交
153
{
154 155
	struct super_block *s = th->t_super;
	struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n];
156
	struct buffer_head *bh;
157 158
	int end, next;
	int org = *beg;
L
Linus Torvalds 已提交
159

160
	BUG_ON(!th->t_trans_id);
L
Linus Torvalds 已提交
161

162 163 164
	RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)",
	       bmap_n, SB_BMAP_NR(s) - 1);
	PROC_INFO_INC(s, scan_bitmap.bmap);
L
Linus Torvalds 已提交
165 166 167 168 169
/* this is unclear and lacks comments, explain how journal bitmaps
   work here for the reader.  Convey a sense of the design here. What
   is a window? */
/* - I mean `a window of zero bits' as in description of this function - Zam. */

170 171 172 173 174
	if (!bi) {
		reiserfs_warning(s, "NULL bitmap info pointer for bitmap %d",
				 bmap_n);
		return 0;
	}
175 176 177 178
	bh = bi->bh;
	get_bh(bh);

	if (buffer_locked(bh)) {
179
		PROC_INFO_INC(s, scan_bitmap.wait);
180
		__wait_on_buffer(bh);
L
Linus Torvalds 已提交
181 182
	}

183 184
	while (1) {
	      cont:
185 186
		if (bi->free_count < min) {
			brelse(bh);
187
			return 0;	// No free blocks in this bitmap
188
		}
189 190 191

		/* search for a first zero bit -- beggining of a window */
		*beg = reiserfs_find_next_zero_le_bit
192
		    ((unsigned long *)(bh->b_data), boundary, *beg);
193 194 195

		if (*beg + min > boundary) {	/* search for a zero bit fails or the rest of bitmap block
						 * cannot contain a zero window of minimum size */
196
			brelse(bh);
197
			return 0;
L
Linus Torvalds 已提交
198 199
		}

200 201 202 203 204
		if (unfm && is_block_in_journal(s, bmap_n, *beg, beg))
			continue;
		/* first zero bit found; we check next bits */
		for (end = *beg + 1;; end++) {
			if (end >= *beg + max || end >= boundary
205
			    || reiserfs_test_le_bit(end, bh->b_data)) {
206 207 208 209 210 211 212 213
				next = end;
				break;
			}
			/* finding the other end of zero bit window requires looking into journal structures (in
			 * case of searching for free blocks for unformatted nodes) */
			if (unfm && is_block_in_journal(s, bmap_n, end, &next))
				break;
		}
L
Linus Torvalds 已提交
214

215 216 217 218
		/* now (*beg) points to beginning of zero bits window,
		 * (end) points to one bit after the window end */
		if (end - *beg >= min) {	/* it seems we have found window of proper size */
			int i;
219
			reiserfs_prepare_for_journal(s, bh, 1);
220 221 222 223
			/* try to set all blocks used checking are they still free */
			for (i = *beg; i < end; i++) {
				/* It seems that we should not check in journal again. */
				if (reiserfs_test_and_set_le_bit
224
				    (i, bh->b_data)) {
225 226 227 228 229 230 231 232 233 234 235
					/* bit was set by another process
					 * while we slept in prepare_for_journal() */
					PROC_INFO_INC(s, scan_bitmap.stolen);
					if (i >= *beg + min) {	/* we can continue with smaller set of allocated blocks,
								 * if length of this set is more or equal to `min' */
						end = i;
						break;
					}
					/* otherwise we clear all bit were set ... */
					while (--i >= *beg)
						reiserfs_test_and_clear_le_bit
236 237
						    (i, bh->b_data);
					reiserfs_restore_prepared_buffer(s, bh);
238 239 240 241 242 243
					*beg = org;
					/* ... and search again in current block from beginning */
					goto cont;
				}
			}
			bi->free_count -= (end - *beg);
244 245
			journal_mark_dirty(th, s, bh);
			brelse(bh);
246 247 248 249 250 251 252 253 254 255 256

			/* free block count calculation */
			reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s),
						     1);
			PUT_SB_FREE_BLOCKS(s, SB_FREE_BLOCKS(s) - (end - *beg));
			journal_mark_dirty(th, s, SB_BUFFER_WITH_SB(s));

			return end - (*beg);
		} else {
			*beg = next;
		}
L
Linus Torvalds 已提交
257 258 259
	}
}

260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278
static int bmap_hash_id(struct super_block *s, u32 id)
{
	char *hash_in = NULL;
	unsigned long hash;
	unsigned bm;

	if (id <= 2) {
		bm = 1;
	} else {
		hash_in = (char *)(&id);
		hash = keyed_hash(hash_in, 4);
		bm = hash % SB_BMAP_NR(s);
		if (!bm)
			bm = 1;
	}
	/* this can only be true when SB_BMAP_NR = 1 */
	if (bm >= SB_BMAP_NR(s))
		bm = 0;
	return bm;
L
Linus Torvalds 已提交
279 280 281 282 283 284
}

/*
 * hashes the id and then returns > 0 if the block group for the
 * corresponding hash is full
 */
285 286 287 288 289 290 291 292
static inline int block_group_used(struct super_block *s, u32 id)
{
	int bm;
	bm = bmap_hash_id(s, id);
	if (SB_AP_BITMAP(s)[bm].free_count > ((s->s_blocksize << 3) * 60 / 100)) {
		return 0;
	}
	return 1;
L
Linus Torvalds 已提交
293 294 295 296 297
}

/*
 * the packing is returned in disk byte order
 */
298
__le32 reiserfs_choose_packing(struct inode * dir)
299
{
300 301 302 303 304 305 306 307 308 309 310 311 312 313
	__le32 packing;
	if (TEST_OPTION(packing_groups, dir->i_sb)) {
		u32 parent_dir = le32_to_cpu(INODE_PKEY(dir)->k_dir_id);
		/*
		 * some versions of reiserfsck expect packing locality 1 to be
		 * special
		 */
		if (parent_dir == 1 || block_group_used(dir->i_sb, parent_dir))
			packing = INODE_PKEY(dir)->k_objectid;
		else
			packing = INODE_PKEY(dir)->k_dir_id;
	} else
		packing = INODE_PKEY(dir)->k_objectid;
	return packing;
L
Linus Torvalds 已提交
314
}
315

L
Linus Torvalds 已提交
316 317
/* Tries to find contiguous zero bit window (given size) in given region of
 * bitmap and place new blocks there. Returns number of allocated blocks. */
318 319 320
static int scan_bitmap(struct reiserfs_transaction_handle *th,
		       b_blocknr_t * start, b_blocknr_t finish,
		       int min, int max, int unfm, unsigned long file_block)
L
Linus Torvalds 已提交
321
{
322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
	int nr_allocated = 0;
	struct super_block *s = th->t_super;
	/* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr
	 * - Hans, it is not a block number - Zam. */

	int bm, off;
	int end_bm, end_off;
	int off_max = s->s_blocksize << 3;

	BUG_ON(!th->t_trans_id);

	PROC_INFO_INC(s, scan_bitmap.call);
	if (SB_FREE_BLOCKS(s) <= 0)
		return 0;	// No point in looking for more free blocks

	get_bit_address(s, *start, &bm, &off);
	get_bit_address(s, finish, &end_bm, &end_off);
	if (bm > SB_BMAP_NR(s))
		return 0;
	if (end_bm > SB_BMAP_NR(s))
		end_bm = SB_BMAP_NR(s);

	/* When the bitmap is more than 10% free, anyone can allocate.
	 * When it's less than 10% free, only files that already use the
	 * bitmap are allowed. Once we pass 80% full, this restriction
	 * is lifted.
	 *
	 * We do this so that files that grow later still have space close to
	 * their original allocation. This improves locality, and presumably
	 * performance as a result.
	 *
	 * This is only an allocation policy and does not make up for getting a
	 * bad hint. Decent hinting must be implemented for this to work well.
	 */
	if (TEST_OPTION(skip_busy, s)
	    && SB_FREE_BLOCKS(s) > SB_BLOCK_COUNT(s) / 20) {
		for (; bm < end_bm; bm++, off = 0) {
			if ((off && (!unfm || (file_block != 0)))
			    || SB_AP_BITMAP(s)[bm].free_count >
			    (s->s_blocksize << 3) / 10)
				nr_allocated =
				    scan_bitmap_block(th, bm, &off, off_max,
						      min, max, unfm);
			if (nr_allocated)
				goto ret;
		}
		/* we know from above that start is a reasonable number */
		get_bit_address(s, *start, &bm, &off);
	}

	for (; bm < end_bm; bm++, off = 0) {
		nr_allocated =
		    scan_bitmap_block(th, bm, &off, off_max, min, max, unfm);
		if (nr_allocated)
			goto ret;
	}

	nr_allocated =
	    scan_bitmap_block(th, bm, &off, end_off + 1, min, max, unfm);

      ret:
	*start = bm * off_max + off;
	return nr_allocated;
L
Linus Torvalds 已提交
385 386 387

}

388 389 390
static void _reiserfs_free_block(struct reiserfs_transaction_handle *th,
				 struct inode *inode, b_blocknr_t block,
				 int for_unformatted)
L
Linus Torvalds 已提交
391
{
392 393
	struct super_block *s = th->t_super;
	struct reiserfs_super_block *rs;
394
	struct buffer_head *sbh, *bmbh;
395 396
	struct reiserfs_bitmap_info *apbi;
	int nr, offset;
L
Linus Torvalds 已提交
397

398
	BUG_ON(!th->t_trans_id);
L
Linus Torvalds 已提交
399

400
	PROC_INFO_INC(s, free_block);
L
Linus Torvalds 已提交
401

402 403 404
	rs = SB_DISK_SUPER_BLOCK(s);
	sbh = SB_BUFFER_WITH_SB(s);
	apbi = SB_AP_BITMAP(s);
L
Linus Torvalds 已提交
405

406
	get_bit_address(s, block, &nr, &offset);
L
Linus Torvalds 已提交
407

408 409 410 411 412 413 414
	if (nr >= sb_bmap_nr(rs)) {
		reiserfs_warning(s, "vs-4075: reiserfs_free_block: "
				 "block %lu is out of range on %s",
				 block, reiserfs_bdevname(s));
		return;
	}

415 416 417 418
	bmbh = apbi[nr].bh;
	get_bh(bmbh);

	reiserfs_prepare_for_journal(s, bmbh, 1);
419 420

	/* clear bit for the given block in bit map */
421
	if (!reiserfs_test_and_clear_le_bit(offset, bmbh->b_data)) {
422 423 424 425 426
		reiserfs_warning(s, "vs-4080: reiserfs_free_block: "
				 "free_block (%s:%lu)[dev:blocknr]: bit already cleared",
				 reiserfs_bdevname(s), block);
	}
	apbi[nr].free_count++;
427 428
	journal_mark_dirty(th, s, bmbh);
	brelse(bmbh);
429 430 431 432 433 434 435 436

	reiserfs_prepare_for_journal(s, sbh, 1);
	/* update super block */
	set_sb_free_blocks(rs, sb_free_blocks(rs) + 1);

	journal_mark_dirty(th, s, sbh);
	if (for_unformatted)
		DQUOT_FREE_BLOCK_NODIRTY(inode, 1);
L
Linus Torvalds 已提交
437 438
}

439 440 441
void reiserfs_free_block(struct reiserfs_transaction_handle *th,
			 struct inode *inode, b_blocknr_t block,
			 int for_unformatted)
L
Linus Torvalds 已提交
442
{
443
	struct super_block *s = th->t_super;
L
Linus Torvalds 已提交
444

445
	BUG_ON(!th->t_trans_id);
L
Linus Torvalds 已提交
446

447 448 449 450 451 452
	RFALSE(!s, "vs-4061: trying to free block on nonexistent device");
	RFALSE(is_reusable(s, block, 1) == 0,
	       "vs-4071: can not free such block");
	/* mark it before we clear it, just in case */
	journal_mark_freed(th, s, block);
	_reiserfs_free_block(th, inode, block, for_unformatted);
L
Linus Torvalds 已提交
453 454 455
}

/* preallocated blocks don't need to be run through journal_mark_freed */
456 457 458 459 460 461 462 463 464
static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th,
					 struct inode *inode, b_blocknr_t block)
{
	RFALSE(!th->t_super,
	       "vs-4060: trying to free block on nonexistent device");
	RFALSE(is_reusable(th->t_super, block, 1) == 0,
	       "vs-4070: can not free such block");
	BUG_ON(!th->t_trans_id);
	_reiserfs_free_block(th, inode, block, 1);
L
Linus Torvalds 已提交
465 466
}

467 468
static void __discard_prealloc(struct reiserfs_transaction_handle *th,
			       struct reiserfs_inode_info *ei)
L
Linus Torvalds 已提交
469
{
470 471 472 473
	unsigned long save = ei->i_prealloc_block;
	int dirty = 0;
	struct inode *inode = &ei->vfs_inode;
	BUG_ON(!th->t_trans_id);
L
Linus Torvalds 已提交
474
#ifdef CONFIG_REISERFS_CHECK
475 476 477 478
	if (ei->i_prealloc_count < 0)
		reiserfs_warning(th->t_super,
				 "zam-4001:%s: inode has negative prealloc blocks count.",
				 __FUNCTION__);
L
Linus Torvalds 已提交
479
#endif
480 481 482 483 484 485 486 487 488 489
	while (ei->i_prealloc_count > 0) {
		reiserfs_free_prealloc_block(th, inode, ei->i_prealloc_block);
		ei->i_prealloc_block++;
		ei->i_prealloc_count--;
		dirty = 1;
	}
	if (dirty)
		reiserfs_update_sd(th, inode);
	ei->i_prealloc_block = save;
	list_del_init(&(ei->i_prealloc_list));
L
Linus Torvalds 已提交
490 491 492
}

/* FIXME: It should be inline function */
493 494
void reiserfs_discard_prealloc(struct reiserfs_transaction_handle *th,
			       struct inode *inode)
L
Linus Torvalds 已提交
495
{
496 497 498 499
	struct reiserfs_inode_info *ei = REISERFS_I(inode);
	BUG_ON(!th->t_trans_id);
	if (ei->i_prealloc_count)
		__discard_prealloc(th, ei);
L
Linus Torvalds 已提交
500 501
}

502
void reiserfs_discard_all_prealloc(struct reiserfs_transaction_handle *th)
L
Linus Torvalds 已提交
503
{
504
	struct list_head *plist = &SB_JOURNAL(th->t_super)->j_prealloc_list;
L
Linus Torvalds 已提交
505

506
	BUG_ON(!th->t_trans_id);
L
Linus Torvalds 已提交
507

508 509 510 511
	while (!list_empty(plist)) {
		struct reiserfs_inode_info *ei;
		ei = list_entry(plist->next, struct reiserfs_inode_info,
				i_prealloc_list);
L
Linus Torvalds 已提交
512
#ifdef CONFIG_REISERFS_CHECK
513 514 515 516 517
		if (!ei->i_prealloc_count) {
			reiserfs_warning(th->t_super,
					 "zam-4001:%s: inode is in prealloc list but has no preallocated blocks.",
					 __FUNCTION__);
		}
L
Linus Torvalds 已提交
518
#endif
519 520
		__discard_prealloc(th, ei);
	}
L
Linus Torvalds 已提交
521 522
}

523
void reiserfs_init_alloc_options(struct super_block *s)
L
Linus Torvalds 已提交
524
{
525 526 527
	set_bit(_ALLOC_skip_busy, &SB_ALLOC_OPTS(s));
	set_bit(_ALLOC_dirid_groups, &SB_ALLOC_OPTS(s));
	set_bit(_ALLOC_packing_groups, &SB_ALLOC_OPTS(s));
L
Linus Torvalds 已提交
528 529 530
}

/* block allocator related options are parsed here */
531
int reiserfs_parse_alloc_options(struct super_block *s, char *options)
L
Linus Torvalds 已提交
532
{
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
	char *this_char, *value;

	REISERFS_SB(s)->s_alloc_options.bits = 0;	/* clear default settings */

	while ((this_char = strsep(&options, ":")) != NULL) {
		if ((value = strchr(this_char, '=')) != NULL)
			*value++ = 0;

		if (!strcmp(this_char, "concentrating_formatted_nodes")) {
			int temp;
			SET_OPTION(concentrating_formatted_nodes);
			temp = (value
				&& *value) ? simple_strtoul(value, &value,
							    0) : 10;
			if (temp <= 0 || temp > 100) {
				REISERFS_SB(s)->s_alloc_options.border = 10;
			} else {
				REISERFS_SB(s)->s_alloc_options.border =
				    100 / temp;
			}
			continue;
		}
		if (!strcmp(this_char, "displacing_large_files")) {
			SET_OPTION(displacing_large_files);
			REISERFS_SB(s)->s_alloc_options.large_file_size =
			    (value
			     && *value) ? simple_strtoul(value, &value, 0) : 16;
			continue;
		}
		if (!strcmp(this_char, "displacing_new_packing_localities")) {
			SET_OPTION(displacing_new_packing_localities);
			continue;
		};

		if (!strcmp(this_char, "old_hashed_relocation")) {
			SET_OPTION(old_hashed_relocation);
			continue;
		}
L
Linus Torvalds 已提交
571

572 573 574 575
		if (!strcmp(this_char, "new_hashed_relocation")) {
			SET_OPTION(new_hashed_relocation);
			continue;
		}
L
Linus Torvalds 已提交
576

577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592
		if (!strcmp(this_char, "dirid_groups")) {
			SET_OPTION(dirid_groups);
			continue;
		}
		if (!strcmp(this_char, "oid_groups")) {
			SET_OPTION(oid_groups);
			continue;
		}
		if (!strcmp(this_char, "packing_groups")) {
			SET_OPTION(packing_groups);
			continue;
		}
		if (!strcmp(this_char, "hashed_formatted_nodes")) {
			SET_OPTION(hashed_formatted_nodes);
			continue;
		}
L
Linus Torvalds 已提交
593

594 595 596 597
		if (!strcmp(this_char, "skip_busy")) {
			SET_OPTION(skip_busy);
			continue;
		}
L
Linus Torvalds 已提交
598

599 600 601 602
		if (!strcmp(this_char, "hundredth_slices")) {
			SET_OPTION(hundredth_slices);
			continue;
		}
L
Linus Torvalds 已提交
603

604 605 606 607
		if (!strcmp(this_char, "old_way")) {
			SET_OPTION(old_way);
			continue;
		}
L
Linus Torvalds 已提交
608

609 610 611 612
		if (!strcmp(this_char, "displace_based_on_dirid")) {
			SET_OPTION(displace_based_on_dirid);
			continue;
		}
L
Linus Torvalds 已提交
613

614 615 616 617 618 619 620 621 622 623 624 625 626 627 628
		if (!strcmp(this_char, "preallocmin")) {
			REISERFS_SB(s)->s_alloc_options.preallocmin =
			    (value
			     && *value) ? simple_strtoul(value, &value, 0) : 4;
			continue;
		}

		if (!strcmp(this_char, "preallocsize")) {
			REISERFS_SB(s)->s_alloc_options.preallocsize =
			    (value
			     && *value) ? simple_strtoul(value, &value,
							 0) :
			    PREALLOCATION_SIZE;
			continue;
		}
L
Linus Torvalds 已提交
629

630 631 632
		reiserfs_warning(s, "zam-4001: %s : unknown option - %s",
				 __FUNCTION__, this_char);
		return 1;
L
Linus Torvalds 已提交
633 634
	}

635 636
	reiserfs_warning(s, "allocator options = [%08x]\n", SB_ALLOC_OPTS(s));
	return 0;
L
Linus Torvalds 已提交
637
}
638 639

static inline void new_hashed_relocation(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
640
{
641 642 643 644 645 646 647 648 649 650 651 652 653 654
	char *hash_in;
	if (hint->formatted_node) {
		hash_in = (char *)&hint->key.k_dir_id;
	} else {
		if (!hint->inode) {
			//hint->search_start = hint->beg;
			hash_in = (char *)&hint->key.k_dir_id;
		} else
		    if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
			hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
		else
			hash_in =
			    (char *)(&INODE_PKEY(hint->inode)->k_objectid);
	}
L
Linus Torvalds 已提交
655

656 657
	hint->search_start =
	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
L
Linus Torvalds 已提交
658 659 660 661 662 663
}

/*
 * Relocation based on dirid, hashing them into a given bitmap block
 * files. Formatted nodes are unaffected, a seperate policy covers them
 */
664
static void dirid_groups(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
665
{
666 667 668 669
	unsigned long hash;
	__u32 dirid = 0;
	int bm = 0;
	struct super_block *sb = hint->th->t_super;
L
Linus Torvalds 已提交
670
	if (hint->inode)
671 672 673 674 675 676 677 678 679 680 681 682
		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);
	else if (hint->formatted_node)
		dirid = hint->key.k_dir_id;

	if (dirid) {
		bm = bmap_hash_id(sb, dirid);
		hash = bm * (sb->s_blocksize << 3);
		/* give a portion of the block group to metadata */
		if (hint->inode)
			hash += sb->s_blocksize / 2;
		hint->search_start = hash;
	}
L
Linus Torvalds 已提交
683 684 685 686 687 688
}

/*
 * Relocation based on oid, hashing them into a given bitmap block
 * files. Formatted nodes are unaffected, a seperate policy covers them
 */
689
static void oid_groups(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
690
{
691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
	if (hint->inode) {
		unsigned long hash;
		__u32 oid;
		__u32 dirid;
		int bm;

		dirid = le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id);

		/* keep the root dir and it's first set of subdirs close to
		 * the start of the disk
		 */
		if (dirid <= 2)
			hash = (hint->inode->i_sb->s_blocksize << 3);
		else {
			oid = le32_to_cpu(INODE_PKEY(hint->inode)->k_objectid);
			bm = bmap_hash_id(hint->inode->i_sb, oid);
			hash = bm * (hint->inode->i_sb->s_blocksize << 3);
		}
		hint->search_start = hash;
L
Linus Torvalds 已提交
710 711 712 713 714 715
	}
}

/* returns 1 if it finds an indirect item and gets valid hint info
 * from it, otherwise 0
 */
716
static int get_left_neighbor(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
717
{
718 719 720 721 722 723 724 725
	struct path *path;
	struct buffer_head *bh;
	struct item_head *ih;
	int pos_in_item;
	__le32 *item;
	int ret = 0;

	if (!hint->path)	/* reiserfs code can call this function w/o pointer to path
L
Linus Torvalds 已提交
726
				 * structure supplied; then we rely on supplied search_start */
727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
		return 0;

	path = hint->path;
	bh = get_last_bh(path);
	RFALSE(!bh, "green-4002: Illegal path specified to get_left_neighbor");
	ih = get_ih(path);
	pos_in_item = path->pos_in_item;
	item = get_item(path);

	hint->search_start = bh->b_blocknr;

	if (!hint->formatted_node && is_indirect_le_ih(ih)) {
		/* for indirect item: go to left and look for the first non-hole entry
		   in the indirect item */
		if (pos_in_item == I_UNFM_NUM(ih))
			pos_in_item--;
//          pos_in_item = I_UNFM_NUM (ih) - 1;
		while (pos_in_item >= 0) {
			int t = get_block_num(item, pos_in_item);
			if (t) {
				hint->search_start = t;
				ret = 1;
				break;
			}
			pos_in_item--;
		}
L
Linus Torvalds 已提交
753 754
	}

755 756
	/* does result value fit into specified region? */
	return ret;
L
Linus Torvalds 已提交
757 758 759 760 761 762
}

/* should be, if formatted node, then try to put on first part of the device
   specified as number of percent with mount option device, else try to put
   on last of device.  This is not to say it is good code to do so,
   but the effect should be measured.  */
763 764
static inline void set_border_in_hint(struct super_block *s,
				      reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
765
{
766 767
	b_blocknr_t border =
	    SB_BLOCK_COUNT(s) / REISERFS_SB(s)->s_alloc_options.border;
L
Linus Torvalds 已提交
768

769 770 771 772
	if (hint->formatted_node)
		hint->end = border - 1;
	else
		hint->beg = border;
L
Linus Torvalds 已提交
773 774
}

775
static inline void displace_large_file(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
776
{
777 778 779 780 781 782 783 784 785 786
	if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
		hint->search_start =
		    hint->beg +
		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_dir_id),
			       4) % (hint->end - hint->beg);
	else
		hint->search_start =
		    hint->beg +
		    keyed_hash((char *)(&INODE_PKEY(hint->inode)->k_objectid),
			       4) % (hint->end - hint->beg);
L
Linus Torvalds 已提交
787 788
}

789
static inline void hash_formatted_node(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
790
{
791
	char *hash_in;
L
Linus Torvalds 已提交
792

793 794 795 796 797 798
	if (!hint->inode)
		hash_in = (char *)&hint->key.k_dir_id;
	else if (TEST_OPTION(displace_based_on_dirid, hint->th->t_super))
		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_dir_id);
	else
		hash_in = (char *)(&INODE_PKEY(hint->inode)->k_objectid);
L
Linus Torvalds 已提交
799

800 801
	hint->search_start =
	    hint->beg + keyed_hash(hash_in, 4) % (hint->end - hint->beg);
L
Linus Torvalds 已提交
802 803
}

804 805 806
static inline int
this_blocknr_allocation_would_make_it_a_large_file(reiserfs_blocknr_hint_t *
						   hint)
L
Linus Torvalds 已提交
807
{
808 809
	return hint->block ==
	    REISERFS_SB(hint->th->t_super)->s_alloc_options.large_file_size;
L
Linus Torvalds 已提交
810 811 812
}

#ifdef DISPLACE_NEW_PACKING_LOCALITIES
813
static inline void displace_new_packing_locality(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
814
{
815
	struct in_core_key *key = &hint->key;
L
Linus Torvalds 已提交
816

817 818 819 820
	hint->th->displace_new_blocks = 0;
	hint->search_start =
	    hint->beg + keyed_hash((char *)(&key->k_objectid),
				   4) % (hint->end - hint->beg);
L
Linus Torvalds 已提交
821
}
822
#endif
L
Linus Torvalds 已提交
823

824
static inline int old_hashed_relocation(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
825
{
826 827
	b_blocknr_t border;
	u32 hash_in;
L
Linus Torvalds 已提交
828

829 830 831
	if (hint->formatted_node || hint->inode == NULL) {
		return 0;
	}
L
Linus Torvalds 已提交
832

833 834 835 836 837 838
	hash_in = le32_to_cpu((INODE_PKEY(hint->inode))->k_dir_id);
	border =
	    hint->beg + (u32) keyed_hash(((char *)(&hash_in)),
					 4) % (hint->end - hint->beg - 1);
	if (border > hint->search_start)
		hint->search_start = border;
L
Linus Torvalds 已提交
839

840
	return 1;
L
Linus Torvalds 已提交
841 842
}

843
static inline int old_way(reiserfs_blocknr_hint_t * hint)
L
Linus Torvalds 已提交
844
{
845 846 847 848 849 850 851 852 853 854 855 856
	b_blocknr_t border;

	if (hint->formatted_node || hint->inode == NULL) {
		return 0;
	}

	border =
	    hint->beg +
	    le32_to_cpu(INODE_PKEY(hint->inode)->k_dir_id) % (hint->end -
							      hint->beg);
	if (border > hint->search_start)
		hint->search_start = border;
L
Linus Torvalds 已提交
857

858 859 860 861 862 863 864 865 866 867 868 869 870 871
	return 1;
}

static inline void hundredth_slices(reiserfs_blocknr_hint_t * hint)
{
	struct in_core_key *key = &hint->key;
	b_blocknr_t slice_start;

	slice_start =
	    (keyed_hash((char *)(&key->k_dir_id), 4) % 100) * (hint->end / 100);
	if (slice_start > hint->search_start
	    || slice_start + (hint->end / 100) <= hint->search_start) {
		hint->search_start = slice_start;
	}
L
Linus Torvalds 已提交
872
}
873 874 875

static void determine_search_start(reiserfs_blocknr_hint_t * hint,
				   int amount_needed)
L
Linus Torvalds 已提交
876
{
877 878
	struct super_block *s = hint->th->t_super;
	int unfm_hint;
L
Linus Torvalds 已提交
879

880 881
	hint->beg = 0;
	hint->end = SB_BLOCK_COUNT(s) - 1;
L
Linus Torvalds 已提交
882

883 884 885
	/* This is former border algorithm. Now with tunable border offset */
	if (concentrating_formatted_nodes(s))
		set_border_in_hint(s, hint);
L
Linus Torvalds 已提交
886 887

#ifdef DISPLACE_NEW_PACKING_LOCALITIES
888 889 890 891 892 893 894 895 896 897 898
	/* whenever we create a new directory, we displace it.  At first we will
	   hash for location, later we might look for a moderately empty place for
	   it */
	if (displacing_new_packing_localities(s)
	    && hint->th->displace_new_blocks) {
		displace_new_packing_locality(hint);

		/* we do not continue determine_search_start,
		 * if new packing locality is being displaced */
		return;
	}
L
Linus Torvalds 已提交
899 900
#endif

901 902
	/* all persons should feel encouraged to add more special cases here and
	 * test them */
L
Linus Torvalds 已提交
903

904 905 906 907 908
	if (displacing_large_files(s) && !hint->formatted_node
	    && this_blocknr_allocation_would_make_it_a_large_file(hint)) {
		displace_large_file(hint);
		return;
	}
L
Linus Torvalds 已提交
909

910 911 912 913 914 915
	/* if none of our special cases is relevant, use the left neighbor in the
	   tree order of the new node we are allocating for */
	if (hint->formatted_node && TEST_OPTION(hashed_formatted_nodes, s)) {
		hash_formatted_node(hint);
		return;
	}
L
Linus Torvalds 已提交
916

917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
	unfm_hint = get_left_neighbor(hint);

	/* Mimic old block allocator behaviour, that is if VFS allowed for preallocation,
	   new blocks are displaced based on directory ID. Also, if suggested search_start
	   is less than last preallocated block, we start searching from it, assuming that
	   HDD dataflow is faster in forward direction */
	if (TEST_OPTION(old_way, s)) {
		if (!hint->formatted_node) {
			if (!reiserfs_hashed_relocation(s))
				old_way(hint);
			else if (!reiserfs_no_unhashed_relocation(s))
				old_hashed_relocation(hint);

			if (hint->inode
			    && hint->search_start <
			    REISERFS_I(hint->inode)->i_prealloc_block)
				hint->search_start =
				    REISERFS_I(hint->inode)->i_prealloc_block;
		}
		return;
L
Linus Torvalds 已提交
937 938
	}

939 940 941 942 943 944
	/* This is an approach proposed by Hans */
	if (TEST_OPTION(hundredth_slices, s)
	    && !(displacing_large_files(s) && !hint->formatted_node)) {
		hundredth_slices(hint);
		return;
	}
L
Linus Torvalds 已提交
945

946 947 948 949 950 951 952 953 954 955 956 957 958 959
	/* old_hashed_relocation only works on unformatted */
	if (!unfm_hint && !hint->formatted_node &&
	    TEST_OPTION(old_hashed_relocation, s)) {
		old_hashed_relocation(hint);
	}
	/* new_hashed_relocation works with both formatted/unformatted nodes */
	if ((!unfm_hint || hint->formatted_node) &&
	    TEST_OPTION(new_hashed_relocation, s)) {
		new_hashed_relocation(hint);
	}
	/* dirid grouping works only on unformatted nodes */
	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
		dirid_groups(hint);
	}
L
Linus Torvalds 已提交
960
#ifdef DISPLACE_NEW_PACKING_LOCALITIES
961 962 963
	if (hint->formatted_node && TEST_OPTION(dirid_groups, s)) {
		dirid_groups(hint);
	}
L
Linus Torvalds 已提交
964 965
#endif

966 967 968 969 970
	/* oid grouping works only on unformatted nodes */
	if (!unfm_hint && !hint->formatted_node && TEST_OPTION(oid_groups, s)) {
		oid_groups(hint);
	}
	return;
L
Linus Torvalds 已提交
971 972 973 974
}

static int determine_prealloc_size(reiserfs_blocknr_hint_t * hint)
{
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990
	/* make minimum size a mount option and benchmark both ways */
	/* we preallocate blocks only for regular files, specific size */
	/* benchmark preallocating always and see what happens */

	hint->prealloc_size = 0;

	if (!hint->formatted_node && hint->preallocate) {
		if (S_ISREG(hint->inode->i_mode)
		    && hint->inode->i_size >=
		    REISERFS_SB(hint->th->t_super)->s_alloc_options.
		    preallocmin * hint->inode->i_sb->s_blocksize)
			hint->prealloc_size =
			    REISERFS_SB(hint->th->t_super)->s_alloc_options.
			    preallocsize - 1;
	}
	return CARRY_ON;
L
Linus Torvalds 已提交
991 992 993 994
}

/* XXX I know it could be merged with upper-level function;
   but may be result function would be too complex. */
995 996 997 998 999 1000
static inline int allocate_without_wrapping_disk(reiserfs_blocknr_hint_t * hint,
						 b_blocknr_t * new_blocknrs,
						 b_blocknr_t start,
						 b_blocknr_t finish, int min,
						 int amount_needed,
						 int prealloc_size)
L
Linus Torvalds 已提交
1001
{
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018
	int rest = amount_needed;
	int nr_allocated;

	while (rest > 0 && start <= finish) {
		nr_allocated = scan_bitmap(hint->th, &start, finish, min,
					   rest + prealloc_size,
					   !hint->formatted_node, hint->block);

		if (nr_allocated == 0)	/* no new blocks allocated, return */
			break;

		/* fill free_blocknrs array first */
		while (rest > 0 && nr_allocated > 0) {
			*new_blocknrs++ = start++;
			rest--;
			nr_allocated--;
		}
L
Linus Torvalds 已提交
1019

1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
		/* do we have something to fill prealloc. array also ? */
		if (nr_allocated > 0) {
			/* it means prealloc_size was greater that 0 and we do preallocation */
			list_add(&REISERFS_I(hint->inode)->i_prealloc_list,
				 &SB_JOURNAL(hint->th->t_super)->
				 j_prealloc_list);
			REISERFS_I(hint->inode)->i_prealloc_block = start;
			REISERFS_I(hint->inode)->i_prealloc_count =
			    nr_allocated;
			break;
		}
L
Linus Torvalds 已提交
1031 1032
	}

1033
	return (amount_needed - rest);
L
Linus Torvalds 已提交
1034 1035 1036
}

static inline int blocknrs_and_prealloc_arrays_from_search_start
1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
    (reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs,
     int amount_needed) {
	struct super_block *s = hint->th->t_super;
	b_blocknr_t start = hint->search_start;
	b_blocknr_t finish = SB_BLOCK_COUNT(s) - 1;
	int passno = 0;
	int nr_allocated = 0;
	int bigalloc = 0;

	determine_prealloc_size(hint);
	if (!hint->formatted_node) {
		int quota_ret;
L
Linus Torvalds 已提交
1049
#ifdef REISERQUOTA_DEBUG
1050 1051 1052
		reiserfs_debug(s, REISERFS_DEBUG_CODE,
			       "reiserquota: allocating %d blocks id=%u",
			       amount_needed, hint->inode->i_uid);
L
Linus Torvalds 已提交
1053
#endif
1054 1055 1056 1057 1058
		quota_ret =
		    DQUOT_ALLOC_BLOCK_NODIRTY(hint->inode, amount_needed);
		if (quota_ret)	/* Quota exceeded? */
			return QUOTA_EXCEEDED;
		if (hint->preallocate && hint->prealloc_size) {
L
Linus Torvalds 已提交
1059
#ifdef REISERQUOTA_DEBUG
1060 1061 1062
			reiserfs_debug(s, REISERFS_DEBUG_CODE,
				       "reiserquota: allocating (prealloc) %d blocks id=%u",
				       hint->prealloc_size, hint->inode->i_uid);
L
Linus Torvalds 已提交
1063
#endif
1064 1065 1066 1067 1068 1069 1070 1071
			quota_ret =
			    DQUOT_PREALLOC_BLOCK_NODIRTY(hint->inode,
							 hint->prealloc_size);
			if (quota_ret)
				hint->preallocate = hint->prealloc_size = 0;
		}
		/* for unformatted nodes, force large allocations */
		bigalloc = amount_needed;
L
Linus Torvalds 已提交
1072 1073
	}

1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108
	do {
		/* in bigalloc mode, nr_allocated should stay zero until
		 * the entire allocation is filled
		 */
		if (unlikely(bigalloc && nr_allocated)) {
			reiserfs_warning(s, "bigalloc is %d, nr_allocated %d\n",
					 bigalloc, nr_allocated);
			/* reset things to a sane value */
			bigalloc = amount_needed - nr_allocated;
		}
		/*
		 * try pass 0 and pass 1 looking for a nice big
		 * contiguous allocation.  Then reset and look
		 * for anything you can find.
		 */
		if (passno == 2 && bigalloc) {
			passno = 0;
			bigalloc = 0;
		}
		switch (passno++) {
		case 0:	/* Search from hint->search_start to end of disk */
			start = hint->search_start;
			finish = SB_BLOCK_COUNT(s) - 1;
			break;
		case 1:	/* Search from hint->beg to hint->search_start */
			start = hint->beg;
			finish = hint->search_start;
			break;
		case 2:	/* Last chance: Search from 0 to hint->beg */
			start = 0;
			finish = hint->beg;
			break;
		default:	/* We've tried searching everywhere, not enough space */
			/* Free the blocks */
			if (!hint->formatted_node) {
L
Linus Torvalds 已提交
1109
#ifdef REISERQUOTA_DEBUG
1110 1111 1112 1113 1114 1115
				reiserfs_debug(s, REISERFS_DEBUG_CODE,
					       "reiserquota: freeing (nospace) %d blocks id=%u",
					       amount_needed +
					       hint->prealloc_size -
					       nr_allocated,
					       hint->inode->i_uid);
L
Linus Torvalds 已提交
1116
#endif
1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140
				DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed + hint->prealloc_size - nr_allocated);	/* Free not allocated blocks */
			}
			while (nr_allocated--)
				reiserfs_free_block(hint->th, hint->inode,
						    new_blocknrs[nr_allocated],
						    !hint->formatted_node);

			return NO_DISK_SPACE;
		}
	} while ((nr_allocated += allocate_without_wrapping_disk(hint,
								 new_blocknrs +
								 nr_allocated,
								 start, finish,
								 bigalloc ?
								 bigalloc : 1,
								 amount_needed -
								 nr_allocated,
								 hint->
								 prealloc_size))
		 < amount_needed);
	if (!hint->formatted_node &&
	    amount_needed + hint->prealloc_size >
	    nr_allocated + REISERFS_I(hint->inode)->i_prealloc_count) {
		/* Some of preallocation blocks were not allocated */
L
Linus Torvalds 已提交
1141
#ifdef REISERQUOTA_DEBUG
1142 1143 1144 1145 1146 1147
		reiserfs_debug(s, REISERFS_DEBUG_CODE,
			       "reiserquota: freeing (failed prealloc) %d blocks id=%u",
			       amount_needed + hint->prealloc_size -
			       nr_allocated -
			       REISERFS_I(hint->inode)->i_prealloc_count,
			       hint->inode->i_uid);
L
Linus Torvalds 已提交
1148
#endif
1149 1150 1151 1152 1153
		DQUOT_FREE_BLOCK_NODIRTY(hint->inode, amount_needed +
					 hint->prealloc_size - nr_allocated -
					 REISERFS_I(hint->inode)->
					 i_prealloc_count);
	}
L
Linus Torvalds 已提交
1154

1155
	return CARRY_ON;
L
Linus Torvalds 已提交
1156 1157 1158 1159
}

/* grab new blocknrs from preallocated list */
/* return amount still needed after using them */
1160 1161 1162
static int use_preallocated_list_if_available(reiserfs_blocknr_hint_t * hint,
					      b_blocknr_t * new_blocknrs,
					      int amount_needed)
L
Linus Torvalds 已提交
1163
{
1164
	struct inode *inode = hint->inode;
L
Linus Torvalds 已提交
1165

1166 1167
	if (REISERFS_I(inode)->i_prealloc_count > 0) {
		while (amount_needed) {
L
Linus Torvalds 已提交
1168

1169 1170
			*new_blocknrs++ = REISERFS_I(inode)->i_prealloc_block++;
			REISERFS_I(inode)->i_prealloc_count--;
L
Linus Torvalds 已提交
1171

1172
			amount_needed--;
L
Linus Torvalds 已提交
1173

1174 1175 1176 1177 1178
			if (REISERFS_I(inode)->i_prealloc_count <= 0) {
				list_del(&REISERFS_I(inode)->i_prealloc_list);
				break;
			}
		}
L
Linus Torvalds 已提交
1179
	}
1180 1181
	/* return amount still needed after using preallocated blocks */
	return amount_needed;
L
Linus Torvalds 已提交
1182 1183
}

1184 1185
int reiserfs_allocate_blocknrs(reiserfs_blocknr_hint_t * hint, b_blocknr_t * new_blocknrs, int amount_needed, int reserved_by_us	/* Amount of blocks we have
																	   already reserved */ )
L
Linus Torvalds 已提交
1186
{
1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216
	int initial_amount_needed = amount_needed;
	int ret;
	struct super_block *s = hint->th->t_super;

	/* Check if there is enough space, taking into account reserved space */
	if (SB_FREE_BLOCKS(s) - REISERFS_SB(s)->reserved_blocks <
	    amount_needed - reserved_by_us)
		return NO_DISK_SPACE;
	/* should this be if !hint->inode &&  hint->preallocate? */
	/* do you mean hint->formatted_node can be removed ? - Zam */
	/* hint->formatted_node cannot be removed because we try to access
	   inode information here, and there is often no inode assotiated with
	   metadata allocations - green */

	if (!hint->formatted_node && hint->preallocate) {
		amount_needed = use_preallocated_list_if_available
		    (hint, new_blocknrs, amount_needed);
		if (amount_needed == 0)	/* all blocknrs we need we got from
					   prealloc. list */
			return CARRY_ON;
		new_blocknrs += (initial_amount_needed - amount_needed);
	}

	/* find search start and save it in hint structure */
	determine_search_start(hint, amount_needed);
	if (hint->search_start >= SB_BLOCK_COUNT(s))
		hint->search_start = SB_BLOCK_COUNT(s) - 1;

	/* allocation itself; fill new_blocknrs and preallocation arrays */
	ret = blocknrs_and_prealloc_arrays_from_search_start
L
Linus Torvalds 已提交
1217
	    (hint, new_blocknrs, amount_needed);
1218 1219 1220 1221 1222 1223 1224 1225 1226 1227

	/* we used prealloc. list to fill (partially) new_blocknrs array. If final allocation fails we
	 * need to return blocks back to prealloc. list or just free them. -- Zam (I chose second
	 * variant) */

	if (ret != CARRY_ON) {
		while (amount_needed++ < initial_amount_needed) {
			reiserfs_free_block(hint->th, hint->inode,
					    *(--new_blocknrs), 1);
		}
L
Linus Torvalds 已提交
1228
	}
1229
	return ret;
L
Linus Torvalds 已提交
1230 1231 1232 1233 1234
}

/* These 2 functions are here to provide blocks reservation to the rest of kernel */
/* Reserve @blocks amount of blocks in fs pointed by @sb. Caller must make sure
   there are actually this much blocks on the FS available */
1235 1236 1237 1238 1239 1240
void reiserfs_claim_blocks_to_be_allocated(struct super_block *sb,	/* super block of
									   filesystem where
									   blocks should be
									   reserved */
					   int blocks	/* How much to reserve */
    )
L
Linus Torvalds 已提交
1241 1242
{

1243 1244 1245
	/* Fast case, if reservation is zero - exit immediately. */
	if (!blocks)
		return;
L
Linus Torvalds 已提交
1246

1247 1248 1249
	spin_lock(&REISERFS_SB(sb)->bitmap_lock);
	REISERFS_SB(sb)->reserved_blocks += blocks;
	spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
L
Linus Torvalds 已提交
1250 1251 1252
}

/* Unreserve @blocks amount of blocks in fs pointed by @sb */
1253 1254 1255 1256 1257 1258
void reiserfs_release_claimed_blocks(struct super_block *sb,	/* super block of
								   filesystem where
								   blocks should be
								   reserved */
				     int blocks	/* How much to unreserve */
    )
L
Linus Torvalds 已提交
1259 1260
{

1261 1262 1263
	/* Fast case, if unreservation is zero - exit immediately. */
	if (!blocks)
		return;
L
Linus Torvalds 已提交
1264

1265 1266 1267 1268 1269
	spin_lock(&REISERFS_SB(sb)->bitmap_lock);
	REISERFS_SB(sb)->reserved_blocks -= blocks;
	spin_unlock(&REISERFS_SB(sb)->bitmap_lock);
	RFALSE(REISERFS_SB(sb)->reserved_blocks < 0,
	       "amount of blocks reserved became zero?");
L
Linus Torvalds 已提交
1270 1271 1272 1273
}

/* This function estimates how much pages we will be able to write to FS
   used for reiserfs_file_write() purposes for now. */
1274 1275
int reiserfs_can_fit_pages(struct super_block *sb	/* superblock of filesystem
							   to estimate space */ )
L
Linus Torvalds 已提交
1276 1277 1278 1279
{
	int space;

	spin_lock(&REISERFS_SB(sb)->bitmap_lock);
1280 1281 1282 1283
	space =
	    (SB_FREE_BLOCKS(sb) -
	     REISERFS_SB(sb)->reserved_blocks) >> (PAGE_CACHE_SHIFT -
						   sb->s_blocksize_bits);
L
Linus Torvalds 已提交
1284 1285
	spin_unlock(&REISERFS_SB(sb)->bitmap_lock);

1286
	return space > 0 ? space : 0;
L
Linus Torvalds 已提交
1287
}