ialloc.c 33.4 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/ialloc.c
3 4 5 6 7 8 9 10 11 12 13 14 15 16
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  BSD ufs-inspired inode and directory allocation by
 *  Stephen Tweedie (sct@redhat.com), 1993
 *  Big-endian to little-endian byte-swapping/bitmaps by
 *        David S. Miller (davem@caip.rutgers.edu), 1995
 */

#include <linux/time.h>
#include <linux/fs.h>
17
#include <linux/jbd2.h>
18 19 20 21 22 23
#include <linux/stat.h>
#include <linux/string.h>
#include <linux/quotaops.h>
#include <linux/buffer_head.h>
#include <linux/random.h>
#include <linux/bitops.h>
24
#include <linux/blkdev.h>
25
#include <asm/byteorder.h>
26

27 28
#include "ext4.h"
#include "ext4_jbd2.h"
29 30 31
#include "xattr.h"
#include "acl.h"

32 33
#include <trace/events/ext4.h>

34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * ialloc.c contains the inodes allocation and deallocation routines
 */

/*
 * The free inodes are managed by bitmaps.  A file system contains several
 * blocks groups.  Each group contains 1 bitmap block for blocks, 1 bitmap
 * block for inodes, N blocks for the inode table and data blocks.
 *
 * The file system contains group descriptors which are located after the
 * super block.  Each descriptor contains the number of the bitmap block and
 * the free blocks count in the block.
 */

A
Andreas Dilger 已提交
48 49 50 51 52
/*
 * To avoid calling the atomic setbit hundreds or thousands of times, we only
 * need to use it within a single byte (to ensure we get endianness right).
 * We can use memset for the rest of the bitmap as there are no other users.
 */
53
void ext4_mark_bitmap_end(int start_bit, int end_bit, char *bitmap)
A
Andreas Dilger 已提交
54 55 56 57 58 59 60 61 62 63 64 65 66 67
{
	int i;

	if (start_bit >= end_bit)
		return;

	ext4_debug("mark end bits +%d through +%d used\n", start_bit, end_bit);
	for (i = start_bit; i < ((start_bit + 7) & ~7UL); i++)
		ext4_set_bit(i, bitmap);
	if (i < end_bit)
		memset(bitmap + (i >> 3), 0xff, (end_bit - i) >> 3);
}

/* Initializes an uninitialized inode bitmap */
68 69 70 71
static unsigned ext4_init_inode_bitmap(struct super_block *sb,
				       struct buffer_head *bh,
				       ext4_group_t block_group,
				       struct ext4_group_desc *gdp)
A
Andreas Dilger 已提交
72 73 74 75 76 77 78 79
{
	struct ext4_sb_info *sbi = EXT4_SB(sb);

	J_ASSERT_BH(bh, buffer_locked(bh));

	/* If checksum is bad mark all blocks and inodes use to prevent
	 * allocation, essentially implementing a per-group read-only flag. */
	if (!ext4_group_desc_csum_verify(sbi, block_group, gdp)) {
80
		ext4_error(sb, "Checksum bad for group %u", block_group);
81
		ext4_free_group_clusters_set(sb, gdp, 0);
82 83
		ext4_free_inodes_set(sb, gdp, 0);
		ext4_itable_unused_set(sb, gdp, 0);
A
Andreas Dilger 已提交
84 85 86 87 88
		memset(bh->b_data, 0xff, sb->s_blocksize);
		return 0;
	}

	memset(bh->b_data, 0, (EXT4_INODES_PER_GROUP(sb) + 7) / 8);
89
	ext4_mark_bitmap_end(EXT4_INODES_PER_GROUP(sb), sb->s_blocksize * 8,
A
Andreas Dilger 已提交
90 91 92 93
			bh->b_data);

	return EXT4_INODES_PER_GROUP(sb);
}
94 95 96 97 98 99 100 101

/*
 * Read the inode allocation bitmap for a given block_group, reading
 * into the specified slot in the superblock's bitmap cache.
 *
 * Return buffer_head of bitmap on success or NULL.
 */
static struct buffer_head *
102
ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
103
{
104
	struct ext4_group_desc *desc;
105
	struct buffer_head *bh = NULL;
106
	ext4_fsblk_t bitmap_blk;
107

108
	desc = ext4_get_group_desc(sb, block_group, NULL);
109
	if (!desc)
110
		return NULL;
111

112 113 114
	bitmap_blk = ext4_inode_bitmap(sb, desc);
	bh = sb_getblk(sb, bitmap_blk);
	if (unlikely(!bh)) {
115
		ext4_error(sb, "Cannot read inode bitmap - "
116
			    "block_group = %u, inode_bitmap = %llu",
117 118 119
			    block_group, bitmap_blk);
		return NULL;
	}
120
	if (bitmap_uptodate(bh))
121 122
		return bh;

123
	lock_buffer(bh);
124 125 126 127
	if (bitmap_uptodate(bh)) {
		unlock_buffer(bh);
		return bh;
	}
128

129
	ext4_lock_group(sb, block_group);
A
Andreas Dilger 已提交
130
	if (desc->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
131
		ext4_init_inode_bitmap(sb, bh, block_group, desc);
132
		set_bitmap_uptodate(bh);
133
		set_buffer_uptodate(bh);
134
		ext4_unlock_group(sb, block_group);
A
Aneesh Kumar K.V 已提交
135
		unlock_buffer(bh);
136
		return bh;
A
Andreas Dilger 已提交
137
	}
138
	ext4_unlock_group(sb, block_group);
139

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
	if (buffer_uptodate(bh)) {
		/*
		 * if not uninit if bh is uptodate,
		 * bitmap is also uptodate
		 */
		set_bitmap_uptodate(bh);
		unlock_buffer(bh);
		return bh;
	}
	/*
	 * submit the buffer_head for read. We can
	 * safely mark the bitmap as uptodate now.
	 * We do it here so the bitmap uptodate bit
	 * get set with buffer lock held.
	 */
155
	trace_ext4_load_inode_bitmap(sb, block_group);
156
	set_bitmap_uptodate(bh);
157 158
	if (bh_submit_read(bh) < 0) {
		put_bh(bh);
159
		ext4_error(sb, "Cannot read inode bitmap - "
160
			    "block_group = %u, inode_bitmap = %llu",
161 162 163
			    block_group, bitmap_blk);
		return NULL;
	}
164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
	return bh;
}

/*
 * NOTE! When we get the inode, we're the only people
 * that have access to it, and as such there are no
 * race conditions we have to worry about. The inode
 * is not on the hash-lists, and it cannot be reached
 * through the filesystem because the directory entry
 * has been deleted earlier.
 *
 * HOWEVER: we must make sure that we get no aliases,
 * which means that we have to call "clear_inode()"
 * _before_ we mark the inode not in use in the inode
 * bitmaps. Otherwise a newly created file might use
 * the same inode number (not actually the same pointer
 * though), and then we'd have two inodes sharing the
 * same inode number and space on the harddisk.
 */
183
void ext4_free_inode(handle_t *handle, struct inode *inode)
184
{
185
	struct super_block *sb = inode->i_sb;
186 187 188 189
	int is_directory;
	unsigned long ino;
	struct buffer_head *bitmap_bh = NULL;
	struct buffer_head *bh2;
190
	ext4_group_t block_group;
191
	unsigned long bit;
192 193
	struct ext4_group_desc *gdp;
	struct ext4_super_block *es;
194
	struct ext4_sb_info *sbi;
195
	int fatal = 0, err, count, cleared;
196 197

	if (atomic_read(&inode->i_count) > 1) {
198 199
		printk(KERN_ERR "ext4_free_inode: inode has count=%d\n",
		       atomic_read(&inode->i_count));
200 201 202
		return;
	}
	if (inode->i_nlink) {
203 204
		printk(KERN_ERR "ext4_free_inode: inode has nlink=%d\n",
		       inode->i_nlink);
205 206 207
		return;
	}
	if (!sb) {
208 209
		printk(KERN_ERR "ext4_free_inode: inode on "
		       "nonexistent device\n");
210 211
		return;
	}
212
	sbi = EXT4_SB(sb);
213 214

	ino = inode->i_ino;
215
	ext4_debug("freeing inode %lu\n", ino);
216
	trace_ext4_free_inode(inode);
217 218 219 220 221

	/*
	 * Note: we must free any quota before locking the superblock,
	 * as writing the quota to disk may need the lock as well.
	 */
222
	dquot_initialize(inode);
223
	ext4_xattr_delete_inode(handle, inode);
224
	dquot_free_inode(inode);
225
	dquot_drop(inode);
226 227 228 229

	is_directory = S_ISDIR(inode->i_mode);

	/* Do this BEFORE marking the inode not in use or returning an error */
A
Al Viro 已提交
230
	ext4_clear_inode(inode);
231

232 233
	es = EXT4_SB(sb)->s_es;
	if (ino < EXT4_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
234
		ext4_error(sb, "reserved or nonexistent inode %lu", ino);
235 236
		goto error_return;
	}
237 238
	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
239
	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
240 241 242 243
	if (!bitmap_bh)
		goto error_return;

	BUFFER_TRACE(bitmap_bh, "get_write_access");
244
	fatal = ext4_journal_get_write_access(handle, bitmap_bh);
245 246 247
	if (fatal)
		goto error_return;

248 249 250
	fatal = -ESRCH;
	gdp = ext4_get_group_desc(sb, block_group, &bh2);
	if (gdp) {
251
		BUFFER_TRACE(bh2, "get_write_access");
252
		fatal = ext4_journal_get_write_access(handle, bh2);
253 254 255 256 257 258 259
	}
	ext4_lock_group(sb, block_group);
	cleared = ext4_clear_bit(bit, bitmap_bh->b_data);
	if (fatal || !cleared) {
		ext4_unlock_group(sb, block_group);
		goto out;
	}
260

261 262 263 264 265 266
	count = ext4_free_inodes_count(sb, gdp) + 1;
	ext4_free_inodes_set(sb, gdp, count);
	if (is_directory) {
		count = ext4_used_dirs_count(sb, gdp) - 1;
		ext4_used_dirs_set(sb, gdp, count);
		percpu_counter_dec(&sbi->s_dirs_counter);
267
	}
268 269
	gdp->bg_checksum = ext4_group_desc_csum(sbi, block_group, gdp);
	ext4_unlock_group(sb, block_group);
270

271 272 273
	percpu_counter_inc(&sbi->s_freeinodes_counter);
	if (sbi->s_log_groups_per_flex) {
		ext4_group_t f = ext4_flex_group(sbi, block_group);
274

275 276 277
		atomic_inc(&sbi->s_flex_groups[f].free_inodes);
		if (is_directory)
			atomic_dec(&sbi->s_flex_groups[f].used_dirs);
278
	}
279 280 281 282 283 284 285 286
	BUFFER_TRACE(bh2, "call ext4_handle_dirty_metadata");
	fatal = ext4_handle_dirty_metadata(handle, NULL, bh2);
out:
	if (cleared) {
		BUFFER_TRACE(bitmap_bh, "call ext4_handle_dirty_metadata");
		err = ext4_handle_dirty_metadata(handle, NULL, bitmap_bh);
		if (!fatal)
			fatal = err;
T
Theodore Ts'o 已提交
287
		ext4_mark_super_dirty(sb);
288 289 290
	} else
		ext4_error(sb, "bit already cleared for inode %lu", ino);

291 292
error_return:
	brelse(bitmap_bh);
293
	ext4_std_error(sb, fatal);
294 295
}

296 297
struct orlov_stats {
	__u32 free_inodes;
298
	__u32 free_clusters;
299 300 301 302 303 304 305 306
	__u32 used_dirs;
};

/*
 * Helper function for Orlov's allocator; returns critical information
 * for a particular block group or flex_bg.  If flex_size is 1, then g
 * is a block group number; otherwise it is flex_bg number.
 */
307 308
static void get_orlov_stats(struct super_block *sb, ext4_group_t g,
			    int flex_size, struct orlov_stats *stats)
309 310
{
	struct ext4_group_desc *desc;
311
	struct flex_groups *flex_group = EXT4_SB(sb)->s_flex_groups;
312

313 314
	if (flex_size > 1) {
		stats->free_inodes = atomic_read(&flex_group[g].free_inodes);
315
		stats->free_clusters = atomic_read(&flex_group[g].free_clusters);
316 317 318
		stats->used_dirs = atomic_read(&flex_group[g].used_dirs);
		return;
	}
319

320 321 322
	desc = ext4_get_group_desc(sb, g, NULL);
	if (desc) {
		stats->free_inodes = ext4_free_inodes_count(sb, desc);
323
		stats->free_clusters = ext4_free_group_clusters(sb, desc);
324 325 326
		stats->used_dirs = ext4_used_dirs_count(sb, desc);
	} else {
		stats->free_inodes = 0;
327
		stats->free_clusters = 0;
328
		stats->used_dirs = 0;
329 330 331
	}
}

332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
/*
 * Orlov's allocator for directories.
 *
 * We always try to spread first-level directories.
 *
 * If there are blockgroups with both free inodes and free blocks counts
 * not worse than average we return one with smallest directory count.
 * Otherwise we simply return a random group.
 *
 * For the rest rules look so:
 *
 * It's OK to put directory into a group unless
 * it has too many directories already (max_dirs) or
 * it has too few free inodes left (min_inodes) or
 * it has too few free blocks left (min_blocks) or
347
 * Parent's group is preferred, if it doesn't satisfy these
348 349 350 351 352
 * conditions we search cyclically through the rest. If none
 * of the groups look good we just look for a group with more
 * free inodes than average (starting at parent's group).
 */

353
static int find_group_orlov(struct super_block *sb, struct inode *parent,
354 355
			    ext4_group_t *group, int mode,
			    const struct qstr *qstr)
356
{
357
	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
358
	struct ext4_sb_info *sbi = EXT4_SB(sb);
359
	ext4_group_t real_ngroups = ext4_get_groups_count(sb);
360
	int inodes_per_group = EXT4_INODES_PER_GROUP(sb);
361
	unsigned int freei, avefreei;
362
	ext4_fsblk_t freeb, avefreec;
363
	unsigned int ndirs;
364
	int max_dirs, min_inodes;
365
	ext4_grpblk_t min_clusters;
366
	ext4_group_t i, grp, g, ngroups;
367
	struct ext4_group_desc *desc;
368 369
	struct orlov_stats stats;
	int flex_size = ext4_flex_bg_size(sbi);
370
	struct dx_hash_info hinfo;
371

372
	ngroups = real_ngroups;
373
	if (flex_size > 1) {
374
		ngroups = (real_ngroups + flex_size - 1) >>
375 376 377
			sbi->s_log_groups_per_flex;
		parent_group >>= sbi->s_log_groups_per_flex;
	}
378 379 380

	freei = percpu_counter_read_positive(&sbi->s_freeinodes_counter);
	avefreei = freei / ngroups;
381 382
	freeb = EXT4_C2B(sbi,
		percpu_counter_read_positive(&sbi->s_freeclusters_counter));
383 384
	avefreec = freeb;
	do_div(avefreec, ngroups);
385 386
	ndirs = percpu_counter_read_positive(&sbi->s_dirs_counter);

387 388
	if (S_ISDIR(mode) &&
	    ((parent == sb->s_root->d_inode) ||
389
	     (ext4_test_inode_flag(parent, EXT4_INODE_TOPDIR)))) {
390
		int best_ndir = inodes_per_group;
391
		int ret = -1;
392

393 394 395 396 397 398 399
		if (qstr) {
			hinfo.hash_version = DX_HASH_HALF_MD4;
			hinfo.seed = sbi->s_hash_seed;
			ext4fs_dirhash(qstr->name, qstr->len, &hinfo);
			grp = hinfo.hash;
		} else
			get_random_bytes(&grp, sizeof(grp));
400
		parent_group = (unsigned)grp % ngroups;
401
		for (i = 0; i < ngroups; i++) {
402 403 404
			g = (parent_group + i) % ngroups;
			get_orlov_stats(sb, g, flex_size, &stats);
			if (!stats.free_inodes)
405
				continue;
406
			if (stats.used_dirs >= best_ndir)
407
				continue;
408
			if (stats.free_inodes < avefreei)
409
				continue;
410
			if (stats.free_clusters < avefreec)
411
				continue;
412
			grp = g;
413
			ret = 0;
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
			best_ndir = stats.used_dirs;
		}
		if (ret)
			goto fallback;
	found_flex_bg:
		if (flex_size == 1) {
			*group = grp;
			return 0;
		}

		/*
		 * We pack inodes at the beginning of the flexgroup's
		 * inode tables.  Block allocation decisions will do
		 * something similar, although regular files will
		 * start at 2nd block group of the flexgroup.  See
		 * ext4_ext_find_goal() and ext4_find_near().
		 */
		grp *= flex_size;
		for (i = 0; i < flex_size; i++) {
433
			if (grp+i >= real_ngroups)
434 435 436 437 438 439
				break;
			desc = ext4_get_group_desc(sb, grp+i, NULL);
			if (desc && ext4_free_inodes_count(sb, desc)) {
				*group = grp+i;
				return 0;
			}
440 441 442 443 444
		}
		goto fallback;
	}

	max_dirs = ndirs / ngroups + inodes_per_group / 16;
445 446 447
	min_inodes = avefreei - inodes_per_group*flex_size / 4;
	if (min_inodes < 1)
		min_inodes = 1;
448
	min_clusters = avefreec - EXT4_CLUSTERS_PER_GROUP(sb)*flex_size / 4;
449 450 451 452 453 454 455 456 457 458

	/*
	 * Start looking in the flex group where we last allocated an
	 * inode for this parent directory
	 */
	if (EXT4_I(parent)->i_last_alloc_group != ~0) {
		parent_group = EXT4_I(parent)->i_last_alloc_group;
		if (flex_size > 1)
			parent_group >>= sbi->s_log_groups_per_flex;
	}
459 460

	for (i = 0; i < ngroups; i++) {
461 462 463
		grp = (parent_group + i) % ngroups;
		get_orlov_stats(sb, grp, flex_size, &stats);
		if (stats.used_dirs >= max_dirs)
464
			continue;
465
		if (stats.free_inodes < min_inodes)
466
			continue;
467
		if (stats.free_clusters < min_clusters)
468
			continue;
469
		goto found_flex_bg;
470 471 472
	}

fallback:
473
	ngroups = real_ngroups;
474
	avefreei = freei / ngroups;
475
fallback_retry:
476
	parent_group = EXT4_I(parent)->i_block_group;
477
	for (i = 0; i < ngroups; i++) {
478 479
		grp = (parent_group + i) % ngroups;
		desc = ext4_get_group_desc(sb, grp, NULL);
480
		if (desc && ext4_free_inodes_count(sb, desc) &&
481 482
		    ext4_free_inodes_count(sb, desc) >= avefreei) {
			*group = grp;
483
			return 0;
484
		}
485 486 487 488 489 490 491 492
	}

	if (avefreei) {
		/*
		 * The free-inodes counter is approximate, and for really small
		 * filesystems the above test can fail to find any blockgroups
		 */
		avefreei = 0;
493
		goto fallback_retry;
494 495 496 497 498
	}

	return -1;
}

499
static int find_group_other(struct super_block *sb, struct inode *parent,
500
			    ext4_group_t *group, int mode)
501
{
502
	ext4_group_t parent_group = EXT4_I(parent)->i_block_group;
503
	ext4_group_t i, last, ngroups = ext4_get_groups_count(sb);
504
	struct ext4_group_desc *desc;
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541
	int flex_size = ext4_flex_bg_size(EXT4_SB(sb));

	/*
	 * Try to place the inode is the same flex group as its
	 * parent.  If we can't find space, use the Orlov algorithm to
	 * find another flex group, and store that information in the
	 * parent directory's inode information so that use that flex
	 * group for future allocations.
	 */
	if (flex_size > 1) {
		int retry = 0;

	try_again:
		parent_group &= ~(flex_size-1);
		last = parent_group + flex_size;
		if (last > ngroups)
			last = ngroups;
		for  (i = parent_group; i < last; i++) {
			desc = ext4_get_group_desc(sb, i, NULL);
			if (desc && ext4_free_inodes_count(sb, desc)) {
				*group = i;
				return 0;
			}
		}
		if (!retry && EXT4_I(parent)->i_last_alloc_group != ~0) {
			retry = 1;
			parent_group = EXT4_I(parent)->i_last_alloc_group;
			goto try_again;
		}
		/*
		 * If this didn't work, use the Orlov search algorithm
		 * to find a new flex group; we pass in the mode to
		 * avoid the topdir algorithms.
		 */
		*group = parent_group + flex_size;
		if (*group > ngroups)
			*group = 0;
542
		return find_group_orlov(sb, parent, group, mode, NULL);
543
	}
544 545 546 547

	/*
	 * Try to place the inode in its parent directory
	 */
548 549
	*group = parent_group;
	desc = ext4_get_group_desc(sb, *group, NULL);
550
	if (desc && ext4_free_inodes_count(sb, desc) &&
551
	    ext4_free_group_clusters(sb, desc))
552
		return 0;
553 554 555 556 557 558 559 560 561 562

	/*
	 * We're going to place this inode in a different blockgroup from its
	 * parent.  We want to cause files in a common directory to all land in
	 * the same blockgroup.  But we want files which are in a different
	 * directory which shares a blockgroup with our parent to land in a
	 * different blockgroup.
	 *
	 * So add our directory's i_ino into the starting point for the hash.
	 */
563
	*group = (*group + parent->i_ino) % ngroups;
564 565 566 567 568 569

	/*
	 * Use a quadratic hash to find a group with a free inode and some free
	 * blocks.
	 */
	for (i = 1; i < ngroups; i <<= 1) {
570 571 572 573
		*group += i;
		if (*group >= ngroups)
			*group -= ngroups;
		desc = ext4_get_group_desc(sb, *group, NULL);
574
		if (desc && ext4_free_inodes_count(sb, desc) &&
575
		    ext4_free_group_clusters(sb, desc))
576
			return 0;
577 578 579 580 581 582
	}

	/*
	 * That failed: try linear search for a free inode, even if that group
	 * has no free blocks.
	 */
583
	*group = parent_group;
584
	for (i = 0; i < ngroups; i++) {
585 586 587
		if (++*group >= ngroups)
			*group = 0;
		desc = ext4_get_group_desc(sb, *group, NULL);
588
		if (desc && ext4_free_inodes_count(sb, desc))
589
			return 0;
590 591 592 593 594
	}

	return -1;
}

595 596
/*
 * claim the inode from the inode bitmap. If the group
597
 * is uninit we need to take the groups's ext4_group_lock
598 599
 * and clear the uninit flag. The inode bitmap update
 * and group desc uninit flag clear should be done
600
 * after holding ext4_group_lock so that ext4_read_inode_bitmap
601 602 603 604 605 606 607 608
 * doesn't race with the ext4_claim_inode
 */
static int ext4_claim_inode(struct super_block *sb,
			struct buffer_head *inode_bitmap_bh,
			unsigned long ino, ext4_group_t group, int mode)
{
	int free = 0, retval = 0, count;
	struct ext4_sb_info *sbi = EXT4_SB(sb);
609
	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
610 611
	struct ext4_group_desc *gdp = ext4_get_group_desc(sb, group, NULL);

612 613 614 615 616 617 618 619
	/*
	 * We have to be sure that new inode allocation does not race with
	 * inode table initialization, because otherwise we may end up
	 * allocating and writing new inode right before sb_issue_zeroout
	 * takes place and overwriting our new inode with zeroes. So we
	 * take alloc_sem to prevent it.
	 */
	down_read(&grp->alloc_sem);
620
	ext4_lock_group(sb, group);
621 622 623 624 625 626 627 628
	if (ext4_set_bit(ino, inode_bitmap_bh->b_data)) {
		/* not a free inode */
		retval = 1;
		goto err_ret;
	}
	ino++;
	if ((group == 0 && ino < EXT4_FIRST_INO(sb)) ||
			ino > EXT4_INODES_PER_GROUP(sb)) {
629
		ext4_unlock_group(sb, group);
630
		up_read(&grp->alloc_sem);
631
		ext4_error(sb, "reserved inode or inode > inodes count - "
632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
			   "block_group = %u, inode=%lu", group,
			   ino + group * EXT4_INODES_PER_GROUP(sb));
		return 1;
	}
	/* If we didn't allocate from within the initialized part of the inode
	 * table then we need to initialize up to this inode. */
	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM)) {

		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)) {
			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_INODE_UNINIT);
			/* When marking the block group with
			 * ~EXT4_BG_INODE_UNINIT we don't want to depend
			 * on the value of bg_itable_unused even though
			 * mke2fs could have initialized the same for us.
			 * Instead we calculated the value below
			 */

			free = 0;
		} else {
			free = EXT4_INODES_PER_GROUP(sb) -
				ext4_itable_unused_count(sb, gdp);
		}

		/*
		 * Check the relative inode number against the last used
		 * relative inode number in this group. if it is greater
		 * we need to  update the bg_itable_unused count
		 *
		 */
		if (ino > free)
			ext4_itable_unused_set(sb, gdp,
					(EXT4_INODES_PER_GROUP(sb) - ino));
	}
	count = ext4_free_inodes_count(sb, gdp) - 1;
	ext4_free_inodes_set(sb, gdp, count);
	if (S_ISDIR(mode)) {
		count = ext4_used_dirs_count(sb, gdp) + 1;
		ext4_used_dirs_set(sb, gdp, count);
670 671 672
		if (sbi->s_log_groups_per_flex) {
			ext4_group_t f = ext4_flex_group(sbi, group);

673
			atomic_inc(&sbi->s_flex_groups[f].used_dirs);
674
		}
675 676 677
	}
	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
err_ret:
678
	ext4_unlock_group(sb, group);
679
	up_read(&grp->alloc_sem);
680 681 682
	return retval;
}

683 684 685 686 687 688 689 690 691 692
/*
 * There are two policies for allocating an inode.  If the new inode is
 * a directory, then a forward search is made for a block group with both
 * free space and a low directory-to-inode ratio; if that fails, then of
 * the groups with above-average free space, that group with the fewest
 * directories already is chosen.
 *
 * For other inodes, search forward from the parent directory's block
 * group to find a free inode.
 */
693
struct inode *ext4_new_inode(handle_t *handle, struct inode *dir, int mode,
694
			     const struct qstr *qstr, __u32 goal, uid_t *owner)
695 696
{
	struct super_block *sb;
A
Aneesh Kumar K.V 已提交
697 698
	struct buffer_head *inode_bitmap_bh = NULL;
	struct buffer_head *group_desc_bh;
699
	ext4_group_t ngroups, group = 0;
700
	unsigned long ino = 0;
701 702
	struct inode *inode;
	struct ext4_group_desc *gdp = NULL;
703 704
	struct ext4_inode_info *ei;
	struct ext4_sb_info *sbi;
705
	int ret2, err = 0;
706
	struct inode *ret;
707
	ext4_group_t i;
708
	ext4_group_t flex_group;
709 710 711 712 713 714

	/* Cannot create files in a deleted directory */
	if (!dir || !dir->i_nlink)
		return ERR_PTR(-EPERM);

	sb = dir->i_sb;
715
	ngroups = ext4_get_groups_count(sb);
716
	trace_ext4_request_inode(dir, mode);
717 718 719
	inode = new_inode(sb);
	if (!inode)
		return ERR_PTR(-ENOMEM);
720 721
	ei = EXT4_I(inode);
	sbi = EXT4_SB(sb);
722

723 724 725
	if (!goal)
		goal = sbi->s_inode_goal;

726
	if (goal && goal <= le32_to_cpu(sbi->s_es->s_inodes_count)) {
727 728 729 730 731 732
		group = (goal - 1) / EXT4_INODES_PER_GROUP(sb);
		ino = (goal - 1) % EXT4_INODES_PER_GROUP(sb);
		ret2 = 0;
		goto got_group;
	}

L
Lukas Czerner 已提交
733 734 735
	if (S_ISDIR(mode))
		ret2 = find_group_orlov(sb, dir, &group, mode, qstr);
	else
736
		ret2 = find_group_other(sb, dir, &group, mode);
737

738
got_group:
739
	EXT4_I(dir)->i_last_alloc_group = group;
740
	err = -ENOSPC;
741
	if (ret2 == -1)
742 743
		goto out;

744
	for (i = 0; i < ngroups; i++, ino = 0) {
745 746
		err = -EIO;

A
Aneesh Kumar K.V 已提交
747
		gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
748 749 750
		if (!gdp)
			goto fail;

A
Aneesh Kumar K.V 已提交
751 752 753
		brelse(inode_bitmap_bh);
		inode_bitmap_bh = ext4_read_inode_bitmap(sb, group);
		if (!inode_bitmap_bh)
754 755 756
			goto fail;

repeat_in_this_group:
757
		ino = ext4_find_next_zero_bit((unsigned long *)
A
Aneesh Kumar K.V 已提交
758 759 760
					      inode_bitmap_bh->b_data,
					      EXT4_INODES_PER_GROUP(sb), ino);

761
		if (ino < EXT4_INODES_PER_GROUP(sb)) {
762

A
Aneesh Kumar K.V 已提交
763 764 765
			BUFFER_TRACE(inode_bitmap_bh, "get_write_access");
			err = ext4_journal_get_write_access(handle,
							    inode_bitmap_bh);
766 767 768
			if (err)
				goto fail;

769 770 771 772 773 774 775
			BUFFER_TRACE(group_desc_bh, "get_write_access");
			err = ext4_journal_get_write_access(handle,
								group_desc_bh);
			if (err)
				goto fail;
			if (!ext4_claim_inode(sb, inode_bitmap_bh,
						ino, group, mode)) {
776
				/* we won it */
A
Aneesh Kumar K.V 已提交
777
				BUFFER_TRACE(inode_bitmap_bh,
778 779
					"call ext4_handle_dirty_metadata");
				err = ext4_handle_dirty_metadata(handle,
780
								 NULL,
A
Aneesh Kumar K.V 已提交
781
							inode_bitmap_bh);
782 783
				if (err)
					goto fail;
784 785
				/* zero bit is inode number 1*/
				ino++;
786 787 788
				goto got;
			}
			/* we lost it */
A
Aneesh Kumar K.V 已提交
789
			ext4_handle_release_buffer(handle, inode_bitmap_bh);
790
			ext4_handle_release_buffer(handle, group_desc_bh);
791

792
			if (++ino < EXT4_INODES_PER_GROUP(sb))
793 794 795 796 797 798 799 800 801 802
				goto repeat_in_this_group;
		}

		/*
		 * This case is possible in concurrent environment.  It is very
		 * rare.  We cannot repeat the find_group_xxx() call because
		 * that will simply return the same blockgroup, because the
		 * group descriptor metadata has not yet been updated.
		 * So we just go onto the next blockgroup.
		 */
803
		if (++group == ngroups)
804 805 806 807 808 809
			group = 0;
	}
	err = -ENOSPC;
	goto out;

got:
A
Andreas Dilger 已提交
810 811 812
	/* We may have to initialize the block bitmap if it isn't already */
	if (EXT4_HAS_RO_COMPAT_FEATURE(sb, EXT4_FEATURE_RO_COMPAT_GDT_CSUM) &&
	    gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
A
Aneesh Kumar K.V 已提交
813
		struct buffer_head *block_bitmap_bh;
A
Andreas Dilger 已提交
814

A
Aneesh Kumar K.V 已提交
815 816 817
		block_bitmap_bh = ext4_read_block_bitmap(sb, group);
		BUFFER_TRACE(block_bitmap_bh, "get block bitmap access");
		err = ext4_journal_get_write_access(handle, block_bitmap_bh);
A
Andreas Dilger 已提交
818
		if (err) {
A
Aneesh Kumar K.V 已提交
819
			brelse(block_bitmap_bh);
A
Andreas Dilger 已提交
820 821 822
			goto fail;
		}

823 824 825 826
		BUFFER_TRACE(block_bitmap_bh, "dirty block bitmap");
		err = ext4_handle_dirty_metadata(handle, NULL, block_bitmap_bh);
		brelse(block_bitmap_bh);

A
Andreas Dilger 已提交
827
		/* recheck and clear flag under lock if we still need to */
828
		ext4_lock_group(sb, group);
A
Andreas Dilger 已提交
829
		if (gdp->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
A
Aneesh Kumar K.V 已提交
830
			gdp->bg_flags &= cpu_to_le16(~EXT4_BG_BLOCK_UNINIT);
831
			ext4_free_group_clusters_set(sb, gdp,
832
				ext4_free_clusters_after_init(sb, group, gdp));
833 834
			gdp->bg_checksum = ext4_group_desc_csum(sbi, group,
								gdp);
A
Andreas Dilger 已提交
835
		}
836
		ext4_unlock_group(sb, group);
A
Andreas Dilger 已提交
837 838 839 840

		if (err)
			goto fail;
	}
A
Aneesh Kumar K.V 已提交
841 842
	BUFFER_TRACE(group_desc_bh, "call ext4_handle_dirty_metadata");
	err = ext4_handle_dirty_metadata(handle, NULL, group_desc_bh);
843 844
	if (err)
		goto fail;
845 846 847 848

	percpu_counter_dec(&sbi->s_freeinodes_counter);
	if (S_ISDIR(mode))
		percpu_counter_inc(&sbi->s_dirs_counter);
T
Theodore Ts'o 已提交
849
	ext4_mark_super_dirty(sb);
850

851 852
	if (sbi->s_log_groups_per_flex) {
		flex_group = ext4_flex_group(sbi, group);
853
		atomic_dec(&sbi->s_flex_groups[flex_group].free_inodes);
854
	}
855 856 857 858 859
	if (owner) {
		inode->i_mode = mode;
		inode->i_uid = owner[0];
		inode->i_gid = owner[1];
	} else if (test_opt(sb, GRPID)) {
860 861
		inode->i_mode = mode;
		inode->i_uid = current_fsuid();
862 863
		inode->i_gid = dir->i_gid;
	} else
864
		inode_init_owner(inode, dir, mode);
865

A
Andreas Dilger 已提交
866
	inode->i_ino = ino + group * EXT4_INODES_PER_GROUP(sb);
867 868
	/* This is the optimal IO size (for stat), not the fs block size */
	inode->i_blocks = 0;
K
Kalpak Shah 已提交
869 870
	inode->i_mtime = inode->i_atime = inode->i_ctime = ei->i_crtime =
						       ext4_current_time(inode);
871 872 873 874 875

	memset(ei->i_data, 0, sizeof(ei->i_data));
	ei->i_dir_start_lookup = 0;
	ei->i_disksize = 0;

876
	/* Don't inherit extent flag from directory, amongst others. */
877 878
	ei->i_flags =
		ext4_mask_flags(mode, EXT4_I(dir)->i_flags & EXT4_FL_INHERITED);
879 880 881
	ei->i_file_acl = 0;
	ei->i_dtime = 0;
	ei->i_block_group = group;
882
	ei->i_last_alloc_group = ~0;
883

884
	ext4_set_inode_flags(inode);
885
	if (IS_DIRSYNC(inode))
886
		ext4_handle_sync(handle);
A
Al Viro 已提交
887 888 889 890
	if (insert_inode_locked(inode) < 0) {
		err = -EINVAL;
		goto fail_drop;
	}
891 892 893 894
	spin_lock(&sbi->s_next_gen_lock);
	inode->i_generation = sbi->s_next_generation++;
	spin_unlock(&sbi->s_next_gen_lock);

895
	ext4_clear_state_flags(ei); /* Only relevant on 32-bit archs */
896
	ext4_set_inode_state(inode, EXT4_STATE_NEW);
K
Kalpak Shah 已提交
897 898

	ei->i_extra_isize = EXT4_SB(sb)->s_want_extra_isize;
899 900

	ret = inode;
901
	dquot_initialize(inode);
902 903
	err = dquot_alloc_inode(inode);
	if (err)
904 905
		goto fail_drop;

906
	err = ext4_init_acl(handle, inode, dir);
907 908 909
	if (err)
		goto fail_free_drop;

910
	err = ext4_init_security(handle, inode, dir, qstr);
911 912 913
	if (err)
		goto fail_free_drop;

914
	if (EXT4_HAS_INCOMPAT_FEATURE(sb, EXT4_FEATURE_INCOMPAT_EXTENTS)) {
915
		/* set extent flag only for directory, file and normal symlink*/
916
		if (S_ISDIR(mode) || S_ISREG(mode) || S_ISLNK(mode)) {
917
			ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS);
918 919
			ext4_ext_tree_init(handle, inode);
		}
A
Alex Tomas 已提交
920
	}
921

922 923 924 925 926
	if (ext4_handle_valid(handle)) {
		ei->i_sync_tid = handle->h_transaction->t_tid;
		ei->i_datasync_tid = handle->h_transaction->t_tid;
	}

927 928 929 930 931 932
	err = ext4_mark_inode_dirty(handle, inode);
	if (err) {
		ext4_std_error(sb, err);
		goto fail_free_drop;
	}

933
	ext4_debug("allocating inode %lu\n", inode->i_ino);
934
	trace_ext4_allocate_inode(inode, dir, mode);
935 936
	goto really_out;
fail:
937
	ext4_std_error(sb, err);
938 939 940 941
out:
	iput(inode);
	ret = ERR_PTR(err);
really_out:
A
Aneesh Kumar K.V 已提交
942
	brelse(inode_bitmap_bh);
943 944 945
	return ret;

fail_free_drop:
946
	dquot_free_inode(inode);
947 948

fail_drop:
949
	dquot_drop(inode);
950
	inode->i_flags |= S_NOQUOTA;
951
	clear_nlink(inode);
A
Al Viro 已提交
952
	unlock_new_inode(inode);
953
	iput(inode);
A
Aneesh Kumar K.V 已提交
954
	brelse(inode_bitmap_bh);
955 956 957 958
	return ERR_PTR(err);
}

/* Verify that we are loading a valid orphan from disk */
959
struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
960
{
961
	unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
962
	ext4_group_t block_group;
963
	int bit;
964
	struct buffer_head *bitmap_bh;
965
	struct inode *inode = NULL;
966
	long err = -EIO;
967 968 969

	/* Error cases - e2fsck has already cleaned up for us */
	if (ino > max_ino) {
970
		ext4_warning(sb, "bad orphan ino %lu!  e2fsck was run?", ino);
971
		goto error;
972 973
	}

974 975
	block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
	bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
976
	bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
977
	if (!bitmap_bh) {
978
		ext4_warning(sb, "inode bitmap error for orphan %lu", ino);
979
		goto error;
980 981 982 983 984 985
	}

	/* Having the inode bit set should be a 100% indicator that this
	 * is a valid orphan (no e2fsck run on fs).  Orphans also include
	 * inodes that were being truncated, so we can't check i_nlink==0.
	 */
986 987 988 989 990 991 992
	if (!ext4_test_bit(bit, bitmap_bh->b_data))
		goto bad_orphan;

	inode = ext4_iget(sb, ino);
	if (IS_ERR(inode))
		goto iget_failed;

993 994 995 996 997 998 999 1000
	/*
	 * If the orphans has i_nlinks > 0 then it should be able to be
	 * truncated, otherwise it won't be removed from the orphan list
	 * during processing and an infinite loop will result.
	 */
	if (inode->i_nlink && !ext4_can_truncate(inode))
		goto bad_orphan;

1001 1002 1003 1004 1005 1006 1007 1008 1009
	if (NEXT_ORPHAN(inode) > max_ino)
		goto bad_orphan;
	brelse(bitmap_bh);
	return inode;

iget_failed:
	err = PTR_ERR(inode);
	inode = NULL;
bad_orphan:
1010
	ext4_warning(sb, "bad orphan inode %lu!  e2fsck was run?", ino);
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020
	printk(KERN_NOTICE "ext4_test_bit(bit=%d, block=%llu) = %d\n",
	       bit, (unsigned long long)bitmap_bh->b_blocknr,
	       ext4_test_bit(bit, bitmap_bh->b_data));
	printk(KERN_NOTICE "inode=%p\n", inode);
	if (inode) {
		printk(KERN_NOTICE "is_bad_inode(inode)=%d\n",
		       is_bad_inode(inode));
		printk(KERN_NOTICE "NEXT_ORPHAN(inode)=%u\n",
		       NEXT_ORPHAN(inode));
		printk(KERN_NOTICE "max_ino=%lu\n", max_ino);
1021
		printk(KERN_NOTICE "i_nlink=%u\n", inode->i_nlink);
1022
		/* Avoid freeing blocks if we got a bad deleted inode */
1023
		if (inode->i_nlink == 0)
1024 1025 1026 1027
			inode->i_blocks = 0;
		iput(inode);
	}
	brelse(bitmap_bh);
1028 1029
error:
	return ERR_PTR(err);
1030 1031
}

1032
unsigned long ext4_count_free_inodes(struct super_block *sb)
1033 1034
{
	unsigned long desc_count;
1035
	struct ext4_group_desc *gdp;
1036
	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1037 1038
#ifdef EXT4FS_DEBUG
	struct ext4_super_block *es;
1039 1040 1041
	unsigned long bitmap_count, x;
	struct buffer_head *bitmap_bh = NULL;

1042
	es = EXT4_SB(sb)->s_es;
1043 1044 1045
	desc_count = 0;
	bitmap_count = 0;
	gdp = NULL;
1046
	for (i = 0; i < ngroups; i++) {
1047
		gdp = ext4_get_group_desc(sb, i, NULL);
1048 1049
		if (!gdp)
			continue;
1050
		desc_count += ext4_free_inodes_count(sb, gdp);
1051
		brelse(bitmap_bh);
1052
		bitmap_bh = ext4_read_inode_bitmap(sb, i);
1053 1054 1055
		if (!bitmap_bh)
			continue;

1056
		x = ext4_count_free(bitmap_bh, EXT4_INODES_PER_GROUP(sb) / 8);
E
Eric Sandeen 已提交
1057
		printk(KERN_DEBUG "group %lu: stored = %d, counted = %lu\n",
1058
			(unsigned long) i, ext4_free_inodes_count(sb, gdp), x);
1059 1060 1061
		bitmap_count += x;
	}
	brelse(bitmap_bh);
1062 1063 1064
	printk(KERN_DEBUG "ext4_count_free_inodes: "
	       "stored = %u, computed = %lu, %lu\n",
	       le32_to_cpu(es->s_free_inodes_count), desc_count, bitmap_count);
1065 1066 1067
	return desc_count;
#else
	desc_count = 0;
1068
	for (i = 0; i < ngroups; i++) {
1069
		gdp = ext4_get_group_desc(sb, i, NULL);
1070 1071
		if (!gdp)
			continue;
1072
		desc_count += ext4_free_inodes_count(sb, gdp);
1073 1074 1075 1076 1077 1078 1079
		cond_resched();
	}
	return desc_count;
#endif
}

/* Called at mount-time, super-block is locked */
1080
unsigned long ext4_count_dirs(struct super_block * sb)
1081 1082
{
	unsigned long count = 0;
1083
	ext4_group_t i, ngroups = ext4_get_groups_count(sb);
1084

1085
	for (i = 0; i < ngroups; i++) {
1086
		struct ext4_group_desc *gdp = ext4_get_group_desc(sb, i, NULL);
1087 1088
		if (!gdp)
			continue;
1089
		count += ext4_used_dirs_count(sb, gdp);
1090 1091 1092
	}
	return count;
}
1093 1094 1095 1096 1097 1098 1099 1100 1101

/*
 * Zeroes not yet zeroed inode table - just write zeroes through the whole
 * inode table. Must be called without any spinlock held. The only place
 * where it is called from on active part of filesystem is ext4lazyinit
 * thread, so we do not need any special locks, however we have to prevent
 * inode allocation from the current group, so we take alloc_sem lock, to
 * block ext4_claim_inode until we are finished.
 */
1102
int ext4_init_inode_table(struct super_block *sb, ext4_group_t group,
1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
				 int barrier)
{
	struct ext4_group_info *grp = ext4_get_group_info(sb, group);
	struct ext4_sb_info *sbi = EXT4_SB(sb);
	struct ext4_group_desc *gdp = NULL;
	struct buffer_head *group_desc_bh;
	handle_t *handle;
	ext4_fsblk_t blk;
	int num, ret = 0, used_blks = 0;

	/* This should not happen, but just to be sure check this */
	if (sb->s_flags & MS_RDONLY) {
		ret = 1;
		goto out;
	}

	gdp = ext4_get_group_desc(sb, group, &group_desc_bh);
	if (!gdp)
		goto out;

	/*
	 * We do not need to lock this, because we are the only one
	 * handling this flag.
	 */
	if (gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_ZEROED))
		goto out;

	handle = ext4_journal_start_sb(sb, 1);
	if (IS_ERR(handle)) {
		ret = PTR_ERR(handle);
		goto out;
	}

	down_write(&grp->alloc_sem);
	/*
	 * If inode bitmap was already initialized there may be some
	 * used inodes so we need to skip blocks with used inodes in
	 * inode table.
	 */
	if (!(gdp->bg_flags & cpu_to_le16(EXT4_BG_INODE_UNINIT)))
		used_blks = DIV_ROUND_UP((EXT4_INODES_PER_GROUP(sb) -
			    ext4_itable_unused_count(sb, gdp)),
			    sbi->s_inodes_per_block);

1147 1148 1149 1150 1151 1152 1153
	if ((used_blks < 0) || (used_blks > sbi->s_itb_per_group)) {
		ext4_error(sb, "Something is wrong with group %u\n"
			   "Used itable blocks: %d"
			   "itable unused count: %u\n",
			   group, used_blks,
			   ext4_itable_unused_count(sb, gdp));
		ret = 1;
1154
		goto err_out;
1155 1156
	}

1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175
	blk = ext4_inode_table(sb, gdp) + used_blks;
	num = sbi->s_itb_per_group - used_blks;

	BUFFER_TRACE(group_desc_bh, "get_write_access");
	ret = ext4_journal_get_write_access(handle,
					    group_desc_bh);
	if (ret)
		goto err_out;

	/*
	 * Skip zeroout if the inode table is full. But we set the ZEROED
	 * flag anyway, because obviously, when it is full it does not need
	 * further zeroing.
	 */
	if (unlikely(num == 0))
		goto skip_zeroout;

	ext4_debug("going to zero out inode table in group %d\n",
		   group);
1176
	ret = sb_issue_zeroout(sb, blk, num, GFP_NOFS);
1177 1178
	if (ret < 0)
		goto err_out;
1179 1180
	if (barrier)
		blkdev_issue_flush(sb->s_bdev, GFP_NOFS, NULL);
1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198

skip_zeroout:
	ext4_lock_group(sb, group);
	gdp->bg_flags |= cpu_to_le16(EXT4_BG_INODE_ZEROED);
	gdp->bg_checksum = ext4_group_desc_csum(sbi, group, gdp);
	ext4_unlock_group(sb, group);

	BUFFER_TRACE(group_desc_bh,
		     "call ext4_handle_dirty_metadata");
	ret = ext4_handle_dirty_metadata(handle, NULL,
					 group_desc_bh);

err_out:
	up_write(&grp->alloc_sem);
	ext4_journal_stop(handle);
out:
	return ret;
}