xattr.c 47.9 KB
Newer Older
1
/*
2
 * linux/fs/ext4/xattr.c
3 4 5 6
 *
 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
 *
 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7
 * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 * Extended attributes for symlinks and special files added per
 *  suggestion of Luka Renko <luka.renko@hermes.si>.
 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
 *  Red Hat Inc.
 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
 *  and Andreas Gruenbacher <agruen@suse.de>.
 */

/*
 * Extended attributes are stored directly in inodes (on file systems with
 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
 * field contains the block number if an inode uses an additional block. All
 * attributes must fit in the inode and one additional block. Blocks that
 * contain the identical set of attributes may be shared among several inodes.
 * Identical blocks are detected by keeping a cache of blocks that have
 * recently been accessed.
 *
 * The attributes in inodes and on blocks have a different header; the entries
 * are stored in the same format:
 *
 *   +------------------+
 *   | header           |
 *   | entry 1          | |
 *   | entry 2          | | growing downwards
 *   | entry 3          | v
 *   | four null bytes  |
 *   | . . .            |
 *   | value 1          | ^
 *   | value 3          | | growing upwards
 *   | value 2          | |
 *   +------------------+
 *
 * The header is followed by multiple entry descriptors. In disk blocks, the
 * entry descriptors are kept sorted. In inodes, they are unsorted. The
 * attribute values are aligned to the end of the block in no specific order.
 *
 * Locking strategy
 * ----------------
46
 * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
47 48 49 50 51 52 53 54 55
 * EA blocks are only changed if they are exclusive to an inode, so
 * holding xattr_sem also means that nothing but the EA block's reference
 * count can change. Multiple writers to the same block are synchronized
 * by the buffer lock.
 */

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
J
Jan Kara 已提交
56
#include <linux/mbcache.h>
57
#include <linux/quotaops.h>
58 59
#include "ext4_jbd2.h"
#include "ext4.h"
60 61 62
#include "xattr.h"
#include "acl.h"

63
#ifdef EXT4_XATTR_DEBUG
64 65 66 67 68 69
# define ea_idebug(inode, fmt, ...)					\
	printk(KERN_DEBUG "inode %s:%lu: " fmt "\n",			\
	       inode->i_sb->s_id, inode->i_ino, ##__VA_ARGS__)
# define ea_bdebug(bh, fmt, ...)					\
	printk(KERN_DEBUG "block %pg:%lu: " fmt "\n",			\
	       bh->b_bdev, (unsigned long)bh->b_blocknr, ##__VA_ARGS__)
70
#else
71 72
# define ea_idebug(inode, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
# define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
73 74
#endif

J
Jan Kara 已提交
75
static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
76 77
static struct buffer_head *ext4_xattr_cache_find(struct inode *,
						 struct ext4_xattr_header *,
J
Jan Kara 已提交
78
						 struct mb_cache_entry **);
79 80
static void ext4_xattr_rehash(struct ext4_xattr_header *,
			      struct ext4_xattr_entry *);
81

82
static const struct xattr_handler * const ext4_xattr_handler_map[] = {
83
	[EXT4_XATTR_INDEX_USER]		     = &ext4_xattr_user_handler,
T
Theodore Ts'o 已提交
84
#ifdef CONFIG_EXT4_FS_POSIX_ACL
85 86
	[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
	[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
87
#endif
88
	[EXT4_XATTR_INDEX_TRUSTED]	     = &ext4_xattr_trusted_handler,
T
Theodore Ts'o 已提交
89
#ifdef CONFIG_EXT4_FS_SECURITY
90
	[EXT4_XATTR_INDEX_SECURITY]	     = &ext4_xattr_security_handler,
91 92 93
#endif
};

S
Stephen Hemminger 已提交
94
const struct xattr_handler *ext4_xattr_handlers[] = {
95 96
	&ext4_xattr_user_handler,
	&ext4_xattr_trusted_handler,
T
Theodore Ts'o 已提交
97
#ifdef CONFIG_EXT4_FS_POSIX_ACL
98 99
	&posix_acl_access_xattr_handler,
	&posix_acl_default_xattr_handler,
100
#endif
T
Theodore Ts'o 已提交
101
#ifdef CONFIG_EXT4_FS_SECURITY
102
	&ext4_xattr_security_handler,
103 104 105 106
#endif
	NULL
};

107 108 109
#define EXT4_GET_MB_CACHE(inode)	(((struct ext4_sb_info *) \
				inode->i_sb->s_fs_info)->s_mb_cache)

110 111 112 113 114
static __le32 ext4_xattr_block_csum(struct inode *inode,
				    sector_t block_nr,
				    struct ext4_xattr_header *hdr)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
115 116
	__u32 csum;
	__le64 dsk_block_nr = cpu_to_le64(block_nr);
117 118
	__u32 dummy_csum = 0;
	int offset = offsetof(struct ext4_xattr_header, h_checksum);
119

120 121
	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
			   sizeof(dsk_block_nr));
122 123 124 125 126
	csum = ext4_chksum(sbi, csum, (__u8 *)hdr, offset);
	csum = ext4_chksum(sbi, csum, (__u8 *)&dummy_csum, sizeof(dummy_csum));
	offset += sizeof(dummy_csum);
	csum = ext4_chksum(sbi, csum, (__u8 *)hdr + offset,
			   EXT4_BLOCK_SIZE(inode->i_sb) - offset);
127

128 129 130 131
	return cpu_to_le32(csum);
}

static int ext4_xattr_block_csum_verify(struct inode *inode,
132
					struct buffer_head *bh)
133
{
134 135
	struct ext4_xattr_header *hdr = BHDR(bh);
	int ret = 1;
136

137 138 139 140 141 142 143
	if (ext4_has_metadata_csum(inode->i_sb)) {
		lock_buffer(bh);
		ret = (hdr->h_checksum == ext4_xattr_block_csum(inode,
							bh->b_blocknr, hdr));
		unlock_buffer(bh);
	}
	return ret;
144 145
}

146 147
static void ext4_xattr_block_csum_set(struct inode *inode,
				      struct buffer_head *bh)
148
{
149 150 151
	if (ext4_has_metadata_csum(inode->i_sb))
		BHDR(bh)->h_checksum = ext4_xattr_block_csum(inode,
						bh->b_blocknr, BHDR(bh));
152 153
}

S
Stephen Hemminger 已提交
154
static inline const struct xattr_handler *
155
ext4_xattr_handler(int name_index)
156
{
S
Stephen Hemminger 已提交
157
	const struct xattr_handler *handler = NULL;
158

159 160
	if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
		handler = ext4_xattr_handler_map[name_index];
161 162 163 164
	return handler;
}

static int
165 166
ext4_xattr_check_entries(struct ext4_xattr_entry *entry, void *end,
			 void *value_start)
167
{
168 169
	struct ext4_xattr_entry *e = entry;

170
	/* Find the end of the names list */
171 172
	while (!IS_LAST_ENTRY(e)) {
		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
173
		if ((void *)next >= end)
174
			return -EFSCORRUPTED;
175
		e = next;
176
	}
177

178
	/* Check the values */
179
	while (!IS_LAST_ENTRY(entry)) {
180 181
		if (entry->e_value_block != 0)
			return -EFSCORRUPTED;
182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
		if (entry->e_value_size != 0) {
			u16 offs = le16_to_cpu(entry->e_value_offs);
			u32 size = le32_to_cpu(entry->e_value_size);
			void *value;

			/*
			 * The value cannot overlap the names, and the value
			 * with padding cannot extend beyond 'end'.  Check both
			 * the padded and unpadded sizes, since the size may
			 * overflow to 0 when adding padding.
			 */
			if (offs > end - value_start)
				return -EFSCORRUPTED;
			value = value_start + offs;
			if (value < (void *)e + sizeof(u32) ||
			    size > end - value ||
			    EXT4_XATTR_SIZE(size) > end - value)
				return -EFSCORRUPTED;
		}
201 202 203
		entry = EXT4_XATTR_NEXT(entry);
	}

204 205 206 207
	return 0;
}

static inline int
208
ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
209
{
210 211 212 213 214
	int error;

	if (buffer_verified(bh))
		return 0;

215
	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
216
	    BHDR(bh)->h_blocks != cpu_to_le32(1))
217
		return -EFSCORRUPTED;
218
	if (!ext4_xattr_block_csum_verify(inode, bh))
219
		return -EFSBADCRC;
220 221
	error = ext4_xattr_check_entries(BFIRST(bh), bh->b_data + bh->b_size,
					 bh->b_data);
222 223 224
	if (!error)
		set_buffer_verified(bh);
	return error;
225 226
}

227 228 229 230 231 232
static int
__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
			 void *end, const char *function, unsigned int line)
{
	int error = -EFSCORRUPTED;

233
	if (end - (void *)header < sizeof(*header) + sizeof(u32) ||
234
	    (header->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC)))
235
		goto errout;
236
	error = ext4_xattr_check_entries(IFIRST(header), end, IFIRST(header));
237 238 239 240 241 242 243 244 245 246
errout:
	if (error)
		__ext4_error_inode(inode, function, line, 0,
				   "corrupted in-inode xattr");
	return error;
}

#define xattr_check_inode(inode, header, end) \
	__xattr_check_inode((inode), (header), (end), __func__, __LINE__)

247
static int
248
ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
249
		      const char *name, int sorted)
250
{
251
	struct ext4_xattr_entry *entry;
252 253 254 255 256 257 258
	size_t name_len;
	int cmp = 1;

	if (name == NULL)
		return -EINVAL;
	name_len = strlen(name);
	entry = *pentry;
259
	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
260 261 262 263 264 265 266 267 268 269 270 271 272
		cmp = name_index - entry->e_name_index;
		if (!cmp)
			cmp = name_len - entry->e_name_len;
		if (!cmp)
			cmp = memcmp(name, entry->e_name, name_len);
		if (cmp <= 0 && (sorted || cmp == 0))
			break;
	}
	*pentry = entry;
	return cmp ? -ENODATA : 0;
}

static int
273
ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
274 275 276
		     void *buffer, size_t buffer_size)
{
	struct buffer_head *bh = NULL;
277
	struct ext4_xattr_entry *entry;
278 279
	size_t size;
	int error;
J
Jan Kara 已提交
280
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
281 282 283 284 285

	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
		  name_index, name, buffer, (long)buffer_size);

	error = -ENODATA;
286
	if (!EXT4_I(inode)->i_file_acl)
287
		goto cleanup;
288 289
	ea_idebug(inode, "reading block %llu",
		  (unsigned long long)EXT4_I(inode)->i_file_acl);
290
	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
291 292 293 294
	if (!bh)
		goto cleanup;
	ea_bdebug(bh, "b_count=%d, refcount=%d",
		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
295
	if (ext4_xattr_check_block(inode, bh)) {
296 297
		EXT4_ERROR_INODE(inode, "bad block %llu",
				 EXT4_I(inode)->i_file_acl);
298
		error = -EFSCORRUPTED;
299 300
		goto cleanup;
	}
301
	ext4_xattr_cache_insert(ext4_mb_cache, bh);
302
	entry = BFIRST(bh);
303
	error = ext4_xattr_find_entry(&entry, name_index, name, 1);
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320
	if (error)
		goto cleanup;
	size = le32_to_cpu(entry->e_value_size);
	if (buffer) {
		error = -ERANGE;
		if (size > buffer_size)
			goto cleanup;
		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
		       size);
	}
	error = size;

cleanup:
	brelse(bh);
	return error;
}

T
Tao Ma 已提交
321
int
322
ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
323 324
		     void *buffer, size_t buffer_size)
{
325 326 327 328
	struct ext4_xattr_ibody_header *header;
	struct ext4_xattr_entry *entry;
	struct ext4_inode *raw_inode;
	struct ext4_iloc iloc;
329 330 331 332
	size_t size;
	void *end;
	int error;

333
	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
334
		return -ENODATA;
335
	error = ext4_get_inode_loc(inode, &iloc);
336 337
	if (error)
		return error;
338
	raw_inode = ext4_raw_inode(&iloc);
339
	header = IHDR(inode, raw_inode);
340
	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
341
	error = xattr_check_inode(inode, header, end);
342 343
	if (error)
		goto cleanup;
344 345
	entry = IFIRST(header);
	error = ext4_xattr_find_entry(&entry, name_index, name, 0);
346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363
	if (error)
		goto cleanup;
	size = le32_to_cpu(entry->e_value_size);
	if (buffer) {
		error = -ERANGE;
		if (size > buffer_size)
			goto cleanup;
		memcpy(buffer, (void *)IFIRST(header) +
		       le16_to_cpu(entry->e_value_offs), size);
	}
	error = size;

cleanup:
	brelse(iloc.bh);
	return error;
}

/*
364
 * ext4_xattr_get()
365 366 367 368 369 370 371 372 373
 *
 * Copy an extended attribute into the buffer
 * provided, or compute the buffer size required.
 * Buffer is NULL to compute the size of the buffer required.
 *
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
int
374
ext4_xattr_get(struct inode *inode, int name_index, const char *name,
375 376 377 378
	       void *buffer, size_t buffer_size)
{
	int error;

379 380 381
	if (unlikely(ext4_forced_shutdown(EXT4_SB(inode->i_sb))))
		return -EIO;

382 383 384
	if (strlen(name) > 255)
		return -ERANGE;

385 386
	down_read(&EXT4_I(inode)->xattr_sem);
	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
387 388
				     buffer_size);
	if (error == -ENODATA)
389
		error = ext4_xattr_block_get(inode, name_index, name, buffer,
390
					     buffer_size);
391
	up_read(&EXT4_I(inode)->xattr_sem);
392 393 394 395
	return error;
}

static int
396
ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
397 398 399 400
			char *buffer, size_t buffer_size)
{
	size_t rest = buffer_size;

401
	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
S
Stephen Hemminger 已提交
402
		const struct xattr_handler *handler =
403
			ext4_xattr_handler(entry->e_name_index);
404

405 406 407 408 409
		if (handler && (!handler->list || handler->list(dentry))) {
			const char *prefix = handler->prefix ?: handler->name;
			size_t prefix_len = strlen(prefix);
			size_t size = prefix_len + entry->e_name_len + 1;

410 411 412
			if (buffer) {
				if (size > rest)
					return -ERANGE;
413 414 415 416 417
				memcpy(buffer, prefix, prefix_len);
				buffer += prefix_len;
				memcpy(buffer, entry->e_name, entry->e_name_len);
				buffer += entry->e_name_len;
				*buffer++ = 0;
418 419 420 421
			}
			rest -= size;
		}
	}
422
	return buffer_size - rest;  /* total size */
423 424 425
}

static int
426
ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
427
{
428
	struct inode *inode = d_inode(dentry);
429 430
	struct buffer_head *bh = NULL;
	int error;
J
Jan Kara 已提交
431
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
432 433 434 435 436

	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
		  buffer, (long)buffer_size);

	error = 0;
437
	if (!EXT4_I(inode)->i_file_acl)
438
		goto cleanup;
439 440
	ea_idebug(inode, "reading block %llu",
		  (unsigned long long)EXT4_I(inode)->i_file_acl);
441
	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
442 443 444 445 446
	error = -EIO;
	if (!bh)
		goto cleanup;
	ea_bdebug(bh, "b_count=%d, refcount=%d",
		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
447
	if (ext4_xattr_check_block(inode, bh)) {
448 449
		EXT4_ERROR_INODE(inode, "bad block %llu",
				 EXT4_I(inode)->i_file_acl);
450
		error = -EFSCORRUPTED;
451 452
		goto cleanup;
	}
453
	ext4_xattr_cache_insert(ext4_mb_cache, bh);
454
	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
455 456 457 458 459 460 461 462

cleanup:
	brelse(bh);

	return error;
}

static int
463
ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
464
{
465
	struct inode *inode = d_inode(dentry);
466 467 468
	struct ext4_xattr_ibody_header *header;
	struct ext4_inode *raw_inode;
	struct ext4_iloc iloc;
469 470 471
	void *end;
	int error;

472
	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
473
		return 0;
474
	error = ext4_get_inode_loc(inode, &iloc);
475 476
	if (error)
		return error;
477
	raw_inode = ext4_raw_inode(&iloc);
478
	header = IHDR(inode, raw_inode);
479
	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
480
	error = xattr_check_inode(inode, header, end);
481 482
	if (error)
		goto cleanup;
483
	error = ext4_xattr_list_entries(dentry, IFIRST(header),
484 485 486 487 488 489 490 491
					buffer, buffer_size);

cleanup:
	brelse(iloc.bh);
	return error;
}

/*
492 493 494
 * Inode operation listxattr()
 *
 * d_inode(dentry)->i_rwsem: don't care
495 496 497 498 499 500 501 502
 *
 * Copy a list of attribute names into the buffer
 * provided, or compute the buffer size required.
 * Buffer is NULL to compute the size of the buffer required.
 *
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
503 504
ssize_t
ext4_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size)
505
{
506
	int ret, ret2;
507

508
	down_read(&EXT4_I(d_inode(dentry))->xattr_sem);
509 510 511 512 513 514
	ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
	if (ret < 0)
		goto errout;
	if (buffer) {
		buffer += ret;
		buffer_size -= ret;
515
	}
516 517 518 519 520
	ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
	if (ret < 0)
		goto errout;
	ret += ret2;
errout:
521
	up_read(&EXT4_I(d_inode(dentry))->xattr_sem);
522
	return ret;
523 524 525
}

/*
526
 * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
527 528
 * not set, set it.
 */
529
static void ext4_xattr_update_super_block(handle_t *handle,
530 531
					  struct super_block *sb)
{
532
	if (ext4_has_feature_xattr(sb))
533 534
		return;

535
	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
536
	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
537
		ext4_set_feature_xattr(sb);
T
Theodore Ts'o 已提交
538
		ext4_handle_dirty_super(handle, sb);
539 540 541 542
	}
}

/*
543 544
 * Release the xattr block BH: If the reference count is > 1, decrement it;
 * otherwise free the block.
545 546
 */
static void
547
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
548 549
			 struct buffer_head *bh)
{
550 551
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
	u32 hash, ref;
552
	int error = 0;
553

554
	BUFFER_TRACE(bh, "get_write_access");
555 556 557 558 559
	error = ext4_journal_get_write_access(handle, bh);
	if (error)
		goto out;

	lock_buffer(bh);
560 561 562
	hash = le32_to_cpu(BHDR(bh)->h_hash);
	ref = le32_to_cpu(BHDR(bh)->h_refcount);
	if (ref == 1) {
563
		ea_bdebug(bh, "refcount now=0; freeing");
J
Jan Kara 已提交
564 565 566 567
		/*
		 * This must happen under buffer lock for
		 * ext4_xattr_block_set() to reliably detect freed block
		 */
568
		mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
569
		get_bh(bh);
570
		unlock_buffer(bh);
571 572 573
		ext4_free_blocks(handle, inode, bh, 0, 1,
				 EXT4_FREE_BLOCKS_METADATA |
				 EXT4_FREE_BLOCKS_FORGET);
574
	} else {
575 576 577 578 579 580 581 582 583 584 585 586 587
		ref--;
		BHDR(bh)->h_refcount = cpu_to_le32(ref);
		if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
			struct mb_cache_entry *ce;

			ce = mb_cache_entry_get(ext4_mb_cache, hash,
						bh->b_blocknr);
			if (ce) {
				ce->e_reusable = 1;
				mb_cache_entry_put(ext4_mb_cache, ce);
			}
		}

588
		ext4_xattr_block_csum_set(inode, bh);
589 590 591 592 593 594
		/*
		 * Beware of this ugliness: Releasing of xattr block references
		 * from different inodes can race and so we have to protect
		 * from a race where someone else frees the block (and releases
		 * its journal_head) before we are done dirtying the buffer. In
		 * nojournal mode this race is harmless and we actually cannot
595
		 * call ext4_handle_dirty_metadata() with locked buffer as
596 597 598 599
		 * that function can call sync_dirty_buffer() so for that case
		 * we handle the dirtying after unlocking the buffer.
		 */
		if (ext4_handle_valid(handle))
600
			error = ext4_handle_dirty_metadata(handle, inode, bh);
601
		unlock_buffer(bh);
602
		if (!ext4_handle_valid(handle))
603
			error = ext4_handle_dirty_metadata(handle, inode, bh);
604
		if (IS_SYNC(inode))
605
			ext4_handle_sync(handle);
606
		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
607 608
		ea_bdebug(bh, "refcount now=%d; releasing",
			  le32_to_cpu(BHDR(bh)->h_refcount));
609
	}
610 611 612
out:
	ext4_std_error(inode->i_sb, error);
	return;
613 614
}

615 616 617 618 619 620 621 622
/*
 * Find the available free space for EAs. This also returns the total number of
 * bytes used by EA entries.
 */
static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
				    size_t *min_offs, void *base, int *total)
{
	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
J
Jan Kara 已提交
623
		if (last->e_value_size) {
624 625 626 627
			size_t offs = le16_to_cpu(last->e_value_offs);
			if (offs < *min_offs)
				*min_offs = offs;
		}
628 629
		if (total)
			*total += EXT4_XATTR_LEN(last->e_name_len);
630 631 632 633
	}
	return (*min_offs - ((void *)last - base) - sizeof(__u32));
}

634
static int
635
ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
636
{
637
	struct ext4_xattr_entry *last;
638 639 640 641
	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);

	/* Compute min_offs and last. */
	last = s->first;
642
	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
J
Jan Kara 已提交
643
		if (last->e_value_size) {
644 645 646 647 648 649 650
			size_t offs = le16_to_cpu(last->e_value_offs);
			if (offs < min_offs)
				min_offs = offs;
		}
	}
	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
	if (!s->not_found) {
J
Jan Kara 已提交
651
		if (s->here->e_value_size) {
652
			size_t size = le32_to_cpu(s->here->e_value_size);
653
			free += EXT4_XATTR_SIZE(size);
654
		}
655
		free += EXT4_XATTR_LEN(name_len);
656 657
	}
	if (i->value) {
658
		if (free < EXT4_XATTR_LEN(name_len) +
659
			   EXT4_XATTR_SIZE(i->value_len))
660 661 662 663 664
			return -ENOSPC;
	}

	if (i->value && s->not_found) {
		/* Insert the new name. */
665
		size_t size = EXT4_XATTR_LEN(name_len);
666 667 668 669 670 671 672
		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
		memmove((void *)s->here + size, s->here, rest);
		memset(s->here, 0, size);
		s->here->e_name_index = i->name_index;
		s->here->e_name_len = name_len;
		memcpy(s->here->e_name, i->name, name_len);
	} else {
J
Jan Kara 已提交
673
		if (s->here->e_value_size) {
674 675 676
			void *first_val = s->base + min_offs;
			size_t offs = le16_to_cpu(s->here->e_value_offs);
			void *val = s->base + offs;
677
			size_t size = EXT4_XATTR_SIZE(
678 679
				le32_to_cpu(s->here->e_value_size));

680
			if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
681 682 683 684
				/* The old and the new value have the same
				   size. Just replace. */
				s->here->e_value_size =
					cpu_to_le32(i->value_len);
685 686 687 688 689 690 691 692
				if (i->value == EXT4_ZERO_XATTR_VALUE) {
					memset(val, 0, size);
				} else {
					/* Clear pad bytes first. */
					memset(val + size - EXT4_XATTR_PAD, 0,
					       EXT4_XATTR_PAD);
					memcpy(val, i->value, i->value_len);
				}
693 694 695 696 697 698 699 700 701 702 703 704 705 706
				return 0;
			}

			/* Remove the old value. */
			memmove(first_val + size, first_val, val - first_val);
			memset(first_val, 0, size);
			s->here->e_value_size = 0;
			s->here->e_value_offs = 0;
			min_offs += size;

			/* Adjust all value offsets. */
			last = s->first;
			while (!IS_LAST_ENTRY(last)) {
				size_t o = le16_to_cpu(last->e_value_offs);
J
Jan Kara 已提交
707
				if (last->e_value_size && o < offs)
708 709
					last->e_value_offs =
						cpu_to_le16(o + size);
710
				last = EXT4_XATTR_NEXT(last);
711 712 713 714
			}
		}
		if (!i->value) {
			/* Remove the old name. */
715
			size_t size = EXT4_XATTR_LEN(name_len);
716 717 718 719 720 721 722 723 724 725 726
			last = ENTRY((void *)last - size);
			memmove(s->here, (void *)s->here + size,
				(void *)last - (void *)s->here + sizeof(__u32));
			memset(last, 0, size);
		}
	}

	if (i->value) {
		/* Insert the new value. */
		s->here->e_value_size = cpu_to_le32(i->value_len);
		if (i->value_len) {
727
			size_t size = EXT4_XATTR_SIZE(i->value_len);
728 729
			void *val = s->base + min_offs - size;
			s->here->e_value_offs = cpu_to_le16(min_offs - size);
730 731 732 733 734 735 736 737
			if (i->value == EXT4_ZERO_XATTR_VALUE) {
				memset(val, 0, size);
			} else {
				/* Clear the pad bytes first. */
				memset(val + size - EXT4_XATTR_PAD, 0,
				       EXT4_XATTR_PAD);
				memcpy(val, i->value, i->value_len);
			}
738 739 740 741 742
		}
	}
	return 0;
}

743 744
struct ext4_xattr_block_find {
	struct ext4_xattr_search s;
745 746 747 748
	struct buffer_head *bh;
};

static int
749 750
ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
		      struct ext4_xattr_block_find *bs)
751 752 753 754 755 756 757
{
	struct super_block *sb = inode->i_sb;
	int error;

	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
		  i->name_index, i->name, i->value, (long)i->value_len);

758
	if (EXT4_I(inode)->i_file_acl) {
759
		/* The inode already has an extended attribute block. */
760
		bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
761 762 763 764 765 766
		error = -EIO;
		if (!bs->bh)
			goto cleanup;
		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
			atomic_read(&(bs->bh->b_count)),
			le32_to_cpu(BHDR(bs->bh)->h_refcount));
767
		if (ext4_xattr_check_block(inode, bs->bh)) {
768 769
			EXT4_ERROR_INODE(inode, "bad block %llu",
					 EXT4_I(inode)->i_file_acl);
770
			error = -EFSCORRUPTED;
771 772 773 774 775 776 777
			goto cleanup;
		}
		/* Find the named attribute. */
		bs->s.base = BHDR(bs->bh);
		bs->s.first = BFIRST(bs->bh);
		bs->s.end = bs->bh->b_data + bs->bh->b_size;
		bs->s.here = bs->s.first;
778
		error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
779
					      i->name, 1);
780 781 782 783 784 785 786 787 788 789 790
		if (error && error != -ENODATA)
			goto cleanup;
		bs->s.not_found = error;
	}
	error = 0;

cleanup:
	return error;
}

static int
791 792 793
ext4_xattr_block_set(handle_t *handle, struct inode *inode,
		     struct ext4_xattr_info *i,
		     struct ext4_xattr_block_find *bs)
794 795 796
{
	struct super_block *sb = inode->i_sb;
	struct buffer_head *new_bh = NULL;
797
	struct ext4_xattr_search *s = &bs->s;
J
Jan Kara 已提交
798
	struct mb_cache_entry *ce = NULL;
799
	int error = 0;
J
Jan Kara 已提交
800
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
801

802
#define header(x) ((struct ext4_xattr_header *)(x))
803 804 805 806

	if (i->value && i->value_len > sb->s_blocksize)
		return -ENOSPC;
	if (s->base) {
807
		BUFFER_TRACE(bs->bh, "get_write_access");
808 809 810 811 812
		error = ext4_journal_get_write_access(handle, bs->bh);
		if (error)
			goto cleanup;
		lock_buffer(bs->bh);

813
		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
J
Jan Kara 已提交
814 815 816 817 818 819 820
			__u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);

			/*
			 * This must happen under buffer lock for
			 * ext4_xattr_block_set() to reliably detect modified
			 * block
			 */
J
Jan Kara 已提交
821 822
			mb_cache_entry_delete_block(ext4_mb_cache, hash,
						    bs->bh->b_blocknr);
823
			ea_bdebug(bs->bh, "modifying in-place");
824
			error = ext4_xattr_set_entry(i, s);
825 826
			if (!error) {
				if (!IS_LAST_ENTRY(s->first))
827
					ext4_xattr_rehash(header(s->base),
828
							  s->here);
829 830
				ext4_xattr_cache_insert(ext4_mb_cache,
					bs->bh);
831
			}
832
			ext4_xattr_block_csum_set(inode, bs->bh);
833
			unlock_buffer(bs->bh);
834
			if (error == -EFSCORRUPTED)
835 836
				goto bad_block;
			if (!error)
837 838 839
				error = ext4_handle_dirty_metadata(handle,
								   inode,
								   bs->bh);
840 841 842 843 844 845
			if (error)
				goto cleanup;
			goto inserted;
		} else {
			int offset = (char *)s->here - bs->bh->b_data;

846
			unlock_buffer(bs->bh);
847
			ea_bdebug(bs->bh, "cloning");
848
			s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
849 850 851 852 853 854 855 856 857 858 859
			error = -ENOMEM;
			if (s->base == NULL)
				goto cleanup;
			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
			s->first = ENTRY(header(s->base)+1);
			header(s->base)->h_refcount = cpu_to_le32(1);
			s->here = ENTRY(s->base + offset);
			s->end = s->base + bs->bh->b_size;
		}
	} else {
		/* Allocate a buffer where we construct the new block. */
860
		s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
861 862 863 864
		/* assert(header == s->base) */
		error = -ENOMEM;
		if (s->base == NULL)
			goto cleanup;
865
		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
866 867 868 869 870 871 872
		header(s->base)->h_blocks = cpu_to_le32(1);
		header(s->base)->h_refcount = cpu_to_le32(1);
		s->first = ENTRY(header(s->base)+1);
		s->here = ENTRY(header(s->base)+1);
		s->end = s->base + sb->s_blocksize;
	}

873
	error = ext4_xattr_set_entry(i, s);
874
	if (error == -EFSCORRUPTED)
875 876 877 878
		goto bad_block;
	if (error)
		goto cleanup;
	if (!IS_LAST_ENTRY(s->first))
879
		ext4_xattr_rehash(header(s->base), s->here);
880 881 882

inserted:
	if (!IS_LAST_ENTRY(s->first)) {
883
		new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
884 885 886 887 888
		if (new_bh) {
			/* We found an identical block in the cache. */
			if (new_bh == bs->bh)
				ea_bdebug(new_bh, "keeping");
			else {
889 890
				u32 ref;

891 892
				WARN_ON_ONCE(dquot_initialize_needed(inode));

893 894
				/* The old block is released after updating
				   the inode. */
895 896
				error = dquot_alloc_block(inode,
						EXT4_C2B(EXT4_SB(sb), 1));
897
				if (error)
898
					goto cleanup;
899
				BUFFER_TRACE(new_bh, "get_write_access");
900
				error = ext4_journal_get_write_access(handle,
901 902 903 904
								      new_bh);
				if (error)
					goto cleanup_dquot;
				lock_buffer(new_bh);
J
Jan Kara 已提交
905 906
				/*
				 * We have to be careful about races with
907 908 909 910 911 912 913 914 915
				 * freeing, rehashing or adding references to
				 * xattr block. Once we hold buffer lock xattr
				 * block's state is stable so we can check
				 * whether the block got freed / rehashed or
				 * not.  Since we unhash mbcache entry under
				 * buffer lock when freeing / rehashing xattr
				 * block, checking whether entry is still
				 * hashed is reliable. Same rules hold for
				 * e_reusable handling.
J
Jan Kara 已提交
916
				 */
917 918
				if (hlist_bl_unhashed(&ce->e_hash_list) ||
				    !ce->e_reusable) {
J
Jan Kara 已提交
919 920 921 922 923 924 925 926 927
					/*
					 * Undo everything and check mbcache
					 * again.
					 */
					unlock_buffer(new_bh);
					dquot_free_block(inode,
							 EXT4_C2B(EXT4_SB(sb),
								  1));
					brelse(new_bh);
J
Jan Kara 已提交
928
					mb_cache_entry_put(ext4_mb_cache, ce);
J
Jan Kara 已提交
929 930 931 932
					ce = NULL;
					new_bh = NULL;
					goto inserted;
				}
933 934 935 936
				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
				if (ref >= EXT4_XATTR_REFCOUNT_MAX)
					ce->e_reusable = 0;
937
				ea_bdebug(new_bh, "reusing; refcount now=%d",
938
					  ref);
939
				ext4_xattr_block_csum_set(inode, new_bh);
940
				unlock_buffer(new_bh);
941 942 943
				error = ext4_handle_dirty_metadata(handle,
								   inode,
								   new_bh);
944 945 946
				if (error)
					goto cleanup_dquot;
			}
J
Jan Kara 已提交
947 948
			mb_cache_entry_touch(ext4_mb_cache, ce);
			mb_cache_entry_put(ext4_mb_cache, ce);
949 950 951 952 953 954 955 956
			ce = NULL;
		} else if (bs->bh && s->base == bs->bh->b_data) {
			/* We were modifying this block in-place. */
			ea_bdebug(bs->bh, "keeping this block");
			new_bh = bs->bh;
			get_bh(new_bh);
		} else {
			/* We need to allocate a new block */
957 958
			ext4_fsblk_t goal, block;

959 960
			WARN_ON_ONCE(dquot_initialize_needed(inode));

961
			goal = ext4_group_first_block_no(sb,
962
						EXT4_I(inode)->i_block_group);
963 964

			/* non-extent files can't have physical blocks past 2^32 */
965
			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
966 967
				goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;

968 969
			block = ext4_new_meta_blocks(handle, inode, goal, 0,
						     NULL, &error);
970 971
			if (error)
				goto cleanup;
972

973
			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
974 975
				BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);

976 977
			ea_idebug(inode, "creating block %llu",
				  (unsigned long long)block);
978 979

			new_bh = sb_getblk(sb, block);
980
			if (unlikely(!new_bh)) {
981
				error = -ENOMEM;
982
getblk_failed:
983
				ext4_free_blocks(handle, inode, NULL, block, 1,
984
						 EXT4_FREE_BLOCKS_METADATA);
985 986 987
				goto cleanup;
			}
			lock_buffer(new_bh);
988
			error = ext4_journal_get_create_access(handle, new_bh);
989 990
			if (error) {
				unlock_buffer(new_bh);
991
				error = -EIO;
992 993 994
				goto getblk_failed;
			}
			memcpy(new_bh->b_data, s->base, new_bh->b_size);
995
			ext4_xattr_block_csum_set(inode, new_bh);
996 997
			set_buffer_uptodate(new_bh);
			unlock_buffer(new_bh);
998
			ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
999 1000
			error = ext4_handle_dirty_metadata(handle, inode,
							   new_bh);
1001 1002 1003 1004 1005 1006
			if (error)
				goto cleanup;
		}
	}

	/* Update the inode. */
1007
	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
1008 1009 1010

	/* Drop the previous xattr block. */
	if (bs->bh && bs->bh != new_bh)
1011
		ext4_xattr_release_block(handle, inode, bs->bh);
1012 1013 1014 1015
	error = 0;

cleanup:
	if (ce)
J
Jan Kara 已提交
1016
		mb_cache_entry_put(ext4_mb_cache, ce);
1017 1018 1019 1020 1021 1022 1023
	brelse(new_bh);
	if (!(bs->bh && s->base == bs->bh->b_data))
		kfree(s->base);

	return error;

cleanup_dquot:
1024
	dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
1025 1026 1027
	goto cleanup;

bad_block:
1028 1029
	EXT4_ERROR_INODE(inode, "bad block %llu",
			 EXT4_I(inode)->i_file_acl);
1030 1031 1032 1033 1034
	goto cleanup;

#undef header
}

T
Tao Ma 已提交
1035 1036
int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
			  struct ext4_xattr_ibody_find *is)
1037
{
1038 1039
	struct ext4_xattr_ibody_header *header;
	struct ext4_inode *raw_inode;
1040 1041
	int error;

1042
	if (EXT4_I(inode)->i_extra_isize == 0)
1043
		return 0;
1044
	raw_inode = ext4_raw_inode(&is->iloc);
1045 1046 1047
	header = IHDR(inode, raw_inode);
	is->s.base = is->s.first = IFIRST(header);
	is->s.here = is->s.first;
1048
	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
1049
	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
1050
		error = xattr_check_inode(inode, header, is->s.end);
1051 1052 1053
		if (error)
			return error;
		/* Find the named attribute. */
1054
		error = ext4_xattr_find_entry(&is->s.here, i->name_index,
1055
					      i->name, 0);
1056 1057 1058 1059 1060 1061 1062
		if (error && error != -ENODATA)
			return error;
		is->s.not_found = error;
	}
	return 0;
}

1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
				struct ext4_xattr_info *i,
				struct ext4_xattr_ibody_find *is)
{
	struct ext4_xattr_ibody_header *header;
	struct ext4_xattr_search *s = &is->s;
	int error;

	if (EXT4_I(inode)->i_extra_isize == 0)
		return -ENOSPC;
	error = ext4_xattr_set_entry(i, s);
	if (error) {
		if (error == -ENOSPC &&
		    ext4_has_inline_data(inode)) {
			error = ext4_try_to_evict_inline_data(handle, inode,
					EXT4_XATTR_LEN(strlen(i->name) +
					EXT4_XATTR_SIZE(i->value_len)));
			if (error)
				return error;
			error = ext4_xattr_ibody_find(inode, i, is);
			if (error)
				return error;
			error = ext4_xattr_set_entry(i, s);
		}
		if (error)
			return error;
	}
	header = IHDR(inode, ext4_raw_inode(&is->iloc));
	if (!IS_LAST_ENTRY(s->first)) {
		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
	} else {
		header->h_magic = cpu_to_le32(0);
		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
	}
	return 0;
}

1101
static int ext4_xattr_ibody_set(struct inode *inode,
1102 1103
				struct ext4_xattr_info *i,
				struct ext4_xattr_ibody_find *is)
1104
{
1105 1106
	struct ext4_xattr_ibody_header *header;
	struct ext4_xattr_search *s = &is->s;
1107 1108
	int error;

1109
	if (EXT4_I(inode)->i_extra_isize == 0)
1110
		return -ENOSPC;
1111
	error = ext4_xattr_set_entry(i, s);
1112 1113
	if (error)
		return error;
1114
	header = IHDR(inode, ext4_raw_inode(&is->iloc));
1115
	if (!IS_LAST_ENTRY(s->first)) {
1116
		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1117
		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1118 1119
	} else {
		header->h_magic = cpu_to_le32(0);
1120
		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1121 1122 1123 1124
	}
	return 0;
}

1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135
static int ext4_xattr_value_same(struct ext4_xattr_search *s,
				 struct ext4_xattr_info *i)
{
	void *value;

	if (le32_to_cpu(s->here->e_value_size) != i->value_len)
		return 0;
	value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
	return !memcmp(value, i->value, i->value_len);
}

1136
/*
1137
 * ext4_xattr_set_handle()
1138
 *
1139
 * Create, replace or remove an extended attribute for this inode.  Value
1140 1141 1142 1143 1144 1145 1146 1147 1148
 * is NULL to remove an existing extended attribute, and non-NULL to
 * either replace an existing extended attribute, or create a new extended
 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
 * specify that an extended attribute must exist and must not exist
 * previous to the call, respectively.
 *
 * Returns 0, or a negative error number on failure.
 */
int
1149
ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1150 1151 1152
		      const char *name, const void *value, size_t value_len,
		      int flags)
{
1153
	struct ext4_xattr_info i = {
1154 1155 1156 1157 1158 1159
		.name_index = name_index,
		.name = name,
		.value = value,
		.value_len = value_len,

	};
1160
	struct ext4_xattr_ibody_find is = {
1161 1162
		.s = { .not_found = -ENODATA, },
	};
1163
	struct ext4_xattr_block_find bs = {
1164 1165
		.s = { .not_found = -ENODATA, },
	};
1166
	int no_expand;
1167 1168 1169 1170 1171 1172
	int error;

	if (!name)
		return -EINVAL;
	if (strlen(name) > 255)
		return -ERANGE;
1173

1174
	ext4_write_lock_xattr(inode, &no_expand);
K
Kalpak Shah 已提交
1175

1176
	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
1177 1178 1179
	if (error)
		goto cleanup;

1180
	if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
1181 1182
		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1183
		ext4_clear_inode_state(inode, EXT4_STATE_NEW);
1184 1185
	}

1186
	error = ext4_xattr_ibody_find(inode, &i, &is);
1187 1188 1189
	if (error)
		goto cleanup;
	if (is.s.not_found)
1190
		error = ext4_xattr_block_find(inode, &i, &bs);
1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206
	if (error)
		goto cleanup;
	if (is.s.not_found && bs.s.not_found) {
		error = -ENODATA;
		if (flags & XATTR_REPLACE)
			goto cleanup;
		error = 0;
		if (!value)
			goto cleanup;
	} else {
		error = -EEXIST;
		if (flags & XATTR_CREATE)
			goto cleanup;
	}
	if (!value) {
		if (!is.s.not_found)
1207
			error = ext4_xattr_ibody_set(inode, &i, &is);
1208
		else if (!bs.s.not_found)
1209
			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1210
	} else {
1211 1212 1213 1214 1215 1216 1217
		error = 0;
		/* Xattr value did not change? Save us some work and bail out */
		if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
			goto cleanup;
		if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
			goto cleanup;

1218
		error = ext4_xattr_ibody_set(inode, &i, &is);
1219 1220
		if (!error && !bs.s.not_found) {
			i.value = NULL;
1221
			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1222
		} else if (error == -ENOSPC) {
1223 1224 1225 1226 1227
			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
				error = ext4_xattr_block_find(inode, &i, &bs);
				if (error)
					goto cleanup;
			}
1228
			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1229 1230 1231 1232
			if (error)
				goto cleanup;
			if (!is.s.not_found) {
				i.value = NULL;
1233
				error = ext4_xattr_ibody_set(inode, &i, &is);
1234 1235 1236 1237
			}
		}
	}
	if (!error) {
1238
		ext4_xattr_update_super_block(handle, inode->i_sb);
1239
		inode->i_ctime = current_time(inode);
1240
		if (!value)
1241
			no_expand = 0;
1242
		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1243
		/*
1244
		 * The bh is consumed by ext4_mark_iloc_dirty, even with
1245 1246 1247 1248
		 * error != 0.
		 */
		is.iloc.bh = NULL;
		if (IS_SYNC(inode))
1249
			ext4_handle_sync(handle);
1250 1251 1252 1253 1254
	}

cleanup:
	brelse(is.iloc.bh);
	brelse(bs.bh);
1255
	ext4_write_unlock_xattr(inode, &no_expand);
1256 1257 1258 1259
	return error;
}

/*
1260
 * ext4_xattr_set()
1261
 *
1262
 * Like ext4_xattr_set_handle, but start from an inode. This extended
1263 1264 1265 1266 1267
 * attribute modification is a filesystem transaction by itself.
 *
 * Returns 0, or a negative error number on failure.
 */
int
1268
ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1269 1270 1271 1272
	       const void *value, size_t value_len, int flags)
{
	handle_t *handle;
	int error, retries = 0;
1273
	int credits = ext4_jbd2_credits_xattr(inode);
1274

1275 1276 1277
	error = dquot_initialize(inode);
	if (error)
		return error;
1278
retry:
1279
	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1280 1281 1282 1283 1284
	if (IS_ERR(handle)) {
		error = PTR_ERR(handle);
	} else {
		int error2;

1285
		error = ext4_xattr_set_handle(handle, inode, name_index, name,
1286
					      value, value_len, flags);
1287
		error2 = ext4_journal_stop(handle);
1288
		if (error == -ENOSPC &&
1289
		    ext4_should_retry_alloc(inode->i_sb, &retries))
1290 1291 1292 1293 1294 1295 1296 1297
			goto retry;
		if (error == 0)
			error = error2;
	}

	return error;
}

1298 1299 1300 1301 1302 1303
/*
 * Shift the EA entries in the inode to create space for the increased
 * i_extra_isize.
 */
static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
				     int value_offs_shift, void *to,
1304
				     void *from, size_t n)
1305 1306 1307 1308
{
	struct ext4_xattr_entry *last = entry;
	int new_offs;

1309 1310 1311
	/* We always shift xattr headers further thus offsets get lower */
	BUG_ON(value_offs_shift > 0);

1312 1313
	/* Adjust the value offsets of the entries */
	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
J
Jan Kara 已提交
1314
		if (last->e_value_size) {
1315 1316 1317 1318 1319 1320 1321 1322 1323
			new_offs = le16_to_cpu(last->e_value_offs) +
							value_offs_shift;
			last->e_value_offs = cpu_to_le16(new_offs);
		}
	}
	/* Shift the entries by n bytes */
	memmove(to, from, n);
}

J
Jan Kara 已提交
1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
/*
 * Move xattr pointed to by 'entry' from inode into external xattr block
 */
static int ext4_xattr_move_to_block(handle_t *handle, struct inode *inode,
				    struct ext4_inode *raw_inode,
				    struct ext4_xattr_entry *entry)
{
	struct ext4_xattr_ibody_find *is = NULL;
	struct ext4_xattr_block_find *bs = NULL;
	char *buffer = NULL, *b_entry_name = NULL;
	size_t value_offs, value_size;
	struct ext4_xattr_info i = {
		.value = NULL,
		.value_len = 0,
		.name_index = entry->e_name_index,
	};
	struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
	int error;

	value_offs = le16_to_cpu(entry->e_value_offs);
	value_size = le32_to_cpu(entry->e_value_size);

	is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
	bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
	buffer = kmalloc(value_size, GFP_NOFS);
	b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
	if (!is || !bs || !buffer || !b_entry_name) {
		error = -ENOMEM;
		goto out;
	}

	is->s.not_found = -ENODATA;
	bs->s.not_found = -ENODATA;
	is->iloc.bh = NULL;
	bs->bh = NULL;

	/* Save the entry name and the entry value */
	memcpy(buffer, (void *)IFIRST(header) + value_offs, value_size);
	memcpy(b_entry_name, entry->e_name, entry->e_name_len);
	b_entry_name[entry->e_name_len] = '\0';
	i.name = b_entry_name;

	error = ext4_get_inode_loc(inode, &is->iloc);
	if (error)
		goto out;

	error = ext4_xattr_ibody_find(inode, &i, is);
	if (error)
		goto out;

	/* Remove the chosen entry from the inode */
1375
	error = ext4_xattr_ibody_set(inode, &i, is);
J
Jan Kara 已提交
1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401
	if (error)
		goto out;

	i.name = b_entry_name;
	i.value = buffer;
	i.value_len = value_size;
	error = ext4_xattr_block_find(inode, &i, bs);
	if (error)
		goto out;

	/* Add entry which was removed from the inode into the block */
	error = ext4_xattr_block_set(handle, inode, &i, bs);
	if (error)
		goto out;
	error = 0;
out:
	kfree(b_entry_name);
	kfree(buffer);
	if (is)
		brelse(is->iloc.bh);
	kfree(is);
	kfree(bs);

	return error;
}

1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458
static int ext4_xattr_make_inode_space(handle_t *handle, struct inode *inode,
				       struct ext4_inode *raw_inode,
				       int isize_diff, size_t ifree,
				       size_t bfree, int *total_ino)
{
	struct ext4_xattr_ibody_header *header = IHDR(inode, raw_inode);
	struct ext4_xattr_entry *small_entry;
	struct ext4_xattr_entry *entry;
	struct ext4_xattr_entry *last;
	unsigned int entry_size;	/* EA entry size */
	unsigned int total_size;	/* EA entry size + value size */
	unsigned int min_total_size;
	int error;

	while (isize_diff > ifree) {
		entry = NULL;
		small_entry = NULL;
		min_total_size = ~0U;
		last = IFIRST(header);
		/* Find the entry best suited to be pushed into EA block */
		for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
			total_size =
			EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
					EXT4_XATTR_LEN(last->e_name_len);
			if (total_size <= bfree &&
			    total_size < min_total_size) {
				if (total_size + ifree < isize_diff) {
					small_entry = last;
				} else {
					entry = last;
					min_total_size = total_size;
				}
			}
		}

		if (entry == NULL) {
			if (small_entry == NULL)
				return -ENOSPC;
			entry = small_entry;
		}

		entry_size = EXT4_XATTR_LEN(entry->e_name_len);
		total_size = entry_size +
			EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size));
		error = ext4_xattr_move_to_block(handle, inode, raw_inode,
						 entry);
		if (error)
			return error;

		*total_ino -= entry_size;
		ifree += total_size;
		bfree -= total_size;
	}

	return 0;
}

1459 1460 1461 1462 1463 1464 1465 1466 1467
/*
 * Expand an inode by new_extra_isize bytes when EAs are present.
 * Returns 0 on success or negative error number on failure.
 */
int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
			       struct ext4_inode *raw_inode, handle_t *handle)
{
	struct ext4_xattr_ibody_header *header;
	struct buffer_head *bh = NULL;
1468 1469
	size_t min_offs;
	size_t ifree, bfree;
1470
	int total_ino;
1471
	void *base, *end;
1472
	int error = 0, tried_min_extra_isize = 0;
A
Aneesh Kumar K.V 已提交
1473
	int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
1474
	int isize_diff;	/* How much do we need to grow i_extra_isize */
1475 1476 1477 1478
	int no_expand;

	if (ext4_write_trylock_xattr(inode, &no_expand) == 0)
		return 0;
1479 1480

retry:
1481
	isize_diff = new_extra_isize - EXT4_I(inode)->i_extra_isize;
1482 1483
	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize)
		goto out;
1484 1485 1486 1487 1488 1489 1490 1491

	header = IHDR(inode, raw_inode);

	/*
	 * Check if enough free space is available in the inode to shift the
	 * entries ahead by new_extra_isize.
	 */

1492
	base = IFIRST(header);
1493 1494 1495 1496
	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
	min_offs = end - base;
	total_ino = sizeof(struct ext4_xattr_ibody_header);

1497 1498 1499 1500
	error = xattr_check_inode(inode, header, end);
	if (error)
		goto cleanup;

1501
	ifree = ext4_xattr_free_space(base, &min_offs, base, &total_ino);
1502 1503
	if (ifree >= isize_diff)
		goto shift;
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513

	/*
	 * Enough free space isn't available in the inode, check if
	 * EA block can hold new_extra_isize bytes.
	 */
	if (EXT4_I(inode)->i_file_acl) {
		bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
		error = -EIO;
		if (!bh)
			goto cleanup;
1514
		if (ext4_xattr_check_block(inode, bh)) {
1515 1516
			EXT4_ERROR_INODE(inode, "bad block %llu",
					 EXT4_I(inode)->i_file_acl);
1517
			error = -EFSCORRUPTED;
1518 1519 1520 1521 1522
			goto cleanup;
		}
		base = BHDR(bh);
		end = bh->b_data + bh->b_size;
		min_offs = end - base;
1523 1524
		bfree = ext4_xattr_free_space(BFIRST(bh), &min_offs, base,
					      NULL);
1525
		if (bfree + ifree < isize_diff) {
1526 1527 1528 1529 1530 1531
			if (!tried_min_extra_isize && s_min_extra_isize) {
				tried_min_extra_isize++;
				new_extra_isize = s_min_extra_isize;
				brelse(bh);
				goto retry;
			}
1532
			error = -ENOSPC;
1533 1534 1535
			goto cleanup;
		}
	} else {
1536
		bfree = inode->i_sb->s_blocksize;
1537 1538
	}

1539 1540 1541 1542 1543 1544 1545 1546 1547 1548
	error = ext4_xattr_make_inode_space(handle, inode, raw_inode,
					    isize_diff, ifree, bfree,
					    &total_ino);
	if (error) {
		if (error == -ENOSPC && !tried_min_extra_isize &&
		    s_min_extra_isize) {
			tried_min_extra_isize++;
			new_extra_isize = s_min_extra_isize;
			brelse(bh);
			goto retry;
1549
		}
1550
		goto cleanup;
1551
	}
1552 1553
shift:
	/* Adjust the offsets and shift the remaining entries ahead */
1554
	ext4_xattr_shift_entries(IFIRST(header), EXT4_I(inode)->i_extra_isize
1555 1556
			- new_extra_isize, (void *)raw_inode +
			EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
1557
			(void *)header, total_ino);
1558
	EXT4_I(inode)->i_extra_isize = new_extra_isize;
1559
	brelse(bh);
1560
out:
1561
	ext4_write_unlock_xattr(inode, &no_expand);
1562 1563 1564 1565
	return 0;

cleanup:
	brelse(bh);
1566
	/*
1567
	 * Inode size expansion failed; don't try again
1568
	 */
1569 1570
	no_expand = 1;
	ext4_write_unlock_xattr(inode, &no_expand);
1571 1572 1573 1574 1575
	return error;
}



1576
/*
1577
 * ext4_xattr_delete_inode()
1578 1579 1580 1581 1582 1583
 *
 * Free extended attribute resources associated with this inode. This
 * is called immediately before an inode is freed. We have exclusive
 * access to the inode.
 */
void
1584
ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1585 1586 1587
{
	struct buffer_head *bh = NULL;

1588
	if (!EXT4_I(inode)->i_file_acl)
1589
		goto cleanup;
1590
	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1591
	if (!bh) {
1592 1593
		EXT4_ERROR_INODE(inode, "block %llu read error",
				 EXT4_I(inode)->i_file_acl);
1594 1595
		goto cleanup;
	}
1596
	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1597
	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1598 1599
		EXT4_ERROR_INODE(inode, "bad block %llu",
				 EXT4_I(inode)->i_file_acl);
1600 1601
		goto cleanup;
	}
1602 1603
	ext4_xattr_release_block(handle, inode, bh);
	EXT4_I(inode)->i_file_acl = 0;
1604 1605 1606 1607 1608 1609

cleanup:
	brelse(bh);
}

/*
1610
 * ext4_xattr_cache_insert()
1611 1612 1613 1614 1615 1616 1617
 *
 * Create a new entry in the extended attribute cache, and insert
 * it unless such an entry is already in the cache.
 *
 * Returns 0, or a negative error number on failure.
 */
static void
J
Jan Kara 已提交
1618
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1619
{
1620 1621 1622 1623
	struct ext4_xattr_header *header = BHDR(bh);
	__u32 hash = le32_to_cpu(header->h_hash);
	int reusable = le32_to_cpu(header->h_refcount) <
		       EXT4_XATTR_REFCOUNT_MAX;
1624 1625
	int error;

J
Jan Kara 已提交
1626
	error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
1627
				      bh->b_blocknr, reusable);
1628
	if (error) {
J
Jan Kara 已提交
1629
		if (error == -EBUSY)
1630
			ea_bdebug(bh, "already in cache");
J
Jan Kara 已提交
1631
	} else
1632 1633 1634 1635
		ea_bdebug(bh, "inserting [%x]", (int)hash);
}

/*
1636
 * ext4_xattr_cmp()
1637 1638 1639 1640 1641 1642 1643
 *
 * Compare two extended attribute blocks for equality.
 *
 * Returns 0 if the blocks are equal, 1 if they differ, and
 * a negative error number on errors.
 */
static int
1644 1645
ext4_xattr_cmp(struct ext4_xattr_header *header1,
	       struct ext4_xattr_header *header2)
1646
{
1647
	struct ext4_xattr_entry *entry1, *entry2;
1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660

	entry1 = ENTRY(header1+1);
	entry2 = ENTRY(header2+1);
	while (!IS_LAST_ENTRY(entry1)) {
		if (IS_LAST_ENTRY(entry2))
			return 1;
		if (entry1->e_hash != entry2->e_hash ||
		    entry1->e_name_index != entry2->e_name_index ||
		    entry1->e_name_len != entry2->e_name_len ||
		    entry1->e_value_size != entry2->e_value_size ||
		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
			return 1;
		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1661
			return -EFSCORRUPTED;
1662 1663 1664 1665 1666
		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
			   le32_to_cpu(entry1->e_value_size)))
			return 1;

1667 1668
		entry1 = EXT4_XATTR_NEXT(entry1);
		entry2 = EXT4_XATTR_NEXT(entry2);
1669 1670 1671 1672 1673 1674 1675
	}
	if (!IS_LAST_ENTRY(entry2))
		return 1;
	return 0;
}

/*
1676
 * ext4_xattr_cache_find()
1677 1678 1679 1680 1681 1682 1683
 *
 * Find an identical extended attribute block.
 *
 * Returns a pointer to the block found, or NULL if such a block was
 * not found or an error occurred.
 */
static struct buffer_head *
1684
ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
J
Jan Kara 已提交
1685
		      struct mb_cache_entry **pce)
1686 1687
{
	__u32 hash = le32_to_cpu(header->h_hash);
J
Jan Kara 已提交
1688 1689
	struct mb_cache_entry *ce;
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1690 1691 1692 1693

	if (!header->h_hash)
		return NULL;  /* never share */
	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
J
Jan Kara 已提交
1694
	ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
1695 1696 1697 1698 1699
	while (ce) {
		struct buffer_head *bh;

		bh = sb_bread(inode->i_sb, ce->e_block);
		if (!bh) {
1700 1701
			EXT4_ERROR_INODE(inode, "block %lu read error",
					 (unsigned long) ce->e_block);
1702
		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
1703 1704 1705 1706
			*pce = ce;
			return bh;
		}
		brelse(bh);
J
Jan Kara 已提交
1707
		ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
1708 1709 1710 1711 1712 1713 1714 1715
	}
	return NULL;
}

#define NAME_HASH_SHIFT 5
#define VALUE_HASH_SHIFT 16

/*
1716
 * ext4_xattr_hash_entry()
1717 1718 1719
 *
 * Compute the hash of an extended attribute.
 */
1720 1721
static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
					 struct ext4_xattr_entry *entry)
1722 1723 1724 1725 1726
{
	__u32 hash = 0;
	char *name = entry->e_name;
	int n;

1727
	for (n = 0; n < entry->e_name_len; n++) {
1728 1729 1730 1731 1732
		hash = (hash << NAME_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
		       *name++;
	}

J
Jan Kara 已提交
1733
	if (entry->e_value_size != 0) {
1734 1735 1736
		__le32 *value = (__le32 *)((char *)header +
			le16_to_cpu(entry->e_value_offs));
		for (n = (le32_to_cpu(entry->e_value_size) +
1737
		     EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751
			hash = (hash << VALUE_HASH_SHIFT) ^
			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
			       le32_to_cpu(*value++);
		}
	}
	entry->e_hash = cpu_to_le32(hash);
}

#undef NAME_HASH_SHIFT
#undef VALUE_HASH_SHIFT

#define BLOCK_HASH_SHIFT 16

/*
1752
 * ext4_xattr_rehash()
1753 1754 1755
 *
 * Re-compute the extended attribute hash value after an entry has changed.
 */
1756 1757
static void ext4_xattr_rehash(struct ext4_xattr_header *header,
			      struct ext4_xattr_entry *entry)
1758
{
1759
	struct ext4_xattr_entry *here;
1760 1761
	__u32 hash = 0;

1762
	ext4_xattr_hash_entry(header, entry);
1763 1764 1765 1766 1767 1768 1769 1770 1771 1772
	here = ENTRY(header+1);
	while (!IS_LAST_ENTRY(here)) {
		if (!here->e_hash) {
			/* Block is not shared if an entry's hash value == 0 */
			hash = 0;
			break;
		}
		hash = (hash << BLOCK_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
		       le32_to_cpu(here->e_hash);
1773
		here = EXT4_XATTR_NEXT(here);
1774 1775 1776 1777 1778 1779
	}
	header->h_hash = cpu_to_le32(hash);
}

#undef BLOCK_HASH_SHIFT

1780 1781
#define	HASH_BUCKET_BITS	10

J
Jan Kara 已提交
1782
struct mb_cache *
J
Jan Kara 已提交
1783
ext4_xattr_create_cache(void)
1784
{
J
Jan Kara 已提交
1785
	return mb_cache_create(HASH_BUCKET_BITS);
1786 1787
}

J
Jan Kara 已提交
1788
void ext4_xattr_destroy_cache(struct mb_cache *cache)
1789
{
1790
	if (cache)
J
Jan Kara 已提交
1791
		mb_cache_destroy(cache);
1792
}
1793