xattr.c 47.4 KB
Newer Older
1
/*
2
 * linux/fs/ext4/xattr.c
3 4 5 6
 *
 * Copyright (C) 2001-2003 Andreas Gruenbacher, <agruen@suse.de>
 *
 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
7
 * Ext4 code with a lot of help from Eric Jarman <ejarman@acm.org>.
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
 * Extended attributes for symlinks and special files added per
 *  suggestion of Luka Renko <luka.renko@hermes.si>.
 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
 *  Red Hat Inc.
 * ea-in-inode support by Alex Tomas <alex@clusterfs.com> aka bzzz
 *  and Andreas Gruenbacher <agruen@suse.de>.
 */

/*
 * Extended attributes are stored directly in inodes (on file systems with
 * inodes bigger than 128 bytes) and on additional disk blocks. The i_file_acl
 * field contains the block number if an inode uses an additional block. All
 * attributes must fit in the inode and one additional block. Blocks that
 * contain the identical set of attributes may be shared among several inodes.
 * Identical blocks are detected by keeping a cache of blocks that have
 * recently been accessed.
 *
 * The attributes in inodes and on blocks have a different header; the entries
 * are stored in the same format:
 *
 *   +------------------+
 *   | header           |
 *   | entry 1          | |
 *   | entry 2          | | growing downwards
 *   | entry 3          | v
 *   | four null bytes  |
 *   | . . .            |
 *   | value 1          | ^
 *   | value 3          | | growing upwards
 *   | value 2          | |
 *   +------------------+
 *
 * The header is followed by multiple entry descriptors. In disk blocks, the
 * entry descriptors are kept sorted. In inodes, they are unsorted. The
 * attribute values are aligned to the end of the block in no specific order.
 *
 * Locking strategy
 * ----------------
46
 * EXT4_I(inode)->i_file_acl is protected by EXT4_I(inode)->xattr_sem.
47 48 49 50 51 52 53 54 55
 * EA blocks are only changed if they are exclusive to an inode, so
 * holding xattr_sem also means that nothing but the EA block's reference
 * count can change. Multiple writers to the same block are synchronized
 * by the buffer lock.
 */

#include <linux/init.h>
#include <linux/fs.h>
#include <linux/slab.h>
J
Jan Kara 已提交
56
#include <linux/mbcache.h>
57
#include <linux/quotaops.h>
58 59
#include "ext4_jbd2.h"
#include "ext4.h"
60 61 62
#include "xattr.h"
#include "acl.h"

63
#ifdef EXT4_XATTR_DEBUG
64 65 66 67 68 69 70
# define ea_idebug(inode, f...) do { \
		printk(KERN_DEBUG "inode %s:%lu: ", \
			inode->i_sb->s_id, inode->i_ino); \
		printk(f); \
		printk("\n"); \
	} while (0)
# define ea_bdebug(bh, f...) do { \
71 72
		printk(KERN_DEBUG "block %pg:%lu: ",		   \
		       bh->b_bdev, (unsigned long) bh->b_blocknr); \
73 74 75 76
		printk(f); \
		printk("\n"); \
	} while (0)
#else
77 78
# define ea_idebug(inode, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
# define ea_bdebug(bh, fmt, ...)	no_printk(fmt, ##__VA_ARGS__)
79 80
#endif

J
Jan Kara 已提交
81
static void ext4_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
82 83
static struct buffer_head *ext4_xattr_cache_find(struct inode *,
						 struct ext4_xattr_header *,
J
Jan Kara 已提交
84
						 struct mb_cache_entry **);
85 86
static void ext4_xattr_rehash(struct ext4_xattr_header *,
			      struct ext4_xattr_entry *);
87
static int ext4_xattr_list(struct dentry *dentry, char *buffer,
88
			   size_t buffer_size);
89

S
Stephen Hemminger 已提交
90
static const struct xattr_handler *ext4_xattr_handler_map[] = {
91
	[EXT4_XATTR_INDEX_USER]		     = &ext4_xattr_user_handler,
T
Theodore Ts'o 已提交
92
#ifdef CONFIG_EXT4_FS_POSIX_ACL
93 94
	[EXT4_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
	[EXT4_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
95
#endif
96
	[EXT4_XATTR_INDEX_TRUSTED]	     = &ext4_xattr_trusted_handler,
T
Theodore Ts'o 已提交
97
#ifdef CONFIG_EXT4_FS_SECURITY
98
	[EXT4_XATTR_INDEX_SECURITY]	     = &ext4_xattr_security_handler,
99 100 101
#endif
};

S
Stephen Hemminger 已提交
102
const struct xattr_handler *ext4_xattr_handlers[] = {
103 104
	&ext4_xattr_user_handler,
	&ext4_xattr_trusted_handler,
T
Theodore Ts'o 已提交
105
#ifdef CONFIG_EXT4_FS_POSIX_ACL
106 107
	&posix_acl_access_xattr_handler,
	&posix_acl_default_xattr_handler,
108
#endif
T
Theodore Ts'o 已提交
109
#ifdef CONFIG_EXT4_FS_SECURITY
110
	&ext4_xattr_security_handler,
111 112 113 114
#endif
	NULL
};

115 116 117
#define EXT4_GET_MB_CACHE(inode)	(((struct ext4_sb_info *) \
				inode->i_sb->s_fs_info)->s_mb_cache)

118 119 120 121 122
static __le32 ext4_xattr_block_csum(struct inode *inode,
				    sector_t block_nr,
				    struct ext4_xattr_header *hdr)
{
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
123 124 125
	__u32 csum;
	__le32 save_csum;
	__le64 dsk_block_nr = cpu_to_le64(block_nr);
126

127
	save_csum = hdr->h_checksum;
128
	hdr->h_checksum = 0;
129 130
	csum = ext4_chksum(sbi, sbi->s_csum_seed, (__u8 *)&dsk_block_nr,
			   sizeof(dsk_block_nr));
131 132
	csum = ext4_chksum(sbi, csum, (__u8 *)hdr,
			   EXT4_BLOCK_SIZE(inode->i_sb));
133

134
	hdr->h_checksum = save_csum;
135 136 137 138 139 140 141
	return cpu_to_le32(csum);
}

static int ext4_xattr_block_csum_verify(struct inode *inode,
					sector_t block_nr,
					struct ext4_xattr_header *hdr)
{
142
	if (ext4_has_metadata_csum(inode->i_sb) &&
143 144 145 146 147 148 149 150 151
	    (hdr->h_checksum != ext4_xattr_block_csum(inode, block_nr, hdr)))
		return 0;
	return 1;
}

static void ext4_xattr_block_csum_set(struct inode *inode,
				      sector_t block_nr,
				      struct ext4_xattr_header *hdr)
{
152
	if (!ext4_has_metadata_csum(inode->i_sb))
153 154 155 156 157 158 159 160 161 162 163 164 165
		return;

	hdr->h_checksum = ext4_xattr_block_csum(inode, block_nr, hdr);
}

static inline int ext4_handle_dirty_xattr_block(handle_t *handle,
						struct inode *inode,
						struct buffer_head *bh)
{
	ext4_xattr_block_csum_set(inode, bh->b_blocknr, BHDR(bh));
	return ext4_handle_dirty_metadata(handle, inode, bh);
}

S
Stephen Hemminger 已提交
166
static inline const struct xattr_handler *
167
ext4_xattr_handler(int name_index)
168
{
S
Stephen Hemminger 已提交
169
	const struct xattr_handler *handler = NULL;
170

171 172
	if (name_index > 0 && name_index < ARRAY_SIZE(ext4_xattr_handler_map))
		handler = ext4_xattr_handler_map[name_index];
173 174 175 176 177 178
	return handler;
}

/*
 * Inode operation listxattr()
 *
179
 * d_inode(dentry)->i_mutex: don't care
180 181
 */
ssize_t
182
ext4_listxattr(struct dentry *dentry, char *buffer, size_t size)
183
{
184
	return ext4_xattr_list(dentry, buffer, size);
185 186 187
}

static int
188 189
ext4_xattr_check_names(struct ext4_xattr_entry *entry, void *end,
		       void *value_start)
190
{
191 192 193 194
	struct ext4_xattr_entry *e = entry;

	while (!IS_LAST_ENTRY(e)) {
		struct ext4_xattr_entry *next = EXT4_XATTR_NEXT(e);
195
		if ((void *)next >= end)
196
			return -EFSCORRUPTED;
197
		e = next;
198
	}
199 200 201 202 203 204 205

	while (!IS_LAST_ENTRY(entry)) {
		if (entry->e_value_size != 0 &&
		    (value_start + le16_to_cpu(entry->e_value_offs) <
		     (void *)e + sizeof(__u32) ||
		     value_start + le16_to_cpu(entry->e_value_offs) +
		    le32_to_cpu(entry->e_value_size) > end))
206
			return -EFSCORRUPTED;
207 208 209
		entry = EXT4_XATTR_NEXT(entry);
	}

210 211 212 213
	return 0;
}

static inline int
214
ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
215
{
216 217 218 219 220
	int error;

	if (buffer_verified(bh))
		return 0;

221
	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
222
	    BHDR(bh)->h_blocks != cpu_to_le32(1))
223
		return -EFSCORRUPTED;
224
	if (!ext4_xattr_block_csum_verify(inode, bh->b_blocknr, BHDR(bh)))
225
		return -EFSBADCRC;
226 227
	error = ext4_xattr_check_names(BFIRST(bh), bh->b_data + bh->b_size,
				       bh->b_data);
228 229 230
	if (!error)
		set_buffer_verified(bh);
	return error;
231 232 233
}

static inline int
234
ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
235 236 237 238 239
{
	size_t value_size = le32_to_cpu(entry->e_value_size);

	if (entry->e_value_block != 0 || value_size > size ||
	    le16_to_cpu(entry->e_value_offs) + value_size > size)
240
		return -EFSCORRUPTED;
241 242 243 244
	return 0;
}

static int
245
ext4_xattr_find_entry(struct ext4_xattr_entry **pentry, int name_index,
246 247
		      const char *name, size_t size, int sorted)
{
248
	struct ext4_xattr_entry *entry;
249 250 251 252 253 254 255
	size_t name_len;
	int cmp = 1;

	if (name == NULL)
		return -EINVAL;
	name_len = strlen(name);
	entry = *pentry;
256
	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
257 258 259 260 261 262 263 264 265
		cmp = name_index - entry->e_name_index;
		if (!cmp)
			cmp = name_len - entry->e_name_len;
		if (!cmp)
			cmp = memcmp(name, entry->e_name, name_len);
		if (cmp <= 0 && (sorted || cmp == 0))
			break;
	}
	*pentry = entry;
266
	if (!cmp && ext4_xattr_check_entry(entry, size))
267
		return -EFSCORRUPTED;
268 269 270 271
	return cmp ? -ENODATA : 0;
}

static int
272
ext4_xattr_block_get(struct inode *inode, int name_index, const char *name,
273 274 275
		     void *buffer, size_t buffer_size)
{
	struct buffer_head *bh = NULL;
276
	struct ext4_xattr_entry *entry;
277 278
	size_t size;
	int error;
J
Jan Kara 已提交
279
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
280 281 282 283 284

	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
		  name_index, name, buffer, (long)buffer_size);

	error = -ENODATA;
285
	if (!EXT4_I(inode)->i_file_acl)
286
		goto cleanup;
287 288
	ea_idebug(inode, "reading block %llu",
		  (unsigned long long)EXT4_I(inode)->i_file_acl);
289
	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
290 291 292 293
	if (!bh)
		goto cleanup;
	ea_bdebug(bh, "b_count=%d, refcount=%d",
		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
294
	if (ext4_xattr_check_block(inode, bh)) {
295
bad_block:
296 297
		EXT4_ERROR_INODE(inode, "bad block %llu",
				 EXT4_I(inode)->i_file_acl);
298
		error = -EFSCORRUPTED;
299 300
		goto cleanup;
	}
301
	ext4_xattr_cache_insert(ext4_mb_cache, bh);
302
	entry = BFIRST(bh);
303
	error = ext4_xattr_find_entry(&entry, name_index, name, bh->b_size, 1);
304
	if (error == -EFSCORRUPTED)
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322
		goto bad_block;
	if (error)
		goto cleanup;
	size = le32_to_cpu(entry->e_value_size);
	if (buffer) {
		error = -ERANGE;
		if (size > buffer_size)
			goto cleanup;
		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
		       size);
	}
	error = size;

cleanup:
	brelse(bh);
	return error;
}

T
Tao Ma 已提交
323
int
324
ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
325 326
		     void *buffer, size_t buffer_size)
{
327 328 329 330
	struct ext4_xattr_ibody_header *header;
	struct ext4_xattr_entry *entry;
	struct ext4_inode *raw_inode;
	struct ext4_iloc iloc;
331 332 333 334
	size_t size;
	void *end;
	int error;

335
	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
336
		return -ENODATA;
337
	error = ext4_get_inode_loc(inode, &iloc);
338 339
	if (error)
		return error;
340
	raw_inode = ext4_raw_inode(&iloc);
341 342
	header = IHDR(inode, raw_inode);
	entry = IFIRST(header);
343
	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
344
	error = ext4_xattr_check_names(entry, end, entry);
345 346
	if (error)
		goto cleanup;
347
	error = ext4_xattr_find_entry(&entry, name_index, name,
348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366
				      end - (void *)entry, 0);
	if (error)
		goto cleanup;
	size = le32_to_cpu(entry->e_value_size);
	if (buffer) {
		error = -ERANGE;
		if (size > buffer_size)
			goto cleanup;
		memcpy(buffer, (void *)IFIRST(header) +
		       le16_to_cpu(entry->e_value_offs), size);
	}
	error = size;

cleanup:
	brelse(iloc.bh);
	return error;
}

/*
367
 * ext4_xattr_get()
368 369 370 371 372 373 374 375 376
 *
 * Copy an extended attribute into the buffer
 * provided, or compute the buffer size required.
 * Buffer is NULL to compute the size of the buffer required.
 *
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
int
377
ext4_xattr_get(struct inode *inode, int name_index, const char *name,
378 379 380 381
	       void *buffer, size_t buffer_size)
{
	int error;

382 383 384
	if (strlen(name) > 255)
		return -ERANGE;

385 386
	down_read(&EXT4_I(inode)->xattr_sem);
	error = ext4_xattr_ibody_get(inode, name_index, name, buffer,
387 388
				     buffer_size);
	if (error == -ENODATA)
389
		error = ext4_xattr_block_get(inode, name_index, name, buffer,
390
					     buffer_size);
391
	up_read(&EXT4_I(inode)->xattr_sem);
392 393 394 395
	return error;
}

static int
396
ext4_xattr_list_entries(struct dentry *dentry, struct ext4_xattr_entry *entry,
397 398 399 400
			char *buffer, size_t buffer_size)
{
	size_t rest = buffer_size;

401
	for (; !IS_LAST_ENTRY(entry); entry = EXT4_XATTR_NEXT(entry)) {
S
Stephen Hemminger 已提交
402
		const struct xattr_handler *handler =
403
			ext4_xattr_handler(entry->e_name_index);
404

405 406 407 408 409
		if (handler && (!handler->list || handler->list(dentry))) {
			const char *prefix = handler->prefix ?: handler->name;
			size_t prefix_len = strlen(prefix);
			size_t size = prefix_len + entry->e_name_len + 1;

410 411 412
			if (buffer) {
				if (size > rest)
					return -ERANGE;
413 414 415 416 417
				memcpy(buffer, prefix, prefix_len);
				buffer += prefix_len;
				memcpy(buffer, entry->e_name, entry->e_name_len);
				buffer += entry->e_name_len;
				*buffer++ = 0;
418 419 420 421
			}
			rest -= size;
		}
	}
422
	return buffer_size - rest;  /* total size */
423 424 425
}

static int
426
ext4_xattr_block_list(struct dentry *dentry, char *buffer, size_t buffer_size)
427
{
428
	struct inode *inode = d_inode(dentry);
429 430
	struct buffer_head *bh = NULL;
	int error;
J
Jan Kara 已提交
431
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
432 433 434 435 436

	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
		  buffer, (long)buffer_size);

	error = 0;
437
	if (!EXT4_I(inode)->i_file_acl)
438
		goto cleanup;
439 440
	ea_idebug(inode, "reading block %llu",
		  (unsigned long long)EXT4_I(inode)->i_file_acl);
441
	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
442 443 444 445 446
	error = -EIO;
	if (!bh)
		goto cleanup;
	ea_bdebug(bh, "b_count=%d, refcount=%d",
		atomic_read(&(bh->b_count)), le32_to_cpu(BHDR(bh)->h_refcount));
447
	if (ext4_xattr_check_block(inode, bh)) {
448 449
		EXT4_ERROR_INODE(inode, "bad block %llu",
				 EXT4_I(inode)->i_file_acl);
450
		error = -EFSCORRUPTED;
451 452
		goto cleanup;
	}
453
	ext4_xattr_cache_insert(ext4_mb_cache, bh);
454
	error = ext4_xattr_list_entries(dentry, BFIRST(bh), buffer, buffer_size);
455 456 457 458 459 460 461 462

cleanup:
	brelse(bh);

	return error;
}

static int
463
ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
464
{
465
	struct inode *inode = d_inode(dentry);
466 467 468
	struct ext4_xattr_ibody_header *header;
	struct ext4_inode *raw_inode;
	struct ext4_iloc iloc;
469 470 471
	void *end;
	int error;

472
	if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR))
473
		return 0;
474
	error = ext4_get_inode_loc(inode, &iloc);
475 476
	if (error)
		return error;
477
	raw_inode = ext4_raw_inode(&iloc);
478
	header = IHDR(inode, raw_inode);
479
	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
480
	error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
481 482
	if (error)
		goto cleanup;
483
	error = ext4_xattr_list_entries(dentry, IFIRST(header),
484 485 486 487 488 489 490 491
					buffer, buffer_size);

cleanup:
	brelse(iloc.bh);
	return error;
}

/*
492
 * ext4_xattr_list()
493 494 495 496 497 498 499 500
 *
 * Copy a list of attribute names into the buffer
 * provided, or compute the buffer size required.
 * Buffer is NULL to compute the size of the buffer required.
 *
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
501
static int
502
ext4_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
503
{
504
	int ret, ret2;
505

506
	down_read(&EXT4_I(d_inode(dentry))->xattr_sem);
507 508 509 510 511 512
	ret = ret2 = ext4_xattr_ibody_list(dentry, buffer, buffer_size);
	if (ret < 0)
		goto errout;
	if (buffer) {
		buffer += ret;
		buffer_size -= ret;
513
	}
514 515 516 517 518
	ret = ext4_xattr_block_list(dentry, buffer, buffer_size);
	if (ret < 0)
		goto errout;
	ret += ret2;
errout:
519
	up_read(&EXT4_I(d_inode(dentry))->xattr_sem);
520
	return ret;
521 522 523
}

/*
524
 * If the EXT4_FEATURE_COMPAT_EXT_ATTR feature of this file system is
525 526
 * not set, set it.
 */
527
static void ext4_xattr_update_super_block(handle_t *handle,
528 529
					  struct super_block *sb)
{
530
	if (ext4_has_feature_xattr(sb))
531 532
		return;

533
	BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get_write_access");
534
	if (ext4_journal_get_write_access(handle, EXT4_SB(sb)->s_sbh) == 0) {
535
		ext4_set_feature_xattr(sb);
T
Theodore Ts'o 已提交
536
		ext4_handle_dirty_super(handle, sb);
537 538 539 540
	}
}

/*
541 542
 * Release the xattr block BH: If the reference count is > 1, decrement it;
 * otherwise free the block.
543 544
 */
static void
545
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
546 547
			 struct buffer_head *bh)
{
548 549
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
	u32 hash, ref;
550
	int error = 0;
551

552
	BUFFER_TRACE(bh, "get_write_access");
553 554 555 556 557
	error = ext4_journal_get_write_access(handle, bh);
	if (error)
		goto out;

	lock_buffer(bh);
558 559 560
	hash = le32_to_cpu(BHDR(bh)->h_hash);
	ref = le32_to_cpu(BHDR(bh)->h_refcount);
	if (ref == 1) {
561
		ea_bdebug(bh, "refcount now=0; freeing");
J
Jan Kara 已提交
562 563 564 565
		/*
		 * This must happen under buffer lock for
		 * ext4_xattr_block_set() to reliably detect freed block
		 */
566
		mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
567
		get_bh(bh);
568
		unlock_buffer(bh);
569 570 571
		ext4_free_blocks(handle, inode, bh, 0, 1,
				 EXT4_FREE_BLOCKS_METADATA |
				 EXT4_FREE_BLOCKS_FORGET);
572
	} else {
573 574 575 576 577 578 579 580 581 582 583 584 585
		ref--;
		BHDR(bh)->h_refcount = cpu_to_le32(ref);
		if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
			struct mb_cache_entry *ce;

			ce = mb_cache_entry_get(ext4_mb_cache, hash,
						bh->b_blocknr);
			if (ce) {
				ce->e_reusable = 1;
				mb_cache_entry_put(ext4_mb_cache, ce);
			}
		}

586 587 588 589 590 591 592 593 594 595 596 597 598
		/*
		 * Beware of this ugliness: Releasing of xattr block references
		 * from different inodes can race and so we have to protect
		 * from a race where someone else frees the block (and releases
		 * its journal_head) before we are done dirtying the buffer. In
		 * nojournal mode this race is harmless and we actually cannot
		 * call ext4_handle_dirty_xattr_block() with locked buffer as
		 * that function can call sync_dirty_buffer() so for that case
		 * we handle the dirtying after unlocking the buffer.
		 */
		if (ext4_handle_valid(handle))
			error = ext4_handle_dirty_xattr_block(handle, inode,
							      bh);
599
		unlock_buffer(bh);
600 601 602
		if (!ext4_handle_valid(handle))
			error = ext4_handle_dirty_xattr_block(handle, inode,
							      bh);
603
		if (IS_SYNC(inode))
604
			ext4_handle_sync(handle);
605
		dquot_free_block(inode, EXT4_C2B(EXT4_SB(inode->i_sb), 1));
606 607
		ea_bdebug(bh, "refcount now=%d; releasing",
			  le32_to_cpu(BHDR(bh)->h_refcount));
608
	}
609 610 611
out:
	ext4_std_error(inode->i_sb, error);
	return;
612 613
}

614 615 616 617 618 619 620 621 622 623 624 625 626
/*
 * Find the available free space for EAs. This also returns the total number of
 * bytes used by EA entries.
 */
static size_t ext4_xattr_free_space(struct ext4_xattr_entry *last,
				    size_t *min_offs, void *base, int *total)
{
	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
		if (!last->e_value_block && last->e_value_size) {
			size_t offs = le16_to_cpu(last->e_value_offs);
			if (offs < *min_offs)
				*min_offs = offs;
		}
627 628
		if (total)
			*total += EXT4_XATTR_LEN(last->e_name_len);
629 630 631 632
	}
	return (*min_offs - ((void *)last - base) - sizeof(__u32));
}

633
static int
634
ext4_xattr_set_entry(struct ext4_xattr_info *i, struct ext4_xattr_search *s)
635
{
636
	struct ext4_xattr_entry *last;
637 638 639 640
	size_t free, min_offs = s->end - s->base, name_len = strlen(i->name);

	/* Compute min_offs and last. */
	last = s->first;
641
	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
642 643 644 645 646 647 648 649 650 651
		if (!last->e_value_block && last->e_value_size) {
			size_t offs = le16_to_cpu(last->e_value_offs);
			if (offs < min_offs)
				min_offs = offs;
		}
	}
	free = min_offs - ((void *)last - s->base) - sizeof(__u32);
	if (!s->not_found) {
		if (!s->here->e_value_block && s->here->e_value_size) {
			size_t size = le32_to_cpu(s->here->e_value_size);
652
			free += EXT4_XATTR_SIZE(size);
653
		}
654
		free += EXT4_XATTR_LEN(name_len);
655 656
	}
	if (i->value) {
657
		if (free < EXT4_XATTR_LEN(name_len) +
658
			   EXT4_XATTR_SIZE(i->value_len))
659 660 661 662 663
			return -ENOSPC;
	}

	if (i->value && s->not_found) {
		/* Insert the new name. */
664
		size_t size = EXT4_XATTR_LEN(name_len);
665 666 667 668 669 670 671 672 673 674 675
		size_t rest = (void *)last - (void *)s->here + sizeof(__u32);
		memmove((void *)s->here + size, s->here, rest);
		memset(s->here, 0, size);
		s->here->e_name_index = i->name_index;
		s->here->e_name_len = name_len;
		memcpy(s->here->e_name, i->name, name_len);
	} else {
		if (!s->here->e_value_block && s->here->e_value_size) {
			void *first_val = s->base + min_offs;
			size_t offs = le16_to_cpu(s->here->e_value_offs);
			void *val = s->base + offs;
676
			size_t size = EXT4_XATTR_SIZE(
677 678
				le32_to_cpu(s->here->e_value_size));

679
			if (i->value && size == EXT4_XATTR_SIZE(i->value_len)) {
680 681 682 683
				/* The old and the new value have the same
				   size. Just replace. */
				s->here->e_value_size =
					cpu_to_le32(i->value_len);
684 685 686 687 688 689 690 691
				if (i->value == EXT4_ZERO_XATTR_VALUE) {
					memset(val, 0, size);
				} else {
					/* Clear pad bytes first. */
					memset(val + size - EXT4_XATTR_PAD, 0,
					       EXT4_XATTR_PAD);
					memcpy(val, i->value, i->value_len);
				}
692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709
				return 0;
			}

			/* Remove the old value. */
			memmove(first_val + size, first_val, val - first_val);
			memset(first_val, 0, size);
			s->here->e_value_size = 0;
			s->here->e_value_offs = 0;
			min_offs += size;

			/* Adjust all value offsets. */
			last = s->first;
			while (!IS_LAST_ENTRY(last)) {
				size_t o = le16_to_cpu(last->e_value_offs);
				if (!last->e_value_block &&
				    last->e_value_size && o < offs)
					last->e_value_offs =
						cpu_to_le16(o + size);
710
				last = EXT4_XATTR_NEXT(last);
711 712 713 714
			}
		}
		if (!i->value) {
			/* Remove the old name. */
715
			size_t size = EXT4_XATTR_LEN(name_len);
716 717 718 719 720 721 722 723 724 725 726
			last = ENTRY((void *)last - size);
			memmove(s->here, (void *)s->here + size,
				(void *)last - (void *)s->here + sizeof(__u32));
			memset(last, 0, size);
		}
	}

	if (i->value) {
		/* Insert the new value. */
		s->here->e_value_size = cpu_to_le32(i->value_len);
		if (i->value_len) {
727
			size_t size = EXT4_XATTR_SIZE(i->value_len);
728 729
			void *val = s->base + min_offs - size;
			s->here->e_value_offs = cpu_to_le16(min_offs - size);
730 731 732 733 734 735 736 737
			if (i->value == EXT4_ZERO_XATTR_VALUE) {
				memset(val, 0, size);
			} else {
				/* Clear the pad bytes first. */
				memset(val + size - EXT4_XATTR_PAD, 0,
				       EXT4_XATTR_PAD);
				memcpy(val, i->value, i->value_len);
			}
738 739 740 741 742
		}
	}
	return 0;
}

743 744
struct ext4_xattr_block_find {
	struct ext4_xattr_search s;
745 746 747 748
	struct buffer_head *bh;
};

static int
749 750
ext4_xattr_block_find(struct inode *inode, struct ext4_xattr_info *i,
		      struct ext4_xattr_block_find *bs)
751 752 753 754 755 756 757
{
	struct super_block *sb = inode->i_sb;
	int error;

	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
		  i->name_index, i->name, i->value, (long)i->value_len);

758
	if (EXT4_I(inode)->i_file_acl) {
759
		/* The inode already has an extended attribute block. */
760
		bs->bh = sb_bread(sb, EXT4_I(inode)->i_file_acl);
761 762 763 764 765 766
		error = -EIO;
		if (!bs->bh)
			goto cleanup;
		ea_bdebug(bs->bh, "b_count=%d, refcount=%d",
			atomic_read(&(bs->bh->b_count)),
			le32_to_cpu(BHDR(bs->bh)->h_refcount));
767
		if (ext4_xattr_check_block(inode, bs->bh)) {
768 769
			EXT4_ERROR_INODE(inode, "bad block %llu",
					 EXT4_I(inode)->i_file_acl);
770
			error = -EFSCORRUPTED;
771 772 773 774 775 776 777
			goto cleanup;
		}
		/* Find the named attribute. */
		bs->s.base = BHDR(bs->bh);
		bs->s.first = BFIRST(bs->bh);
		bs->s.end = bs->bh->b_data + bs->bh->b_size;
		bs->s.here = bs->s.first;
778
		error = ext4_xattr_find_entry(&bs->s.here, i->name_index,
779 780 781 782 783 784 785 786 787 788 789 790
					      i->name, bs->bh->b_size, 1);
		if (error && error != -ENODATA)
			goto cleanup;
		bs->s.not_found = error;
	}
	error = 0;

cleanup:
	return error;
}

static int
791 792 793
ext4_xattr_block_set(handle_t *handle, struct inode *inode,
		     struct ext4_xattr_info *i,
		     struct ext4_xattr_block_find *bs)
794 795 796
{
	struct super_block *sb = inode->i_sb;
	struct buffer_head *new_bh = NULL;
797
	struct ext4_xattr_search *s = &bs->s;
J
Jan Kara 已提交
798
	struct mb_cache_entry *ce = NULL;
799
	int error = 0;
J
Jan Kara 已提交
800
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
801

802
#define header(x) ((struct ext4_xattr_header *)(x))
803 804 805 806

	if (i->value && i->value_len > sb->s_blocksize)
		return -ENOSPC;
	if (s->base) {
807
		BUFFER_TRACE(bs->bh, "get_write_access");
808 809 810 811 812
		error = ext4_journal_get_write_access(handle, bs->bh);
		if (error)
			goto cleanup;
		lock_buffer(bs->bh);

813
		if (header(s->base)->h_refcount == cpu_to_le32(1)) {
J
Jan Kara 已提交
814 815 816 817 818 819 820
			__u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);

			/*
			 * This must happen under buffer lock for
			 * ext4_xattr_block_set() to reliably detect modified
			 * block
			 */
J
Jan Kara 已提交
821 822
			mb_cache_entry_delete_block(ext4_mb_cache, hash,
						    bs->bh->b_blocknr);
823
			ea_bdebug(bs->bh, "modifying in-place");
824
			error = ext4_xattr_set_entry(i, s);
825 826
			if (!error) {
				if (!IS_LAST_ENTRY(s->first))
827
					ext4_xattr_rehash(header(s->base),
828
							  s->here);
829 830
				ext4_xattr_cache_insert(ext4_mb_cache,
					bs->bh);
831 832
			}
			unlock_buffer(bs->bh);
833
			if (error == -EFSCORRUPTED)
834 835
				goto bad_block;
			if (!error)
836 837 838
				error = ext4_handle_dirty_xattr_block(handle,
								      inode,
								      bs->bh);
839 840 841 842 843 844
			if (error)
				goto cleanup;
			goto inserted;
		} else {
			int offset = (char *)s->here - bs->bh->b_data;

845
			unlock_buffer(bs->bh);
846
			ea_bdebug(bs->bh, "cloning");
847
			s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
848 849 850 851 852 853 854 855 856 857 858
			error = -ENOMEM;
			if (s->base == NULL)
				goto cleanup;
			memcpy(s->base, BHDR(bs->bh), bs->bh->b_size);
			s->first = ENTRY(header(s->base)+1);
			header(s->base)->h_refcount = cpu_to_le32(1);
			s->here = ENTRY(s->base + offset);
			s->end = s->base + bs->bh->b_size;
		}
	} else {
		/* Allocate a buffer where we construct the new block. */
859
		s->base = kzalloc(sb->s_blocksize, GFP_NOFS);
860 861 862 863
		/* assert(header == s->base) */
		error = -ENOMEM;
		if (s->base == NULL)
			goto cleanup;
864
		header(s->base)->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
865 866 867 868 869 870 871
		header(s->base)->h_blocks = cpu_to_le32(1);
		header(s->base)->h_refcount = cpu_to_le32(1);
		s->first = ENTRY(header(s->base)+1);
		s->here = ENTRY(header(s->base)+1);
		s->end = s->base + sb->s_blocksize;
	}

872
	error = ext4_xattr_set_entry(i, s);
873
	if (error == -EFSCORRUPTED)
874 875 876 877
		goto bad_block;
	if (error)
		goto cleanup;
	if (!IS_LAST_ENTRY(s->first))
878
		ext4_xattr_rehash(header(s->base), s->here);
879 880 881

inserted:
	if (!IS_LAST_ENTRY(s->first)) {
882
		new_bh = ext4_xattr_cache_find(inode, header(s->base), &ce);
883 884 885 886 887
		if (new_bh) {
			/* We found an identical block in the cache. */
			if (new_bh == bs->bh)
				ea_bdebug(new_bh, "keeping");
			else {
888 889
				u32 ref;

890 891
				/* The old block is released after updating
				   the inode. */
892 893
				error = dquot_alloc_block(inode,
						EXT4_C2B(EXT4_SB(sb), 1));
894
				if (error)
895
					goto cleanup;
896
				BUFFER_TRACE(new_bh, "get_write_access");
897
				error = ext4_journal_get_write_access(handle,
898 899 900 901
								      new_bh);
				if (error)
					goto cleanup_dquot;
				lock_buffer(new_bh);
J
Jan Kara 已提交
902 903
				/*
				 * We have to be careful about races with
904 905 906 907 908 909 910 911 912
				 * freeing, rehashing or adding references to
				 * xattr block. Once we hold buffer lock xattr
				 * block's state is stable so we can check
				 * whether the block got freed / rehashed or
				 * not.  Since we unhash mbcache entry under
				 * buffer lock when freeing / rehashing xattr
				 * block, checking whether entry is still
				 * hashed is reliable. Same rules hold for
				 * e_reusable handling.
J
Jan Kara 已提交
913
				 */
914 915
				if (hlist_bl_unhashed(&ce->e_hash_list) ||
				    !ce->e_reusable) {
J
Jan Kara 已提交
916 917 918 919 920 921 922 923 924
					/*
					 * Undo everything and check mbcache
					 * again.
					 */
					unlock_buffer(new_bh);
					dquot_free_block(inode,
							 EXT4_C2B(EXT4_SB(sb),
								  1));
					brelse(new_bh);
J
Jan Kara 已提交
925
					mb_cache_entry_put(ext4_mb_cache, ce);
J
Jan Kara 已提交
926 927 928 929
					ce = NULL;
					new_bh = NULL;
					goto inserted;
				}
930 931 932 933
				ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
				BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
				if (ref >= EXT4_XATTR_REFCOUNT_MAX)
					ce->e_reusable = 0;
934
				ea_bdebug(new_bh, "reusing; refcount now=%d",
935
					  ref);
936
				unlock_buffer(new_bh);
937 938 939
				error = ext4_handle_dirty_xattr_block(handle,
								      inode,
								      new_bh);
940 941 942
				if (error)
					goto cleanup_dquot;
			}
J
Jan Kara 已提交
943 944
			mb_cache_entry_touch(ext4_mb_cache, ce);
			mb_cache_entry_put(ext4_mb_cache, ce);
945 946 947 948 949 950 951 952
			ce = NULL;
		} else if (bs->bh && s->base == bs->bh->b_data) {
			/* We were modifying this block in-place. */
			ea_bdebug(bs->bh, "keeping this block");
			new_bh = bs->bh;
			get_bh(new_bh);
		} else {
			/* We need to allocate a new block */
953 954 955
			ext4_fsblk_t goal, block;

			goal = ext4_group_first_block_no(sb,
956
						EXT4_I(inode)->i_block_group);
957 958

			/* non-extent files can't have physical blocks past 2^32 */
959
			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
960 961
				goal = goal & EXT4_MAX_BLOCK_FILE_PHYS;

962 963
			block = ext4_new_meta_blocks(handle, inode, goal, 0,
						     NULL, &error);
964 965
			if (error)
				goto cleanup;
966

967
			if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
968 969
				BUG_ON(block > EXT4_MAX_BLOCK_FILE_PHYS);

970 971
			ea_idebug(inode, "creating block %llu",
				  (unsigned long long)block);
972 973

			new_bh = sb_getblk(sb, block);
974
			if (unlikely(!new_bh)) {
975
				error = -ENOMEM;
976
getblk_failed:
977
				ext4_free_blocks(handle, inode, NULL, block, 1,
978
						 EXT4_FREE_BLOCKS_METADATA);
979 980 981
				goto cleanup;
			}
			lock_buffer(new_bh);
982
			error = ext4_journal_get_create_access(handle, new_bh);
983 984
			if (error) {
				unlock_buffer(new_bh);
985
				error = -EIO;
986 987 988 989 990
				goto getblk_failed;
			}
			memcpy(new_bh->b_data, s->base, new_bh->b_size);
			set_buffer_uptodate(new_bh);
			unlock_buffer(new_bh);
991
			ext4_xattr_cache_insert(ext4_mb_cache, new_bh);
992 993
			error = ext4_handle_dirty_xattr_block(handle,
							      inode, new_bh);
994 995 996 997 998 999
			if (error)
				goto cleanup;
		}
	}

	/* Update the inode. */
1000
	EXT4_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
1001 1002 1003

	/* Drop the previous xattr block. */
	if (bs->bh && bs->bh != new_bh)
1004
		ext4_xattr_release_block(handle, inode, bs->bh);
1005 1006 1007 1008
	error = 0;

cleanup:
	if (ce)
J
Jan Kara 已提交
1009
		mb_cache_entry_put(ext4_mb_cache, ce);
1010 1011 1012 1013 1014 1015 1016
	brelse(new_bh);
	if (!(bs->bh && s->base == bs->bh->b_data))
		kfree(s->base);

	return error;

cleanup_dquot:
1017
	dquot_free_block(inode, EXT4_C2B(EXT4_SB(sb), 1));
1018 1019 1020
	goto cleanup;

bad_block:
1021 1022
	EXT4_ERROR_INODE(inode, "bad block %llu",
			 EXT4_I(inode)->i_file_acl);
1023 1024 1025 1026 1027
	goto cleanup;

#undef header
}

T
Tao Ma 已提交
1028 1029
int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
			  struct ext4_xattr_ibody_find *is)
1030
{
1031 1032
	struct ext4_xattr_ibody_header *header;
	struct ext4_inode *raw_inode;
1033 1034
	int error;

1035
	if (EXT4_I(inode)->i_extra_isize == 0)
1036
		return 0;
1037
	raw_inode = ext4_raw_inode(&is->iloc);
1038 1039 1040
	header = IHDR(inode, raw_inode);
	is->s.base = is->s.first = IFIRST(header);
	is->s.here = is->s.first;
1041
	is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
1042
	if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
1043 1044
		error = ext4_xattr_check_names(IFIRST(header), is->s.end,
					       IFIRST(header));
1045 1046 1047
		if (error)
			return error;
		/* Find the named attribute. */
1048
		error = ext4_xattr_find_entry(&is->s.here, i->name_index,
1049 1050 1051 1052 1053 1054 1055 1056 1057
					      i->name, is->s.end -
					      (void *)is->s.base, 0);
		if (error && error != -ENODATA)
			return error;
		is->s.not_found = error;
	}
	return 0;
}

1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098
int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
				struct ext4_xattr_info *i,
				struct ext4_xattr_ibody_find *is)
{
	struct ext4_xattr_ibody_header *header;
	struct ext4_xattr_search *s = &is->s;
	int error;

	if (EXT4_I(inode)->i_extra_isize == 0)
		return -ENOSPC;
	error = ext4_xattr_set_entry(i, s);
	if (error) {
		if (error == -ENOSPC &&
		    ext4_has_inline_data(inode)) {
			error = ext4_try_to_evict_inline_data(handle, inode,
					EXT4_XATTR_LEN(strlen(i->name) +
					EXT4_XATTR_SIZE(i->value_len)));
			if (error)
				return error;
			error = ext4_xattr_ibody_find(inode, i, is);
			if (error)
				return error;
			error = ext4_xattr_set_entry(i, s);
		}
		if (error)
			return error;
	}
	header = IHDR(inode, ext4_raw_inode(&is->iloc));
	if (!IS_LAST_ENTRY(s->first)) {
		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
	} else {
		header->h_magic = cpu_to_le32(0);
		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
	}
	return 0;
}

static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
				struct ext4_xattr_info *i,
				struct ext4_xattr_ibody_find *is)
1099
{
1100 1101
	struct ext4_xattr_ibody_header *header;
	struct ext4_xattr_search *s = &is->s;
1102 1103
	int error;

1104
	if (EXT4_I(inode)->i_extra_isize == 0)
1105
		return -ENOSPC;
1106
	error = ext4_xattr_set_entry(i, s);
1107 1108
	if (error)
		return error;
1109
	header = IHDR(inode, ext4_raw_inode(&is->iloc));
1110
	if (!IS_LAST_ENTRY(s->first)) {
1111
		header->h_magic = cpu_to_le32(EXT4_XATTR_MAGIC);
1112
		ext4_set_inode_state(inode, EXT4_STATE_XATTR);
1113 1114
	} else {
		header->h_magic = cpu_to_le32(0);
1115
		ext4_clear_inode_state(inode, EXT4_STATE_XATTR);
1116 1117 1118 1119
	}
	return 0;
}

1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
static int ext4_xattr_value_same(struct ext4_xattr_search *s,
				 struct ext4_xattr_info *i)
{
	void *value;

	if (le32_to_cpu(s->here->e_value_size) != i->value_len)
		return 0;
	value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
	return !memcmp(value, i->value, i->value_len);
}

1131
/*
1132
 * ext4_xattr_set_handle()
1133
 *
1134
 * Create, replace or remove an extended attribute for this inode.  Value
1135 1136 1137 1138 1139 1140 1141 1142 1143
 * is NULL to remove an existing extended attribute, and non-NULL to
 * either replace an existing extended attribute, or create a new extended
 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
 * specify that an extended attribute must exist and must not exist
 * previous to the call, respectively.
 *
 * Returns 0, or a negative error number on failure.
 */
int
1144
ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
1145 1146 1147
		      const char *name, const void *value, size_t value_len,
		      int flags)
{
1148
	struct ext4_xattr_info i = {
1149 1150 1151 1152 1153 1154
		.name_index = name_index,
		.name = name,
		.value = value,
		.value_len = value_len,

	};
1155
	struct ext4_xattr_ibody_find is = {
1156 1157
		.s = { .not_found = -ENODATA, },
	};
1158
	struct ext4_xattr_block_find bs = {
1159 1160
		.s = { .not_found = -ENODATA, },
	};
K
Kalpak Shah 已提交
1161
	unsigned long no_expand;
1162 1163 1164 1165 1166 1167
	int error;

	if (!name)
		return -EINVAL;
	if (strlen(name) > 255)
		return -ERANGE;
1168
	down_write(&EXT4_I(inode)->xattr_sem);
1169 1170
	no_expand = ext4_test_inode_state(inode, EXT4_STATE_NO_EXPAND);
	ext4_set_inode_state(inode, EXT4_STATE_NO_EXPAND);
K
Kalpak Shah 已提交
1171

1172
	error = ext4_reserve_inode_write(handle, inode, &is.iloc);
1173 1174 1175
	if (error)
		goto cleanup;

1176
	if (ext4_test_inode_state(inode, EXT4_STATE_NEW)) {
1177 1178
		struct ext4_inode *raw_inode = ext4_raw_inode(&is.iloc);
		memset(raw_inode, 0, EXT4_SB(inode->i_sb)->s_inode_size);
1179
		ext4_clear_inode_state(inode, EXT4_STATE_NEW);
1180 1181
	}

1182
	error = ext4_xattr_ibody_find(inode, &i, &is);
1183 1184 1185
	if (error)
		goto cleanup;
	if (is.s.not_found)
1186
		error = ext4_xattr_block_find(inode, &i, &bs);
1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202
	if (error)
		goto cleanup;
	if (is.s.not_found && bs.s.not_found) {
		error = -ENODATA;
		if (flags & XATTR_REPLACE)
			goto cleanup;
		error = 0;
		if (!value)
			goto cleanup;
	} else {
		error = -EEXIST;
		if (flags & XATTR_CREATE)
			goto cleanup;
	}
	if (!value) {
		if (!is.s.not_found)
1203
			error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1204
		else if (!bs.s.not_found)
1205
			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1206
	} else {
1207 1208 1209 1210 1211 1212 1213
		error = 0;
		/* Xattr value did not change? Save us some work and bail out */
		if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
			goto cleanup;
		if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
			goto cleanup;

1214
		error = ext4_xattr_ibody_set(handle, inode, &i, &is);
1215 1216
		if (!error && !bs.s.not_found) {
			i.value = NULL;
1217
			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1218
		} else if (error == -ENOSPC) {
1219 1220 1221 1222 1223
			if (EXT4_I(inode)->i_file_acl && !bs.s.base) {
				error = ext4_xattr_block_find(inode, &i, &bs);
				if (error)
					goto cleanup;
			}
1224
			error = ext4_xattr_block_set(handle, inode, &i, &bs);
1225 1226 1227 1228
			if (error)
				goto cleanup;
			if (!is.s.not_found) {
				i.value = NULL;
1229
				error = ext4_xattr_ibody_set(handle, inode, &i,
1230 1231 1232 1233 1234
							     &is);
			}
		}
	}
	if (!error) {
1235
		ext4_xattr_update_super_block(handle, inode->i_sb);
K
Kalpak Shah 已提交
1236
		inode->i_ctime = ext4_current_time(inode);
1237
		if (!value)
1238
			ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1239
		error = ext4_mark_iloc_dirty(handle, inode, &is.iloc);
1240
		/*
1241
		 * The bh is consumed by ext4_mark_iloc_dirty, even with
1242 1243 1244 1245
		 * error != 0.
		 */
		is.iloc.bh = NULL;
		if (IS_SYNC(inode))
1246
			ext4_handle_sync(handle);
1247 1248 1249 1250 1251
	}

cleanup:
	brelse(is.iloc.bh);
	brelse(bs.bh);
K
Kalpak Shah 已提交
1252
	if (no_expand == 0)
1253
		ext4_clear_inode_state(inode, EXT4_STATE_NO_EXPAND);
1254
	up_write(&EXT4_I(inode)->xattr_sem);
1255 1256 1257 1258
	return error;
}

/*
1259
 * ext4_xattr_set()
1260
 *
1261
 * Like ext4_xattr_set_handle, but start from an inode. This extended
1262 1263 1264 1265 1266
 * attribute modification is a filesystem transaction by itself.
 *
 * Returns 0, or a negative error number on failure.
 */
int
1267
ext4_xattr_set(struct inode *inode, int name_index, const char *name,
1268 1269 1270 1271
	       const void *value, size_t value_len, int flags)
{
	handle_t *handle;
	int error, retries = 0;
1272
	int credits = ext4_jbd2_credits_xattr(inode);
1273 1274

retry:
1275
	handle = ext4_journal_start(inode, EXT4_HT_XATTR, credits);
1276 1277 1278 1279 1280
	if (IS_ERR(handle)) {
		error = PTR_ERR(handle);
	} else {
		int error2;

1281
		error = ext4_xattr_set_handle(handle, inode, name_index, name,
1282
					      value, value_len, flags);
1283
		error2 = ext4_journal_stop(handle);
1284
		if (error == -ENOSPC &&
1285
		    ext4_should_retry_alloc(inode->i_sb, &retries))
1286 1287 1288 1289 1290 1291 1292 1293
			goto retry;
		if (error == 0)
			error = error2;
	}

	return error;
}

1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
/*
 * Shift the EA entries in the inode to create space for the increased
 * i_extra_isize.
 */
static void ext4_xattr_shift_entries(struct ext4_xattr_entry *entry,
				     int value_offs_shift, void *to,
				     void *from, size_t n, int blocksize)
{
	struct ext4_xattr_entry *last = entry;
	int new_offs;

	/* Adjust the value offsets of the entries */
	for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
		if (!last->e_value_block && last->e_value_size) {
			new_offs = le16_to_cpu(last->e_value_offs) +
							value_offs_shift;
			BUG_ON(new_offs + le32_to_cpu(last->e_value_size)
				 > blocksize);
			last->e_value_offs = cpu_to_le16(new_offs);
		}
	}
	/* Shift the entries by n bytes */
	memmove(to, from, n);
}

/*
 * Expand an inode by new_extra_isize bytes when EAs are present.
 * Returns 0 on success or negative error number on failure.
 */
int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
			       struct ext4_inode *raw_inode, handle_t *handle)
{
	struct ext4_xattr_ibody_header *header;
	struct ext4_xattr_entry *entry, *last, *first;
	struct buffer_head *bh = NULL;
	struct ext4_xattr_ibody_find *is = NULL;
	struct ext4_xattr_block_find *bs = NULL;
	char *buffer = NULL, *b_entry_name = NULL;
	size_t min_offs, free;
1333
	int total_ino;
1334 1335
	void *base, *start, *end;
	int extra_isize = 0, error = 0, tried_min_extra_isize = 0;
A
Aneesh Kumar K.V 已提交
1336
	int s_min_extra_isize = le16_to_cpu(EXT4_SB(inode->i_sb)->s_es->s_min_extra_isize);
1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378 1379 1380

	down_write(&EXT4_I(inode)->xattr_sem);
retry:
	if (EXT4_I(inode)->i_extra_isize >= new_extra_isize) {
		up_write(&EXT4_I(inode)->xattr_sem);
		return 0;
	}

	header = IHDR(inode, raw_inode);
	entry = IFIRST(header);

	/*
	 * Check if enough free space is available in the inode to shift the
	 * entries ahead by new_extra_isize.
	 */

	base = start = entry;
	end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
	min_offs = end - base;
	last = entry;
	total_ino = sizeof(struct ext4_xattr_ibody_header);

	free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
	if (free >= new_extra_isize) {
		entry = IFIRST(header);
		ext4_xattr_shift_entries(entry,	EXT4_I(inode)->i_extra_isize
				- new_extra_isize, (void *)raw_inode +
				EXT4_GOOD_OLD_INODE_SIZE + new_extra_isize,
				(void *)header, total_ino,
				inode->i_sb->s_blocksize);
		EXT4_I(inode)->i_extra_isize = new_extra_isize;
		error = 0;
		goto cleanup;
	}

	/*
	 * Enough free space isn't available in the inode, check if
	 * EA block can hold new_extra_isize bytes.
	 */
	if (EXT4_I(inode)->i_file_acl) {
		bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
		error = -EIO;
		if (!bh)
			goto cleanup;
1381
		if (ext4_xattr_check_block(inode, bh)) {
1382 1383
			EXT4_ERROR_INODE(inode, "bad block %llu",
					 EXT4_I(inode)->i_file_acl);
1384
			error = -EFSCORRUPTED;
1385 1386 1387 1388 1389 1390
			goto cleanup;
		}
		base = BHDR(bh);
		first = BFIRST(bh);
		end = bh->b_data + bh->b_size;
		min_offs = end - base;
1391
		free = ext4_xattr_free_space(first, &min_offs, base, NULL);
1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420 1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453
		if (free < new_extra_isize) {
			if (!tried_min_extra_isize && s_min_extra_isize) {
				tried_min_extra_isize++;
				new_extra_isize = s_min_extra_isize;
				brelse(bh);
				goto retry;
			}
			error = -1;
			goto cleanup;
		}
	} else {
		free = inode->i_sb->s_blocksize;
	}

	while (new_extra_isize > 0) {
		size_t offs, size, entry_size;
		struct ext4_xattr_entry *small_entry = NULL;
		struct ext4_xattr_info i = {
			.value = NULL,
			.value_len = 0,
		};
		unsigned int total_size;  /* EA entry size + value size */
		unsigned int shift_bytes; /* No. of bytes to shift EAs by? */
		unsigned int min_total_size = ~0U;

		is = kzalloc(sizeof(struct ext4_xattr_ibody_find), GFP_NOFS);
		bs = kzalloc(sizeof(struct ext4_xattr_block_find), GFP_NOFS);
		if (!is || !bs) {
			error = -ENOMEM;
			goto cleanup;
		}

		is->s.not_found = -ENODATA;
		bs->s.not_found = -ENODATA;
		is->iloc.bh = NULL;
		bs->bh = NULL;

		last = IFIRST(header);
		/* Find the entry best suited to be pushed into EA block */
		entry = NULL;
		for (; !IS_LAST_ENTRY(last); last = EXT4_XATTR_NEXT(last)) {
			total_size =
			EXT4_XATTR_SIZE(le32_to_cpu(last->e_value_size)) +
					EXT4_XATTR_LEN(last->e_name_len);
			if (total_size <= free && total_size < min_total_size) {
				if (total_size < new_extra_isize) {
					small_entry = last;
				} else {
					entry = last;
					min_total_size = total_size;
				}
			}
		}

		if (entry == NULL) {
			if (small_entry) {
				entry = small_entry;
			} else {
				if (!tried_min_extra_isize &&
				    s_min_extra_isize) {
					tried_min_extra_isize++;
					new_extra_isize = s_min_extra_isize;
D
Dave Jones 已提交
1454 1455
					kfree(is); is = NULL;
					kfree(bs); bs = NULL;
1456
					brelse(bh);
1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489
					goto retry;
				}
				error = -1;
				goto cleanup;
			}
		}
		offs = le16_to_cpu(entry->e_value_offs);
		size = le32_to_cpu(entry->e_value_size);
		entry_size = EXT4_XATTR_LEN(entry->e_name_len);
		i.name_index = entry->e_name_index,
		buffer = kmalloc(EXT4_XATTR_SIZE(size), GFP_NOFS);
		b_entry_name = kmalloc(entry->e_name_len + 1, GFP_NOFS);
		if (!buffer || !b_entry_name) {
			error = -ENOMEM;
			goto cleanup;
		}
		/* Save the entry name and the entry value */
		memcpy(buffer, (void *)IFIRST(header) + offs,
		       EXT4_XATTR_SIZE(size));
		memcpy(b_entry_name, entry->e_name, entry->e_name_len);
		b_entry_name[entry->e_name_len] = '\0';
		i.name = b_entry_name;

		error = ext4_get_inode_loc(inode, &is->iloc);
		if (error)
			goto cleanup;

		error = ext4_xattr_ibody_find(inode, &i, is);
		if (error)
			goto cleanup;

		/* Remove the chosen entry from the inode */
		error = ext4_xattr_ibody_set(handle, inode, &i, is);
1490 1491
		if (error)
			goto cleanup;
1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510

		entry = IFIRST(header);
		if (entry_size + EXT4_XATTR_SIZE(size) >= new_extra_isize)
			shift_bytes = new_extra_isize;
		else
			shift_bytes = entry_size + size;
		/* Adjust the offsets and shift the remaining entries ahead */
		ext4_xattr_shift_entries(entry, EXT4_I(inode)->i_extra_isize -
			shift_bytes, (void *)raw_inode +
			EXT4_GOOD_OLD_INODE_SIZE + extra_isize + shift_bytes,
			(void *)header, total_ino - entry_size,
			inode->i_sb->s_blocksize);

		extra_isize += shift_bytes;
		new_extra_isize -= shift_bytes;
		EXT4_I(inode)->i_extra_isize = extra_isize;

		i.name = b_entry_name;
		i.value = buffer;
A
Aneesh Kumar K.V 已提交
1511
		i.value_len = size;
1512 1513 1514 1515 1516 1517 1518 1519 1520 1521
		error = ext4_xattr_block_find(inode, &i, bs);
		if (error)
			goto cleanup;

		/* Add entry which was removed from the inode into the block */
		error = ext4_xattr_block_set(handle, inode, &i, bs);
		if (error)
			goto cleanup;
		kfree(b_entry_name);
		kfree(buffer);
1522 1523
		b_entry_name = NULL;
		buffer = NULL;
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545
		brelse(is->iloc.bh);
		kfree(is);
		kfree(bs);
	}
	brelse(bh);
	up_write(&EXT4_I(inode)->xattr_sem);
	return 0;

cleanup:
	kfree(b_entry_name);
	kfree(buffer);
	if (is)
		brelse(is->iloc.bh);
	kfree(is);
	kfree(bs);
	brelse(bh);
	up_write(&EXT4_I(inode)->xattr_sem);
	return error;
}



1546
/*
1547
 * ext4_xattr_delete_inode()
1548 1549 1550 1551 1552 1553
 *
 * Free extended attribute resources associated with this inode. This
 * is called immediately before an inode is freed. We have exclusive
 * access to the inode.
 */
void
1554
ext4_xattr_delete_inode(handle_t *handle, struct inode *inode)
1555 1556 1557
{
	struct buffer_head *bh = NULL;

1558
	if (!EXT4_I(inode)->i_file_acl)
1559
		goto cleanup;
1560
	bh = sb_bread(inode->i_sb, EXT4_I(inode)->i_file_acl);
1561
	if (!bh) {
1562 1563
		EXT4_ERROR_INODE(inode, "block %llu read error",
				 EXT4_I(inode)->i_file_acl);
1564 1565
		goto cleanup;
	}
1566
	if (BHDR(bh)->h_magic != cpu_to_le32(EXT4_XATTR_MAGIC) ||
1567
	    BHDR(bh)->h_blocks != cpu_to_le32(1)) {
1568 1569
		EXT4_ERROR_INODE(inode, "bad block %llu",
				 EXT4_I(inode)->i_file_acl);
1570 1571
		goto cleanup;
	}
1572 1573
	ext4_xattr_release_block(handle, inode, bh);
	EXT4_I(inode)->i_file_acl = 0;
1574 1575 1576 1577 1578 1579

cleanup:
	brelse(bh);
}

/*
1580
 * ext4_xattr_cache_insert()
1581 1582 1583 1584 1585 1586 1587
 *
 * Create a new entry in the extended attribute cache, and insert
 * it unless such an entry is already in the cache.
 *
 * Returns 0, or a negative error number on failure.
 */
static void
J
Jan Kara 已提交
1588
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
1589
{
1590 1591 1592 1593
	struct ext4_xattr_header *header = BHDR(bh);
	__u32 hash = le32_to_cpu(header->h_hash);
	int reusable = le32_to_cpu(header->h_refcount) <
		       EXT4_XATTR_REFCOUNT_MAX;
1594 1595
	int error;

J
Jan Kara 已提交
1596
	error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
1597
				      bh->b_blocknr, reusable);
1598
	if (error) {
J
Jan Kara 已提交
1599
		if (error == -EBUSY)
1600
			ea_bdebug(bh, "already in cache");
J
Jan Kara 已提交
1601
	} else
1602 1603 1604 1605
		ea_bdebug(bh, "inserting [%x]", (int)hash);
}

/*
1606
 * ext4_xattr_cmp()
1607 1608 1609 1610 1611 1612 1613
 *
 * Compare two extended attribute blocks for equality.
 *
 * Returns 0 if the blocks are equal, 1 if they differ, and
 * a negative error number on errors.
 */
static int
1614 1615
ext4_xattr_cmp(struct ext4_xattr_header *header1,
	       struct ext4_xattr_header *header2)
1616
{
1617
	struct ext4_xattr_entry *entry1, *entry2;
1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630

	entry1 = ENTRY(header1+1);
	entry2 = ENTRY(header2+1);
	while (!IS_LAST_ENTRY(entry1)) {
		if (IS_LAST_ENTRY(entry2))
			return 1;
		if (entry1->e_hash != entry2->e_hash ||
		    entry1->e_name_index != entry2->e_name_index ||
		    entry1->e_name_len != entry2->e_name_len ||
		    entry1->e_value_size != entry2->e_value_size ||
		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
			return 1;
		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
1631
			return -EFSCORRUPTED;
1632 1633 1634 1635 1636
		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
			   le32_to_cpu(entry1->e_value_size)))
			return 1;

1637 1638
		entry1 = EXT4_XATTR_NEXT(entry1);
		entry2 = EXT4_XATTR_NEXT(entry2);
1639 1640 1641 1642 1643 1644 1645
	}
	if (!IS_LAST_ENTRY(entry2))
		return 1;
	return 0;
}

/*
1646
 * ext4_xattr_cache_find()
1647 1648 1649 1650 1651 1652 1653
 *
 * Find an identical extended attribute block.
 *
 * Returns a pointer to the block found, or NULL if such a block was
 * not found or an error occurred.
 */
static struct buffer_head *
1654
ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
J
Jan Kara 已提交
1655
		      struct mb_cache_entry **pce)
1656 1657
{
	__u32 hash = le32_to_cpu(header->h_hash);
J
Jan Kara 已提交
1658 1659
	struct mb_cache_entry *ce;
	struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
1660 1661 1662 1663

	if (!header->h_hash)
		return NULL;  /* never share */
	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
J
Jan Kara 已提交
1664
	ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
1665 1666 1667 1668 1669
	while (ce) {
		struct buffer_head *bh;

		bh = sb_bread(inode->i_sb, ce->e_block);
		if (!bh) {
1670 1671
			EXT4_ERROR_INODE(inode, "block %lu read error",
					 (unsigned long) ce->e_block);
1672
		} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
1673 1674 1675 1676
			*pce = ce;
			return bh;
		}
		brelse(bh);
J
Jan Kara 已提交
1677
		ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
1678 1679 1680 1681 1682 1683 1684 1685
	}
	return NULL;
}

#define NAME_HASH_SHIFT 5
#define VALUE_HASH_SHIFT 16

/*
1686
 * ext4_xattr_hash_entry()
1687 1688 1689
 *
 * Compute the hash of an extended attribute.
 */
1690 1691
static inline void ext4_xattr_hash_entry(struct ext4_xattr_header *header,
					 struct ext4_xattr_entry *entry)
1692 1693 1694 1695 1696
{
	__u32 hash = 0;
	char *name = entry->e_name;
	int n;

1697
	for (n = 0; n < entry->e_name_len; n++) {
1698 1699 1700 1701 1702 1703 1704 1705 1706
		hash = (hash << NAME_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
		       *name++;
	}

	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
		__le32 *value = (__le32 *)((char *)header +
			le16_to_cpu(entry->e_value_offs));
		for (n = (le32_to_cpu(entry->e_value_size) +
1707
		     EXT4_XATTR_ROUND) >> EXT4_XATTR_PAD_BITS; n; n--) {
1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721
			hash = (hash << VALUE_HASH_SHIFT) ^
			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
			       le32_to_cpu(*value++);
		}
	}
	entry->e_hash = cpu_to_le32(hash);
}

#undef NAME_HASH_SHIFT
#undef VALUE_HASH_SHIFT

#define BLOCK_HASH_SHIFT 16

/*
1722
 * ext4_xattr_rehash()
1723 1724 1725
 *
 * Re-compute the extended attribute hash value after an entry has changed.
 */
1726 1727
static void ext4_xattr_rehash(struct ext4_xattr_header *header,
			      struct ext4_xattr_entry *entry)
1728
{
1729
	struct ext4_xattr_entry *here;
1730 1731
	__u32 hash = 0;

1732
	ext4_xattr_hash_entry(header, entry);
1733 1734 1735 1736 1737 1738 1739 1740 1741 1742
	here = ENTRY(header+1);
	while (!IS_LAST_ENTRY(here)) {
		if (!here->e_hash) {
			/* Block is not shared if an entry's hash value == 0 */
			hash = 0;
			break;
		}
		hash = (hash << BLOCK_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
		       le32_to_cpu(here->e_hash);
1743
		here = EXT4_XATTR_NEXT(here);
1744 1745 1746 1747 1748 1749
	}
	header->h_hash = cpu_to_le32(hash);
}

#undef BLOCK_HASH_SHIFT

1750 1751
#define	HASH_BUCKET_BITS	10

J
Jan Kara 已提交
1752
struct mb_cache *
J
Jan Kara 已提交
1753
ext4_xattr_create_cache(void)
1754
{
J
Jan Kara 已提交
1755
	return mb_cache_create(HASH_BUCKET_BITS);
1756 1757
}

J
Jan Kara 已提交
1758
void ext4_xattr_destroy_cache(struct mb_cache *cache)
1759
{
1760
	if (cache)
J
Jan Kara 已提交
1761
		mb_cache_destroy(cache);
1762
}
1763