xattr.c 28.9 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
L
Linus Torvalds 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
/*
 * linux/fs/ext2/xattr.c
 *
 * Copyright (C) 2001-2003 Andreas Gruenbacher <agruen@suse.de>
 *
 * Fix by Harrison Xing <harrison@mountainviewdata.com>.
 * Extended attributes for symlinks and special files added per
 *  suggestion of Luka Renko <luka.renko@hermes.si>.
 * xattr consolidation Copyright (c) 2004 James Morris <jmorris@redhat.com>,
 *  Red Hat Inc.
 *
 */

/*
 * Extended attributes are stored on disk blocks allocated outside of
 * any inode. The i_file_acl field is then made to point to this allocated
 * block. If all extended attributes of an inode are identical, these
 * inodes may share the same extended attribute block. Such situations
 * are automatically detected by keeping a cache of recent attribute block
 * numbers and hashes over the block's contents in memory.
 *
 *
 * Extended attribute block layout:
 *
 *   +------------------+
 *   | header           |
 *   | entry 1          | |
 *   | entry 2          | | growing downwards
 *   | entry 3          | v
 *   | four null bytes  |
 *   | . . .            |
 *   | value 1          | ^
 *   | value 3          | | growing upwards
 *   | value 2          | |
 *   +------------------+
 *
 * The block header is followed by multiple entry descriptors. These entry
L
Lucas De Marchi 已提交
39
 * descriptors are variable in size, and aligned to EXT2_XATTR_PAD
L
Linus Torvalds 已提交
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
 * byte boundaries. The entry descriptors are sorted by attribute name,
 * so that two extended attribute blocks can be compared efficiently.
 *
 * Attribute values are aligned to the end of the block, stored in
 * no specific order. They are also padded to EXT2_XATTR_PAD byte
 * boundaries. No additional gaps are left between them.
 *
 * Locking strategy
 * ----------------
 * EXT2_I(inode)->i_file_acl is protected by EXT2_I(inode)->xattr_sem.
 * EA blocks are only changed if they are exclusive to an inode, so
 * holding xattr_sem also means that nothing but the EA block's reference
 * count will change. Multiple writers to an EA block are synchronized
 * by the bh lock. No more than a single bh lock is held at any time
 * to avoid deadlocks.
 */

#include <linux/buffer_head.h>
#include <linux/init.h>
#include <linux/slab.h>
J
Jan Kara 已提交
60
#include <linux/mbcache.h>
L
Linus Torvalds 已提交
61 62
#include <linux/quotaops.h>
#include <linux/rwsem.h>
63
#include <linux/security.h>
L
Linus Torvalds 已提交
64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
#include "ext2.h"
#include "xattr.h"
#include "acl.h"

#define HDR(bh) ((struct ext2_xattr_header *)((bh)->b_data))
#define ENTRY(ptr) ((struct ext2_xattr_entry *)(ptr))
#define FIRST_ENTRY(bh) ENTRY(HDR(bh)+1)
#define IS_LAST_ENTRY(entry) (*(__u32 *)(entry) == 0)

#ifdef EXT2_XATTR_DEBUG
# define ea_idebug(inode, f...) do { \
		printk(KERN_DEBUG "inode %s:%ld: ", \
			inode->i_sb->s_id, inode->i_ino); \
		printk(f); \
		printk("\n"); \
	} while (0)
# define ea_bdebug(bh, f...) do { \
81 82
		printk(KERN_DEBUG "block %pg:%lu: ", \
			bh->b_bdev, (unsigned long) bh->b_blocknr); \
L
Linus Torvalds 已提交
83 84 85 86 87 88 89 90 91 92 93
		printk(f); \
		printk("\n"); \
	} while (0)
#else
# define ea_idebug(f...)
# define ea_bdebug(f...)
#endif

static int ext2_xattr_set2(struct inode *, struct buffer_head *,
			   struct ext2_xattr_header *);

J
Jan Kara 已提交
94
static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
L
Linus Torvalds 已提交
95 96 97 98 99
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
						 struct ext2_xattr_header *);
static void ext2_xattr_rehash(struct ext2_xattr_header *,
			      struct ext2_xattr_entry *);

S
Stephen Hemminger 已提交
100
static const struct xattr_handler *ext2_xattr_handler_map[] = {
L
Linus Torvalds 已提交
101 102
	[EXT2_XATTR_INDEX_USER]		     = &ext2_xattr_user_handler,
#ifdef CONFIG_EXT2_FS_POSIX_ACL
103 104
	[EXT2_XATTR_INDEX_POSIX_ACL_ACCESS]  = &posix_acl_access_xattr_handler,
	[EXT2_XATTR_INDEX_POSIX_ACL_DEFAULT] = &posix_acl_default_xattr_handler,
L
Linus Torvalds 已提交
105 106 107 108 109 110 111
#endif
	[EXT2_XATTR_INDEX_TRUSTED]	     = &ext2_xattr_trusted_handler,
#ifdef CONFIG_EXT2_FS_SECURITY
	[EXT2_XATTR_INDEX_SECURITY]	     = &ext2_xattr_security_handler,
#endif
};

S
Stephen Hemminger 已提交
112
const struct xattr_handler *ext2_xattr_handlers[] = {
L
Linus Torvalds 已提交
113 114 115
	&ext2_xattr_user_handler,
	&ext2_xattr_trusted_handler,
#ifdef CONFIG_EXT2_FS_POSIX_ACL
116 117
	&posix_acl_access_xattr_handler,
	&posix_acl_default_xattr_handler,
L
Linus Torvalds 已提交
118 119 120 121 122 123 124
#endif
#ifdef CONFIG_EXT2_FS_SECURITY
	&ext2_xattr_security_handler,
#endif
	NULL
};

125 126
#define EA_BLOCK_CACHE(inode)	(EXT2_SB(inode->i_sb)->s_ea_block_cache)

S
Stephen Hemminger 已提交
127
static inline const struct xattr_handler *
L
Linus Torvalds 已提交
128 129
ext2_xattr_handler(int name_index)
{
S
Stephen Hemminger 已提交
130
	const struct xattr_handler *handler = NULL;
L
Linus Torvalds 已提交
131 132 133 134 135 136

	if (name_index > 0 && name_index < ARRAY_SIZE(ext2_xattr_handler_map))
		handler = ext2_xattr_handler_map[name_index];
	return handler;
}

137 138 139 140 141 142 143 144 145 146
static bool
ext2_xattr_header_valid(struct ext2_xattr_header *header)
{
	if (header->h_magic != cpu_to_le32(EXT2_XATTR_MAGIC) ||
	    header->h_blocks != cpu_to_le32(1))
		return false;

	return true;
}

147
static bool
148 149
ext2_xattr_entry_valid(struct ext2_xattr_entry *entry,
		       char *end, size_t end_offs)
150
{
151
	struct ext2_xattr_entry *next;
152 153
	size_t size;

154 155 156 157
	next = EXT2_XATTR_NEXT(entry);
	if ((char *)next >= end)
		return false;

158 159 160 161 162 163 164 165 166 167 168
	if (entry->e_value_block != 0)
		return false;

	size = le32_to_cpu(entry->e_value_size);
	if (size > end_offs ||
	    le16_to_cpu(entry->e_value_offs) + size > end_offs)
		return false;

	return true;
}

169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
static int
ext2_xattr_cmp_entry(int name_index, size_t name_len, const char *name,
		     struct ext2_xattr_entry *entry)
{
	int cmp;

	cmp = name_index - entry->e_name_index;
	if (!cmp)
		cmp = name_len - entry->e_name_len;
	if (!cmp)
		cmp = memcmp(name, entry->e_name, name_len);

	return cmp;
}

L
Linus Torvalds 已提交
184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
/*
 * ext2_xattr_get()
 *
 * Copy an extended attribute into the buffer
 * provided, or compute the buffer size required.
 * Buffer is NULL to compute the size of the buffer required.
 *
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
int
ext2_xattr_get(struct inode *inode, int name_index, const char *name,
	       void *buffer, size_t buffer_size)
{
	struct buffer_head *bh = NULL;
	struct ext2_xattr_entry *entry;
	size_t name_len, size;
	char *end;
C
Chengguang Xu 已提交
202
	int error, not_found;
203
	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
L
Linus Torvalds 已提交
204 205 206 207 208 209

	ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
		  name_index, name, buffer, (long)buffer_size);

	if (name == NULL)
		return -EINVAL;
210 211 212 213
	name_len = strlen(name);
	if (name_len > 255)
		return -ERANGE;

L
Linus Torvalds 已提交
214 215 216 217 218 219 220 221 222 223 224 225
	down_read(&EXT2_I(inode)->xattr_sem);
	error = -ENODATA;
	if (!EXT2_I(inode)->i_file_acl)
		goto cleanup;
	ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
	error = -EIO;
	if (!bh)
		goto cleanup;
	ea_bdebug(bh, "b_count=%d, refcount=%d",
		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
	end = bh->b_data + bh->b_size;
226 227 228
	if (!ext2_xattr_header_valid(HDR(bh))) {
bad_block:
		ext2_error(inode->i_sb, "ext2_xattr_get",
L
Linus Torvalds 已提交
229 230 231 232 233 234
			"inode %ld: bad block %d", inode->i_ino,
			EXT2_I(inode)->i_file_acl);
		error = -EIO;
		goto cleanup;
	}

235
	/* find named attribute */
L
Linus Torvalds 已提交
236 237
	entry = FIRST_ENTRY(bh);
	while (!IS_LAST_ENTRY(entry)) {
238 239
		if (!ext2_xattr_entry_valid(entry, end,
		    inode->i_sb->s_blocksize))
J
Jan Kara 已提交
240
			goto bad_block;
C
Chengguang Xu 已提交
241 242 243 244

		not_found = ext2_xattr_cmp_entry(name_index, name_len, name,
						 entry);
		if (!not_found)
L
Linus Torvalds 已提交
245
			goto found;
C
Chengguang Xu 已提交
246 247 248
		if (not_found < 0)
			break;

249
		entry = EXT2_XATTR_NEXT(entry);
L
Linus Torvalds 已提交
250
	}
251
	if (ext2_xattr_cache_insert(ea_block_cache, bh))
L
Linus Torvalds 已提交
252 253 254 255
		ea_idebug(inode, "cache insert failed");
	error = -ENODATA;
	goto cleanup;
found:
256
	size = le32_to_cpu(entry->e_value_size);
257
	if (ext2_xattr_cache_insert(ea_block_cache, bh))
L
Linus Torvalds 已提交
258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
		ea_idebug(inode, "cache insert failed");
	if (buffer) {
		error = -ERANGE;
		if (size > buffer_size)
			goto cleanup;
		/* return value of attribute */
		memcpy(buffer, bh->b_data + le16_to_cpu(entry->e_value_offs),
			size);
	}
	error = size;

cleanup:
	brelse(bh);
	up_read(&EXT2_I(inode)->xattr_sem);

	return error;
}

/*
 * ext2_xattr_list()
 *
 * Copy a list of attribute names into the buffer
 * provided, or compute the buffer size required.
 * Buffer is NULL to compute the size of the buffer required.
 *
 * Returns a negative error number on failure, or the number of bytes
 * used / required on success.
 */
static int
287
ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
L
Linus Torvalds 已提交
288
{
289
	struct inode *inode = d_inode(dentry);
L
Linus Torvalds 已提交
290 291 292 293 294
	struct buffer_head *bh = NULL;
	struct ext2_xattr_entry *entry;
	char *end;
	size_t rest = buffer_size;
	int error;
295
	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
L
Linus Torvalds 已提交
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311

	ea_idebug(inode, "buffer=%p, buffer_size=%ld",
		  buffer, (long)buffer_size);

	down_read(&EXT2_I(inode)->xattr_sem);
	error = 0;
	if (!EXT2_I(inode)->i_file_acl)
		goto cleanup;
	ea_idebug(inode, "reading block %d", EXT2_I(inode)->i_file_acl);
	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
	error = -EIO;
	if (!bh)
		goto cleanup;
	ea_bdebug(bh, "b_count=%d, refcount=%d",
		atomic_read(&(bh->b_count)), le32_to_cpu(HDR(bh)->h_refcount));
	end = bh->b_data + bh->b_size;
312 313 314
	if (!ext2_xattr_header_valid(HDR(bh))) {
bad_block:
		ext2_error(inode->i_sb, "ext2_xattr_list",
L
Linus Torvalds 已提交
315 316 317 318 319 320 321 322 323
			"inode %ld: bad block %d", inode->i_ino,
			EXT2_I(inode)->i_file_acl);
		error = -EIO;
		goto cleanup;
	}

	/* check the on-disk data structure */
	entry = FIRST_ENTRY(bh);
	while (!IS_LAST_ENTRY(entry)) {
324 325
		if (!ext2_xattr_entry_valid(entry, end,
		    inode->i_sb->s_blocksize))
J
Jan Kara 已提交
326
			goto bad_block;
327
		entry = EXT2_XATTR_NEXT(entry);
L
Linus Torvalds 已提交
328
	}
329
	if (ext2_xattr_cache_insert(ea_block_cache, bh))
L
Linus Torvalds 已提交
330 331 332 333 334
		ea_idebug(inode, "cache insert failed");

	/* list the attribute names */
	for (entry = FIRST_ENTRY(bh); !IS_LAST_ENTRY(entry);
	     entry = EXT2_XATTR_NEXT(entry)) {
S
Stephen Hemminger 已提交
335
		const struct xattr_handler *handler =
L
Linus Torvalds 已提交
336 337
			ext2_xattr_handler(entry->e_name_index);

338 339 340 341 342
		if (handler && (!handler->list || handler->list(dentry))) {
			const char *prefix = handler->prefix ?: handler->name;
			size_t prefix_len = strlen(prefix);
			size_t size = prefix_len + entry->e_name_len + 1;

L
Linus Torvalds 已提交
343 344 345 346 347
			if (buffer) {
				if (size > rest) {
					error = -ERANGE;
					goto cleanup;
				}
348 349 350 351 352
				memcpy(buffer, prefix, prefix_len);
				buffer += prefix_len;
				memcpy(buffer, entry->e_name, entry->e_name_len);
				buffer += entry->e_name_len;
				*buffer++ = 0;
L
Linus Torvalds 已提交
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
			}
			rest -= size;
		}
	}
	error = buffer_size - rest;  /* total size */

cleanup:
	brelse(bh);
	up_read(&EXT2_I(inode)->xattr_sem);

	return error;
}

/*
 * Inode operation listxattr()
 *
369
 * d_inode(dentry)->i_mutex: don't care
L
Linus Torvalds 已提交
370 371 372 373
 */
ssize_t
ext2_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
374
	return ext2_xattr_list(dentry, buffer, size);
L
Linus Torvalds 已提交
375 376 377 378 379 380 381 382 383 384 385
}

/*
 * If the EXT2_FEATURE_COMPAT_EXT_ATTR feature of this file system is
 * not set, set it.
 */
static void ext2_xattr_update_super_block(struct super_block *sb)
{
	if (EXT2_HAS_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR))
		return;

386
	spin_lock(&EXT2_SB(sb)->s_lock);
387
	ext2_update_dynamic_rev(sb);
388
	EXT2_SET_COMPAT_FEATURE(sb, EXT2_FEATURE_COMPAT_EXT_ATTR);
389
	spin_unlock(&EXT2_SB(sb)->s_lock);
L
Linus Torvalds 已提交
390 391 392 393 394 395
	mark_buffer_dirty(EXT2_SB(sb)->s_sbh);
}

/*
 * ext2_xattr_set()
 *
396
 * Create, replace or remove an extended attribute for this inode.  Value
L
Linus Torvalds 已提交
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411
 * is NULL to remove an existing extended attribute, and non-NULL to
 * either replace an existing extended attribute, or create a new extended
 * attribute. The flags XATTR_REPLACE and XATTR_CREATE
 * specify that an extended attribute must exist and must not exist
 * previous to the call, respectively.
 *
 * Returns 0, or a negative error number on failure.
 */
int
ext2_xattr_set(struct inode *inode, int name_index, const char *name,
	       const void *value, size_t value_len, int flags)
{
	struct super_block *sb = inode->i_sb;
	struct buffer_head *bh = NULL;
	struct ext2_xattr_header *header = NULL;
412
	struct ext2_xattr_entry *here = NULL, *last = NULL;
L
Linus Torvalds 已提交
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
	size_t name_len, free, min_offs = sb->s_blocksize;
	int not_found = 1, error;
	char *end;
	
	/*
	 * header -- Points either into bh, or to a temporarily
	 *           allocated buffer.
	 * here -- The named entry found, or the place for inserting, within
	 *         the block pointed to by header.
	 * last -- Points right after the last named entry within the block
	 *         pointed to by header.
	 * min_offs -- The offset of the first value (values are aligned
	 *             towards the end of the block).
	 * end -- Points right after the block pointed to by header.
	 */
	
	ea_idebug(inode, "name=%d.%s, value=%p, value_len=%ld",
		  name_index, name, value, (long)value_len);

	if (value == NULL)
		value_len = 0;
	if (name == NULL)
		return -EINVAL;
	name_len = strlen(name);
	if (name_len > 255 || value_len > sb->s_blocksize)
		return -ERANGE;
	down_write(&EXT2_I(inode)->xattr_sem);
	if (EXT2_I(inode)->i_file_acl) {
		/* The inode already has an extended attribute block. */
		bh = sb_bread(sb, EXT2_I(inode)->i_file_acl);
		error = -EIO;
		if (!bh)
			goto cleanup;
		ea_bdebug(bh, "b_count=%d, refcount=%d",
			atomic_read(&(bh->b_count)),
			le32_to_cpu(HDR(bh)->h_refcount));
		header = HDR(bh);
		end = bh->b_data + bh->b_size;
451 452 453
		if (!ext2_xattr_header_valid(header)) {
bad_block:
			ext2_error(sb, "ext2_xattr_set",
L
Linus Torvalds 已提交
454 455 456 457 458
				"inode %ld: bad block %d", inode->i_ino, 
				   EXT2_I(inode)->i_file_acl);
			error = -EIO;
			goto cleanup;
		}
J
Jan Kara 已提交
459 460 461 462 463 464
		/*
		 * Find the named attribute. If not found, 'here' will point
		 * to entry where the new attribute should be inserted to
		 * maintain sorting.
		 */
		last = FIRST_ENTRY(bh);
L
Linus Torvalds 已提交
465
		while (!IS_LAST_ENTRY(last)) {
466
			if (!ext2_xattr_entry_valid(last, end, sb->s_blocksize))
J
Jan Kara 已提交
467 468
				goto bad_block;
			if (last->e_value_size) {
L
Linus Torvalds 已提交
469 470 471 472
				size_t offs = le16_to_cpu(last->e_value_offs);
				if (offs < min_offs)
					min_offs = offs;
			}
J
Jan Kara 已提交
473
			if (not_found > 0) {
474 475 476
				not_found = ext2_xattr_cmp_entry(name_index,
								 name_len,
								 name, last);
J
Jan Kara 已提交
477 478 479
				if (not_found <= 0)
					here = last;
			}
480
			last = EXT2_XATTR_NEXT(last);
L
Linus Torvalds 已提交
481
		}
J
Jan Kara 已提交
482 483
		if (not_found > 0)
			here = last;
L
Linus Torvalds 已提交
484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505

		/* Check whether we have enough space left. */
		free = min_offs - ((char*)last - (char*)header) - sizeof(__u32);
	} else {
		/* We will use a new extended attribute block. */
		free = sb->s_blocksize -
			sizeof(struct ext2_xattr_header) - sizeof(__u32);
	}

	if (not_found) {
		/* Request to remove a nonexistent attribute? */
		error = -ENODATA;
		if (flags & XATTR_REPLACE)
			goto cleanup;
		error = 0;
		if (value == NULL)
			goto cleanup;
	} else {
		/* Request to create an existing attribute? */
		error = -EEXIST;
		if (flags & XATTR_CREATE)
			goto cleanup;
J
Jan Kara 已提交
506
		free += EXT2_XATTR_SIZE(le32_to_cpu(here->e_value_size));
L
Linus Torvalds 已提交
507 508 509 510 511 512 513 514 515 516 517 518
		free += EXT2_XATTR_LEN(name_len);
	}
	error = -ENOSPC;
	if (free < EXT2_XATTR_LEN(name_len) + EXT2_XATTR_SIZE(value_len))
		goto cleanup;

	/* Here we know that we can set the new attribute. */

	if (header) {
		/* assert(header == HDR(bh)); */
		lock_buffer(bh);
		if (header->h_refcount == cpu_to_le32(1)) {
J
Jan Kara 已提交
519 520
			__u32 hash = le32_to_cpu(header->h_hash);

L
Linus Torvalds 已提交
521
			ea_bdebug(bh, "modifying in-place");
J
Jan Kara 已提交
522 523 524 525
			/*
			 * This must happen under buffer lock for
			 * ext2_xattr_set2() to reliably detect modified block
			 */
526
			mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
527
					      bh->b_blocknr);
J
Jan Kara 已提交
528

L
Linus Torvalds 已提交
529 530 531 532 533 534
			/* keep the buffer locked while modifying it. */
		} else {
			int offset;

			unlock_buffer(bh);
			ea_bdebug(bh, "cloning");
535
			header = kmemdup(HDR(bh), bh->b_size, GFP_KERNEL);
L
Linus Torvalds 已提交
536 537 538 539 540 541 542 543 544 545 546 547
			error = -ENOMEM;
			if (header == NULL)
				goto cleanup;
			header->h_refcount = cpu_to_le32(1);

			offset = (char *)here - bh->b_data;
			here = ENTRY((char *)header + offset);
			offset = (char *)last - bh->b_data;
			last = ENTRY((char *)header + offset);
		}
	} else {
		/* Allocate a buffer where we construct the new block. */
548
		header = kzalloc(sb->s_blocksize, GFP_KERNEL);
L
Linus Torvalds 已提交
549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569
		error = -ENOMEM;
		if (header == NULL)
			goto cleanup;
		end = (char *)header + sb->s_blocksize;
		header->h_magic = cpu_to_le32(EXT2_XATTR_MAGIC);
		header->h_blocks = header->h_refcount = cpu_to_le32(1);
		last = here = ENTRY(header+1);
	}

	/* Iff we are modifying the block in-place, bh is locked here. */

	if (not_found) {
		/* Insert the new name. */
		size_t size = EXT2_XATTR_LEN(name_len);
		size_t rest = (char *)last - (char *)here;
		memmove((char *)here + size, here, rest);
		memset(here, 0, size);
		here->e_name_index = name_index;
		here->e_name_len = name_len;
		memcpy(here->e_name, name, name_len);
	} else {
J
Jan Kara 已提交
570
		if (here->e_value_size) {
L
Linus Torvalds 已提交
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596
			char *first_val = (char *)header + min_offs;
			size_t offs = le16_to_cpu(here->e_value_offs);
			char *val = (char *)header + offs;
			size_t size = EXT2_XATTR_SIZE(
				le32_to_cpu(here->e_value_size));

			if (size == EXT2_XATTR_SIZE(value_len)) {
				/* The old and the new value have the same
				   size. Just replace. */
				here->e_value_size = cpu_to_le32(value_len);
				memset(val + size - EXT2_XATTR_PAD, 0,
				       EXT2_XATTR_PAD); /* Clear pad bytes. */
				memcpy(val, value, value_len);
				goto skip_replace;
			}

			/* Remove the old value. */
			memmove(first_val + size, first_val, val - first_val);
			memset(first_val, 0, size);
			here->e_value_offs = 0;
			min_offs += size;

			/* Adjust all value offsets. */
			last = ENTRY(header+1);
			while (!IS_LAST_ENTRY(last)) {
				size_t o = le16_to_cpu(last->e_value_offs);
J
Jan Kara 已提交
597
				if (o < offs)
L
Linus Torvalds 已提交
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
					last->e_value_offs =
						cpu_to_le16(o + size);
				last = EXT2_XATTR_NEXT(last);
			}
		}
		if (value == NULL) {
			/* Remove the old name. */
			size_t size = EXT2_XATTR_LEN(name_len);
			last = ENTRY((char *)last - size);
			memmove(here, (char*)here + size,
				(char*)last - (char*)here);
			memset(last, 0, size);
		}
	}

	if (value != NULL) {
		/* Insert the new value. */
		here->e_value_size = cpu_to_le32(value_len);
		if (value_len) {
			size_t size = EXT2_XATTR_SIZE(value_len);
			char *val = (char *)header + min_offs - size;
			here->e_value_offs =
				cpu_to_le16((char *)val - (char *)header);
			memset(val + size - EXT2_XATTR_PAD, 0,
			       EXT2_XATTR_PAD); /* Clear the pad bytes. */
			memcpy(val, value, value_len);
		}
	}

skip_replace:
	if (IS_LAST_ENTRY(ENTRY(header+1))) {
		/* This block is now empty. */
		if (bh && header == HDR(bh))
			unlock_buffer(bh);  /* we were modifying in-place. */
		error = ext2_xattr_set2(inode, bh, NULL);
	} else {
		ext2_xattr_rehash(header, here);
		if (bh && header == HDR(bh))
			unlock_buffer(bh);  /* we were modifying in-place. */
		error = ext2_xattr_set2(inode, bh, header);
	}

cleanup:
	if (!(bh && header == HDR(bh)))
		kfree(header);
P
Pan Bian 已提交
643
	brelse(bh);
L
Linus Torvalds 已提交
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658
	up_write(&EXT2_I(inode)->xattr_sem);

	return error;
}

/*
 * Second half of ext2_xattr_set(): Update the file system.
 */
static int
ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
		struct ext2_xattr_header *header)
{
	struct super_block *sb = inode->i_sb;
	struct buffer_head *new_bh = NULL;
	int error;
659
	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
L
Linus Torvalds 已提交
660 661 662 663 664 665 666 667 668 669 670 671

	if (header) {
		new_bh = ext2_xattr_cache_find(inode, header);
		if (new_bh) {
			/* We found an identical block in the cache. */
			if (new_bh == old_bh) {
				ea_bdebug(new_bh, "keeping this block");
			} else {
				/* The old block is released after updating
				   the inode.  */
				ea_bdebug(new_bh, "reusing block");

672 673
				error = dquot_alloc_block(inode, 1);
				if (error) {
L
Linus Torvalds 已提交
674 675 676
					unlock_buffer(new_bh);
					goto cleanup;
				}
M
Marcin Slusarz 已提交
677
				le32_add_cpu(&HDR(new_bh)->h_refcount, 1);
L
Linus Torvalds 已提交
678 679 680 681 682 683 684 685 686
				ea_bdebug(new_bh, "refcount now=%d",
					le32_to_cpu(HDR(new_bh)->h_refcount));
			}
			unlock_buffer(new_bh);
		} else if (old_bh && header == HDR(old_bh)) {
			/* Keep this block. No need to lock the block as we
			   don't need to change the reference count. */
			new_bh = old_bh;
			get_bh(new_bh);
687
			ext2_xattr_cache_insert(ea_block_cache, new_bh);
L
Linus Torvalds 已提交
688 689
		} else {
			/* We need to allocate a new block */
690 691
			ext2_fsblk_t goal = ext2_group_first_block_no(sb,
						EXT2_I(inode)->i_block_group);
M
Martin J. Bligh 已提交
692
			int block = ext2_new_block(inode, goal, &error);
L
Linus Torvalds 已提交
693 694 695 696 697
			if (error)
				goto cleanup;
			ea_idebug(inode, "creating block %d", block);

			new_bh = sb_getblk(sb, block);
698
			if (unlikely(!new_bh)) {
L
Linus Torvalds 已提交
699
				ext2_free_blocks(inode, block, 1);
700
				mark_inode_dirty(inode);
701
				error = -ENOMEM;
L
Linus Torvalds 已提交
702 703 704 705 706 707
				goto cleanup;
			}
			lock_buffer(new_bh);
			memcpy(new_bh->b_data, header, new_bh->b_size);
			set_buffer_uptodate(new_bh);
			unlock_buffer(new_bh);
708
			ext2_xattr_cache_insert(ea_block_cache, new_bh);
L
Linus Torvalds 已提交
709 710 711 712 713 714 715 716 717 718 719 720 721 722
			
			ext2_xattr_update_super_block(sb);
		}
		mark_buffer_dirty(new_bh);
		if (IS_SYNC(inode)) {
			sync_dirty_buffer(new_bh);
			error = -EIO;
			if (buffer_req(new_bh) && !buffer_uptodate(new_bh))
				goto cleanup;
		}
	}

	/* Update the inode. */
	EXT2_I(inode)->i_file_acl = new_bh ? new_bh->b_blocknr : 0;
723
	inode->i_ctime = current_time(inode);
L
Linus Torvalds 已提交
724
	if (IS_SYNC(inode)) {
C
Christoph Hellwig 已提交
725
		error = sync_inode_metadata(inode, 1);
L
Linus Torvalds 已提交
726 727 728 729
		/* In case sync failed due to ENOSPC the inode was actually
		 * written (only some dirty data were not) so we just proceed
		 * as if nothing happened and cleanup the unused block */
		if (error && error != -ENOSPC) {
730 731 732 733
			if (new_bh && new_bh != old_bh) {
				dquot_free_block_nodirty(inode, 1);
				mark_inode_dirty(inode);
			}
L
Linus Torvalds 已提交
734 735 736 737 738 739 740 741 742 743 744 745 746
			goto cleanup;
		}
	} else
		mark_inode_dirty(inode);

	error = 0;
	if (old_bh && old_bh != new_bh) {
		/*
		 * If there was an old block and we are no longer using it,
		 * release the old block.
		 */
		lock_buffer(old_bh);
		if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
J
Jan Kara 已提交
747 748 749 750 751 752
			__u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);

			/*
			 * This must happen under buffer lock for
			 * ext2_xattr_set2() to reliably detect freed block
			 */
753
			mb_cache_entry_delete(ea_block_cache, hash,
754
					      old_bh->b_blocknr);
L
Linus Torvalds 已提交
755 756 757
			/* Free the old block. */
			ea_bdebug(old_bh, "freeing");
			ext2_free_blocks(inode, old_bh->b_blocknr, 1);
758
			mark_inode_dirty(inode);
L
Linus Torvalds 已提交
759 760 761 762 763 764
			/* We let our caller release old_bh, so we
			 * need to duplicate the buffer before. */
			get_bh(old_bh);
			bforget(old_bh);
		} else {
			/* Decrement the refcount only. */
M
Marcin Slusarz 已提交
765
			le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
766 767
			dquot_free_block_nodirty(inode, 1);
			mark_inode_dirty(inode);
L
Linus Torvalds 已提交
768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790
			mark_buffer_dirty(old_bh);
			ea_bdebug(old_bh, "refcount now=%d",
				le32_to_cpu(HDR(old_bh)->h_refcount));
		}
		unlock_buffer(old_bh);
	}

cleanup:
	brelse(new_bh);

	return error;
}

/*
 * ext2_xattr_delete_inode()
 *
 * Free extended attribute resources associated with this inode. This
 * is called immediately before an inode is freed.
 */
void
ext2_xattr_delete_inode(struct inode *inode)
{
	struct buffer_head *bh = NULL;
791
	struct ext2_sb_info *sbi = EXT2_SB(inode->i_sb);
L
Linus Torvalds 已提交
792 793 794 795

	down_write(&EXT2_I(inode)->xattr_sem);
	if (!EXT2_I(inode)->i_file_acl)
		goto cleanup;
796

797
	if (!ext2_data_block_valid(sbi, EXT2_I(inode)->i_file_acl, 1)) {
798 799 800 801 802 803
		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
			"inode %ld: xattr block %d is out of data blocks range",
			inode->i_ino, EXT2_I(inode)->i_file_acl);
		goto cleanup;
	}

L
Linus Torvalds 已提交
804 805 806 807 808 809 810 811
	bh = sb_bread(inode->i_sb, EXT2_I(inode)->i_file_acl);
	if (!bh) {
		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
			"inode %ld: block %d read error", inode->i_ino,
			EXT2_I(inode)->i_file_acl);
		goto cleanup;
	}
	ea_bdebug(bh, "b_count=%d", atomic_read(&(bh->b_count)));
812
	if (!ext2_xattr_header_valid(HDR(bh))) {
L
Linus Torvalds 已提交
813 814 815 816 817 818 819
		ext2_error(inode->i_sb, "ext2_xattr_delete_inode",
			"inode %ld: bad block %d", inode->i_ino,
			EXT2_I(inode)->i_file_acl);
		goto cleanup;
	}
	lock_buffer(bh);
	if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
J
Jan Kara 已提交
820 821 822 823 824 825
		__u32 hash = le32_to_cpu(HDR(bh)->h_hash);

		/*
		 * This must happen under buffer lock for ext2_xattr_set2() to
		 * reliably detect freed block
		 */
826
		mb_cache_entry_delete(EA_BLOCK_CACHE(inode), hash,
827
				      bh->b_blocknr);
L
Linus Torvalds 已提交
828 829 830
		ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
		get_bh(bh);
		bforget(bh);
P
Peter Staubach 已提交
831
		unlock_buffer(bh);
L
Linus Torvalds 已提交
832
	} else {
M
Marcin Slusarz 已提交
833
		le32_add_cpu(&HDR(bh)->h_refcount, -1);
P
Peter Staubach 已提交
834 835 836
		ea_bdebug(bh, "refcount now=%d",
			le32_to_cpu(HDR(bh)->h_refcount));
		unlock_buffer(bh);
L
Linus Torvalds 已提交
837 838 839
		mark_buffer_dirty(bh);
		if (IS_SYNC(inode))
			sync_dirty_buffer(bh);
840
		dquot_free_block_nodirty(inode, 1);
L
Linus Torvalds 已提交
841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857
	}
	EXT2_I(inode)->i_file_acl = 0;

cleanup:
	brelse(bh);
	up_write(&EXT2_I(inode)->xattr_sem);
}

/*
 * ext2_xattr_cache_insert()
 *
 * Create a new entry in the extended attribute cache, and insert
 * it unless such an entry is already in the cache.
 *
 * Returns 0, or a negative error number on failure.
 */
static int
J
Jan Kara 已提交
858
ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
L
Linus Torvalds 已提交
859 860 861 862
{
	__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
	int error;

863 864
	error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr,
				      true);
L
Linus Torvalds 已提交
865 866 867 868 869 870
	if (error) {
		if (error == -EBUSY) {
			ea_bdebug(bh, "already in cache (%d cache entries)",
				atomic_read(&ext2_xattr_cache->c_entry_count));
			error = 0;
		}
J
Jan Kara 已提交
871 872
	} else
		ea_bdebug(bh, "inserting [%x]", (int)hash);
L
Linus Torvalds 已提交
873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
	return error;
}

/*
 * ext2_xattr_cmp()
 *
 * Compare two extended attribute blocks for equality.
 *
 * Returns 0 if the blocks are equal, 1 if they differ, and
 * a negative error number on errors.
 */
static int
ext2_xattr_cmp(struct ext2_xattr_header *header1,
	       struct ext2_xattr_header *header2)
{
	struct ext2_xattr_entry *entry1, *entry2;

	entry1 = ENTRY(header1+1);
	entry2 = ENTRY(header2+1);
	while (!IS_LAST_ENTRY(entry1)) {
		if (IS_LAST_ENTRY(entry2))
			return 1;
		if (entry1->e_hash != entry2->e_hash ||
		    entry1->e_name_index != entry2->e_name_index ||
		    entry1->e_name_len != entry2->e_name_len ||
		    entry1->e_value_size != entry2->e_value_size ||
		    memcmp(entry1->e_name, entry2->e_name, entry1->e_name_len))
			return 1;
		if (entry1->e_value_block != 0 || entry2->e_value_block != 0)
			return -EIO;
		if (memcmp((char *)header1 + le16_to_cpu(entry1->e_value_offs),
			   (char *)header2 + le16_to_cpu(entry2->e_value_offs),
			   le32_to_cpu(entry1->e_value_size)))
			return 1;

		entry1 = EXT2_XATTR_NEXT(entry1);
		entry2 = EXT2_XATTR_NEXT(entry2);
	}
	if (!IS_LAST_ENTRY(entry2))
		return 1;
	return 0;
}

/*
 * ext2_xattr_cache_find()
 *
 * Find an identical extended attribute block.
 *
 * Returns a locked buffer head to the block found, or NULL if such
 * a block was not found or an error occurred.
 */
static struct buffer_head *
ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
	__u32 hash = le32_to_cpu(header->h_hash);
J
Jan Kara 已提交
928
	struct mb_cache_entry *ce;
929
	struct mb_cache *ea_block_cache = EA_BLOCK_CACHE(inode);
L
Linus Torvalds 已提交
930 931 932 933 934

	if (!header->h_hash)
		return NULL;  /* never share */
	ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
935
	ce = mb_cache_entry_find_first(ea_block_cache, hash);
L
Linus Torvalds 已提交
936 937 938
	while (ce) {
		struct buffer_head *bh;

939
		bh = sb_bread(inode->i_sb, ce->e_value);
L
Linus Torvalds 已提交
940 941 942
		if (!bh) {
			ext2_error(inode->i_sb, "ext2_xattr_cache_find",
				"inode %ld: block %ld read error",
943
				inode->i_ino, (unsigned long) ce->e_value);
L
Linus Torvalds 已提交
944 945
		} else {
			lock_buffer(bh);
J
Jan Kara 已提交
946 947 948 949 950 951 952 953 954 955
			/*
			 * We have to be careful about races with freeing or
			 * rehashing of xattr block. Once we hold buffer lock
			 * xattr block's state is stable so we can check
			 * whether the block got freed / rehashed or not.
			 * Since we unhash mbcache entry under buffer lock when
			 * freeing / rehashing xattr block, checking whether
			 * entry is still hashed is reliable.
			 */
			if (hlist_bl_unhashed(&ce->e_hash_list)) {
956
				mb_cache_entry_put(ea_block_cache, ce);
J
Jan Kara 已提交
957 958 959 960
				unlock_buffer(bh);
				brelse(bh);
				goto again;
			} else if (le32_to_cpu(HDR(bh)->h_refcount) >
L
Linus Torvalds 已提交
961 962
				   EXT2_XATTR_REFCOUNT_MAX) {
				ea_idebug(inode, "block %ld refcount %d>%d",
963
					  (unsigned long) ce->e_value,
L
Linus Torvalds 已提交
964 965 966 967 968
					  le32_to_cpu(HDR(bh)->h_refcount),
					  EXT2_XATTR_REFCOUNT_MAX);
			} else if (!ext2_xattr_cmp(header, HDR(bh))) {
				ea_bdebug(bh, "b_count=%d",
					  atomic_read(&(bh->b_count)));
969 970
				mb_cache_entry_touch(ea_block_cache, ce);
				mb_cache_entry_put(ea_block_cache, ce);
L
Linus Torvalds 已提交
971 972 973 974 975
				return bh;
			}
			unlock_buffer(bh);
			brelse(bh);
		}
976
		ce = mb_cache_entry_find_next(ea_block_cache, ce);
L
Linus Torvalds 已提交
977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
	}
	return NULL;
}

#define NAME_HASH_SHIFT 5
#define VALUE_HASH_SHIFT 16

/*
 * ext2_xattr_hash_entry()
 *
 * Compute the hash of an extended attribute.
 */
static inline void ext2_xattr_hash_entry(struct ext2_xattr_header *header,
					 struct ext2_xattr_entry *entry)
{
	__u32 hash = 0;
	char *name = entry->e_name;
	int n;

	for (n=0; n < entry->e_name_len; n++) {
		hash = (hash << NAME_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - NAME_HASH_SHIFT)) ^
		       *name++;
	}

	if (entry->e_value_block == 0 && entry->e_value_size != 0) {
		__le32 *value = (__le32 *)((char *)header +
			le16_to_cpu(entry->e_value_offs));
		for (n = (le32_to_cpu(entry->e_value_size) +
		     EXT2_XATTR_ROUND) >> EXT2_XATTR_PAD_BITS; n; n--) {
			hash = (hash << VALUE_HASH_SHIFT) ^
			       (hash >> (8*sizeof(hash) - VALUE_HASH_SHIFT)) ^
			       le32_to_cpu(*value++);
		}
	}
	entry->e_hash = cpu_to_le32(hash);
}

#undef NAME_HASH_SHIFT
#undef VALUE_HASH_SHIFT

#define BLOCK_HASH_SHIFT 16

/*
 * ext2_xattr_rehash()
 *
 * Re-compute the extended attribute hash value after an entry has changed.
 */
static void ext2_xattr_rehash(struct ext2_xattr_header *header,
			      struct ext2_xattr_entry *entry)
{
	struct ext2_xattr_entry *here;
	__u32 hash = 0;
	
	ext2_xattr_hash_entry(header, entry);
	here = ENTRY(header+1);
	while (!IS_LAST_ENTRY(here)) {
		if (!here->e_hash) {
			/* Block is not shared if an entry's hash value == 0 */
			hash = 0;
			break;
		}
		hash = (hash << BLOCK_HASH_SHIFT) ^
		       (hash >> (8*sizeof(hash) - BLOCK_HASH_SHIFT)) ^
		       le32_to_cpu(here->e_hash);
		here = EXT2_XATTR_NEXT(here);
	}
	header->h_hash = cpu_to_le32(hash);
}

#undef BLOCK_HASH_SHIFT

J
Jan Kara 已提交
1049 1050
#define HASH_BUCKET_BITS 10

J
Jan Kara 已提交
1051
struct mb_cache *ext2_xattr_create_cache(void)
L
Linus Torvalds 已提交
1052
{
J
Jan Kara 已提交
1053
	return mb_cache_create(HASH_BUCKET_BITS);
L
Linus Torvalds 已提交
1054 1055
}

J
Jan Kara 已提交
1056
void ext2_xattr_destroy_cache(struct mb_cache *cache)
L
Linus Torvalds 已提交
1057
{
J
Jan Kara 已提交
1058
	if (cache)
J
Jan Kara 已提交
1059
		mb_cache_destroy(cache);
L
Linus Torvalds 已提交
1060
}