xattr.c 27.1 KB
Newer Older
1 2
#include <linux/ceph/ceph_debug.h>

S
Sage Weil 已提交
3
#include "super.h"
4 5 6
#include "mds_client.h"

#include <linux/ceph/decode.h>
S
Sage Weil 已提交
7 8

#include <linux/xattr.h>
9
#include <linux/slab.h>
S
Sage Weil 已提交
10

11 12 13
#define XATTR_CEPH_PREFIX "ceph."
#define XATTR_CEPH_PREFIX_LEN (sizeof (XATTR_CEPH_PREFIX) - 1)

G
Guangliang Zhao 已提交
14 15 16 17 18 19 20 21 22 23 24 25
/*
 * List of handlers for synthetic system.* attributes. Other
 * attributes are handled directly.
 */
const struct xattr_handler *ceph_xattr_handlers[] = {
#ifdef CONFIG_CEPH_FS_POSIX_ACL
	&ceph_xattr_acl_access_handler,
	&ceph_xattr_acl_default_handler,
#endif
	NULL,
};

S
Sage Weil 已提交
26 27
static bool ceph_is_valid_xattr(const char *name)
{
28
	return !strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN) ||
29
	       !strncmp(name, XATTR_SECURITY_PREFIX,
S
Sage Weil 已提交
30
			XATTR_SECURITY_PREFIX_LEN) ||
G
Guangliang Zhao 已提交
31
	       !strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN) ||
S
Sage Weil 已提交
32 33 34 35 36 37 38 39
	       !strncmp(name, XATTR_TRUSTED_PREFIX, XATTR_TRUSTED_PREFIX_LEN) ||
	       !strncmp(name, XATTR_USER_PREFIX, XATTR_USER_PREFIX_LEN);
}

/*
 * These define virtual xattrs exposing the recursive directory
 * statistics and layout metadata.
 */
40
struct ceph_vxattr {
S
Sage Weil 已提交
41
	char *name;
42
	size_t name_size;	/* strlen(name) + 1 (for '\0') */
S
Sage Weil 已提交
43 44
	size_t (*getxattr_cb)(struct ceph_inode_info *ci, char *val,
			      size_t size);
S
Sage Weil 已提交
45
	bool readonly, hidden;
S
Sage Weil 已提交
46
	bool (*exists_cb)(struct ceph_inode_info *ci);
S
Sage Weil 已提交
47 48
};

49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
/* layouts */

static bool ceph_vxattrcb_layout_exists(struct ceph_inode_info *ci)
{
	size_t s;
	char *p = (char *)&ci->i_layout;

	for (s = 0; s < sizeof(ci->i_layout); s++, p++)
		if (*p)
			return true;
	return false;
}

static size_t ceph_vxattrcb_layout(struct ceph_inode_info *ci, char *val,
					size_t size)
{
	int ret;
	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
	struct ceph_osd_client *osdc = &fsc->client->osdc;
	s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
	const char *pool_name;

	dout("ceph_vxattrcb_layout %p\n", &ci->vfs_inode);
	down_read(&osdc->map_sem);
	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
	if (pool_name)
		ret = snprintf(val, size,
		"stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%s",
		(unsigned long long)ceph_file_layout_su(ci->i_layout),
		(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
	        (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
		pool_name);
	else
		ret = snprintf(val, size,
		"stripe_unit=%lld stripe_count=%lld object_size=%lld pool=%lld",
		(unsigned long long)ceph_file_layout_su(ci->i_layout),
		(unsigned long long)ceph_file_layout_stripe_count(ci->i_layout),
	        (unsigned long long)ceph_file_layout_object_size(ci->i_layout),
		(unsigned long long)pool);

	up_read(&osdc->map_sem);
	return ret;
}

93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
static size_t ceph_vxattrcb_layout_stripe_unit(struct ceph_inode_info *ci,
					       char *val, size_t size)
{
	return snprintf(val, size, "%lld",
			(unsigned long long)ceph_file_layout_su(ci->i_layout));
}

static size_t ceph_vxattrcb_layout_stripe_count(struct ceph_inode_info *ci,
						char *val, size_t size)
{
	return snprintf(val, size, "%lld",
	       (unsigned long long)ceph_file_layout_stripe_count(ci->i_layout));
}

static size_t ceph_vxattrcb_layout_object_size(struct ceph_inode_info *ci,
					       char *val, size_t size)
{
	return snprintf(val, size, "%lld",
	       (unsigned long long)ceph_file_layout_object_size(ci->i_layout));
}

static size_t ceph_vxattrcb_layout_pool(struct ceph_inode_info *ci,
					char *val, size_t size)
{
	int ret;
	struct ceph_fs_client *fsc = ceph_sb_to_client(ci->vfs_inode.i_sb);
	struct ceph_osd_client *osdc = &fsc->client->osdc;
	s64 pool = ceph_file_layout_pg_pool(ci->i_layout);
	const char *pool_name;

	down_read(&osdc->map_sem);
	pool_name = ceph_pg_pool_name_by_id(osdc->osdmap, pool);
	if (pool_name)
		ret = snprintf(val, size, "%s", pool_name);
	else
		ret = snprintf(val, size, "%lld", (unsigned long long)pool);
	up_read(&osdc->map_sem);
	return ret;
}

S
Sage Weil 已提交
133 134
/* directories */

135
static size_t ceph_vxattrcb_dir_entries(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
136 137 138 139 140
					size_t size)
{
	return snprintf(val, size, "%lld", ci->i_files + ci->i_subdirs);
}

141
static size_t ceph_vxattrcb_dir_files(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
142 143 144 145 146
				      size_t size)
{
	return snprintf(val, size, "%lld", ci->i_files);
}

147
static size_t ceph_vxattrcb_dir_subdirs(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
148 149 150 151 152
					size_t size)
{
	return snprintf(val, size, "%lld", ci->i_subdirs);
}

153
static size_t ceph_vxattrcb_dir_rentries(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
154 155 156 157 158
					 size_t size)
{
	return snprintf(val, size, "%lld", ci->i_rfiles + ci->i_rsubdirs);
}

159
static size_t ceph_vxattrcb_dir_rfiles(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
160 161 162 163 164
				       size_t size)
{
	return snprintf(val, size, "%lld", ci->i_rfiles);
}

165
static size_t ceph_vxattrcb_dir_rsubdirs(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
166 167 168 169 170
					 size_t size)
{
	return snprintf(val, size, "%lld", ci->i_rsubdirs);
}

171
static size_t ceph_vxattrcb_dir_rbytes(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
172 173 174 175 176
				       size_t size)
{
	return snprintf(val, size, "%lld", ci->i_rbytes);
}

177
static size_t ceph_vxattrcb_dir_rctime(struct ceph_inode_info *ci, char *val,
S
Sage Weil 已提交
178 179
				       size_t size)
{
180
	return snprintf(val, size, "%ld.09%ld", (long)ci->i_rctime.tv_sec,
S
Sage Weil 已提交
181 182 183
			(long)ci->i_rctime.tv_nsec);
}

184

185
#define CEPH_XATTR_NAME(_type, _name)	XATTR_CEPH_PREFIX #_type "." #_name
186 187
#define CEPH_XATTR_NAME2(_type, _name, _name2)	\
	XATTR_CEPH_PREFIX #_type "." #_name "." #_name2
188

S
Sage Weil 已提交
189 190 191 192 193 194 195
#define XATTR_NAME_CEPH(_type, _name)					\
	{								\
		.name = CEPH_XATTR_NAME(_type, _name),			\
		.name_size = sizeof (CEPH_XATTR_NAME(_type, _name)), \
		.getxattr_cb = ceph_vxattrcb_ ## _type ## _ ## _name, \
		.readonly = true,				\
		.hidden = false,				\
S
Sage Weil 已提交
196
		.exists_cb = NULL,			\
S
Sage Weil 已提交
197
	}
198 199 200 201 202 203 204 205 206
#define XATTR_LAYOUT_FIELD(_type, _name, _field)			\
	{								\
		.name = CEPH_XATTR_NAME2(_type, _name, _field),	\
		.name_size = sizeof (CEPH_XATTR_NAME2(_type, _name, _field)), \
		.getxattr_cb = ceph_vxattrcb_ ## _name ## _ ## _field, \
		.readonly = false,				\
		.hidden = true,			\
		.exists_cb = ceph_vxattrcb_layout_exists,	\
	}
207

208
static struct ceph_vxattr ceph_dir_vxattrs[] = {
S
Sage Weil 已提交
209 210 211 212 213 214 215 216
	{
		.name = "ceph.dir.layout",
		.name_size = sizeof("ceph.dir.layout"),
		.getxattr_cb = ceph_vxattrcb_layout,
		.readonly = false,
		.hidden = false,
		.exists_cb = ceph_vxattrcb_layout_exists,
	},
217 218 219 220
	XATTR_LAYOUT_FIELD(dir, layout, stripe_unit),
	XATTR_LAYOUT_FIELD(dir, layout, stripe_count),
	XATTR_LAYOUT_FIELD(dir, layout, object_size),
	XATTR_LAYOUT_FIELD(dir, layout, pool),
221 222 223 224 225 226 227 228
	XATTR_NAME_CEPH(dir, entries),
	XATTR_NAME_CEPH(dir, files),
	XATTR_NAME_CEPH(dir, subdirs),
	XATTR_NAME_CEPH(dir, rentries),
	XATTR_NAME_CEPH(dir, rfiles),
	XATTR_NAME_CEPH(dir, rsubdirs),
	XATTR_NAME_CEPH(dir, rbytes),
	XATTR_NAME_CEPH(dir, rctime),
229
	{ .name = NULL, 0 }	/* Required table terminator */
S
Sage Weil 已提交
230
};
231
static size_t ceph_dir_vxattrs_name_size;	/* total size of all names */
S
Sage Weil 已提交
232 233 234

/* files */

235
static struct ceph_vxattr ceph_file_vxattrs[] = {
236 237 238 239 240 241 242 243
	{
		.name = "ceph.file.layout",
		.name_size = sizeof("ceph.file.layout"),
		.getxattr_cb = ceph_vxattrcb_layout,
		.readonly = false,
		.hidden = false,
		.exists_cb = ceph_vxattrcb_layout_exists,
	},
244 245 246 247
	XATTR_LAYOUT_FIELD(file, layout, stripe_unit),
	XATTR_LAYOUT_FIELD(file, layout, stripe_count),
	XATTR_LAYOUT_FIELD(file, layout, object_size),
	XATTR_LAYOUT_FIELD(file, layout, pool),
248
	{ .name = NULL, 0 }	/* Required table terminator */
S
Sage Weil 已提交
249
};
250
static size_t ceph_file_vxattrs_name_size;	/* total size of all names */
S
Sage Weil 已提交
251

252
static struct ceph_vxattr *ceph_inode_vxattrs(struct inode *inode)
S
Sage Weil 已提交
253 254 255 256 257 258 259 260
{
	if (S_ISDIR(inode->i_mode))
		return ceph_dir_vxattrs;
	else if (S_ISREG(inode->i_mode))
		return ceph_file_vxattrs;
	return NULL;
}

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281
static size_t ceph_vxattrs_name_size(struct ceph_vxattr *vxattrs)
{
	if (vxattrs == ceph_dir_vxattrs)
		return ceph_dir_vxattrs_name_size;
	if (vxattrs == ceph_file_vxattrs)
		return ceph_file_vxattrs_name_size;
	BUG();

	return 0;
}

/*
 * Compute the aggregate size (including terminating '\0') of all
 * virtual extended attribute names in the given vxattr table.
 */
static size_t __init vxattrs_name_size(struct ceph_vxattr *vxattrs)
{
	struct ceph_vxattr *vxattr;
	size_t size = 0;

	for (vxattr = vxattrs; vxattr->name; vxattr++)
S
Sage Weil 已提交
282 283
		if (!vxattr->hidden)
			size += vxattr->name_size;
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301

	return size;
}

/* Routines called at initialization and exit time */

void __init ceph_xattr_init(void)
{
	ceph_dir_vxattrs_name_size = vxattrs_name_size(ceph_dir_vxattrs);
	ceph_file_vxattrs_name_size = vxattrs_name_size(ceph_file_vxattrs);
}

void ceph_xattr_exit(void)
{
	ceph_dir_vxattrs_name_size = 0;
	ceph_file_vxattrs_name_size = 0;
}

302
static struct ceph_vxattr *ceph_match_vxattr(struct inode *inode,
S
Sage Weil 已提交
303 304
						const char *name)
{
305
	struct ceph_vxattr *vxattr = ceph_inode_vxattrs(inode);
306 307 308 309 310 311 312 313 314

	if (vxattr) {
		while (vxattr->name) {
			if (!strcmp(vxattr->name, name))
				return vxattr;
			vxattr++;
		}
	}

S
Sage Weil 已提交
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
	return NULL;
}

static int __set_xattr(struct ceph_inode_info *ci,
			   const char *name, int name_len,
			   const char *val, int val_len,
			   int dirty,
			   int should_free_name, int should_free_val,
			   struct ceph_inode_xattr **newxattr)
{
	struct rb_node **p;
	struct rb_node *parent = NULL;
	struct ceph_inode_xattr *xattr = NULL;
	int c;
	int new = 0;

	p = &ci->i_xattrs.index.rb_node;
	while (*p) {
		parent = *p;
		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
		c = strncmp(name, xattr->name, min(name_len, xattr->name_len));
		if (c < 0)
			p = &(*p)->rb_left;
		else if (c > 0)
			p = &(*p)->rb_right;
		else {
			if (name_len == xattr->name_len)
				break;
			else if (name_len < xattr->name_len)
				p = &(*p)->rb_left;
			else
				p = &(*p)->rb_right;
		}
		xattr = NULL;
	}

	if (!xattr) {
		new = 1;
		xattr = *newxattr;
		xattr->name = name;
		xattr->name_len = name_len;
		xattr->should_free_name = should_free_name;

		ci->i_xattrs.count++;
		dout("__set_xattr count=%d\n", ci->i_xattrs.count);
	} else {
		kfree(*newxattr);
		*newxattr = NULL;
		if (xattr->should_free_val)
			kfree((void *)xattr->val);

		if (should_free_name) {
			kfree((void *)name);
			name = xattr->name;
		}
		ci->i_xattrs.names_size -= xattr->name_len;
		ci->i_xattrs.vals_size -= xattr->val_len;
	}
	ci->i_xattrs.names_size += name_len;
	ci->i_xattrs.vals_size += val_len;
	if (val)
		xattr->val = val;
	else
		xattr->val = "";

	xattr->val_len = val_len;
	xattr->dirty = dirty;
	xattr->should_free_val = (val && should_free_val);

	if (new) {
		rb_link_node(&xattr->node, parent, p);
		rb_insert_color(&xattr->node, &ci->i_xattrs.index);
		dout("__set_xattr_val p=%p\n", p);
	}

	dout("__set_xattr_val added %llx.%llx xattr %p %s=%.*s\n",
	     ceph_vinop(&ci->vfs_inode), xattr, name, val_len, val);

	return 0;
}

static struct ceph_inode_xattr *__get_xattr(struct ceph_inode_info *ci,
			   const char *name)
{
	struct rb_node **p;
	struct rb_node *parent = NULL;
	struct ceph_inode_xattr *xattr = NULL;
S
Sage Weil 已提交
402
	int name_len = strlen(name);
S
Sage Weil 已提交
403 404 405 406 407 408 409
	int c;

	p = &ci->i_xattrs.index.rb_node;
	while (*p) {
		parent = *p;
		xattr = rb_entry(parent, struct ceph_inode_xattr, node);
		c = strncmp(name, xattr->name, xattr->name_len);
S
Sage Weil 已提交
410 411
		if (c == 0 && name_len > xattr->name_len)
			c = 1;
S
Sage Weil 已提交
412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525
		if (c < 0)
			p = &(*p)->rb_left;
		else if (c > 0)
			p = &(*p)->rb_right;
		else {
			dout("__get_xattr %s: found %.*s\n", name,
			     xattr->val_len, xattr->val);
			return xattr;
		}
	}

	dout("__get_xattr %s: not found\n", name);

	return NULL;
}

static void __free_xattr(struct ceph_inode_xattr *xattr)
{
	BUG_ON(!xattr);

	if (xattr->should_free_name)
		kfree((void *)xattr->name);
	if (xattr->should_free_val)
		kfree((void *)xattr->val);

	kfree(xattr);
}

static int __remove_xattr(struct ceph_inode_info *ci,
			  struct ceph_inode_xattr *xattr)
{
	if (!xattr)
		return -EOPNOTSUPP;

	rb_erase(&xattr->node, &ci->i_xattrs.index);

	if (xattr->should_free_name)
		kfree((void *)xattr->name);
	if (xattr->should_free_val)
		kfree((void *)xattr->val);

	ci->i_xattrs.names_size -= xattr->name_len;
	ci->i_xattrs.vals_size -= xattr->val_len;
	ci->i_xattrs.count--;
	kfree(xattr);

	return 0;
}

static int __remove_xattr_by_name(struct ceph_inode_info *ci,
			   const char *name)
{
	struct rb_node **p;
	struct ceph_inode_xattr *xattr;
	int err;

	p = &ci->i_xattrs.index.rb_node;
	xattr = __get_xattr(ci, name);
	err = __remove_xattr(ci, xattr);
	return err;
}

static char *__copy_xattr_names(struct ceph_inode_info *ci,
				char *dest)
{
	struct rb_node *p;
	struct ceph_inode_xattr *xattr = NULL;

	p = rb_first(&ci->i_xattrs.index);
	dout("__copy_xattr_names count=%d\n", ci->i_xattrs.count);

	while (p) {
		xattr = rb_entry(p, struct ceph_inode_xattr, node);
		memcpy(dest, xattr->name, xattr->name_len);
		dest[xattr->name_len] = '\0';

		dout("dest=%s %p (%s) (%d/%d)\n", dest, xattr, xattr->name,
		     xattr->name_len, ci->i_xattrs.names_size);

		dest += xattr->name_len + 1;
		p = rb_next(p);
	}

	return dest;
}

void __ceph_destroy_xattrs(struct ceph_inode_info *ci)
{
	struct rb_node *p, *tmp;
	struct ceph_inode_xattr *xattr = NULL;

	p = rb_first(&ci->i_xattrs.index);

	dout("__ceph_destroy_xattrs p=%p\n", p);

	while (p) {
		xattr = rb_entry(p, struct ceph_inode_xattr, node);
		tmp = p;
		p = rb_next(tmp);
		dout("__ceph_destroy_xattrs next p=%p (%.*s)\n", p,
		     xattr->name_len, xattr->name);
		rb_erase(tmp, &ci->i_xattrs.index);

		__free_xattr(xattr);
	}

	ci->i_xattrs.names_size = 0;
	ci->i_xattrs.vals_size = 0;
	ci->i_xattrs.index_version = 0;
	ci->i_xattrs.count = 0;
	ci->i_xattrs.index = RB_ROOT;
}

static int __build_xattrs(struct inode *inode)
526 527
	__releases(ci->i_ceph_lock)
	__acquires(ci->i_ceph_lock)
S
Sage Weil 已提交
528 529 530 531 532 533 534 535 536
{
	u32 namelen;
	u32 numattr = 0;
	void *p, *end;
	u32 len;
	const char *name, *val;
	struct ceph_inode_info *ci = ceph_inode(inode);
	int xattr_version;
	struct ceph_inode_xattr **xattrs = NULL;
S
Sage Weil 已提交
537
	int err = 0;
S
Sage Weil 已提交
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
	int i;

	dout("__build_xattrs() len=%d\n",
	     ci->i_xattrs.blob ? (int)ci->i_xattrs.blob->vec.iov_len : 0);

	if (ci->i_xattrs.index_version >= ci->i_xattrs.version)
		return 0; /* already built */

	__ceph_destroy_xattrs(ci);

start:
	/* updated internal xattr rb tree */
	if (ci->i_xattrs.blob && ci->i_xattrs.blob->vec.iov_len > 4) {
		p = ci->i_xattrs.blob->vec.iov_base;
		end = p + ci->i_xattrs.blob->vec.iov_len;
		ceph_decode_32_safe(&p, end, numattr, bad);
		xattr_version = ci->i_xattrs.version;
555
		spin_unlock(&ci->i_ceph_lock);
S
Sage Weil 已提交
556 557 558 559 560 561 562 563 564 565 566 567 568 569

		xattrs = kcalloc(numattr, sizeof(struct ceph_xattr *),
				 GFP_NOFS);
		err = -ENOMEM;
		if (!xattrs)
			goto bad_lock;
		memset(xattrs, 0, numattr*sizeof(struct ceph_xattr *));
		for (i = 0; i < numattr; i++) {
			xattrs[i] = kmalloc(sizeof(struct ceph_inode_xattr),
					    GFP_NOFS);
			if (!xattrs[i])
				goto bad_lock;
		}

570
		spin_lock(&ci->i_ceph_lock);
S
Sage Weil 已提交
571 572 573 574 575
		if (ci->i_xattrs.version != xattr_version) {
			/* lost a race, retry */
			for (i = 0; i < numattr; i++)
				kfree(xattrs[i]);
			kfree(xattrs);
A
Alan Cox 已提交
576
			xattrs = NULL;
S
Sage Weil 已提交
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
			goto start;
		}
		err = -EIO;
		while (numattr--) {
			ceph_decode_32_safe(&p, end, len, bad);
			namelen = len;
			name = p;
			p += len;
			ceph_decode_32_safe(&p, end, len, bad);
			val = p;
			p += len;

			err = __set_xattr(ci, name, namelen, val, len,
					  0, 0, 0, &xattrs[numattr]);

			if (err < 0)
				goto bad;
		}
		kfree(xattrs);
	}
	ci->i_xattrs.index_version = ci->i_xattrs.version;
	ci->i_xattrs.dirty = false;

	return err;
bad_lock:
602
	spin_lock(&ci->i_ceph_lock);
S
Sage Weil 已提交
603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669
bad:
	if (xattrs) {
		for (i = 0; i < numattr; i++)
			kfree(xattrs[i]);
		kfree(xattrs);
	}
	ci->i_xattrs.names_size = 0;
	return err;
}

static int __get_required_blob_size(struct ceph_inode_info *ci, int name_size,
				    int val_size)
{
	/*
	 * 4 bytes for the length, and additional 4 bytes per each xattr name,
	 * 4 bytes per each value
	 */
	int size = 4 + ci->i_xattrs.count*(4 + 4) +
			     ci->i_xattrs.names_size +
			     ci->i_xattrs.vals_size;
	dout("__get_required_blob_size c=%d names.size=%d vals.size=%d\n",
	     ci->i_xattrs.count, ci->i_xattrs.names_size,
	     ci->i_xattrs.vals_size);

	if (name_size)
		size += 4 + 4 + name_size + val_size;

	return size;
}

/*
 * If there are dirty xattrs, reencode xattrs into the prealloc_blob
 * and swap into place.
 */
void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
{
	struct rb_node *p;
	struct ceph_inode_xattr *xattr = NULL;
	void *dest;

	dout("__build_xattrs_blob %p\n", &ci->vfs_inode);
	if (ci->i_xattrs.dirty) {
		int need = __get_required_blob_size(ci, 0, 0);

		BUG_ON(need > ci->i_xattrs.prealloc_blob->alloc_len);

		p = rb_first(&ci->i_xattrs.index);
		dest = ci->i_xattrs.prealloc_blob->vec.iov_base;

		ceph_encode_32(&dest, ci->i_xattrs.count);
		while (p) {
			xattr = rb_entry(p, struct ceph_inode_xattr, node);

			ceph_encode_32(&dest, xattr->name_len);
			memcpy(dest, xattr->name, xattr->name_len);
			dest += xattr->name_len;
			ceph_encode_32(&dest, xattr->val_len);
			memcpy(dest, xattr->val, xattr->val_len);
			dest += xattr->val_len;

			p = rb_next(p);
		}

		/* adjust buffer len; it may be larger than we need */
		ci->i_xattrs.prealloc_blob->vec.iov_len =
			dest - ci->i_xattrs.prealloc_blob->vec.iov_base;

S
Sage Weil 已提交
670 671
		if (ci->i_xattrs.blob)
			ceph_buffer_put(ci->i_xattrs.blob);
S
Sage Weil 已提交
672 673 674
		ci->i_xattrs.blob = ci->i_xattrs.prealloc_blob;
		ci->i_xattrs.prealloc_blob = NULL;
		ci->i_xattrs.dirty = false;
675
		ci->i_xattrs.version++;
S
Sage Weil 已提交
676 677 678
	}
}

G
Guangliang Zhao 已提交
679
ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
S
Sage Weil 已提交
680 681 682 683 684
		      size_t size)
{
	struct ceph_inode_info *ci = ceph_inode(inode);
	int err;
	struct ceph_inode_xattr *xattr;
685
	struct ceph_vxattr *vxattr = NULL;
S
Sage Weil 已提交
686 687 688 689

	if (!ceph_is_valid_xattr(name))
		return -ENODATA;

S
Sage Weil 已提交
690 691 692 693
	/* let's see if a virtual xattr was requested */
	vxattr = ceph_match_vxattr(inode, name);
	if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
		err = vxattr->getxattr_cb(ci, value, size);
694
		return err;
S
Sage Weil 已提交
695 696
	}

697 698 699 700
	spin_lock(&ci->i_ceph_lock);
	dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
	     ci->i_xattrs.version, ci->i_xattrs.index_version);

S
Sage Weil 已提交
701 702 703 704
	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
	    (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
		goto get_xattr;
	} else {
705
		spin_unlock(&ci->i_ceph_lock);
S
Sage Weil 已提交
706 707 708 709 710 711
		/* get xattrs from mds (if we don't already have them) */
		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
		if (err)
			return err;
	}

712
	spin_lock(&ci->i_ceph_lock);
S
Sage Weil 已提交
713 714 715 716 717 718 719 720

	err = __build_xattrs(inode);
	if (err < 0)
		goto out;

get_xattr:
	err = -ENODATA;  /* == ENOATTR */
	xattr = __get_xattr(ci, name);
S
Sage Weil 已提交
721
	if (!xattr)
S
Sage Weil 已提交
722 723 724 725 726 727 728 729 730 731 732 733 734
		goto out;

	err = -ERANGE;
	if (size && size < xattr->val_len)
		goto out;

	err = xattr->val_len;
	if (size == 0)
		goto out;

	memcpy(value, xattr->val, xattr->val_len);

out:
735
	spin_unlock(&ci->i_ceph_lock);
S
Sage Weil 已提交
736 737 738
	return err;
}

G
Guangliang Zhao 已提交
739 740 741 742 743 744 745 746 747
ssize_t ceph_getxattr(struct dentry *dentry, const char *name, void *value,
		      size_t size)
{
	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
		return generic_getxattr(dentry, name, value, size);

	return __ceph_getxattr(dentry->d_inode, name, value, size);
}

S
Sage Weil 已提交
748 749 750 751
ssize_t ceph_listxattr(struct dentry *dentry, char *names, size_t size)
{
	struct inode *inode = dentry->d_inode;
	struct ceph_inode_info *ci = ceph_inode(inode);
752
	struct ceph_vxattr *vxattrs = ceph_inode_vxattrs(inode);
S
Sage Weil 已提交
753 754 755 756 757 758
	u32 vir_namelen = 0;
	u32 namelen;
	int err;
	u32 len;
	int i;

759
	spin_lock(&ci->i_ceph_lock);
S
Sage Weil 已提交
760 761 762 763
	dout("listxattr %p ver=%lld index_ver=%lld\n", inode,
	     ci->i_xattrs.version, ci->i_xattrs.index_version);

	if (__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1) &&
764
	    (ci->i_xattrs.index_version >= ci->i_xattrs.version)) {
S
Sage Weil 已提交
765 766
		goto list_xattr;
	} else {
767
		spin_unlock(&ci->i_ceph_lock);
S
Sage Weil 已提交
768 769 770 771 772
		err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR);
		if (err)
			return err;
	}

773
	spin_lock(&ci->i_ceph_lock);
S
Sage Weil 已提交
774 775 776 777 778 779

	err = __build_xattrs(inode);
	if (err < 0)
		goto out;

list_xattr:
780 781 782 783 784 785
	/*
	 * Start with virtual dir xattr names (if any) (including
	 * terminating '\0' characters for each).
	 */
	vir_namelen = ceph_vxattrs_name_size(vxattrs);

S
Sage Weil 已提交
786
	/* adding 1 byte per each variable due to the null termination */
787
	namelen = ci->i_xattrs.names_size + ci->i_xattrs.count;
S
Sage Weil 已提交
788
	err = -ERANGE;
789
	if (size && vir_namelen + namelen > size)
S
Sage Weil 已提交
790 791
		goto out;

792
	err = namelen + vir_namelen;
S
Sage Weil 已提交
793 794 795 796 797 798
	if (size == 0)
		goto out;

	names = __copy_xattr_names(ci, names);

	/* virtual xattr names, too */
799 800
	err = namelen;
	if (vxattrs) {
S
Sage Weil 已提交
801
		for (i = 0; vxattrs[i].name; i++) {
802 803 804 805 806 807 808
			if (!vxattrs[i].hidden &&
			    !(vxattrs[i].exists_cb &&
			      !vxattrs[i].exists_cb(ci))) {
				len = sprintf(names, "%s", vxattrs[i].name);
				names += len + 1;
				err += len + 1;
			}
S
Sage Weil 已提交
809
		}
810
	}
S
Sage Weil 已提交
811 812

out:
813
	spin_unlock(&ci->i_ceph_lock);
S
Sage Weil 已提交
814 815 816 817 818 819
	return err;
}

static int ceph_sync_setxattr(struct dentry *dentry, const char *name,
			      const char *value, size_t size, int flags)
{
820
	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
S
Sage Weil 已提交
821 822
	struct inode *inode = dentry->d_inode;
	struct ceph_inode_info *ci = ceph_inode(inode);
S
Sage Weil 已提交
823
	struct inode *parent_inode;
S
Sage Weil 已提交
824
	struct ceph_mds_request *req;
825
	struct ceph_mds_client *mdsc = fsc->mdsc;
S
Sage Weil 已提交
826 827 828 829 830 831 832 833 834 835 836 837 838
	int err;
	int i, nr_pages;
	struct page **pages = NULL;
	void *kaddr;

	/* copy value into some pages */
	nr_pages = calc_pages_for(0, size);
	if (nr_pages) {
		pages = kmalloc(sizeof(pages[0])*nr_pages, GFP_NOFS);
		if (!pages)
			return -ENOMEM;
		err = -ENOMEM;
		for (i = 0; i < nr_pages; i++) {
839
			pages[i] = __page_cache_alloc(GFP_NOFS);
S
Sage Weil 已提交
840 841 842 843 844 845 846 847 848 849 850 851 852 853 854
			if (!pages[i]) {
				nr_pages = i;
				goto out;
			}
			kaddr = kmap(pages[i]);
			memcpy(kaddr, value + i*PAGE_CACHE_SIZE,
			       min(PAGE_CACHE_SIZE, size-i*PAGE_CACHE_SIZE));
		}
	}

	dout("setxattr value=%.*s\n", (int)size, value);

	/* do request */
	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETXATTR,
				       USE_AUTH_MDS);
J
Julia Lawall 已提交
855 856 857 858
	if (IS_ERR(req)) {
		err = PTR_ERR(req);
		goto out;
	}
859 860
	req->r_inode = inode;
	ihold(inode);
S
Sage Weil 已提交
861 862 863 864 865 866 867 868 869 870
	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
	req->r_num_caps = 1;
	req->r_args.setxattr.flags = cpu_to_le32(flags);
	req->r_path2 = kstrdup(name, GFP_NOFS);

	req->r_pages = pages;
	req->r_num_pages = nr_pages;
	req->r_data_len = size;

	dout("xattr.ver (before): %lld\n", ci->i_xattrs.version);
S
Sage Weil 已提交
871
	parent_inode = ceph_get_dentry_parent_inode(dentry);
S
Sage Weil 已提交
872
	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
S
Sage Weil 已提交
873
	iput(parent_inode);
S
Sage Weil 已提交
874 875 876 877 878 879 880 881 882 883 884 885
	ceph_mdsc_put_request(req);
	dout("xattr.ver (after): %lld\n", ci->i_xattrs.version);

out:
	if (pages) {
		for (i = 0; i < nr_pages; i++)
			__free_page(pages[i]);
		kfree(pages);
	}
	return err;
}

G
Guangliang Zhao 已提交
886 887
int __ceph_setxattr(struct dentry *dentry, const char *name,
			const void *value, size_t size, int flags)
S
Sage Weil 已提交
888 889
{
	struct inode *inode = dentry->d_inode;
890
	struct ceph_vxattr *vxattr;
S
Sage Weil 已提交
891
	struct ceph_inode_info *ci = ceph_inode(inode);
892
	int issued;
S
Sage Weil 已提交
893
	int err;
894
	int dirty;
S
Sage Weil 已提交
895 896 897 898 899 900 901 902 903 904
	int name_len = strlen(name);
	int val_len = size;
	char *newname = NULL;
	char *newval = NULL;
	struct ceph_inode_xattr *xattr = NULL;
	int required_blob_size;

	if (!ceph_is_valid_xattr(name))
		return -EOPNOTSUPP;

905 906 907
	vxattr = ceph_match_vxattr(inode, name);
	if (vxattr && vxattr->readonly)
		return -EOPNOTSUPP;
S
Sage Weil 已提交
908

909 910 911 912
	/* pass any unhandled ceph.* xattrs through to the MDS */
	if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
		goto do_sync_unlocked;

S
Sage Weil 已提交
913 914
	/* preallocate memory for xattr name, value, index node */
	err = -ENOMEM;
J
Julia Lawall 已提交
915
	newname = kmemdup(name, name_len + 1, GFP_NOFS);
S
Sage Weil 已提交
916 917 918 919
	if (!newname)
		goto out;

	if (val_len) {
920
		newval = kmemdup(value, val_len, GFP_NOFS);
S
Sage Weil 已提交
921 922 923 924 925 926 927 928
		if (!newval)
			goto out;
	}

	xattr = kmalloc(sizeof(struct ceph_inode_xattr), GFP_NOFS);
	if (!xattr)
		goto out;

929
	spin_lock(&ci->i_ceph_lock);
S
Sage Weil 已提交
930 931
retry:
	issued = __ceph_caps_issued(ci, NULL);
932
	dout("setxattr %p issued %s\n", inode, ceph_cap_string(issued));
S
Sage Weil 已提交
933 934 935 936 937 938 939 940
	if (!(issued & CEPH_CAP_XATTR_EXCL))
		goto do_sync;
	__build_xattrs(inode);

	required_blob_size = __get_required_blob_size(ci, name_len, val_len);

	if (!ci->i_xattrs.prealloc_blob ||
	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
941
		struct ceph_buffer *blob;
S
Sage Weil 已提交
942

943
		spin_unlock(&ci->i_ceph_lock);
S
Sage Weil 已提交
944
		dout(" preaallocating new blob size=%d\n", required_blob_size);
S
Sage Weil 已提交
945
		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
S
Sage Weil 已提交
946 947
		if (!blob)
			goto out;
948
		spin_lock(&ci->i_ceph_lock);
S
Sage Weil 已提交
949 950
		if (ci->i_xattrs.prealloc_blob)
			ceph_buffer_put(ci->i_xattrs.prealloc_blob);
S
Sage Weil 已提交
951 952 953 954 955 956
		ci->i_xattrs.prealloc_blob = blob;
		goto retry;
	}

	err = __set_xattr(ci, newname, name_len, newval,
			  val_len, 1, 1, 1, &xattr);
957

958
	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
S
Sage Weil 已提交
959 960
	ci->i_xattrs.dirty = true;
	inode->i_ctime = CURRENT_TIME;
961

962
	spin_unlock(&ci->i_ceph_lock);
963 964
	if (dirty)
		__mark_inode_dirty(inode, dirty);
S
Sage Weil 已提交
965 966 967
	return err;

do_sync:
968
	spin_unlock(&ci->i_ceph_lock);
969
do_sync_unlocked:
S
Sage Weil 已提交
970 971 972 973 974 975 976 977
	err = ceph_sync_setxattr(dentry, name, value, size, flags);
out:
	kfree(newname);
	kfree(newval);
	kfree(xattr);
	return err;
}

G
Guangliang Zhao 已提交
978 979 980 981 982 983 984 985 986 987 988 989
int ceph_setxattr(struct dentry *dentry, const char *name,
		  const void *value, size_t size, int flags)
{
	if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
		return -EROFS;

	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
		return generic_setxattr(dentry, name, value, size, flags);

	return __ceph_setxattr(dentry, name, value, size, flags);
}

S
Sage Weil 已提交
990 991
static int ceph_send_removexattr(struct dentry *dentry, const char *name)
{
992 993
	struct ceph_fs_client *fsc = ceph_sb_to_client(dentry->d_sb);
	struct ceph_mds_client *mdsc = fsc->mdsc;
S
Sage Weil 已提交
994
	struct inode *inode = dentry->d_inode;
S
Sage Weil 已提交
995
	struct inode *parent_inode;
S
Sage Weil 已提交
996 997 998 999 1000 1001 1002
	struct ceph_mds_request *req;
	int err;

	req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_RMXATTR,
				       USE_AUTH_MDS);
	if (IS_ERR(req))
		return PTR_ERR(req);
1003 1004
	req->r_inode = inode;
	ihold(inode);
S
Sage Weil 已提交
1005 1006 1007 1008
	req->r_inode_drop = CEPH_CAP_XATTR_SHARED;
	req->r_num_caps = 1;
	req->r_path2 = kstrdup(name, GFP_NOFS);

S
Sage Weil 已提交
1009
	parent_inode = ceph_get_dentry_parent_inode(dentry);
S
Sage Weil 已提交
1010
	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
S
Sage Weil 已提交
1011
	iput(parent_inode);
S
Sage Weil 已提交
1012 1013 1014 1015
	ceph_mdsc_put_request(req);
	return err;
}

G
Guangliang Zhao 已提交
1016
int __ceph_removexattr(struct dentry *dentry, const char *name)
S
Sage Weil 已提交
1017 1018
{
	struct inode *inode = dentry->d_inode;
1019
	struct ceph_vxattr *vxattr;
S
Sage Weil 已提交
1020 1021 1022
	struct ceph_inode_info *ci = ceph_inode(inode);
	int issued;
	int err;
1023
	int required_blob_size;
1024
	int dirty;
S
Sage Weil 已提交
1025 1026 1027 1028

	if (!ceph_is_valid_xattr(name))
		return -EOPNOTSUPP;

1029 1030 1031
	vxattr = ceph_match_vxattr(inode, name);
	if (vxattr && vxattr->readonly)
		return -EOPNOTSUPP;
S
Sage Weil 已提交
1032

1033 1034 1035 1036
	/* pass any unhandled ceph.* xattrs through to the MDS */
	if (!strncmp(name, XATTR_CEPH_PREFIX, XATTR_CEPH_PREFIX_LEN))
		goto do_sync_unlocked;

1037
	err = -ENOMEM;
1038
	spin_lock(&ci->i_ceph_lock);
1039
retry:
S
Sage Weil 已提交
1040 1041 1042 1043 1044
	issued = __ceph_caps_issued(ci, NULL);
	dout("removexattr %p issued %s\n", inode, ceph_cap_string(issued));

	if (!(issued & CEPH_CAP_XATTR_EXCL))
		goto do_sync;
1045
	__build_xattrs(inode);
S
Sage Weil 已提交
1046

1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
	required_blob_size = __get_required_blob_size(ci, 0, 0);

	if (!ci->i_xattrs.prealloc_blob ||
	    required_blob_size > ci->i_xattrs.prealloc_blob->alloc_len) {
		struct ceph_buffer *blob;

		spin_unlock(&ci->i_ceph_lock);
		dout(" preaallocating new blob size=%d\n", required_blob_size);
		blob = ceph_buffer_new(required_blob_size, GFP_NOFS);
		if (!blob)
			goto out;
		spin_lock(&ci->i_ceph_lock);
		if (ci->i_xattrs.prealloc_blob)
			ceph_buffer_put(ci->i_xattrs.prealloc_blob);
		ci->i_xattrs.prealloc_blob = blob;
		goto retry;
	}

S
Sage Weil 已提交
1065
	err = __remove_xattr_by_name(ceph_inode(inode), name);
1066

1067
	dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL);
S
Sage Weil 已提交
1068 1069
	ci->i_xattrs.dirty = true;
	inode->i_ctime = CURRENT_TIME;
1070
	spin_unlock(&ci->i_ceph_lock);
1071 1072
	if (dirty)
		__mark_inode_dirty(inode, dirty);
S
Sage Weil 已提交
1073 1074
	return err;
do_sync:
1075
	spin_unlock(&ci->i_ceph_lock);
1076
do_sync_unlocked:
S
Sage Weil 已提交
1077
	err = ceph_send_removexattr(dentry, name);
1078
out:
S
Sage Weil 已提交
1079 1080 1081
	return err;
}

G
Guangliang Zhao 已提交
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
int ceph_removexattr(struct dentry *dentry, const char *name)
{
	if (ceph_snap(dentry->d_inode) != CEPH_NOSNAP)
		return -EROFS;

	if (!strncmp(name, XATTR_SYSTEM_PREFIX, XATTR_SYSTEM_PREFIX_LEN))
		return generic_removexattr(dentry, name);

	return __ceph_removexattr(dentry, name);
}