device_cgroup.c 19.0 KB
Newer Older
1
/*
L
Lai Jiangshan 已提交
2
 * device_cgroup.c - device cgroup subsystem
3 4 5 6 7 8 9 10 11
 *
 * Copyright 2007 IBM Corp
 */

#include <linux/device_cgroup.h>
#include <linux/cgroup.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/uaccess.h>
12
#include <linux/seq_file.h>
13
#include <linux/slab.h>
L
Lai Jiangshan 已提交
14
#include <linux/rcupdate.h>
L
Li Zefan 已提交
15
#include <linux/mutex.h>
16 17 18 19 20 21 22 23 24 25

#define ACC_MKNOD 1
#define ACC_READ  2
#define ACC_WRITE 4
#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)

#define DEV_BLOCK 1
#define DEV_CHAR  2
#define DEV_ALL   4  /* this represents all devices */

L
Li Zefan 已提交
26 27
static DEFINE_MUTEX(devcgroup_mutex);

28 29 30 31 32 33
enum devcg_behavior {
	DEVCG_DEFAULT_NONE,
	DEVCG_DEFAULT_ALLOW,
	DEVCG_DEFAULT_DENY,
};

34
/*
35
 * exception list locking rules:
L
Li Zefan 已提交
36
 * hold devcgroup_mutex for update/read.
L
Lai Jiangshan 已提交
37
 * hold rcu_read_lock() for read.
38 39
 */

40
struct dev_exception_item {
41 42 43 44
	u32 major, minor;
	short type;
	short access;
	struct list_head list;
45
	struct rcu_head rcu;
46 47 48 49
};

struct dev_cgroup {
	struct cgroup_subsys_state css;
50
	struct list_head exceptions;
51
	enum devcg_behavior behavior;
52 53
	/* temporary list for pending propagation operations */
	struct list_head propagate_pending;
54 55
};

56 57 58 59 60
static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
{
	return container_of(s, struct dev_cgroup, css);
}

61 62
static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
{
63
	return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
64 65
}

66 67 68 69 70
static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
{
	return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
}

71 72
struct cgroup_subsys devices_subsys;

73 74
static int devcgroup_can_attach(struct cgroup *new_cgrp,
				struct cgroup_taskset *set)
75
{
76
	struct task_struct *task = cgroup_taskset_first(set);
77

78 79
	if (current != task && !capable(CAP_SYS_ADMIN))
		return -EPERM;
80 81 82 83
	return 0;
}

/*
L
Li Zefan 已提交
84
 * called under devcgroup_mutex
85
 */
86
static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
87
{
88
	struct dev_exception_item *ex, *tmp, *new;
89

T
Tejun Heo 已提交
90 91
	lockdep_assert_held(&devcgroup_mutex);

92 93
	list_for_each_entry(ex, orig, list) {
		new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
94 95 96 97 98 99 100 101
		if (!new)
			goto free_and_exit;
		list_add_tail(&new->list, dest);
	}

	return 0;

free_and_exit:
102 103 104
	list_for_each_entry_safe(ex, tmp, dest, list) {
		list_del(&ex->list);
		kfree(ex);
105 106 107 108 109
	}
	return -ENOMEM;
}

/*
L
Li Zefan 已提交
110
 * called under devcgroup_mutex
111
 */
112 113
static int dev_exception_add(struct dev_cgroup *dev_cgroup,
			     struct dev_exception_item *ex)
114
{
115
	struct dev_exception_item *excopy, *walk;
116

T
Tejun Heo 已提交
117 118
	lockdep_assert_held(&devcgroup_mutex);

119 120
	excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
	if (!excopy)
121 122
		return -ENOMEM;

123 124
	list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
		if (walk->type != ex->type)
125
			continue;
126
		if (walk->major != ex->major)
127
			continue;
128
		if (walk->minor != ex->minor)
129 130
			continue;

131 132 133
		walk->access |= ex->access;
		kfree(excopy);
		excopy = NULL;
134 135
	}

136 137
	if (excopy != NULL)
		list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
138 139 140 141
	return 0;
}

/*
L
Li Zefan 已提交
142
 * called under devcgroup_mutex
143
 */
144 145
static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
			     struct dev_exception_item *ex)
146
{
147
	struct dev_exception_item *walk, *tmp;
148

T
Tejun Heo 已提交
149 150
	lockdep_assert_held(&devcgroup_mutex);

151 152
	list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
		if (walk->type != ex->type)
153
			continue;
154
		if (walk->major != ex->major)
155
			continue;
156
		if (walk->minor != ex->minor)
157 158
			continue;

159
		walk->access &= ~ex->access;
160
		if (!walk->access) {
161
			list_del_rcu(&walk->list);
162
			kfree_rcu(walk, rcu);
163 164 165 166
		}
	}
}

167 168 169 170 171 172 173 174 175 176
static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
{
	struct dev_exception_item *ex, *tmp;

	list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
		list_del_rcu(&ex->list);
		kfree_rcu(ex, rcu);
	}
}

177
/**
178 179
 * dev_exception_clean - frees all entries of the exception list
 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
180 181 182
 *
 * called under devcgroup_mutex
 */
183
static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
184
{
T
Tejun Heo 已提交
185 186
	lockdep_assert_held(&devcgroup_mutex);

187
	__dev_exception_clean(dev_cgroup);
188 189
}

190 191 192 193 194
static inline bool is_devcg_online(const struct dev_cgroup *devcg)
{
	return (devcg->behavior != DEVCG_DEFAULT_NONE);
}

195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
/**
 * devcgroup_online - initializes devcgroup's behavior and exceptions based on
 * 		      parent's
 * @cgroup: cgroup getting online
 * returns 0 in case of success, error code otherwise
 */
static int devcgroup_online(struct cgroup *cgroup)
{
	struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
	int ret = 0;

	mutex_lock(&devcgroup_mutex);
	dev_cgroup = cgroup_to_devcgroup(cgroup);
	if (cgroup->parent)
		parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);

	if (parent_dev_cgroup == NULL)
		dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
	else {
		ret = dev_exceptions_copy(&dev_cgroup->exceptions,
					  &parent_dev_cgroup->exceptions);
		if (!ret)
			dev_cgroup->behavior = parent_dev_cgroup->behavior;
	}
	mutex_unlock(&devcgroup_mutex);

	return ret;
}

static void devcgroup_offline(struct cgroup *cgroup)
{
	struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);

	mutex_lock(&devcgroup_mutex);
	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
	mutex_unlock(&devcgroup_mutex);
}

233 234 235
/*
 * called from kernel/cgroup.c with cgroup_lock() held.
 */
236
static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
237
{
238
	struct dev_cgroup *dev_cgroup;
239 240 241 242 243
	struct cgroup *parent_cgroup;

	dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
	if (!dev_cgroup)
		return ERR_PTR(-ENOMEM);
244
	INIT_LIST_HEAD(&dev_cgroup->exceptions);
245
	INIT_LIST_HEAD(&dev_cgroup->propagate_pending);
246
	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
247 248 249 250 251
	parent_cgroup = cgroup->parent;

	return &dev_cgroup->css;
}

252
static void devcgroup_css_free(struct cgroup *cgroup)
253 254 255 256
{
	struct dev_cgroup *dev_cgroup;

	dev_cgroup = cgroup_to_devcgroup(cgroup);
257
	__dev_exception_clean(dev_cgroup);
258 259 260 261 262
	kfree(dev_cgroup);
}

#define DEVCG_ALLOW 1
#define DEVCG_DENY 2
263 264
#define DEVCG_LIST 3

265
#define MAJMINLEN 13
266
#define ACCLEN 4
267 268 269 270

static void set_access(char *acc, short access)
{
	int idx = 0;
271
	memset(acc, 0, ACCLEN);
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
	if (access & ACC_READ)
		acc[idx++] = 'r';
	if (access & ACC_WRITE)
		acc[idx++] = 'w';
	if (access & ACC_MKNOD)
		acc[idx++] = 'm';
}

static char type_to_char(short type)
{
	if (type == DEV_ALL)
		return 'a';
	if (type == DEV_CHAR)
		return 'c';
	if (type == DEV_BLOCK)
		return 'b';
	return 'X';
}

291
static void set_majmin(char *str, unsigned m)
292 293
{
	if (m == ~0)
L
Li Zefan 已提交
294
		strcpy(str, "*");
295
	else
L
Li Zefan 已提交
296
		sprintf(str, "%u", m);
297 298
}

299 300
static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
				struct seq_file *m)
301
{
302
	struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
303
	struct dev_exception_item *ex;
304
	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
305

306
	rcu_read_lock();
307 308 309 310 311 312
	/*
	 * To preserve the compatibility:
	 * - Only show the "all devices" when the default policy is to allow
	 * - List the exceptions in case the default policy is to deny
	 * This way, the file remains as a "whitelist of devices"
	 */
313
	if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
314 315 316 317
		set_access(acc, ACC_MASK);
		set_majmin(maj, ~0);
		set_majmin(min, ~0);
		seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
318
			   maj, min, acc);
319
	} else {
320 321 322 323 324
		list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
			set_access(acc, ex->access);
			set_majmin(maj, ex->major);
			set_majmin(min, ex->minor);
			seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
325 326
				   maj, min, acc);
		}
327
	}
328
	rcu_read_unlock();
329

330
	return 0;
331 332
}

333
/**
334 335 336 337 338
 * may_access - verifies if a new exception is part of what is allowed
 *		by a dev cgroup based on the default policy +
 *		exceptions. This is used to make sure a child cgroup
 *		won't have more privileges than its parent or to
 *		verify if a certain access is allowed.
339
 * @dev_cgroup: dev cgroup to be tested against
340
 * @refex: new exception
341
 * @behavior: behavior of the exception
342
 */
343
static bool may_access(struct dev_cgroup *dev_cgroup,
344 345
		       struct dev_exception_item *refex,
		       enum devcg_behavior behavior)
346
{
347
	struct dev_exception_item *ex;
348
	bool match = false;
349

T
Tejun Heo 已提交
350 351 352 353
	rcu_lockdep_assert(rcu_read_lock_held() ||
			   lockdep_is_held(&devcgroup_mutex),
			   "device_cgroup::may_access() called without proper synchronization");

T
Tejun Heo 已提交
354
	list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
355
		if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
356
			continue;
357
		if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
358
			continue;
359
		if (ex->major != ~0 && ex->major != refex->major)
360
			continue;
361
		if (ex->minor != ~0 && ex->minor != refex->minor)
362
			continue;
363
		if (refex->access & (~ex->access))
364
			continue;
365 366
		match = true;
		break;
367
	}
368

369 370 371 372 373 374 375 376 377 378 379 380
	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
		if (behavior == DEVCG_DEFAULT_ALLOW) {
			/* the exception will deny access to certain devices */
			return true;
		} else {
			/* the exception will allow access to certain devices */
			if (match)
				/*
				 * a new exception allowing access shouldn't
				 * match an parent's exception
				 */
				return false;
381
			return true;
382
		}
383
	} else {
384 385 386
		/* only behavior == DEVCG_DEFAULT_DENY allowed here */
		if (match)
			/* parent has an exception that matches the proposed */
387
			return true;
388 389
		else
			return false;
390 391
	}
	return false;
392 393 394 395
}

/*
 * parent_has_perm:
396
 * when adding a new allow rule to a device exception list, the rule
397 398
 * must be allowed in the parent device
 */
399
static int parent_has_perm(struct dev_cgroup *childcg,
400
				  struct dev_exception_item *ex)
401
{
402
	struct cgroup *pcg = childcg->css.cgroup->parent;
403 404 405 406 407
	struct dev_cgroup *parent;

	if (!pcg)
		return 1;
	parent = cgroup_to_devcgroup(pcg);
408
	return may_access(parent, ex, childcg->behavior);
409 410
}

411 412 413 414 415 416 417 418
/**
 * may_allow_all - checks if it's possible to change the behavior to
 *		   allow based on parent's rules.
 * @parent: device cgroup's parent
 * returns: != 0 in case it's allowed, 0 otherwise
 */
static inline int may_allow_all(struct dev_cgroup *parent)
{
419 420
	if (!parent)
		return 1;
421 422 423
	return parent->behavior == DEVCG_DEFAULT_ALLOW;
}

424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
/**
 * revalidate_active_exceptions - walks through the active exception list and
 * 				  revalidates the exceptions based on parent's
 * 				  behavior and exceptions. The exceptions that
 * 				  are no longer valid will be removed.
 * 				  Called with devcgroup_mutex held.
 * @devcg: cgroup which exceptions will be checked
 *
 * This is one of the three key functions for hierarchy implementation.
 * This function is responsible for re-evaluating all the cgroup's active
 * exceptions due to a parent's exception change.
 * Refer to Documentation/cgroups/devices.txt for more details.
 */
static void revalidate_active_exceptions(struct dev_cgroup *devcg)
{
	struct dev_exception_item *ex;
	struct list_head *this, *tmp;

	list_for_each_safe(this, tmp, &devcg->exceptions) {
		ex = container_of(this, struct dev_exception_item, list);
		if (!parent_has_perm(devcg, ex))
			dev_exception_rm(devcg, ex);
	}
}

/**
 * get_online_devcg - walks the cgroup tree and fills a list with the online
 * 		      groups
 * @root: cgroup used as starting point
 * @online: list that will be filled with online groups
 *
 * Must be called with devcgroup_mutex held. Grabs RCU lock.
 * Because devcgroup_mutex is held, no devcg will become online or offline
 * during the tree walk (see devcgroup_online, devcgroup_offline)
 * A separated list is needed because propagate_behavior() and
 * propagate_exception() need to allocate memory and can block.
 */
static void get_online_devcg(struct cgroup *root, struct list_head *online)
{
	struct cgroup *pos;
	struct dev_cgroup *devcg;

	lockdep_assert_held(&devcgroup_mutex);

	rcu_read_lock();
	cgroup_for_each_descendant_pre(pos, root) {
		devcg = cgroup_to_devcgroup(pos);
		if (is_devcg_online(devcg))
			list_add_tail(&devcg->propagate_pending, online);
	}
	rcu_read_unlock();
}

/**
 * propagate_exception - propagates a new exception to the children
 * @devcg_root: device cgroup that added a new exception
 * @ex: new exception to be propagated
 *
 * returns: 0 in case of success, != 0 in case of error
 */
static int propagate_exception(struct dev_cgroup *devcg_root,
			       struct dev_exception_item *ex)
{
	struct cgroup *root = devcg_root->css.cgroup;
	struct dev_cgroup *devcg, *parent, *tmp;
	int rc = 0;
	LIST_HEAD(pending);

	get_online_devcg(root, &pending);

	list_for_each_entry_safe(devcg, tmp, &pending, propagate_pending) {
		parent = cgroup_to_devcgroup(devcg->css.cgroup->parent);

		/*
		 * in case both root's behavior and devcg is allow, a new
		 * restriction means adding to the exception list
		 */
		if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
		    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
			rc = dev_exception_add(devcg, ex);
			if (rc)
				break;
		} else {
			/*
			 * in the other possible cases:
			 * root's behavior: allow, devcg's: deny
			 * root's behavior: deny, devcg's: deny
			 * the exception will be removed
			 */
			dev_exception_rm(devcg, ex);
		}
		revalidate_active_exceptions(devcg);

		list_del_init(&devcg->propagate_pending);
	}
	return rc;
}

static inline bool has_children(struct dev_cgroup *devcgroup)
{
	struct cgroup *cgrp = devcgroup->css.cgroup;

	return !list_empty(&cgrp->children);
}

529
/*
530
 * Modify the exception list using allow/deny rules.
531 532
 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 * so we can give a container CAP_MKNOD to let it create devices but not
533
 * modify the exception list.
534 535
 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 * us to also grant CAP_SYS_ADMIN to containers without giving away the
536
 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
537 538 539 540 541
 *
 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 * new access is only allowed if you're in the top-level cgroup, or your
 * parent cgroup has the access you're asking for.
 */
542 543
static int devcgroup_update_access(struct dev_cgroup *devcgroup,
				   int filetype, const char *buffer)
544
{
545
	const char *b;
546
	char temp[12];		/* 11 + 1 characters needed for a u32 */
547
	int count, rc = 0;
548
	struct dev_exception_item ex;
549
	struct cgroup *p = devcgroup->css.cgroup;
550
	struct dev_cgroup *parent = NULL;
551 552 553 554

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

555 556 557
	if (p->parent)
		parent = cgroup_to_devcgroup(p->parent);

558
	memset(&ex, 0, sizeof(ex));
559 560 561 562
	b = buffer;

	switch (*b) {
	case 'a':
563 564
		switch (filetype) {
		case DEVCG_ALLOW:
565 566 567
			if (has_children(devcgroup))
				return -EINVAL;

568
			if (!may_allow_all(parent))
569
				return -EPERM;
570
			dev_exception_clean(devcgroup);
571 572 573 574
			devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
			if (!parent)
				break;

575 576 577 578
			rc = dev_exceptions_copy(&devcgroup->exceptions,
						 &parent->exceptions);
			if (rc)
				return rc;
579 580
			break;
		case DEVCG_DENY:
581 582 583
			if (has_children(devcgroup))
				return -EINVAL;

584
			dev_exception_clean(devcgroup);
585
			devcgroup->behavior = DEVCG_DEFAULT_DENY;
586 587 588 589 590
			break;
		default:
			return -EINVAL;
		}
		return 0;
591
	case 'b':
592
		ex.type = DEV_BLOCK;
593 594
		break;
	case 'c':
595
		ex.type = DEV_CHAR;
596 597
		break;
	default:
598
		return -EINVAL;
599 600
	}
	b++;
601 602
	if (!isspace(*b))
		return -EINVAL;
603 604
	b++;
	if (*b == '*') {
605
		ex.major = ~0;
606 607
		b++;
	} else if (isdigit(*b)) {
608 609 610 611 612 613 614 615 616 617
		memset(temp, 0, sizeof(temp));
		for (count = 0; count < sizeof(temp) - 1; count++) {
			temp[count] = *b;
			b++;
			if (!isdigit(*b))
				break;
		}
		rc = kstrtou32(temp, 10, &ex.major);
		if (rc)
			return -EINVAL;
618
	} else {
619
		return -EINVAL;
620
	}
621 622
	if (*b != ':')
		return -EINVAL;
623 624 625 626
	b++;

	/* read minor */
	if (*b == '*') {
627
		ex.minor = ~0;
628 629
		b++;
	} else if (isdigit(*b)) {
630 631 632 633 634 635 636 637 638 639
		memset(temp, 0, sizeof(temp));
		for (count = 0; count < sizeof(temp) - 1; count++) {
			temp[count] = *b;
			b++;
			if (!isdigit(*b))
				break;
		}
		rc = kstrtou32(temp, 10, &ex.minor);
		if (rc)
			return -EINVAL;
640
	} else {
641
		return -EINVAL;
642
	}
643 644
	if (!isspace(*b))
		return -EINVAL;
645 646 647
	for (b++, count = 0; count < 3; count++, b++) {
		switch (*b) {
		case 'r':
648
			ex.access |= ACC_READ;
649 650
			break;
		case 'w':
651
			ex.access |= ACC_WRITE;
652 653
			break;
		case 'm':
654
			ex.access |= ACC_MKNOD;
655 656 657 658 659 660
			break;
		case '\n':
		case '\0':
			count = 3;
			break;
		default:
661
			return -EINVAL;
662 663 664 665 666
		}
	}

	switch (filetype) {
	case DEVCG_ALLOW:
667
		if (!parent_has_perm(devcgroup, &ex))
668
			return -EPERM;
669 670 671 672 673
		/*
		 * If the default policy is to allow by default, try to remove
		 * an matching exception instead. And be silent about it: we
		 * don't want to break compatibility
		 */
674
		if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
675
			dev_exception_rm(devcgroup, &ex);
676 677
			return 0;
		}
678 679
		rc = dev_exception_add(devcgroup, &ex);
		break;
680
	case DEVCG_DENY:
681 682 683 684 685
		/*
		 * If the default policy is to deny by default, try to remove
		 * an matching exception instead. And be silent about it: we
		 * don't want to break compatibility
		 */
686
		if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
687
			dev_exception_rm(devcgroup, &ex);
688 689 690 691 692 693 694 695
		else
			rc = dev_exception_add(devcgroup, &ex);

		if (rc)
			break;
		/* we only propagate new restrictions */
		rc = propagate_exception(devcgroup, &ex);
		break;
696
	default:
697
		rc = -EINVAL;
698
	}
699
	return rc;
700
}
701

702 703 704 705
static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
				  const char *buffer)
{
	int retval;
L
Li Zefan 已提交
706 707

	mutex_lock(&devcgroup_mutex);
708 709
	retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
					 cft->private, buffer);
L
Li Zefan 已提交
710
	mutex_unlock(&devcgroup_mutex);
711 712 713 714 715 716
	return retval;
}

static struct cftype dev_cgroup_files[] = {
	{
		.name = "allow",
717
		.write_string  = devcgroup_access_write,
718 719 720 721
		.private = DEVCG_ALLOW,
	},
	{
		.name = "deny",
722
		.write_string = devcgroup_access_write,
723 724
		.private = DEVCG_DENY,
	},
725 726 727 728 729
	{
		.name = "list",
		.read_seq_string = devcgroup_seq_read,
		.private = DEVCG_LIST,
	},
730
	{ }	/* terminate */
731 732 733 734 735
};

struct cgroup_subsys devices_subsys = {
	.name = "devices",
	.can_attach = devcgroup_can_attach,
736 737
	.css_alloc = devcgroup_css_alloc,
	.css_free = devcgroup_css_free,
738 739
	.css_online = devcgroup_online,
	.css_offline = devcgroup_offline,
740
	.subsys_id = devices_subsys_id,
741
	.base_cftypes = dev_cgroup_files,
742 743
};

744 745 746 747 748 749 750 751 752 753
/**
 * __devcgroup_check_permission - checks if an inode operation is permitted
 * @dev_cgroup: the dev cgroup to be tested against
 * @type: device type
 * @major: device major number
 * @minor: device minor number
 * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
 *
 * returns 0 on success, -EPERM case the operation is not permitted
 */
J
Jiri Slaby 已提交
754
static int __devcgroup_check_permission(short type, u32 major, u32 minor,
755
				        short access)
756
{
J
Jiri Slaby 已提交
757
	struct dev_cgroup *dev_cgroup;
758
	struct dev_exception_item ex;
759
	int rc;
L
Li Zefan 已提交
760

761 762 763 764 765
	memset(&ex, 0, sizeof(ex));
	ex.type = type;
	ex.major = major;
	ex.minor = minor;
	ex.access = access;
L
Li Zefan 已提交
766

767
	rcu_read_lock();
J
Jiri Slaby 已提交
768
	dev_cgroup = task_devcgroup(current);
769
	rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior);
770
	rcu_read_unlock();
771

772 773
	if (!rc)
		return -EPERM;
L
Li Zefan 已提交
774

775 776
	return 0;
}
777

778 779 780 781 782 783 784 785 786 787 788 789 790
int __devcgroup_inode_permission(struct inode *inode, int mask)
{
	short type, access = 0;

	if (S_ISBLK(inode->i_mode))
		type = DEV_BLOCK;
	if (S_ISCHR(inode->i_mode))
		type = DEV_CHAR;
	if (mask & MAY_WRITE)
		access |= ACC_WRITE;
	if (mask & MAY_READ)
		access |= ACC_READ;

J
Jiri Slaby 已提交
791 792
	return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
			access);
793 794 795 796
}

int devcgroup_inode_mknod(int mode, dev_t dev)
{
797
	short type;
798

S
Serge E. Hallyn 已提交
799 800 801
	if (!S_ISBLK(mode) && !S_ISCHR(mode))
		return 0;

802 803 804 805
	if (S_ISBLK(mode))
		type = DEV_BLOCK;
	else
		type = DEV_CHAR;
L
Li Zefan 已提交
806

J
Jiri Slaby 已提交
807 808
	return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
			ACC_MKNOD);
L
Li Zefan 已提交
809

810
}