device_cgroup.c 18.9 KB
Newer Older
1
/*
L
Lai Jiangshan 已提交
2
 * device_cgroup.c - device cgroup subsystem
3 4 5 6 7 8 9 10 11
 *
 * Copyright 2007 IBM Corp
 */

#include <linux/device_cgroup.h>
#include <linux/cgroup.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/uaccess.h>
12
#include <linux/seq_file.h>
13
#include <linux/slab.h>
L
Lai Jiangshan 已提交
14
#include <linux/rcupdate.h>
L
Li Zefan 已提交
15
#include <linux/mutex.h>
16 17 18 19 20 21 22 23 24 25

#define ACC_MKNOD 1
#define ACC_READ  2
#define ACC_WRITE 4
#define ACC_MASK (ACC_MKNOD | ACC_READ | ACC_WRITE)

#define DEV_BLOCK 1
#define DEV_CHAR  2
#define DEV_ALL   4  /* this represents all devices */

L
Li Zefan 已提交
26 27
static DEFINE_MUTEX(devcgroup_mutex);

28 29 30 31 32 33
enum devcg_behavior {
	DEVCG_DEFAULT_NONE,
	DEVCG_DEFAULT_ALLOW,
	DEVCG_DEFAULT_DENY,
};

34
/*
35
 * exception list locking rules:
L
Li Zefan 已提交
36
 * hold devcgroup_mutex for update/read.
L
Lai Jiangshan 已提交
37
 * hold rcu_read_lock() for read.
38 39
 */

40
struct dev_exception_item {
41 42 43 44
	u32 major, minor;
	short type;
	short access;
	struct list_head list;
45
	struct rcu_head rcu;
46 47 48 49
};

struct dev_cgroup {
	struct cgroup_subsys_state css;
50
	struct list_head exceptions;
51
	enum devcg_behavior behavior;
52 53
	/* temporary list for pending propagation operations */
	struct list_head propagate_pending;
54 55
};

56 57 58 59 60
static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
{
	return container_of(s, struct dev_cgroup, css);
}

61 62
static inline struct dev_cgroup *cgroup_to_devcgroup(struct cgroup *cgroup)
{
63
	return css_to_devcgroup(cgroup_subsys_state(cgroup, devices_subsys_id));
64 65
}

66 67 68 69 70
static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
{
	return css_to_devcgroup(task_subsys_state(task, devices_subsys_id));
}

71 72
struct cgroup_subsys devices_subsys;

73 74
static int devcgroup_can_attach(struct cgroup *new_cgrp,
				struct cgroup_taskset *set)
75
{
76
	struct task_struct *task = cgroup_taskset_first(set);
77

78 79
	if (current != task && !capable(CAP_SYS_ADMIN))
		return -EPERM;
80 81 82 83
	return 0;
}

/*
L
Li Zefan 已提交
84
 * called under devcgroup_mutex
85
 */
86
static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
87
{
88
	struct dev_exception_item *ex, *tmp, *new;
89

T
Tejun Heo 已提交
90 91
	lockdep_assert_held(&devcgroup_mutex);

92 93
	list_for_each_entry(ex, orig, list) {
		new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
94 95 96 97 98 99 100 101
		if (!new)
			goto free_and_exit;
		list_add_tail(&new->list, dest);
	}

	return 0;

free_and_exit:
102 103 104
	list_for_each_entry_safe(ex, tmp, dest, list) {
		list_del(&ex->list);
		kfree(ex);
105 106 107 108 109
	}
	return -ENOMEM;
}

/*
L
Li Zefan 已提交
110
 * called under devcgroup_mutex
111
 */
112 113
static int dev_exception_add(struct dev_cgroup *dev_cgroup,
			     struct dev_exception_item *ex)
114
{
115
	struct dev_exception_item *excopy, *walk;
116

T
Tejun Heo 已提交
117 118
	lockdep_assert_held(&devcgroup_mutex);

119 120
	excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
	if (!excopy)
121 122
		return -ENOMEM;

123 124
	list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
		if (walk->type != ex->type)
125
			continue;
126
		if (walk->major != ex->major)
127
			continue;
128
		if (walk->minor != ex->minor)
129 130
			continue;

131 132 133
		walk->access |= ex->access;
		kfree(excopy);
		excopy = NULL;
134 135
	}

136 137
	if (excopy != NULL)
		list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
138 139 140 141
	return 0;
}

/*
L
Li Zefan 已提交
142
 * called under devcgroup_mutex
143
 */
144 145
static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
			     struct dev_exception_item *ex)
146
{
147
	struct dev_exception_item *walk, *tmp;
148

T
Tejun Heo 已提交
149 150
	lockdep_assert_held(&devcgroup_mutex);

151 152
	list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
		if (walk->type != ex->type)
153
			continue;
154
		if (walk->major != ex->major)
155
			continue;
156
		if (walk->minor != ex->minor)
157 158
			continue;

159
		walk->access &= ~ex->access;
160
		if (!walk->access) {
161
			list_del_rcu(&walk->list);
162
			kfree_rcu(walk, rcu);
163 164 165 166
		}
	}
}

167 168 169 170 171 172 173 174 175 176
static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
{
	struct dev_exception_item *ex, *tmp;

	list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
		list_del_rcu(&ex->list);
		kfree_rcu(ex, rcu);
	}
}

177
/**
178 179
 * dev_exception_clean - frees all entries of the exception list
 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
180 181 182
 *
 * called under devcgroup_mutex
 */
183
static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
184
{
T
Tejun Heo 已提交
185 186
	lockdep_assert_held(&devcgroup_mutex);

187
	__dev_exception_clean(dev_cgroup);
188 189
}

190 191 192 193 194
static inline bool is_devcg_online(const struct dev_cgroup *devcg)
{
	return (devcg->behavior != DEVCG_DEFAULT_NONE);
}

195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
/**
 * devcgroup_online - initializes devcgroup's behavior and exceptions based on
 * 		      parent's
 * @cgroup: cgroup getting online
 * returns 0 in case of success, error code otherwise
 */
static int devcgroup_online(struct cgroup *cgroup)
{
	struct dev_cgroup *dev_cgroup, *parent_dev_cgroup = NULL;
	int ret = 0;

	mutex_lock(&devcgroup_mutex);
	dev_cgroup = cgroup_to_devcgroup(cgroup);
	if (cgroup->parent)
		parent_dev_cgroup = cgroup_to_devcgroup(cgroup->parent);

	if (parent_dev_cgroup == NULL)
		dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
	else {
		ret = dev_exceptions_copy(&dev_cgroup->exceptions,
					  &parent_dev_cgroup->exceptions);
		if (!ret)
			dev_cgroup->behavior = parent_dev_cgroup->behavior;
	}
	mutex_unlock(&devcgroup_mutex);

	return ret;
}

static void devcgroup_offline(struct cgroup *cgroup)
{
	struct dev_cgroup *dev_cgroup = cgroup_to_devcgroup(cgroup);

	mutex_lock(&devcgroup_mutex);
	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
	mutex_unlock(&devcgroup_mutex);
}

233 234 235
/*
 * called from kernel/cgroup.c with cgroup_lock() held.
 */
236
static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup)
237
{
238
	struct dev_cgroup *dev_cgroup;
239 240 241 242

	dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
	if (!dev_cgroup)
		return ERR_PTR(-ENOMEM);
243
	INIT_LIST_HEAD(&dev_cgroup->exceptions);
244
	INIT_LIST_HEAD(&dev_cgroup->propagate_pending);
245
	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
246 247 248 249

	return &dev_cgroup->css;
}

250
static void devcgroup_css_free(struct cgroup *cgroup)
251 252 253 254
{
	struct dev_cgroup *dev_cgroup;

	dev_cgroup = cgroup_to_devcgroup(cgroup);
255
	__dev_exception_clean(dev_cgroup);
256 257 258 259 260
	kfree(dev_cgroup);
}

#define DEVCG_ALLOW 1
#define DEVCG_DENY 2
261 262
#define DEVCG_LIST 3

263
#define MAJMINLEN 13
264
#define ACCLEN 4
265 266 267 268

static void set_access(char *acc, short access)
{
	int idx = 0;
269
	memset(acc, 0, ACCLEN);
270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
	if (access & ACC_READ)
		acc[idx++] = 'r';
	if (access & ACC_WRITE)
		acc[idx++] = 'w';
	if (access & ACC_MKNOD)
		acc[idx++] = 'm';
}

static char type_to_char(short type)
{
	if (type == DEV_ALL)
		return 'a';
	if (type == DEV_CHAR)
		return 'c';
	if (type == DEV_BLOCK)
		return 'b';
	return 'X';
}

289
static void set_majmin(char *str, unsigned m)
290 291
{
	if (m == ~0)
L
Li Zefan 已提交
292
		strcpy(str, "*");
293
	else
L
Li Zefan 已提交
294
		sprintf(str, "%u", m);
295 296
}

297 298
static int devcgroup_seq_read(struct cgroup *cgroup, struct cftype *cft,
				struct seq_file *m)
299
{
300
	struct dev_cgroup *devcgroup = cgroup_to_devcgroup(cgroup);
301
	struct dev_exception_item *ex;
302
	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
303

304
	rcu_read_lock();
305 306 307 308 309 310
	/*
	 * To preserve the compatibility:
	 * - Only show the "all devices" when the default policy is to allow
	 * - List the exceptions in case the default policy is to deny
	 * This way, the file remains as a "whitelist of devices"
	 */
311
	if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
312 313 314 315
		set_access(acc, ACC_MASK);
		set_majmin(maj, ~0);
		set_majmin(min, ~0);
		seq_printf(m, "%c %s:%s %s\n", type_to_char(DEV_ALL),
316
			   maj, min, acc);
317
	} else {
318 319 320 321 322
		list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
			set_access(acc, ex->access);
			set_majmin(maj, ex->major);
			set_majmin(min, ex->minor);
			seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
323 324
				   maj, min, acc);
		}
325
	}
326
	rcu_read_unlock();
327

328
	return 0;
329 330
}

331
/**
332 333 334 335 336
 * may_access - verifies if a new exception is part of what is allowed
 *		by a dev cgroup based on the default policy +
 *		exceptions. This is used to make sure a child cgroup
 *		won't have more privileges than its parent or to
 *		verify if a certain access is allowed.
337
 * @dev_cgroup: dev cgroup to be tested against
338
 * @refex: new exception
339
 * @behavior: behavior of the exception
340
 */
341
static bool may_access(struct dev_cgroup *dev_cgroup,
342 343
		       struct dev_exception_item *refex,
		       enum devcg_behavior behavior)
344
{
345
	struct dev_exception_item *ex;
346
	bool match = false;
347

T
Tejun Heo 已提交
348 349 350 351
	rcu_lockdep_assert(rcu_read_lock_held() ||
			   lockdep_is_held(&devcgroup_mutex),
			   "device_cgroup::may_access() called without proper synchronization");

T
Tejun Heo 已提交
352
	list_for_each_entry_rcu(ex, &dev_cgroup->exceptions, list) {
353
		if ((refex->type & DEV_BLOCK) && !(ex->type & DEV_BLOCK))
354
			continue;
355
		if ((refex->type & DEV_CHAR) && !(ex->type & DEV_CHAR))
356
			continue;
357
		if (ex->major != ~0 && ex->major != refex->major)
358
			continue;
359
		if (ex->minor != ~0 && ex->minor != refex->minor)
360
			continue;
361
		if (refex->access & (~ex->access))
362
			continue;
363 364
		match = true;
		break;
365
	}
366

367 368 369 370 371 372 373 374 375 376 377 378
	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
		if (behavior == DEVCG_DEFAULT_ALLOW) {
			/* the exception will deny access to certain devices */
			return true;
		} else {
			/* the exception will allow access to certain devices */
			if (match)
				/*
				 * a new exception allowing access shouldn't
				 * match an parent's exception
				 */
				return false;
379
			return true;
380
		}
381
	} else {
382 383 384
		/* only behavior == DEVCG_DEFAULT_DENY allowed here */
		if (match)
			/* parent has an exception that matches the proposed */
385
			return true;
386 387
		else
			return false;
388 389
	}
	return false;
390 391 392 393
}

/*
 * parent_has_perm:
394
 * when adding a new allow rule to a device exception list, the rule
395 396
 * must be allowed in the parent device
 */
397
static int parent_has_perm(struct dev_cgroup *childcg,
398
				  struct dev_exception_item *ex)
399
{
400
	struct cgroup *pcg = childcg->css.cgroup->parent;
401 402 403 404 405
	struct dev_cgroup *parent;

	if (!pcg)
		return 1;
	parent = cgroup_to_devcgroup(pcg);
406
	return may_access(parent, ex, childcg->behavior);
407 408
}

409 410 411 412 413 414 415 416
/**
 * may_allow_all - checks if it's possible to change the behavior to
 *		   allow based on parent's rules.
 * @parent: device cgroup's parent
 * returns: != 0 in case it's allowed, 0 otherwise
 */
static inline int may_allow_all(struct dev_cgroup *parent)
{
417 418
	if (!parent)
		return 1;
419 420 421
	return parent->behavior == DEVCG_DEFAULT_ALLOW;
}

422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526
/**
 * revalidate_active_exceptions - walks through the active exception list and
 * 				  revalidates the exceptions based on parent's
 * 				  behavior and exceptions. The exceptions that
 * 				  are no longer valid will be removed.
 * 				  Called with devcgroup_mutex held.
 * @devcg: cgroup which exceptions will be checked
 *
 * This is one of the three key functions for hierarchy implementation.
 * This function is responsible for re-evaluating all the cgroup's active
 * exceptions due to a parent's exception change.
 * Refer to Documentation/cgroups/devices.txt for more details.
 */
static void revalidate_active_exceptions(struct dev_cgroup *devcg)
{
	struct dev_exception_item *ex;
	struct list_head *this, *tmp;

	list_for_each_safe(this, tmp, &devcg->exceptions) {
		ex = container_of(this, struct dev_exception_item, list);
		if (!parent_has_perm(devcg, ex))
			dev_exception_rm(devcg, ex);
	}
}

/**
 * get_online_devcg - walks the cgroup tree and fills a list with the online
 * 		      groups
 * @root: cgroup used as starting point
 * @online: list that will be filled with online groups
 *
 * Must be called with devcgroup_mutex held. Grabs RCU lock.
 * Because devcgroup_mutex is held, no devcg will become online or offline
 * during the tree walk (see devcgroup_online, devcgroup_offline)
 * A separated list is needed because propagate_behavior() and
 * propagate_exception() need to allocate memory and can block.
 */
static void get_online_devcg(struct cgroup *root, struct list_head *online)
{
	struct cgroup *pos;
	struct dev_cgroup *devcg;

	lockdep_assert_held(&devcgroup_mutex);

	rcu_read_lock();
	cgroup_for_each_descendant_pre(pos, root) {
		devcg = cgroup_to_devcgroup(pos);
		if (is_devcg_online(devcg))
			list_add_tail(&devcg->propagate_pending, online);
	}
	rcu_read_unlock();
}

/**
 * propagate_exception - propagates a new exception to the children
 * @devcg_root: device cgroup that added a new exception
 * @ex: new exception to be propagated
 *
 * returns: 0 in case of success, != 0 in case of error
 */
static int propagate_exception(struct dev_cgroup *devcg_root,
			       struct dev_exception_item *ex)
{
	struct cgroup *root = devcg_root->css.cgroup;
	struct dev_cgroup *devcg, *parent, *tmp;
	int rc = 0;
	LIST_HEAD(pending);

	get_online_devcg(root, &pending);

	list_for_each_entry_safe(devcg, tmp, &pending, propagate_pending) {
		parent = cgroup_to_devcgroup(devcg->css.cgroup->parent);

		/*
		 * in case both root's behavior and devcg is allow, a new
		 * restriction means adding to the exception list
		 */
		if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
		    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
			rc = dev_exception_add(devcg, ex);
			if (rc)
				break;
		} else {
			/*
			 * in the other possible cases:
			 * root's behavior: allow, devcg's: deny
			 * root's behavior: deny, devcg's: deny
			 * the exception will be removed
			 */
			dev_exception_rm(devcg, ex);
		}
		revalidate_active_exceptions(devcg);

		list_del_init(&devcg->propagate_pending);
	}
	return rc;
}

static inline bool has_children(struct dev_cgroup *devcgroup)
{
	struct cgroup *cgrp = devcgroup->css.cgroup;

	return !list_empty(&cgrp->children);
}

527
/*
528
 * Modify the exception list using allow/deny rules.
529 530
 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 * so we can give a container CAP_MKNOD to let it create devices but not
531
 * modify the exception list.
532 533
 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 * us to also grant CAP_SYS_ADMIN to containers without giving away the
534
 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
535 536 537 538 539
 *
 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 * new access is only allowed if you're in the top-level cgroup, or your
 * parent cgroup has the access you're asking for.
 */
540 541
static int devcgroup_update_access(struct dev_cgroup *devcgroup,
				   int filetype, const char *buffer)
542
{
543
	const char *b;
544
	char temp[12];		/* 11 + 1 characters needed for a u32 */
545
	int count, rc = 0;
546
	struct dev_exception_item ex;
547
	struct cgroup *p = devcgroup->css.cgroup;
548
	struct dev_cgroup *parent = NULL;
549 550 551 552

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

553 554 555
	if (p->parent)
		parent = cgroup_to_devcgroup(p->parent);

556
	memset(&ex, 0, sizeof(ex));
557 558 559 560
	b = buffer;

	switch (*b) {
	case 'a':
561 562
		switch (filetype) {
		case DEVCG_ALLOW:
563 564 565
			if (has_children(devcgroup))
				return -EINVAL;

566
			if (!may_allow_all(parent))
567
				return -EPERM;
568
			dev_exception_clean(devcgroup);
569 570 571 572
			devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
			if (!parent)
				break;

573 574 575 576
			rc = dev_exceptions_copy(&devcgroup->exceptions,
						 &parent->exceptions);
			if (rc)
				return rc;
577 578
			break;
		case DEVCG_DENY:
579 580 581
			if (has_children(devcgroup))
				return -EINVAL;

582
			dev_exception_clean(devcgroup);
583
			devcgroup->behavior = DEVCG_DEFAULT_DENY;
584 585 586 587 588
			break;
		default:
			return -EINVAL;
		}
		return 0;
589
	case 'b':
590
		ex.type = DEV_BLOCK;
591 592
		break;
	case 'c':
593
		ex.type = DEV_CHAR;
594 595
		break;
	default:
596
		return -EINVAL;
597 598
	}
	b++;
599 600
	if (!isspace(*b))
		return -EINVAL;
601 602
	b++;
	if (*b == '*') {
603
		ex.major = ~0;
604 605
		b++;
	} else if (isdigit(*b)) {
606 607 608 609 610 611 612 613 614 615
		memset(temp, 0, sizeof(temp));
		for (count = 0; count < sizeof(temp) - 1; count++) {
			temp[count] = *b;
			b++;
			if (!isdigit(*b))
				break;
		}
		rc = kstrtou32(temp, 10, &ex.major);
		if (rc)
			return -EINVAL;
616
	} else {
617
		return -EINVAL;
618
	}
619 620
	if (*b != ':')
		return -EINVAL;
621 622 623 624
	b++;

	/* read minor */
	if (*b == '*') {
625
		ex.minor = ~0;
626 627
		b++;
	} else if (isdigit(*b)) {
628 629 630 631 632 633 634 635 636 637
		memset(temp, 0, sizeof(temp));
		for (count = 0; count < sizeof(temp) - 1; count++) {
			temp[count] = *b;
			b++;
			if (!isdigit(*b))
				break;
		}
		rc = kstrtou32(temp, 10, &ex.minor);
		if (rc)
			return -EINVAL;
638
	} else {
639
		return -EINVAL;
640
	}
641 642
	if (!isspace(*b))
		return -EINVAL;
643 644 645
	for (b++, count = 0; count < 3; count++, b++) {
		switch (*b) {
		case 'r':
646
			ex.access |= ACC_READ;
647 648
			break;
		case 'w':
649
			ex.access |= ACC_WRITE;
650 651
			break;
		case 'm':
652
			ex.access |= ACC_MKNOD;
653 654 655 656 657 658
			break;
		case '\n':
		case '\0':
			count = 3;
			break;
		default:
659
			return -EINVAL;
660 661 662 663 664
		}
	}

	switch (filetype) {
	case DEVCG_ALLOW:
665
		if (!parent_has_perm(devcgroup, &ex))
666
			return -EPERM;
667 668 669 670 671
		/*
		 * If the default policy is to allow by default, try to remove
		 * an matching exception instead. And be silent about it: we
		 * don't want to break compatibility
		 */
672
		if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
673
			dev_exception_rm(devcgroup, &ex);
674 675
			return 0;
		}
676 677
		rc = dev_exception_add(devcgroup, &ex);
		break;
678
	case DEVCG_DENY:
679 680 681 682 683
		/*
		 * If the default policy is to deny by default, try to remove
		 * an matching exception instead. And be silent about it: we
		 * don't want to break compatibility
		 */
684
		if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
685
			dev_exception_rm(devcgroup, &ex);
686 687 688 689 690 691 692 693
		else
			rc = dev_exception_add(devcgroup, &ex);

		if (rc)
			break;
		/* we only propagate new restrictions */
		rc = propagate_exception(devcgroup, &ex);
		break;
694
	default:
695
		rc = -EINVAL;
696
	}
697
	return rc;
698
}
699

700 701 702 703
static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,
				  const char *buffer)
{
	int retval;
L
Li Zefan 已提交
704 705

	mutex_lock(&devcgroup_mutex);
706 707
	retval = devcgroup_update_access(cgroup_to_devcgroup(cgrp),
					 cft->private, buffer);
L
Li Zefan 已提交
708
	mutex_unlock(&devcgroup_mutex);
709 710 711 712 713 714
	return retval;
}

static struct cftype dev_cgroup_files[] = {
	{
		.name = "allow",
715
		.write_string  = devcgroup_access_write,
716 717 718 719
		.private = DEVCG_ALLOW,
	},
	{
		.name = "deny",
720
		.write_string = devcgroup_access_write,
721 722
		.private = DEVCG_DENY,
	},
723 724 725 726 727
	{
		.name = "list",
		.read_seq_string = devcgroup_seq_read,
		.private = DEVCG_LIST,
	},
728
	{ }	/* terminate */
729 730 731 732 733
};

struct cgroup_subsys devices_subsys = {
	.name = "devices",
	.can_attach = devcgroup_can_attach,
734 735
	.css_alloc = devcgroup_css_alloc,
	.css_free = devcgroup_css_free,
736 737
	.css_online = devcgroup_online,
	.css_offline = devcgroup_offline,
738
	.subsys_id = devices_subsys_id,
739
	.base_cftypes = dev_cgroup_files,
740 741
};

742 743 744 745 746 747 748 749 750 751
/**
 * __devcgroup_check_permission - checks if an inode operation is permitted
 * @dev_cgroup: the dev cgroup to be tested against
 * @type: device type
 * @major: device major number
 * @minor: device minor number
 * @access: combination of ACC_WRITE, ACC_READ and ACC_MKNOD
 *
 * returns 0 on success, -EPERM case the operation is not permitted
 */
J
Jiri Slaby 已提交
752
static int __devcgroup_check_permission(short type, u32 major, u32 minor,
753
				        short access)
754
{
J
Jiri Slaby 已提交
755
	struct dev_cgroup *dev_cgroup;
756
	struct dev_exception_item ex;
757
	int rc;
L
Li Zefan 已提交
758

759 760 761 762 763
	memset(&ex, 0, sizeof(ex));
	ex.type = type;
	ex.major = major;
	ex.minor = minor;
	ex.access = access;
L
Li Zefan 已提交
764

765
	rcu_read_lock();
J
Jiri Slaby 已提交
766
	dev_cgroup = task_devcgroup(current);
767
	rc = may_access(dev_cgroup, &ex, dev_cgroup->behavior);
768
	rcu_read_unlock();
769

770 771
	if (!rc)
		return -EPERM;
L
Li Zefan 已提交
772

773 774
	return 0;
}
775

776 777 778 779 780 781 782 783 784 785 786 787 788
int __devcgroup_inode_permission(struct inode *inode, int mask)
{
	short type, access = 0;

	if (S_ISBLK(inode->i_mode))
		type = DEV_BLOCK;
	if (S_ISCHR(inode->i_mode))
		type = DEV_CHAR;
	if (mask & MAY_WRITE)
		access |= ACC_WRITE;
	if (mask & MAY_READ)
		access |= ACC_READ;

J
Jiri Slaby 已提交
789 790
	return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
			access);
791 792 793 794
}

int devcgroup_inode_mknod(int mode, dev_t dev)
{
795
	short type;
796

S
Serge E. Hallyn 已提交
797 798 799
	if (!S_ISBLK(mode) && !S_ISCHR(mode))
		return 0;

800 801 802 803
	if (S_ISBLK(mode))
		type = DEV_BLOCK;
	else
		type = DEV_CHAR;
L
Li Zefan 已提交
804

J
Jiri Slaby 已提交
805 806
	return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
			ACC_MKNOD);
L
Li Zefan 已提交
807

808
}