device_cgroup.c 21.4 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0
2
/*
L
Lai Jiangshan 已提交
3
 * device_cgroup.c - device cgroup subsystem
4 5 6 7 8 9 10 11 12
 *
 * Copyright 2007 IBM Corp
 */

#include <linux/device_cgroup.h>
#include <linux/cgroup.h>
#include <linux/ctype.h>
#include <linux/list.h>
#include <linux/uaccess.h>
13
#include <linux/seq_file.h>
14
#include <linux/slab.h>
L
Lai Jiangshan 已提交
15
#include <linux/rcupdate.h>
L
Li Zefan 已提交
16
#include <linux/mutex.h>
17

18 19 20 21
#define DEVCG_ACC_MKNOD 1
#define DEVCG_ACC_READ  2
#define DEVCG_ACC_WRITE 4
#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE)
22

23 24 25
#define DEVCG_DEV_BLOCK 1
#define DEVCG_DEV_CHAR  2
#define DEVCG_DEV_ALL   4  /* this represents all devices */
26

L
Li Zefan 已提交
27 28
static DEFINE_MUTEX(devcgroup_mutex);

29 30 31 32 33 34
enum devcg_behavior {
	DEVCG_DEFAULT_NONE,
	DEVCG_DEFAULT_ALLOW,
	DEVCG_DEFAULT_DENY,
};

35
/*
36
 * exception list locking rules:
L
Li Zefan 已提交
37
 * hold devcgroup_mutex for update/read.
L
Lai Jiangshan 已提交
38
 * hold rcu_read_lock() for read.
39 40
 */

41
struct dev_exception_item {
42 43 44 45
	u32 major, minor;
	short type;
	short access;
	struct list_head list;
46
	struct rcu_head rcu;
47 48 49 50
};

struct dev_cgroup {
	struct cgroup_subsys_state css;
51
	struct list_head exceptions;
52
	enum devcg_behavior behavior;
53 54
};

55 56
static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s)
{
57
	return s ? container_of(s, struct dev_cgroup, css) : NULL;
58 59
}

60 61
static inline struct dev_cgroup *task_devcgroup(struct task_struct *task)
{
62
	return css_to_devcgroup(task_css(task, devices_cgrp_id));
63 64
}

65
/*
L
Li Zefan 已提交
66
 * called under devcgroup_mutex
67
 */
68
static int dev_exceptions_copy(struct list_head *dest, struct list_head *orig)
69
{
70
	struct dev_exception_item *ex, *tmp, *new;
71

T
Tejun Heo 已提交
72 73
	lockdep_assert_held(&devcgroup_mutex);

74 75
	list_for_each_entry(ex, orig, list) {
		new = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
76 77 78 79 80 81 82 83
		if (!new)
			goto free_and_exit;
		list_add_tail(&new->list, dest);
	}

	return 0;

free_and_exit:
84 85 86
	list_for_each_entry_safe(ex, tmp, dest, list) {
		list_del(&ex->list);
		kfree(ex);
87 88 89 90 91
	}
	return -ENOMEM;
}

/*
L
Li Zefan 已提交
92
 * called under devcgroup_mutex
93
 */
94 95
static int dev_exception_add(struct dev_cgroup *dev_cgroup,
			     struct dev_exception_item *ex)
96
{
97
	struct dev_exception_item *excopy, *walk;
98

T
Tejun Heo 已提交
99 100
	lockdep_assert_held(&devcgroup_mutex);

101 102
	excopy = kmemdup(ex, sizeof(*ex), GFP_KERNEL);
	if (!excopy)
103 104
		return -ENOMEM;

105 106
	list_for_each_entry(walk, &dev_cgroup->exceptions, list) {
		if (walk->type != ex->type)
107
			continue;
108
		if (walk->major != ex->major)
109
			continue;
110
		if (walk->minor != ex->minor)
111 112
			continue;

113 114 115
		walk->access |= ex->access;
		kfree(excopy);
		excopy = NULL;
116 117
	}

118 119
	if (excopy != NULL)
		list_add_tail_rcu(&excopy->list, &dev_cgroup->exceptions);
120 121 122 123
	return 0;
}

/*
L
Li Zefan 已提交
124
 * called under devcgroup_mutex
125
 */
126 127
static void dev_exception_rm(struct dev_cgroup *dev_cgroup,
			     struct dev_exception_item *ex)
128
{
129
	struct dev_exception_item *walk, *tmp;
130

T
Tejun Heo 已提交
131 132
	lockdep_assert_held(&devcgroup_mutex);

133 134
	list_for_each_entry_safe(walk, tmp, &dev_cgroup->exceptions, list) {
		if (walk->type != ex->type)
135
			continue;
136
		if (walk->major != ex->major)
137
			continue;
138
		if (walk->minor != ex->minor)
139 140
			continue;

141
		walk->access &= ~ex->access;
142
		if (!walk->access) {
143
			list_del_rcu(&walk->list);
144
			kfree_rcu(walk, rcu);
145 146 147 148
		}
	}
}

149 150 151 152 153 154 155 156 157 158
static void __dev_exception_clean(struct dev_cgroup *dev_cgroup)
{
	struct dev_exception_item *ex, *tmp;

	list_for_each_entry_safe(ex, tmp, &dev_cgroup->exceptions, list) {
		list_del_rcu(&ex->list);
		kfree_rcu(ex, rcu);
	}
}

159
/**
160 161
 * dev_exception_clean - frees all entries of the exception list
 * @dev_cgroup: dev_cgroup with the exception list to be cleaned
162 163 164
 *
 * called under devcgroup_mutex
 */
165
static void dev_exception_clean(struct dev_cgroup *dev_cgroup)
166
{
T
Tejun Heo 已提交
167 168
	lockdep_assert_held(&devcgroup_mutex);

169
	__dev_exception_clean(dev_cgroup);
170 171
}

172 173 174 175 176
static inline bool is_devcg_online(const struct dev_cgroup *devcg)
{
	return (devcg->behavior != DEVCG_DEFAULT_NONE);
}

177 178 179
/**
 * devcgroup_online - initializes devcgroup's behavior and exceptions based on
 * 		      parent's
180
 * @css: css getting online
181 182
 * returns 0 in case of success, error code otherwise
 */
183
static int devcgroup_online(struct cgroup_subsys_state *css)
184
{
185
	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
T
Tejun Heo 已提交
186
	struct dev_cgroup *parent_dev_cgroup = css_to_devcgroup(css->parent);
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
	int ret = 0;

	mutex_lock(&devcgroup_mutex);

	if (parent_dev_cgroup == NULL)
		dev_cgroup->behavior = DEVCG_DEFAULT_ALLOW;
	else {
		ret = dev_exceptions_copy(&dev_cgroup->exceptions,
					  &parent_dev_cgroup->exceptions);
		if (!ret)
			dev_cgroup->behavior = parent_dev_cgroup->behavior;
	}
	mutex_unlock(&devcgroup_mutex);

	return ret;
}

204
static void devcgroup_offline(struct cgroup_subsys_state *css)
205
{
206
	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
207 208 209 210 211 212

	mutex_lock(&devcgroup_mutex);
	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
	mutex_unlock(&devcgroup_mutex);
}

213 214 215
/*
 * called from kernel/cgroup.c with cgroup_lock() held.
 */
216 217
static struct cgroup_subsys_state *
devcgroup_css_alloc(struct cgroup_subsys_state *parent_css)
218
{
219
	struct dev_cgroup *dev_cgroup;
220 221 222 223

	dev_cgroup = kzalloc(sizeof(*dev_cgroup), GFP_KERNEL);
	if (!dev_cgroup)
		return ERR_PTR(-ENOMEM);
224
	INIT_LIST_HEAD(&dev_cgroup->exceptions);
225
	dev_cgroup->behavior = DEVCG_DEFAULT_NONE;
226 227 228 229

	return &dev_cgroup->css;
}

230
static void devcgroup_css_free(struct cgroup_subsys_state *css)
231
{
232
	struct dev_cgroup *dev_cgroup = css_to_devcgroup(css);
233

234
	__dev_exception_clean(dev_cgroup);
235 236 237 238 239
	kfree(dev_cgroup);
}

#define DEVCG_ALLOW 1
#define DEVCG_DENY 2
240 241
#define DEVCG_LIST 3

242
#define MAJMINLEN 13
243
#define ACCLEN 4
244 245 246 247

static void set_access(char *acc, short access)
{
	int idx = 0;
248
	memset(acc, 0, ACCLEN);
249
	if (access & DEVCG_ACC_READ)
250
		acc[idx++] = 'r';
251
	if (access & DEVCG_ACC_WRITE)
252
		acc[idx++] = 'w';
253
	if (access & DEVCG_ACC_MKNOD)
254 255 256 257 258
		acc[idx++] = 'm';
}

static char type_to_char(short type)
{
259
	if (type == DEVCG_DEV_ALL)
260
		return 'a';
261
	if (type == DEVCG_DEV_CHAR)
262
		return 'c';
263
	if (type == DEVCG_DEV_BLOCK)
264 265 266 267
		return 'b';
	return 'X';
}

268
static void set_majmin(char *str, unsigned m)
269 270
{
	if (m == ~0)
L
Li Zefan 已提交
271
		strcpy(str, "*");
272
	else
L
Li Zefan 已提交
273
		sprintf(str, "%u", m);
274 275
}

276
static int devcgroup_seq_show(struct seq_file *m, void *v)
277
{
278
	struct dev_cgroup *devcgroup = css_to_devcgroup(seq_css(m));
279
	struct dev_exception_item *ex;
280
	char maj[MAJMINLEN], min[MAJMINLEN], acc[ACCLEN];
281

282
	rcu_read_lock();
283 284 285 286 287 288
	/*
	 * To preserve the compatibility:
	 * - Only show the "all devices" when the default policy is to allow
	 * - List the exceptions in case the default policy is to deny
	 * This way, the file remains as a "whitelist of devices"
	 */
289
	if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
290
		set_access(acc, DEVCG_ACC_MASK);
291 292
		set_majmin(maj, ~0);
		set_majmin(min, ~0);
293
		seq_printf(m, "%c %s:%s %s\n", type_to_char(DEVCG_DEV_ALL),
294
			   maj, min, acc);
295
	} else {
296 297 298 299 300
		list_for_each_entry_rcu(ex, &devcgroup->exceptions, list) {
			set_access(acc, ex->access);
			set_majmin(maj, ex->major);
			set_majmin(min, ex->minor);
			seq_printf(m, "%c %s:%s %s\n", type_to_char(ex->type),
301 302
				   maj, min, acc);
		}
303
	}
304
	rcu_read_unlock();
305

306
	return 0;
307 308
}

309
/**
310
 * match_exception	- iterates the exception list trying to find a complete match
311
 * @exceptions: list of exceptions
312
 * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
313 314
 * @major: device file major number, ~0 to match all
 * @minor: device file minor number, ~0 to match all
315
 * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
316
 *
317 318 319 320
 * It is considered a complete match if an exception is found that will
 * contain the entire range of provided parameters.
 *
 * Return: true in case it matches an exception completely
321
 */
322 323
static bool match_exception(struct list_head *exceptions, short type,
			    u32 major, u32 minor, short access)
324
{
325
	struct dev_exception_item *ex;
326

327
	list_for_each_entry_rcu(ex, exceptions, list) {
328
		if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
329
			continue;
330
		if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
331 332 333 334 335 336 337 338 339 340 341 342 343 344
			continue;
		if (ex->major != ~0 && ex->major != major)
			continue;
		if (ex->minor != ~0 && ex->minor != minor)
			continue;
		/* provided access cannot have more than the exception rule */
		if (access & (~ex->access))
			continue;
		return true;
	}
	return false;
}

/**
345
 * match_exception_partial - iterates the exception list trying to find a partial match
346
 * @exceptions: list of exceptions
347
 * @type: device type (DEVCG_DEV_BLOCK or DEVCG_DEV_CHAR)
348 349
 * @major: device file major number, ~0 to match all
 * @minor: device file minor number, ~0 to match all
350
 * @access: permission mask (DEVCG_ACC_READ, DEVCG_ACC_WRITE, DEVCG_ACC_MKNOD)
351
 *
352 353 354 355 356 357
 * It is considered a partial match if an exception's range is found to
 * contain *any* of the devices specified by provided parameters. This is
 * used to make sure no extra access is being granted that is forbidden by
 * any of the exception list.
 *
 * Return: true in case the provided range mat matches an exception completely
358 359 360 361 362
 */
static bool match_exception_partial(struct list_head *exceptions, short type,
				    u32 major, u32 minor, short access)
{
	struct dev_exception_item *ex;
T
Tejun Heo 已提交
363

364
	list_for_each_entry_rcu(ex, exceptions, list) {
365
		if ((type & DEVCG_DEV_BLOCK) && !(ex->type & DEVCG_DEV_BLOCK))
366
			continue;
367
		if ((type & DEVCG_DEV_CHAR) && !(ex->type & DEVCG_DEV_CHAR))
368
			continue;
369 370 371 372 373
		/*
		 * We must be sure that both the exception and the provided
		 * range aren't masking all devices
		 */
		if (ex->major != ~0 && major != ~0 && ex->major != major)
374
			continue;
375
		if (ex->minor != ~0 && minor != ~0 && ex->minor != minor)
376
			continue;
377 378 379 380 381 382
		/*
		 * In order to make sure the provided range isn't matching
		 * an exception, all its access bits shouldn't match the
		 * exception's access bits
		 */
		if (!(access & ex->access))
383
			continue;
384
		return true;
385
	}
386 387 388 389
	return false;
}

/**
390
 * verify_new_ex - verifies if a new exception is allowed by parent cgroup's permissions
391 392 393
 * @dev_cgroup: dev cgroup to be tested against
 * @refex: new exception
 * @behavior: behavior of the exception's dev_cgroup
394 395 396
 *
 * This is used to make sure a child cgroup won't have more privileges
 * than its parent
397 398 399 400 401 402 403
 */
static bool verify_new_ex(struct dev_cgroup *dev_cgroup,
		          struct dev_exception_item *refex,
		          enum devcg_behavior behavior)
{
	bool match = false;

404
	RCU_LOCKDEP_WARN(!rcu_read_lock_held() &&
405
			 !lockdep_is_held(&devcgroup_mutex),
406
			 "device_cgroup:verify_new_ex called without proper synchronization");
407

408 409
	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW) {
		if (behavior == DEVCG_DEFAULT_ALLOW) {
410 411 412 413
			/*
			 * new exception in the child doesn't matter, only
			 * adding extra restrictions
			 */ 
414 415
			return true;
		} else {
416 417 418 419 420 421 422 423 424 425 426
			/*
			 * new exception in the child will add more devices
			 * that can be acessed, so it can't match any of
			 * parent's exceptions, even slightly
			 */ 
			match = match_exception_partial(&dev_cgroup->exceptions,
							refex->type,
							refex->major,
							refex->minor,
							refex->access);

427 428
			if (match)
				return false;
429
			return true;
430
		}
431
	} else {
432 433 434 435 436 437 438 439 440 441
		/*
		 * Only behavior == DEVCG_DEFAULT_DENY allowed here, therefore
		 * the new exception will add access to more devices and must
		 * be contained completely in an parent's exception to be
		 * allowed
		 */
		match = match_exception(&dev_cgroup->exceptions, refex->type,
					refex->major, refex->minor,
					refex->access);

442 443
		if (match)
			/* parent has an exception that matches the proposed */
444
			return true;
445 446
		else
			return false;
447 448
	}
	return false;
449 450 451 452
}

/*
 * parent_has_perm:
453
 * when adding a new allow rule to a device exception list, the rule
454 455
 * must be allowed in the parent device
 */
456
static int parent_has_perm(struct dev_cgroup *childcg,
457
				  struct dev_exception_item *ex)
458
{
T
Tejun Heo 已提交
459
	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
460

T
Tejun Heo 已提交
461
	if (!parent)
462
		return 1;
463
	return verify_new_ex(parent, ex, childcg->behavior);
464 465
}

466 467 468 469 470 471 472 473 474 475 476 477 478 479
/**
 * parent_allows_removal - verify if it's ok to remove an exception
 * @childcg: child cgroup from where the exception will be removed
 * @ex: exception being removed
 *
 * When removing an exception in cgroups with default ALLOW policy, it must
 * be checked if removing it will give the child cgroup more access than the
 * parent.
 *
 * Return: true if it's ok to remove exception, false otherwise
 */
static bool parent_allows_removal(struct dev_cgroup *childcg,
				  struct dev_exception_item *ex)
{
T
Tejun Heo 已提交
480
	struct dev_cgroup *parent = css_to_devcgroup(childcg->css.parent);
481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496

	if (!parent)
		return true;

	/* It's always allowed to remove access to devices */
	if (childcg->behavior == DEVCG_DEFAULT_DENY)
		return true;

	/*
	 * Make sure you're not removing part or a whole exception existing in
	 * the parent cgroup
	 */
	return !match_exception_partial(&parent->exceptions, ex->type,
					ex->major, ex->minor, ex->access);
}

497 498 499 500 501 502 503 504
/**
 * may_allow_all - checks if it's possible to change the behavior to
 *		   allow based on parent's rules.
 * @parent: device cgroup's parent
 * returns: != 0 in case it's allowed, 0 otherwise
 */
static inline int may_allow_all(struct dev_cgroup *parent)
{
505 506
	if (!parent)
		return 1;
507 508 509
	return parent->behavior == DEVCG_DEFAULT_ALLOW;
}

510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
/**
 * revalidate_active_exceptions - walks through the active exception list and
 * 				  revalidates the exceptions based on parent's
 * 				  behavior and exceptions. The exceptions that
 * 				  are no longer valid will be removed.
 * 				  Called with devcgroup_mutex held.
 * @devcg: cgroup which exceptions will be checked
 *
 * This is one of the three key functions for hierarchy implementation.
 * This function is responsible for re-evaluating all the cgroup's active
 * exceptions due to a parent's exception change.
 * Refer to Documentation/cgroups/devices.txt for more details.
 */
static void revalidate_active_exceptions(struct dev_cgroup *devcg)
{
	struct dev_exception_item *ex;
	struct list_head *this, *tmp;

	list_for_each_safe(this, tmp, &devcg->exceptions) {
		ex = container_of(this, struct dev_exception_item, list);
		if (!parent_has_perm(devcg, ex))
			dev_exception_rm(devcg, ex);
	}
}

/**
 * propagate_exception - propagates a new exception to the children
 * @devcg_root: device cgroup that added a new exception
 * @ex: new exception to be propagated
 *
 * returns: 0 in case of success, != 0 in case of error
 */
static int propagate_exception(struct dev_cgroup *devcg_root,
			       struct dev_exception_item *ex)
{
545
	struct cgroup_subsys_state *pos;
546 547
	int rc = 0;

548
	rcu_read_lock();
549

550 551
	css_for_each_descendant_pre(pos, &devcg_root->css) {
		struct dev_cgroup *devcg = css_to_devcgroup(pos);
552 553 554 555 556 557 558

		/*
		 * Because devcgroup_mutex is held, no devcg will become
		 * online or offline during the tree walk (see on/offline
		 * methods), and online ones are safe to access outside RCU
		 * read lock without bumping refcnt.
		 */
559
		if (pos == &devcg_root->css || !is_devcg_online(devcg))
560 561 562
			continue;

		rcu_read_unlock();
563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583

		/*
		 * in case both root's behavior and devcg is allow, a new
		 * restriction means adding to the exception list
		 */
		if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW &&
		    devcg->behavior == DEVCG_DEFAULT_ALLOW) {
			rc = dev_exception_add(devcg, ex);
			if (rc)
				break;
		} else {
			/*
			 * in the other possible cases:
			 * root's behavior: allow, devcg's: deny
			 * root's behavior: deny, devcg's: deny
			 * the exception will be removed
			 */
			dev_exception_rm(devcg, ex);
		}
		revalidate_active_exceptions(devcg);

584
		rcu_read_lock();
585
	}
586 587

	rcu_read_unlock();
588 589 590
	return rc;
}

591
/*
592
 * Modify the exception list using allow/deny rules.
593 594
 * CAP_SYS_ADMIN is needed for this.  It's at least separate from CAP_MKNOD
 * so we can give a container CAP_MKNOD to let it create devices but not
595
 * modify the exception list.
596 597
 * It seems likely we'll want to add a CAP_CONTAINER capability to allow
 * us to also grant CAP_SYS_ADMIN to containers without giving away the
598
 * device exception list controls, but for now we'll stick with CAP_SYS_ADMIN
599 600 601 602 603
 *
 * Taking rules away is always allowed (given CAP_SYS_ADMIN).  Granting
 * new access is only allowed if you're in the top-level cgroup, or your
 * parent cgroup has the access you're asking for.
 */
604
static int devcgroup_update_access(struct dev_cgroup *devcgroup,
605
				   int filetype, char *buffer)
606
{
607
	const char *b;
608
	char temp[12];		/* 11 + 1 characters needed for a u32 */
609
	int count, rc = 0;
610
	struct dev_exception_item ex;
T
Tejun Heo 已提交
611
	struct dev_cgroup *parent = css_to_devcgroup(devcgroup->css.parent);
612 613 614 615

	if (!capable(CAP_SYS_ADMIN))
		return -EPERM;

616
	memset(&ex, 0, sizeof(ex));
617 618 619 620
	b = buffer;

	switch (*b) {
	case 'a':
621 622
		switch (filetype) {
		case DEVCG_ALLOW:
623
			if (css_has_online_children(&devcgroup->css))
624 625
				return -EINVAL;

626
			if (!may_allow_all(parent))
627
				return -EPERM;
628
			dev_exception_clean(devcgroup);
629 630 631 632
			devcgroup->behavior = DEVCG_DEFAULT_ALLOW;
			if (!parent)
				break;

633 634 635 636
			rc = dev_exceptions_copy(&devcgroup->exceptions,
						 &parent->exceptions);
			if (rc)
				return rc;
637 638
			break;
		case DEVCG_DENY:
639
			if (css_has_online_children(&devcgroup->css))
640 641
				return -EINVAL;

642
			dev_exception_clean(devcgroup);
643
			devcgroup->behavior = DEVCG_DEFAULT_DENY;
644 645 646 647 648
			break;
		default:
			return -EINVAL;
		}
		return 0;
649
	case 'b':
650
		ex.type = DEVCG_DEV_BLOCK;
651 652
		break;
	case 'c':
653
		ex.type = DEVCG_DEV_CHAR;
654 655
		break;
	default:
656
		return -EINVAL;
657 658
	}
	b++;
659 660
	if (!isspace(*b))
		return -EINVAL;
661 662
	b++;
	if (*b == '*') {
663
		ex.major = ~0;
664 665
		b++;
	} else if (isdigit(*b)) {
666 667 668 669 670 671 672 673 674 675
		memset(temp, 0, sizeof(temp));
		for (count = 0; count < sizeof(temp) - 1; count++) {
			temp[count] = *b;
			b++;
			if (!isdigit(*b))
				break;
		}
		rc = kstrtou32(temp, 10, &ex.major);
		if (rc)
			return -EINVAL;
676
	} else {
677
		return -EINVAL;
678
	}
679 680
	if (*b != ':')
		return -EINVAL;
681 682 683 684
	b++;

	/* read minor */
	if (*b == '*') {
685
		ex.minor = ~0;
686 687
		b++;
	} else if (isdigit(*b)) {
688 689 690 691 692 693 694 695 696 697
		memset(temp, 0, sizeof(temp));
		for (count = 0; count < sizeof(temp) - 1; count++) {
			temp[count] = *b;
			b++;
			if (!isdigit(*b))
				break;
		}
		rc = kstrtou32(temp, 10, &ex.minor);
		if (rc)
			return -EINVAL;
698
	} else {
699
		return -EINVAL;
700
	}
701 702
	if (!isspace(*b))
		return -EINVAL;
703 704 705
	for (b++, count = 0; count < 3; count++, b++) {
		switch (*b) {
		case 'r':
706
			ex.access |= DEVCG_ACC_READ;
707 708
			break;
		case 'w':
709
			ex.access |= DEVCG_ACC_WRITE;
710 711
			break;
		case 'm':
712
			ex.access |= DEVCG_ACC_MKNOD;
713 714 715 716 717 718
			break;
		case '\n':
		case '\0':
			count = 3;
			break;
		default:
719
			return -EINVAL;
720 721 722 723 724
		}
	}

	switch (filetype) {
	case DEVCG_ALLOW:
725 726 727 728 729
		/*
		 * If the default policy is to allow by default, try to remove
		 * an matching exception instead. And be silent about it: we
		 * don't want to break compatibility
		 */
730
		if (devcgroup->behavior == DEVCG_DEFAULT_ALLOW) {
731 732 733
			/* Check if the parent allows removing it first */
			if (!parent_allows_removal(devcgroup, &ex))
				return -EPERM;
734
			dev_exception_rm(devcgroup, &ex);
735
			break;
736
		}
737 738 739

		if (!parent_has_perm(devcgroup, &ex))
			return -EPERM;
740 741
		rc = dev_exception_add(devcgroup, &ex);
		break;
742
	case DEVCG_DENY:
743 744 745 746 747
		/*
		 * If the default policy is to deny by default, try to remove
		 * an matching exception instead. And be silent about it: we
		 * don't want to break compatibility
		 */
748
		if (devcgroup->behavior == DEVCG_DEFAULT_DENY)
749
			dev_exception_rm(devcgroup, &ex);
750 751 752 753 754 755 756 757
		else
			rc = dev_exception_add(devcgroup, &ex);

		if (rc)
			break;
		/* we only propagate new restrictions */
		rc = propagate_exception(devcgroup, &ex);
		break;
758
	default:
759
		rc = -EINVAL;
760
	}
761
	return rc;
762
}
763

764 765
static ssize_t devcgroup_access_write(struct kernfs_open_file *of,
				      char *buf, size_t nbytes, loff_t off)
766 767
{
	int retval;
L
Li Zefan 已提交
768 769

	mutex_lock(&devcgroup_mutex);
770 771
	retval = devcgroup_update_access(css_to_devcgroup(of_css(of)),
					 of_cft(of)->private, strstrip(buf));
L
Li Zefan 已提交
772
	mutex_unlock(&devcgroup_mutex);
773
	return retval ?: nbytes;
774 775 776 777 778
}

static struct cftype dev_cgroup_files[] = {
	{
		.name = "allow",
779
		.write = devcgroup_access_write,
780 781 782 783
		.private = DEVCG_ALLOW,
	},
	{
		.name = "deny",
784
		.write = devcgroup_access_write,
785 786
		.private = DEVCG_DENY,
	},
787 788
	{
		.name = "list",
789
		.seq_show = devcgroup_seq_show,
790 791
		.private = DEVCG_LIST,
	},
792
	{ }	/* terminate */
793 794
};

795
struct cgroup_subsys devices_cgrp_subsys = {
796 797
	.css_alloc = devcgroup_css_alloc,
	.css_free = devcgroup_css_free,
798 799
	.css_online = devcgroup_online,
	.css_offline = devcgroup_offline,
800
	.legacy_cftypes = dev_cgroup_files,
801 802
};

803 804 805 806 807 808
/**
 * __devcgroup_check_permission - checks if an inode operation is permitted
 * @dev_cgroup: the dev cgroup to be tested against
 * @type: device type
 * @major: device major number
 * @minor: device minor number
809
 * @access: combination of DEVCG_ACC_WRITE, DEVCG_ACC_READ and DEVCG_ACC_MKNOD
810 811 812
 *
 * returns 0 on success, -EPERM case the operation is not permitted
 */
J
Jiri Slaby 已提交
813
static int __devcgroup_check_permission(short type, u32 major, u32 minor,
814
				        short access)
815
{
J
Jiri Slaby 已提交
816
	struct dev_cgroup *dev_cgroup;
817
	bool rc;
L
Li Zefan 已提交
818

819
	rcu_read_lock();
J
Jiri Slaby 已提交
820
	dev_cgroup = task_devcgroup(current);
821 822 823 824 825 826 827 828
	if (dev_cgroup->behavior == DEVCG_DEFAULT_ALLOW)
		/* Can't match any of the exceptions, even partially */
		rc = !match_exception_partial(&dev_cgroup->exceptions,
					      type, major, minor, access);
	else
		/* Need to match completely one exception to be allowed */
		rc = match_exception(&dev_cgroup->exceptions, type, major,
				     minor, access);
829
	rcu_read_unlock();
830

831 832
	if (!rc)
		return -EPERM;
L
Li Zefan 已提交
833

834 835
	return 0;
}
836

837 838 839 840 841
int __devcgroup_inode_permission(struct inode *inode, int mask)
{
	short type, access = 0;

	if (S_ISBLK(inode->i_mode))
842
		type = DEVCG_DEV_BLOCK;
843
	if (S_ISCHR(inode->i_mode))
844
		type = DEVCG_DEV_CHAR;
845
	if (mask & MAY_WRITE)
846
		access |= DEVCG_ACC_WRITE;
847
	if (mask & MAY_READ)
848
		access |= DEVCG_ACC_READ;
849

J
Jiri Slaby 已提交
850 851
	return __devcgroup_check_permission(type, imajor(inode), iminor(inode),
			access);
852 853 854 855
}

int devcgroup_inode_mknod(int mode, dev_t dev)
{
856
	short type;
857

S
Serge E. Hallyn 已提交
858 859 860
	if (!S_ISBLK(mode) && !S_ISCHR(mode))
		return 0;

861
	if (S_ISBLK(mode))
862
		type = DEVCG_DEV_BLOCK;
863
	else
864
		type = DEVCG_DEV_CHAR;
L
Li Zefan 已提交
865

J
Jiri Slaby 已提交
866
	return __devcgroup_check_permission(type, MAJOR(dev), MINOR(dev),
867
			DEVCG_ACC_MKNOD);
L
Li Zefan 已提交
868

869
}