diff --git a/Documentation/cgroups/devices.txt b/Documentation/cgroups/devices.txt index 16624a7f82224d2f79d2a7aa2999ebe66c063baa..3c1095ca02ea17b350179c3bf89ff885f875c60d 100644 --- a/Documentation/cgroups/devices.txt +++ b/Documentation/cgroups/devices.txt @@ -13,9 +13,7 @@ either an integer or * for all. Access is a composition of r The root device cgroup starts with rwm to 'all'. A child device cgroup gets a copy of the parent. Administrators can then remove devices from the whitelist or add new entries. A child cgroup can -never receive a device access which is denied by its parent. However -when a device access is removed from a parent it will not also be -removed from the child(ren). +never receive a device access which is denied by its parent. 2. User Interface @@ -50,3 +48,69 @@ task to a new cgroup. (Again we'll probably want to change that). A cgroup may not be granted more permissions than the cgroup's parent has. + +4. Hierarchy + +device cgroups maintain hierarchy by making sure a cgroup never has more +access permissions than its parent. Every time an entry is written to +a cgroup's devices.deny file, all its children will have that entry removed +from their whitelist and all the locally set whitelist entries will be +re-evaluated. In case one of the locally set whitelist entries would provide +more access than the cgroup's parent, it'll be removed from the whitelist. + +Example: + A + / \ + B + + group behavior exceptions + A allow "b 8:* rwm", "c 116:1 rw" + B deny "c 1:3 rwm", "c 116:2 rwm", "b 3:* rwm" + +If a device is denied in group A: + # echo "c 116:* r" > A/devices.deny +it'll propagate down and after revalidating B's entries, the whitelist entry +"c 116:2 rwm" will be removed: + + group whitelist entries denied devices + A all "b 8:* rwm", "c 116:* rw" + B "c 1:3 rwm", "b 3:* rwm" all the rest + +In case parent's exceptions change and local exceptions are not allowed +anymore, they'll be deleted. + +Notice that new whitelist entries will not be propagated: + A + / \ + B + + group whitelist entries denied devices + A "c 1:3 rwm", "c 1:5 r" all the rest + B "c 1:3 rwm", "c 1:5 r" all the rest + +when adding "c *:3 rwm": + # echo "c *:3 rwm" >A/devices.allow + +the result: + group whitelist entries denied devices + A "c *:3 rwm", "c 1:5 r" all the rest + B "c 1:3 rwm", "c 1:5 r" all the rest + +but now it'll be possible to add new entries to B: + # echo "c 2:3 rwm" >B/devices.allow + # echo "c 50:3 r" >B/devices.allow +or even + # echo "c *:3 rwm" >B/devices.allow + +Allowing or denying all by writing 'a' to devices.allow or devices.deny will +not be possible once the device cgroups has children. + +4.1 Hierarchy (internal implementation) + +device cgroups is implemented internally using a behavior (ALLOW, DENY) and a +list of exceptions. The internal state is controlled using the same user +interface to preserve compatibility with the previous whitelist-only +implementation. Removal or addition of exceptions that will reduce the access +to devices will be propagated down the hierarchy. +For every propagated exception, the effective rules will be re-evaluated based +on current parent's access rules. diff --git a/security/device_cgroup.c b/security/device_cgroup.c index 16c9e1069be6bb23a24116a32b7b658207c2a124..221967d4690c3699faf9d77e047e457f55f94de6 100644 --- a/security/device_cgroup.c +++ b/security/device_cgroup.c @@ -49,6 +49,8 @@ struct dev_cgroup { struct cgroup_subsys_state css; struct list_head exceptions; enum devcg_behavior behavior; + /* temporary list for pending propagation operations */ + struct list_head propagate_pending; }; static inline struct dev_cgroup *css_to_devcgroup(struct cgroup_subsys_state *s) @@ -185,6 +187,11 @@ static void dev_exception_clean(struct dev_cgroup *dev_cgroup) __dev_exception_clean(dev_cgroup); } +static inline bool is_devcg_online(const struct dev_cgroup *devcg) +{ + return (devcg->behavior != DEVCG_DEFAULT_NONE); +} + /** * devcgroup_online - initializes devcgroup's behavior and exceptions based on * parent's @@ -235,6 +242,7 @@ static struct cgroup_subsys_state *devcgroup_css_alloc(struct cgroup *cgroup) if (!dev_cgroup) return ERR_PTR(-ENOMEM); INIT_LIST_HEAD(&dev_cgroup->exceptions); + INIT_LIST_HEAD(&dev_cgroup->propagate_pending); dev_cgroup->behavior = DEVCG_DEFAULT_NONE; parent_cgroup = cgroup->parent; @@ -413,6 +421,111 @@ static inline int may_allow_all(struct dev_cgroup *parent) return parent->behavior == DEVCG_DEFAULT_ALLOW; } +/** + * revalidate_active_exceptions - walks through the active exception list and + * revalidates the exceptions based on parent's + * behavior and exceptions. The exceptions that + * are no longer valid will be removed. + * Called with devcgroup_mutex held. + * @devcg: cgroup which exceptions will be checked + * + * This is one of the three key functions for hierarchy implementation. + * This function is responsible for re-evaluating all the cgroup's active + * exceptions due to a parent's exception change. + * Refer to Documentation/cgroups/devices.txt for more details. + */ +static void revalidate_active_exceptions(struct dev_cgroup *devcg) +{ + struct dev_exception_item *ex; + struct list_head *this, *tmp; + + list_for_each_safe(this, tmp, &devcg->exceptions) { + ex = container_of(this, struct dev_exception_item, list); + if (!parent_has_perm(devcg, ex)) + dev_exception_rm(devcg, ex); + } +} + +/** + * get_online_devcg - walks the cgroup tree and fills a list with the online + * groups + * @root: cgroup used as starting point + * @online: list that will be filled with online groups + * + * Must be called with devcgroup_mutex held. Grabs RCU lock. + * Because devcgroup_mutex is held, no devcg will become online or offline + * during the tree walk (see devcgroup_online, devcgroup_offline) + * A separated list is needed because propagate_behavior() and + * propagate_exception() need to allocate memory and can block. + */ +static void get_online_devcg(struct cgroup *root, struct list_head *online) +{ + struct cgroup *pos; + struct dev_cgroup *devcg; + + lockdep_assert_held(&devcgroup_mutex); + + rcu_read_lock(); + cgroup_for_each_descendant_pre(pos, root) { + devcg = cgroup_to_devcgroup(pos); + if (is_devcg_online(devcg)) + list_add_tail(&devcg->propagate_pending, online); + } + rcu_read_unlock(); +} + +/** + * propagate_exception - propagates a new exception to the children + * @devcg_root: device cgroup that added a new exception + * @ex: new exception to be propagated + * + * returns: 0 in case of success, != 0 in case of error + */ +static int propagate_exception(struct dev_cgroup *devcg_root, + struct dev_exception_item *ex) +{ + struct cgroup *root = devcg_root->css.cgroup; + struct dev_cgroup *devcg, *parent, *tmp; + int rc = 0; + LIST_HEAD(pending); + + get_online_devcg(root, &pending); + + list_for_each_entry_safe(devcg, tmp, &pending, propagate_pending) { + parent = cgroup_to_devcgroup(devcg->css.cgroup->parent); + + /* + * in case both root's behavior and devcg is allow, a new + * restriction means adding to the exception list + */ + if (devcg_root->behavior == DEVCG_DEFAULT_ALLOW && + devcg->behavior == DEVCG_DEFAULT_ALLOW) { + rc = dev_exception_add(devcg, ex); + if (rc) + break; + } else { + /* + * in the other possible cases: + * root's behavior: allow, devcg's: deny + * root's behavior: deny, devcg's: deny + * the exception will be removed + */ + dev_exception_rm(devcg, ex); + } + revalidate_active_exceptions(devcg); + + list_del_init(&devcg->propagate_pending); + } + return rc; +} + +static inline bool has_children(struct dev_cgroup *devcgroup) +{ + struct cgroup *cgrp = devcgroup->css.cgroup; + + return !list_empty(&cgrp->children); +} + /* * Modify the exception list using allow/deny rules. * CAP_SYS_ADMIN is needed for this. It's at least separate from CAP_MKNOD @@ -449,6 +562,9 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, case 'a': switch (filetype) { case DEVCG_ALLOW: + if (has_children(devcgroup)) + return -EINVAL; + if (!may_allow_all(parent)) return -EPERM; dev_exception_clean(devcgroup); @@ -462,6 +578,9 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, return rc; break; case DEVCG_DENY: + if (has_children(devcgroup)) + return -EINVAL; + dev_exception_clean(devcgroup); devcgroup->behavior = DEVCG_DEFAULT_DENY; break; @@ -556,22 +675,28 @@ static int devcgroup_update_access(struct dev_cgroup *devcgroup, dev_exception_rm(devcgroup, &ex); return 0; } - return dev_exception_add(devcgroup, &ex); + rc = dev_exception_add(devcgroup, &ex); + break; case DEVCG_DENY: /* * If the default policy is to deny by default, try to remove * an matching exception instead. And be silent about it: we * don't want to break compatibility */ - if (devcgroup->behavior == DEVCG_DEFAULT_DENY) { + if (devcgroup->behavior == DEVCG_DEFAULT_DENY) dev_exception_rm(devcgroup, &ex); - return 0; - } - return dev_exception_add(devcgroup, &ex); + else + rc = dev_exception_add(devcgroup, &ex); + + if (rc) + break; + /* we only propagate new restrictions */ + rc = propagate_exception(devcgroup, &ex); + break; default: - return -EINVAL; + rc = -EINVAL; } - return 0; + return rc; } static int devcgroup_access_write(struct cgroup *cgrp, struct cftype *cft,