提交 1888e9b4 编写于 作者: L Linus Torvalds

Merge tag 'per-namespace-ipc-sysctls-for-v5.19' of...

Merge tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace

Pull ipc sysctl namespace updates from Eric Biederman:
 "This updates the ipc sysctls so that they are fundamentally per ipc
  namespace. Previously these sysctls depended upon a hack to simulate
  being per ipc namespace by looking up the ipc namespace in read or
  write. With this set of changes the ipc sysctls are registered per ipc
  namespace and open looks up the ipc namespace.

  Not only does this series of changes ensure the traditional binding at
  open time happens, but it sets a foundation for being able to relax
  the permission checks to allow a user namspace root to change the ipc
  sysctls for an ipc namespace that the user namespace root requires. To
  do this requires the ipc namespace to be known at open time"

* tag 'per-namespace-ipc-sysctls-for-v5.19' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm/user-namespace:
  ipc: Remove extra braces
  ipc: Check permissions for checkpoint_restart sysctls at open time
  ipc: Remove extra1 field abuse to pass ipc namespace
  ipc: Use the same namespace to modify and validate
  ipc: Store ipc sysctls in the ipc namespace
  ipc: Store mqueue sysctls in the ipc namespace
......@@ -10,6 +10,7 @@
#include <linux/ns_common.h>
#include <linux/refcount.h>
#include <linux/rhashtable-types.h>
#include <linux/sysctl.h>
struct user_namespace;
......@@ -63,6 +64,12 @@ struct ipc_namespace {
unsigned int mq_msg_default;
unsigned int mq_msgsize_default;
struct ctl_table_set mq_set;
struct ctl_table_header *mq_sysctls;
struct ctl_table_set ipc_set;
struct ctl_table_header *ipc_sysctls;
/* user_ns which owns the ipc ns */
struct user_namespace *user_ns;
struct ucounts *ucounts;
......@@ -169,15 +176,37 @@ static inline void put_ipc_ns(struct ipc_namespace *ns)
#ifdef CONFIG_POSIX_MQUEUE_SYSCTL
struct ctl_table_header;
extern struct ctl_table_header *mq_register_sysctl_table(void);
void retire_mq_sysctls(struct ipc_namespace *ns);
bool setup_mq_sysctls(struct ipc_namespace *ns);
#else /* CONFIG_POSIX_MQUEUE_SYSCTL */
static inline struct ctl_table_header *mq_register_sysctl_table(void)
static inline void retire_mq_sysctls(struct ipc_namespace *ns)
{
return NULL;
}
static inline bool setup_mq_sysctls(struct ipc_namespace *ns)
{
return true;
}
#endif /* CONFIG_POSIX_MQUEUE_SYSCTL */
#ifdef CONFIG_SYSVIPC_SYSCTL
bool setup_ipc_sysctls(struct ipc_namespace *ns);
void retire_ipc_sysctls(struct ipc_namespace *ns);
#else /* CONFIG_SYSVIPC_SYSCTL */
static inline void retire_ipc_sysctls(struct ipc_namespace *ns)
{
}
static inline bool setup_ipc_sysctls(struct ipc_namespace *ns)
{
return true;
}
#endif /* CONFIG_SYSVIPC_SYSCTL */
#endif
......@@ -13,43 +13,17 @@
#include <linux/capability.h>
#include <linux/ipc_namespace.h>
#include <linux/msg.h>
#include <linux/slab.h>
#include "util.h"
static void *get_ipc(struct ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
return which;
}
static int proc_ipc_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_dointvec(&ipc_table, write, buffer, lenp, ppos);
}
static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
}
static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ipc_namespace *ns = current->nsproxy->ipc_ns;
int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
struct ipc_namespace *ns =
container_of(table->data, struct ipc_namespace, shm_rmid_forced);
int err;
err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
if (err < 0)
return err;
......@@ -58,17 +32,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
return err;
}
static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table ipc_table;
memcpy(&ipc_table, table, sizeof(ipc_table));
ipc_table.data = get_ipc(table);
return proc_doulongvec_minmax(&ipc_table, write, buffer,
lenp, ppos);
}
static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
......@@ -87,14 +50,15 @@ static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ipc_namespace *ns =
container_of(table->data, struct ipc_namespace, sem_ctls);
int ret, semmni;
struct ipc_namespace *ns = current->nsproxy->ipc_ns;
semmni = ns->sem_ctls[3];
ret = proc_ipc_dointvec(table, write, buffer, lenp, ppos);
ret = proc_dointvec(table, write, buffer, lenp, ppos);
if (!ret)
ret = sem_check_semmni(current->nsproxy->ipc_ns);
ret = sem_check_semmni(ns);
/*
* Reset the semmni value if an error happens.
......@@ -104,44 +68,31 @@ static int proc_ipc_sem_dointvec(struct ctl_table *table, int write,
return ret;
}
#ifdef CONFIG_CHECKPOINT_RESTORE
static int proc_ipc_dointvec_minmax_checkpoint_restore(struct ctl_table *table,
int write, void *buffer, size_t *lenp, loff_t *ppos)
{
struct user_namespace *user_ns = current->nsproxy->ipc_ns->user_ns;
if (write && !checkpoint_restore_ns_capable(user_ns))
return -EPERM;
return proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
}
#endif
int ipc_mni = IPCMNI;
int ipc_mni_shift = IPCMNI_SHIFT;
int ipc_min_cycle = RADIX_TREE_MAP_SIZE;
static struct ctl_table ipc_kern_table[] = {
static struct ctl_table ipc_sysctls[] = {
{
.procname = "shmmax",
.data = &init_ipc_ns.shm_ctlmax,
.maxlen = sizeof(init_ipc_ns.shm_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "shmall",
.data = &init_ipc_ns.shm_ctlall,
.maxlen = sizeof(init_ipc_ns.shm_ctlall),
.mode = 0644,
.proc_handler = proc_ipc_doulongvec_minmax,
.proc_handler = proc_doulongvec_minmax,
},
{
.procname = "shmmni",
.data = &init_ipc_ns.shm_ctlmni,
.maxlen = sizeof(init_ipc_ns.shm_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
},
......@@ -159,7 +110,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmax,
.maxlen = sizeof(init_ipc_ns.msg_ctlmax),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
......@@ -168,7 +119,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmni,
.maxlen = sizeof(init_ipc_ns.msg_ctlmni),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = &ipc_mni,
},
......@@ -186,7 +137,7 @@ static struct ctl_table ipc_kern_table[] = {
.data = &init_ipc_ns.msg_ctlmnb,
.maxlen = sizeof(init_ipc_ns.msg_ctlmnb),
.mode = 0644,
.proc_handler = proc_ipc_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
......@@ -202,8 +153,8 @@ static struct ctl_table ipc_kern_table[] = {
.procname = "sem_next_id",
.data = &init_ipc_ns.ids[IPC_SEM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SEM_IDS].next_id),
.mode = 0666,
.proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
.mode = 0444,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
......@@ -211,8 +162,8 @@ static struct ctl_table ipc_kern_table[] = {
.procname = "msg_next_id",
.data = &init_ipc_ns.ids[IPC_MSG_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_MSG_IDS].next_id),
.mode = 0666,
.proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
.mode = 0444,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
......@@ -220,8 +171,8 @@ static struct ctl_table ipc_kern_table[] = {
.procname = "shm_next_id",
.data = &init_ipc_ns.ids[IPC_SHM_IDS].next_id,
.maxlen = sizeof(init_ipc_ns.ids[IPC_SHM_IDS].next_id),
.mode = 0666,
.proc_handler = proc_ipc_dointvec_minmax_checkpoint_restore,
.mode = 0444,
.proc_handler = proc_dointvec_minmax,
.extra1 = SYSCTL_ZERO,
.extra2 = SYSCTL_INT_MAX,
},
......@@ -229,18 +180,112 @@ static struct ctl_table ipc_kern_table[] = {
{}
};
static struct ctl_table ipc_root_table[] = {
{
.procname = "kernel",
.mode = 0555,
.child = ipc_kern_table,
},
{}
static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
{
return &current->nsproxy->ipc_ns->ipc_set;
}
static int set_is_seen(struct ctl_table_set *set)
{
return &current->nsproxy->ipc_ns->ipc_set == set;
}
static int ipc_permissions(struct ctl_table_header *head, struct ctl_table *table)
{
int mode = table->mode;
#ifdef CONFIG_CHECKPOINT_RESTORE
struct ipc_namespace *ns = current->nsproxy->ipc_ns;
if (((table->data == &ns->ids[IPC_SEM_IDS].next_id) ||
(table->data == &ns->ids[IPC_MSG_IDS].next_id) ||
(table->data == &ns->ids[IPC_SHM_IDS].next_id)) &&
checkpoint_restore_ns_capable(ns->user_ns))
mode = 0666;
#endif
return mode;
}
static struct ctl_table_root set_root = {
.lookup = set_lookup,
.permissions = ipc_permissions,
};
bool setup_ipc_sysctls(struct ipc_namespace *ns)
{
struct ctl_table *tbl;
setup_sysctl_set(&ns->ipc_set, &set_root, set_is_seen);
tbl = kmemdup(ipc_sysctls, sizeof(ipc_sysctls), GFP_KERNEL);
if (tbl) {
int i;
for (i = 0; i < ARRAY_SIZE(ipc_sysctls); i++) {
if (tbl[i].data == &init_ipc_ns.shm_ctlmax)
tbl[i].data = &ns->shm_ctlmax;
else if (tbl[i].data == &init_ipc_ns.shm_ctlall)
tbl[i].data = &ns->shm_ctlall;
else if (tbl[i].data == &init_ipc_ns.shm_ctlmni)
tbl[i].data = &ns->shm_ctlmni;
else if (tbl[i].data == &init_ipc_ns.shm_rmid_forced)
tbl[i].data = &ns->shm_rmid_forced;
else if (tbl[i].data == &init_ipc_ns.msg_ctlmax)
tbl[i].data = &ns->msg_ctlmax;
else if (tbl[i].data == &init_ipc_ns.msg_ctlmni)
tbl[i].data = &ns->msg_ctlmni;
else if (tbl[i].data == &init_ipc_ns.msg_ctlmnb)
tbl[i].data = &ns->msg_ctlmnb;
else if (tbl[i].data == &init_ipc_ns.sem_ctls)
tbl[i].data = &ns->sem_ctls;
#ifdef CONFIG_CHECKPOINT_RESTORE
else if (tbl[i].data == &init_ipc_ns.ids[IPC_SEM_IDS].next_id)
tbl[i].data = &ns->ids[IPC_SEM_IDS].next_id;
else if (tbl[i].data == &init_ipc_ns.ids[IPC_MSG_IDS].next_id)
tbl[i].data = &ns->ids[IPC_MSG_IDS].next_id;
else if (tbl[i].data == &init_ipc_ns.ids[IPC_SHM_IDS].next_id)
tbl[i].data = &ns->ids[IPC_SHM_IDS].next_id;
#endif
else
tbl[i].data = NULL;
}
ns->ipc_sysctls = __register_sysctl_table(&ns->ipc_set, "kernel", tbl);
}
if (!ns->ipc_sysctls) {
kfree(tbl);
retire_sysctl_set(&ns->ipc_set);
return false;
}
return true;
}
void retire_ipc_sysctls(struct ipc_namespace *ns)
{
struct ctl_table *tbl;
tbl = ns->ipc_sysctls->ctl_table_arg;
unregister_sysctl_table(ns->ipc_sysctls);
retire_sysctl_set(&ns->ipc_set);
kfree(tbl);
}
static int __init ipc_sysctl_init(void)
{
register_sysctl_table(ipc_root_table);
if (!setup_ipc_sysctls(&init_ipc_ns)) {
pr_warn("ipc sysctl registration failed\n");
return -ENOMEM;
}
return 0;
}
......
......@@ -9,39 +9,9 @@
#include <linux/ipc_namespace.h>
#include <linux/sysctl.h>
#ifdef CONFIG_PROC_SYSCTL
static void *get_mq(struct ctl_table *table)
{
char *which = table->data;
struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns;
which = (which - (char *)&init_ipc_ns) + (char *)ipc_ns;
return which;
}
static int proc_mq_dointvec(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
return proc_dointvec(&mq_table, write, buffer, lenp, ppos);
}
static int proc_mq_dointvec_minmax(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
struct ctl_table mq_table;
memcpy(&mq_table, table, sizeof(mq_table));
mq_table.data = get_mq(table);
return proc_dointvec_minmax(&mq_table, write, buffer,
lenp, ppos);
}
#else
#define proc_mq_dointvec NULL
#define proc_mq_dointvec_minmax NULL
#endif
#include <linux/stat.h>
#include <linux/capability.h>
#include <linux/slab.h>
static int msg_max_limit_min = MIN_MSGMAX;
static int msg_max_limit_max = HARD_MSGMAX;
......@@ -55,14 +25,14 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_queues_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec,
.proc_handler = proc_dointvec,
},
{
.procname = "msg_max",
.data = &init_ipc_ns.mq_msg_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = &msg_max_limit_min,
.extra2 = &msg_max_limit_max,
},
......@@ -71,7 +41,7 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_msgsize_max,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
......@@ -80,7 +50,7 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_msg_default,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = &msg_max_limit_min,
.extra2 = &msg_max_limit_max,
},
......@@ -89,32 +59,73 @@ static struct ctl_table mq_sysctls[] = {
.data = &init_ipc_ns.mq_msgsize_default,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_mq_dointvec_minmax,
.proc_handler = proc_dointvec_minmax,
.extra1 = &msg_maxsize_limit_min,
.extra2 = &msg_maxsize_limit_max,
},
{}
};
static struct ctl_table mq_sysctl_dir[] = {
{
.procname = "mqueue",
.mode = 0555,
.child = mq_sysctls,
},
{}
};
static struct ctl_table_set *set_lookup(struct ctl_table_root *root)
{
return &current->nsproxy->ipc_ns->mq_set;
}
static struct ctl_table mq_sysctl_root[] = {
{
.procname = "fs",
.mode = 0555,
.child = mq_sysctl_dir,
},
{}
static int set_is_seen(struct ctl_table_set *set)
{
return &current->nsproxy->ipc_ns->mq_set == set;
}
static struct ctl_table_root set_root = {
.lookup = set_lookup,
};
struct ctl_table_header *mq_register_sysctl_table(void)
bool setup_mq_sysctls(struct ipc_namespace *ns)
{
return register_sysctl_table(mq_sysctl_root);
struct ctl_table *tbl;
setup_sysctl_set(&ns->mq_set, &set_root, set_is_seen);
tbl = kmemdup(mq_sysctls, sizeof(mq_sysctls), GFP_KERNEL);
if (tbl) {
int i;
for (i = 0; i < ARRAY_SIZE(mq_sysctls); i++) {
if (tbl[i].data == &init_ipc_ns.mq_queues_max)
tbl[i].data = &ns->mq_queues_max;
else if (tbl[i].data == &init_ipc_ns.mq_msg_max)
tbl[i].data = &ns->mq_msg_max;
else if (tbl[i].data == &init_ipc_ns.mq_msgsize_max)
tbl[i].data = &ns->mq_msgsize_max;
else if (tbl[i].data == &init_ipc_ns.mq_msg_default)
tbl[i].data = &ns->mq_msg_default;
else if (tbl[i].data == &init_ipc_ns.mq_msgsize_default)
tbl[i].data = &ns->mq_msgsize_default;
else
tbl[i].data = NULL;
}
ns->mq_sysctls = __register_sysctl_table(&ns->mq_set, "fs/mqueue", tbl);
}
if (!ns->mq_sysctls) {
kfree(tbl);
retire_sysctl_set(&ns->mq_set);
return false;
}
return true;
}
void retire_mq_sysctls(struct ipc_namespace *ns)
{
struct ctl_table *tbl;
tbl = ns->mq_sysctls->ctl_table_arg;
unregister_sysctl_table(ns->mq_sysctls);
retire_sysctl_set(&ns->mq_set);
kfree(tbl);
}
......@@ -164,8 +164,6 @@ static void remove_notification(struct mqueue_inode_info *info);
static struct kmem_cache *mqueue_inode_cachep;
static struct ctl_table_header *mq_sysctl_table;
static inline struct mqueue_inode_info *MQUEUE_I(struct inode *inode)
{
return container_of(inode, struct mqueue_inode_info, vfs_inode);
......@@ -1727,8 +1725,10 @@ static int __init init_mqueue_fs(void)
if (mqueue_inode_cachep == NULL)
return -ENOMEM;
/* ignore failures - they are not fatal */
mq_sysctl_table = mq_register_sysctl_table();
if (!setup_mq_sysctls(&init_ipc_ns)) {
pr_warn("sysctl registration failed\n");
return -ENOMEM;
}
error = register_filesystem(&mqueue_fs_type);
if (error)
......@@ -1745,8 +1745,6 @@ static int __init init_mqueue_fs(void)
out_filesystem:
unregister_filesystem(&mqueue_fs_type);
out_sysctl:
if (mq_sysctl_table)
unregister_sysctl_table(mq_sysctl_table);
kmem_cache_destroy(mqueue_inode_cachep);
return error;
}
......
......@@ -59,6 +59,13 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
if (err)
goto fail_put;
err = -ENOMEM;
if (!setup_mq_sysctls(ns))
goto fail_put;
if (!setup_ipc_sysctls(ns))
goto fail_put;
sem_init_ns(ns);
msg_init_ns(ns);
shm_init_ns(ns);
......@@ -125,6 +132,9 @@ static void free_ipc_ns(struct ipc_namespace *ns)
msg_exit_ns(ns);
shm_exit_ns(ns);
retire_mq_sysctls(ns);
retire_ipc_sysctls(ns);
dec_ipc_namespaces(ns->ucounts);
put_user_ns(ns->user_ns);
ns_free_inum(&ns->ns);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册