diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt index 1c7fb0a94e28a84cf6a8c2b56c82babe9fa34d64..704e474a93df8539e093aa3569bb889faa1adb70 100644 --- a/Documentation/sysctl/kernel.txt +++ b/Documentation/sysctl/kernel.txt @@ -61,6 +61,7 @@ show up in /proc/sys/kernel: - rtsig-nr - sem - sg-big-buff [ generic SCSI device (sg) ] +- shm_rmid_forced - shmall - shmmax [ sysv ipc ] - shmmni @@ -518,6 +519,27 @@ kernel. This value defaults to SHMMAX. ============================================================== +shm_rmid_forced: + +Linux lets you set resource limits, including how much memory one +process can consume, via setrlimit(2). Unfortunately, shared memory +segments are allowed to exist without association with any process, and +thus might not be counted against any resource limits. If enabled, +shared memory segments are automatically destroyed when their attach +count becomes zero after a detach or a process termination. It will +also destroy segments that were created, but never attached to, on exit +from the process. The only use left for IPC_RMID is to immediately +destroy an unattached segment. Of course, this breaks the way things are +defined, so some applications might stop working. Note that this +feature will do you no good unless you also configure your resource +limits (in particular, RLIMIT_AS and RLIMIT_NPROC). Most systems don't +need this. + +Note that if you change this from 0 to 1, already created segments +without users and with a dead originative process will be destroyed. + +============================================================== + softlockup_thresh: This value can be used to lower the softlockup tolerance threshold. The diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index a6d1655f9607de872a10fecaa744e38412c8edb2..8a297a5e794cc8e51c22351098b80a35ce43ef09 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -44,6 +44,11 @@ struct ipc_namespace { size_t shm_ctlall; int shm_ctlmni; int shm_tot; + /* + * Defines whether IPC_RMID is forced for _all_ shm segments regardless + * of shmctl() + */ + int shm_rmid_forced; struct notifier_block ipcns_nb; @@ -72,6 +77,7 @@ extern int register_ipcns_notifier(struct ipc_namespace *); extern int cond_register_ipcns_notifier(struct ipc_namespace *); extern void unregister_ipcns_notifier(struct ipc_namespace *); extern int ipcns_notify(unsigned long); +extern void shm_destroy_orphaned(struct ipc_namespace *ns); #else /* CONFIG_SYSVIPC */ static inline int register_ipcns_notifier(struct ipc_namespace *ns) { return 0; } @@ -79,6 +85,7 @@ static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns) { return 0; } static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { } static inline int ipcns_notify(unsigned long l) { return 0; } +static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {} #endif /* CONFIG_SYSVIPC */ #ifdef CONFIG_POSIX_MQUEUE diff --git a/include/linux/shm.h b/include/linux/shm.h index eca6235a46c0b1b89d951a90216b132c6a21cfb7..7d27ffde01902a7a29687c5ef36b607d290c694d 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -106,6 +106,7 @@ struct shmid_kernel /* private to the kernel */ #ifdef CONFIG_SYSVIPC long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr); extern int is_file_shm_hugepages(struct file *file); +extern void exit_shm(struct task_struct *task); #else static inline long do_shmat(int shmid, char __user *shmaddr, int shmflg, unsigned long *addr) @@ -116,6 +117,9 @@ static inline int is_file_shm_hugepages(struct file *file) { return 0; } +static inline void exit_shm(struct task_struct *task) +{ +} #endif #endif /* __KERNEL__ */ diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 56410faa4550ee65d521aad623e0f52d5a522368..00fba2bab87d8019739e4645f98e21559688dcbe 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; + memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } +static int proc_ipc_dointvec_minmax(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + + return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +} + +static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (err < 0) + return err; + if (ns->shm_rmid_forced) + shm_destroy_orphaned(ns); + return err; +} + static int proc_ipc_callback_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL +#define proc_ipc_dointvec_minmax NULL +#define proc_ipc_dointvec_minmax_orphans NULL #define proc_ipc_callback_dointvec NULL #define proc_ipcauto_dointvec_minmax NULL #endif @@ -154,6 +181,15 @@ static struct ctl_table ipc_kern_table[] = { .mode = 0644, .proc_handler = proc_ipc_dointvec, }, + { + .procname = "shm_rmid_forced", + .data = &init_ipc_ns.shm_rmid_forced, + .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax_orphans, + .extra1 = &zero, + .extra2 = &one, + }, { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, diff --git a/ipc/shm.c b/ipc/shm.c index 27884adb1a9072b6bea5caabdee25cf5b14a8465..3f5b14365f33e54408c5e89d3b15aacbc2c1435b 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns) ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; + ns->shm_rmid_forced = 0; ns->shm_tot = 0; ipc_init_ids(&shm_ids(ns)); } @@ -186,6 +187,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) ipc_rcu_putref(shp); } +/* + * shm_may_destroy - identifies whether shm segment should be destroyed now + * + * Returns true if and only if there are no active users of the segment and + * one of the following is true: + * + * 1) shmctl(id, IPC_RMID, NULL) was called for this shp + * + * 2) sysctl kernel.shm_rmid_forced is set to 1. + */ +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +{ + return (shp->shm_nattch == 0) && + (ns->shm_rmid_forced || + (shp->shm_perm.mode & SHM_DEST)); +} + /* * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. @@ -206,11 +224,83 @@ static void shm_close(struct vm_area_struct *vma) shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + up_write(&shm_ids(ns).rw_mutex); +} + +static int shm_try_destroy_current(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct shmid_kernel *shp = shm_lock(ns, id); + + if (IS_ERR(shp)) + return 0; + + if (shp->shm_cprid != task_tgid_vnr(current)) { + shm_unlock(shp); + return 0; + } + + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + return 0; +} + +static int shm_try_destroy_orphaned(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct shmid_kernel *shp = shm_lock(ns, id); + struct task_struct *task; + + if (IS_ERR(shp)) + return 0; + + /* + * We want to destroy segments without users and with already + * exit'ed originating process. + * + * XXX: the originating process may exist in another pid namespace. + */ + task = find_task_by_vpid(shp->shm_cprid); + if (task != NULL) { + shm_unlock(shp); + return 0; + } + + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); + return 0; +} + +void shm_destroy_orphaned(struct ipc_namespace *ns) +{ + down_write(&shm_ids(ns).rw_mutex); + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + up_write(&shm_ids(ns).rw_mutex); +} + + +void exit_shm(struct task_struct *task) +{ + struct nsproxy *nsp = task->nsproxy; + struct ipc_namespace *ns; + + if (!nsp) + return; + ns = nsp->ipc_ns; + if (!ns || !ns->shm_rmid_forced) + return; + + /* Destroy all already created segments, but not mapped yet */ + down_write(&shm_ids(ns).rw_mutex); + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); up_write(&shm_ids(ns).rw_mutex); } @@ -950,8 +1040,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr) shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); diff --git a/kernel/exit.c b/kernel/exit.c index 9ee58bb9e60fb0602755445c672c067db8f72ea4..2913b3509d4288026990cc087894e69eace6c564 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -980,6 +980,7 @@ NORET_TYPE void do_exit(long code) trace_sched_process_exit(tsk); exit_sem(tsk); + exit_shm(tsk); exit_files(tsk); exit_fs(tsk); check_stack_usage();