From b34a6b1da371ed8af1221459a18c67970f7e3d53 Mon Sep 17 00:00:00 2001 From: Vasiliy Kulikov Date: Tue, 26 Jul 2011 16:08:48 -0700 Subject: ipc: introduce shm_rmid_forced sysctl Add support for the shm_rmid_forced sysctl. If set to 1, all shared memory objects in current ipc namespace will be automatically forced to use IPC_RMID. The POSIX way of handling shmem allows one to create shm objects and call shmdt(), leaving shm object associated with no process, thus consuming memory not counted via rlimits. With shm_rmid_forced=1 the shared memory object is counted at least for one process, so OOM killer may effectively kill the fat process holding the shared memory. It obviously breaks POSIX - some programs relying on the feature would stop working. So set shm_rmid_forced=1 only if you're sure nobody uses "orphaned" memory. Use shm_rmid_forced=0 by default for compatability reasons. The feature was previously impemented in -ow as a configure option. [akpm@linux-foundation.org: fix documentation, per Randy] [akpm@linux-foundation.org: fix warning] [akpm@linux-foundation.org: readability/conventionality tweaks] [akpm@linux-foundation.org: fix shm_rmid_forced/shm_forced_rmid confusion, use standard comment layout] Signed-off-by: Vasiliy Kulikov Cc: Randy Dunlap Cc: "Eric W. Biederman" Cc: "Serge E. Hallyn" Cc: Daniel Lezcano Cc: Oleg Nesterov Cc: Tejun Heo Cc: Ingo Molnar Cc: Alan Cox Cc: Solar Designer Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- ipc/ipc_sysctl.c | 36 +++++++++++++++++++++ ipc/shm.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 2 files changed, 129 insertions(+), 4 deletions(-) (limited to 'ipc') diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index 56410faa4550..00fba2bab87d 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -31,12 +31,37 @@ static int proc_ipc_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; + memcpy(&ipc_table, table, sizeof(ipc_table)); ipc_table.data = get_ipc(table); return proc_dointvec(&ipc_table, write, buffer, lenp, ppos); } +static int proc_ipc_dointvec_minmax(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ctl_table ipc_table; + + memcpy(&ipc_table, table, sizeof(ipc_table)); + ipc_table.data = get_ipc(table); + + return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos); +} + +static int proc_ipc_dointvec_minmax_orphans(ctl_table *table, int write, + void __user *buffer, size_t *lenp, loff_t *ppos) +{ + struct ipc_namespace *ns = current->nsproxy->ipc_ns; + int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); + + if (err < 0) + return err; + if (ns->shm_rmid_forced) + shm_destroy_orphaned(ns); + return err; +} + static int proc_ipc_callback_dointvec(ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos) { @@ -125,6 +150,8 @@ static int proc_ipcauto_dointvec_minmax(ctl_table *table, int write, #else #define proc_ipc_doulongvec_minmax NULL #define proc_ipc_dointvec NULL +#define proc_ipc_dointvec_minmax NULL +#define proc_ipc_dointvec_minmax_orphans NULL #define proc_ipc_callback_dointvec NULL #define proc_ipcauto_dointvec_minmax NULL #endif @@ -154,6 +181,15 @@ static struct ctl_table ipc_kern_table[] = { .mode = 0644, .proc_handler = proc_ipc_dointvec, }, + { + .procname = "shm_rmid_forced", + .data = &init_ipc_ns.shm_rmid_forced, + .maxlen = sizeof(init_ipc_ns.shm_rmid_forced), + .mode = 0644, + .proc_handler = proc_ipc_dointvec_minmax_orphans, + .extra1 = &zero, + .extra2 = &one, + }, { .procname = "msgmax", .data = &init_ipc_ns.msg_ctlmax, diff --git a/ipc/shm.c b/ipc/shm.c index 27884adb1a90..3f5b14365f33 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -74,6 +74,7 @@ void shm_init_ns(struct ipc_namespace *ns) ns->shm_ctlmax = SHMMAX; ns->shm_ctlall = SHMALL; ns->shm_ctlmni = SHMMNI; + ns->shm_rmid_forced = 0; ns->shm_tot = 0; ipc_init_ids(&shm_ids(ns)); } @@ -186,6 +187,23 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) ipc_rcu_putref(shp); } +/* + * shm_may_destroy - identifies whether shm segment should be destroyed now + * + * Returns true if and only if there are no active users of the segment and + * one of the following is true: + * + * 1) shmctl(id, IPC_RMID, NULL) was called for this shp + * + * 2) sysctl kernel.shm_rmid_forced is set to 1. + */ +static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +{ + return (shp->shm_nattch == 0) && + (ns->shm_rmid_forced || + (shp->shm_perm.mode & SHM_DEST)); +} + /* * remove the attach descriptor vma. * free memory for segment if it is marked destroyed. @@ -206,11 +224,83 @@ static void shm_close(struct vm_area_struct *vma) shp->shm_lprid = task_tgid_vnr(current); shp->shm_dtim = get_seconds(); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + up_write(&shm_ids(ns).rw_mutex); +} + +static int shm_try_destroy_current(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct shmid_kernel *shp = shm_lock(ns, id); + + if (IS_ERR(shp)) + return 0; + + if (shp->shm_cprid != task_tgid_vnr(current)) { + shm_unlock(shp); + return 0; + } + + if (shm_may_destroy(ns, shp)) + shm_destroy(ns, shp); + else + shm_unlock(shp); + return 0; +} + +static int shm_try_destroy_orphaned(int id, void *p, void *data) +{ + struct ipc_namespace *ns = data; + struct shmid_kernel *shp = shm_lock(ns, id); + struct task_struct *task; + + if (IS_ERR(shp)) + return 0; + + /* + * We want to destroy segments without users and with already + * exit'ed originating process. + * + * XXX: the originating process may exist in another pid namespace. + */ + task = find_task_by_vpid(shp->shm_cprid); + if (task != NULL) { + shm_unlock(shp); + return 0; + } + + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); + return 0; +} + +void shm_destroy_orphaned(struct ipc_namespace *ns) +{ + down_write(&shm_ids(ns).rw_mutex); + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_orphaned, ns); + up_write(&shm_ids(ns).rw_mutex); +} + + +void exit_shm(struct task_struct *task) +{ + struct nsproxy *nsp = task->nsproxy; + struct ipc_namespace *ns; + + if (!nsp) + return; + ns = nsp->ipc_ns; + if (!ns || !ns->shm_rmid_forced) + return; + + /* Destroy all already created segments, but not mapped yet */ + down_write(&shm_ids(ns).rw_mutex); + idr_for_each(&shm_ids(ns).ipcs_idr, &shm_try_destroy_current, ns); up_write(&shm_ids(ns).rw_mutex); } @@ -950,8 +1040,7 @@ out_nattch: shp = shm_lock(ns, shmid); BUG_ON(IS_ERR(shp)); shp->shm_nattch--; - if(shp->shm_nattch == 0 && - shp->shm_perm.mode & SHM_DEST) + if (shm_may_destroy(ns, shp)) shm_destroy(ns, shp); else shm_unlock(shp); -- cgit v1.2.3-70-g09d2