diff options
Diffstat (limited to 'ipc/shm.c')
| -rw-r--r-- | ipc/shm.c | 189 | 
1 files changed, 143 insertions, 46 deletions
| diff --git a/ipc/shm.c b/ipc/shm.c index 4942bdd65748..b3048ebd5c31 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -62,9 +62,18 @@ struct shmid_kernel /* private to the kernel */  	struct pid		*shm_lprid;  	struct ucounts		*mlock_ucounts; -	/* The task created the shm object.  NULL if the task is dead. */ +	/* +	 * The task created the shm object, for +	 * task_lock(shp->shm_creator) +	 */  	struct task_struct	*shm_creator; -	struct list_head	shm_clist;	/* list by creator */ + +	/* +	 * List by creator. task_lock(->shm_creator) required for read/write. +	 * If list_empty(), then the creator is dead already. +	 */ +	struct list_head	shm_clist; +	struct ipc_namespace	*ns;  } __randomize_layout;  /* shm_mode upper byte flags */ @@ -115,6 +124,7 @@ static void do_shm_rmid(struct ipc_namespace *ns, struct kern_ipc_perm *ipcp)  	struct shmid_kernel *shp;  	shp = container_of(ipcp, struct shmid_kernel, shm_perm); +	WARN_ON(ns != shp->ns);  	if (shp->shm_nattch) {  		shp->shm_perm.mode |= SHM_DEST; @@ -225,10 +235,43 @@ static void shm_rcu_free(struct rcu_head *head)  	kfree(shp);  } -static inline void shm_rmid(struct ipc_namespace *ns, struct shmid_kernel *s) +/* + * It has to be called with shp locked. + * It must be called before ipc_rmid() + */ +static inline void shm_clist_rm(struct shmid_kernel *shp)  { -	list_del(&s->shm_clist); -	ipc_rmid(&shm_ids(ns), &s->shm_perm); +	struct task_struct *creator; + +	/* ensure that shm_creator does not disappear */ +	rcu_read_lock(); + +	/* +	 * A concurrent exit_shm may do a list_del_init() as well. +	 * Just do nothing if exit_shm already did the work +	 */ +	if (!list_empty(&shp->shm_clist)) { +		/* +		 * shp->shm_creator is guaranteed to be valid *only* +		 * if shp->shm_clist is not empty. +		 */ +		creator = shp->shm_creator; + +		task_lock(creator); +		/* +		 * list_del_init() is a nop if the entry was already removed +		 * from the list. +		 */ +		list_del_init(&shp->shm_clist); +		task_unlock(creator); +	} +	rcu_read_unlock(); +} + +static inline void shm_rmid(struct shmid_kernel *s) +{ +	shm_clist_rm(s); +	ipc_rmid(&shm_ids(s->ns), &s->shm_perm);  } @@ -283,7 +326,7 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)  	shm_file = shp->shm_file;  	shp->shm_file = NULL;  	ns->shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; -	shm_rmid(ns, shp); +	shm_rmid(shp);  	shm_unlock(shp);  	if (!is_file_hugepages(shm_file))  		shmem_lock(shm_file, 0, shp->mlock_ucounts); @@ -303,10 +346,10 @@ static void shm_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp)   *   * 2) sysctl kernel.shm_rmid_forced is set to 1.   */ -static bool shm_may_destroy(struct ipc_namespace *ns, struct shmid_kernel *shp) +static bool shm_may_destroy(struct shmid_kernel *shp)  {  	return (shp->shm_nattch == 0) && -	       (ns->shm_rmid_forced || +	       (shp->ns->shm_rmid_forced ||  		(shp->shm_perm.mode & SHM_DEST));  } @@ -337,7 +380,7 @@ static void shm_close(struct vm_area_struct *vma)  	ipc_update_pid(&shp->shm_lprid, task_tgid(current));  	shp->shm_dtim = ktime_get_real_seconds();  	shp->shm_nattch--; -	if (shm_may_destroy(ns, shp)) +	if (shm_may_destroy(shp))  		shm_destroy(ns, shp);  	else  		shm_unlock(shp); @@ -358,10 +401,10 @@ static int shm_try_destroy_orphaned(int id, void *p, void *data)  	 *  	 * As shp->* are changed under rwsem, it's safe to skip shp locking.  	 */ -	if (shp->shm_creator != NULL) +	if (!list_empty(&shp->shm_clist))  		return 0; -	if (shm_may_destroy(ns, shp)) { +	if (shm_may_destroy(shp)) {  		shm_lock_by_ptr(shp);  		shm_destroy(ns, shp);  	} @@ -379,48 +422,97 @@ void shm_destroy_orphaned(struct ipc_namespace *ns)  /* Locking assumes this will only be called with task == current */  void exit_shm(struct task_struct *task)  { -	struct ipc_namespace *ns = task->nsproxy->ipc_ns; -	struct shmid_kernel *shp, *n; +	for (;;) { +		struct shmid_kernel *shp; +		struct ipc_namespace *ns; -	if (list_empty(&task->sysvshm.shm_clist)) -		return; +		task_lock(task); + +		if (list_empty(&task->sysvshm.shm_clist)) { +			task_unlock(task); +			break; +		} + +		shp = list_first_entry(&task->sysvshm.shm_clist, struct shmid_kernel, +				shm_clist); -	/* -	 * If kernel.shm_rmid_forced is not set then only keep track of -	 * which shmids are orphaned, so that a later set of the sysctl -	 * can clean them up. -	 */ -	if (!ns->shm_rmid_forced) { -		down_read(&shm_ids(ns).rwsem); -		list_for_each_entry(shp, &task->sysvshm.shm_clist, shm_clist) -			shp->shm_creator = NULL;  		/* -		 * Only under read lock but we are only called on current -		 * so no entry on the list will be shared. +		 * 1) Get pointer to the ipc namespace. It is worth to say +		 * that this pointer is guaranteed to be valid because +		 * shp lifetime is always shorter than namespace lifetime +		 * in which shp lives. +		 * We taken task_lock it means that shp won't be freed.  		 */ -		list_del(&task->sysvshm.shm_clist); -		up_read(&shm_ids(ns).rwsem); -		return; -	} +		ns = shp->ns; -	/* -	 * Destroy all already created segments, that were not yet mapped, -	 * and mark any mapped as orphan to cover the sysctl toggling. -	 * Destroy is skipped if shm_may_destroy() returns false. -	 */ -	down_write(&shm_ids(ns).rwsem); -	list_for_each_entry_safe(shp, n, &task->sysvshm.shm_clist, shm_clist) { -		shp->shm_creator = NULL; +		/* +		 * 2) If kernel.shm_rmid_forced is not set then only keep track of +		 * which shmids are orphaned, so that a later set of the sysctl +		 * can clean them up. +		 */ +		if (!ns->shm_rmid_forced) +			goto unlink_continue; -		if (shm_may_destroy(ns, shp)) { -			shm_lock_by_ptr(shp); -			shm_destroy(ns, shp); +		/* +		 * 3) get a reference to the namespace. +		 *    The refcount could be already 0. If it is 0, then +		 *    the shm objects will be free by free_ipc_work(). +		 */ +		ns = get_ipc_ns_not_zero(ns); +		if (!ns) { +unlink_continue: +			list_del_init(&shp->shm_clist); +			task_unlock(task); +			continue;  		} -	} -	/* Remove the list head from any segments still attached. */ -	list_del(&task->sysvshm.shm_clist); -	up_write(&shm_ids(ns).rwsem); +		/* +		 * 4) get a reference to shp. +		 *   This cannot fail: shm_clist_rm() is called before +		 *   ipc_rmid(), thus the refcount cannot be 0. +		 */ +		WARN_ON(!ipc_rcu_getref(&shp->shm_perm)); + +		/* +		 * 5) unlink the shm segment from the list of segments +		 *    created by current. +		 *    This must be done last. After unlinking, +		 *    only the refcounts obtained above prevent IPC_RMID +		 *    from destroying the segment or the namespace. +		 */ +		list_del_init(&shp->shm_clist); + +		task_unlock(task); + +		/* +		 * 6) we have all references +		 *    Thus lock & if needed destroy shp. +		 */ +		down_write(&shm_ids(ns).rwsem); +		shm_lock_by_ptr(shp); +		/* +		 * rcu_read_lock was implicitly taken in shm_lock_by_ptr, it's +		 * safe to call ipc_rcu_putref here +		 */ +		ipc_rcu_putref(&shp->shm_perm, shm_rcu_free); + +		if (ipc_valid_object(&shp->shm_perm)) { +			if (shm_may_destroy(shp)) +				shm_destroy(ns, shp); +			else +				shm_unlock(shp); +		} else { +			/* +			 * Someone else deleted the shp from namespace +			 * idr/kht while we have waited. +			 * Just unlock and continue. +			 */ +			shm_unlock(shp); +		} + +		up_write(&shm_ids(ns).rwsem); +		put_ipc_ns(ns); /* paired with get_ipc_ns_not_zero */ +	}  }  static vm_fault_t shm_fault(struct vm_fault *vmf) @@ -676,7 +768,11 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params)  	if (error < 0)  		goto no_id; +	shp->ns = ns; + +	task_lock(current);  	list_add(&shp->shm_clist, ¤t->sysvshm.shm_clist); +	task_unlock(current);  	/*  	 * shmid gets reported as "inode#" in /proc/pid/maps. @@ -1567,7 +1663,8 @@ out_nattch:  	down_write(&shm_ids(ns).rwsem);  	shp = shm_lock(ns, shmid);  	shp->shm_nattch--; -	if (shm_may_destroy(ns, shp)) + +	if (shm_may_destroy(shp))  		shm_destroy(ns, shp);  	else  		shm_unlock(shp); | 
