From a2f2945a99057c7d44043465906c6bb63c3368a0 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Tue, 11 Nov 2008 22:02:54 +1100 Subject: The oomkiller calculations make decisions based on capabilities. Since these are not security decisions and LSMs should not record if they fall the request they should use the new has_capability_noaudit() interface so the denials will not be recorded. Signed-off-by: Eric Paris Acked-by: Stephen Smalley Signed-off-by: James Morris --- mm/oom_kill.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'mm') diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 64e5b4bcd964..34a458aa7997 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -129,8 +129,8 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * Superuser processes are usually more important, so we make it * less likely that we kill those. */ - if (has_capability(p, CAP_SYS_ADMIN) || - has_capability(p, CAP_SYS_RESOURCE)) + if (has_capability_noaudit(p, CAP_SYS_ADMIN) || + has_capability_noaudit(p, CAP_SYS_RESOURCE)) points /= 4; /* @@ -139,7 +139,7 @@ unsigned long badness(struct task_struct *p, unsigned long uptime) * tend to only have this flag set on applications they think * of as important. */ - if (has_capability(p, CAP_SYS_RAWIO)) + if (has_capability_noaudit(p, CAP_SYS_RAWIO)) points /= 4; /* -- cgit v1.2.3-70-g09d2 From 76aac0e9a17742e60d408be1a706e9aaad370891 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:12 +1100 Subject: CRED: Wrap task credential accesses in the core kernel Wrap access to task credentials so that they can be separated more easily from the task_struct during the introduction of COW creds. Change most current->(|e|s|fs)[ug]id to current_(|e|s|fs)[ug]id(). Change some task->e?[ug]id to task_e?[ug]id(). In some places it makes more sense to use RCU directly rather than a convenient wrapper; these will be addressed by later patches. Signed-off-by: David Howells Reviewed-by: James Morris Acked-by: Serge Hallyn Cc: Al Viro Cc: linux-audit@redhat.com Cc: containers@lists.linux-foundation.org Cc: linux-mm@kvack.org Signed-off-by: James Morris --- kernel/acct.c | 7 +++---- kernel/auditsc.c | 6 ++++-- kernel/cgroup.c | 9 +++++---- kernel/futex.c | 8 +++++--- kernel/futex_compat.c | 3 ++- kernel/ptrace.c | 15 +++++++++------ kernel/sched.c | 11 +++++++---- kernel/signal.c | 15 +++++++++------ kernel/sys.c | 16 ++++++++-------- kernel/sysctl.c | 2 +- kernel/timer.c | 8 ++++---- kernel/user_namespace.c | 2 +- mm/mempolicy.c | 7 +++++-- mm/migrate.c | 7 +++++-- mm/shmem.c | 8 ++++---- 15 files changed, 72 insertions(+), 52 deletions(-) (limited to 'mm') diff --git a/kernel/acct.c b/kernel/acct.c index f6006a60df5d..d57b7cbb98b6 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -530,15 +530,14 @@ static void do_acct_process(struct bsd_acct_struct *acct, do_div(elapsed, AHZ); ac.ac_btime = get_seconds() - elapsed; /* we really need to bite the bullet and change layout */ - ac.ac_uid = current->uid; - ac.ac_gid = current->gid; + current_uid_gid(&ac.ac_uid, &ac.ac_gid); #if ACCT_VERSION==2 ac.ac_ahz = AHZ; #endif #if ACCT_VERSION==1 || ACCT_VERSION==2 /* backward-compatible 16 bit fields */ - ac.ac_uid16 = current->uid; - ac.ac_gid16 = current->gid; + ac.ac_uid16 = ac.ac_uid; + ac.ac_gid16 = ac.ac_gid; #endif #if ACCT_VERSION==3 ac.ac_pid = task_tgid_nr_ns(current, ns); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index cef34235b362..9c7e47ae4576 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -2628,7 +2628,8 @@ void audit_core_dumps(long signr) { struct audit_buffer *ab; u32 sid; - uid_t auid = audit_get_loginuid(current); + uid_t auid = audit_get_loginuid(current), uid; + gid_t gid; unsigned int sessionid = audit_get_sessionid(current); if (!audit_enabled) @@ -2638,8 +2639,9 @@ void audit_core_dumps(long signr) return; ab = audit_log_start(NULL, GFP_KERNEL, AUDIT_ANOM_ABEND); + current_uid_gid(&uid, &gid); audit_log_format(ab, "auid=%u uid=%u gid=%u ses=%u", - auid, current->uid, current->gid, sessionid); + auid, uid, gid, sessionid); security_task_getsecid(current, &sid); if (sid) { char *ctx = NULL; diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 35eebd5510c2..78f9b310c4f3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -571,8 +571,8 @@ static struct inode *cgroup_new_inode(mode_t mode, struct super_block *sb) if (inode) { inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); inode->i_blocks = 0; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; inode->i_mapping->backing_dev_info = &cgroup_backing_dev_info; @@ -1279,6 +1279,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) { struct task_struct *tsk; + uid_t euid; int ret; if (pid) { @@ -1291,8 +1292,8 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) get_task_struct(tsk); rcu_read_unlock(); - if ((current->euid) && (current->euid != tsk->uid) - && (current->euid != tsk->suid)) { + euid = current_euid(); + if (euid && euid != tsk->uid && euid != tsk->suid) { put_task_struct(tsk); return -EACCES; } diff --git a/kernel/futex.c b/kernel/futex.c index 8af10027514b..e06962132aaf 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -439,10 +439,11 @@ static void free_pi_state(struct futex_pi_state *pi_state) static struct task_struct * futex_find_get_task(pid_t pid) { struct task_struct *p; + uid_t euid = current_euid(); rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || ((current->euid != p->euid) && (current->euid != p->uid))) + if (!p || (euid != p->euid && euid != p->uid)) p = ERR_PTR(-ESRCH); else get_task_struct(p); @@ -1829,6 +1830,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, { struct robust_list_head __user *head; unsigned long ret; + uid_t euid = current_euid(); if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1844,8 +1846,8 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && - !capable(CAP_SYS_PTRACE)) + if (euid != p->euid && euid != p->uid && + !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; rcu_read_unlock(); diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 04ac3a9e42cf..3254d4e41e88 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -135,6 +135,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; + uid_t euid = current_euid(); if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -150,7 +151,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && + if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 1e68e4c39e2c..937f6b5b2008 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -123,16 +123,19 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ + uid_t uid; + gid_t gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - if (((current->uid != task->euid) || - (current->uid != task->suid) || - (current->uid != task->uid) || - (current->gid != task->egid) || - (current->gid != task->sgid) || - (current->gid != task->gid)) && !capable(CAP_SYS_PTRACE)) + current_uid_gid(&uid, &gid); + if ((uid != task->euid || + uid != task->suid || + uid != task->uid || + gid != task->egid || + gid != task->sgid || + gid != task->gid) && !capable(CAP_SYS_PTRACE)) return -EPERM; smp_rmb(); if (task->mm) diff --git a/kernel/sched.c b/kernel/sched.c index e8819bc6f462..c3b8b1fcde0d 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -5128,6 +5128,7 @@ static int __sched_setscheduler(struct task_struct *p, int policy, unsigned long flags; const struct sched_class *prev_class = p->sched_class; struct rq *rq; + uid_t euid; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -5180,8 +5181,9 @@ recheck: return -EPERM; /* can't change other user's priorities */ - if ((current->euid != p->euid) && - (current->euid != p->uid)) + euid = current_euid(); + if (euid != p->euid && + euid != p->uid) return -EPERM; } @@ -5392,6 +5394,7 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) cpumask_t cpus_allowed; cpumask_t new_mask = *in_mask; struct task_struct *p; + uid_t euid; int retval; get_online_cpus(); @@ -5412,9 +5415,9 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); + euid = current_euid(); retval = -EPERM; - if ((current->euid != p->euid) && (current->euid != p->uid) && - !capable(CAP_SYS_NICE)) + if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 4530fc654455..167b535fe1a9 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -567,6 +567,7 @@ static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { struct pid *sid; + uid_t uid, euid; int error; if (!valid_signal(sig)) @@ -579,8 +580,10 @@ static int check_kill_permission(int sig, struct siginfo *info, if (error) return error; - if ((current->euid ^ t->suid) && (current->euid ^ t->uid) && - (current->uid ^ t->suid) && (current->uid ^ t->uid) && + uid = current_uid(); + euid = current_euid(); + if ((euid ^ t->suid) && (euid ^ t->uid) && + (uid ^ t->suid) && (uid ^ t->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -844,7 +847,7 @@ static int send_signal(int sig, struct siginfo *info, struct task_struct *t, q->info.si_errno = 0; q->info.si_code = SI_USER; q->info.si_pid = task_pid_vnr(current); - q->info.si_uid = current->uid; + q->info.si_uid = current_uid(); break; case (unsigned long) SEND_SIG_PRIV: q->info.si_signo = sig; @@ -1598,7 +1601,7 @@ void ptrace_notify(int exit_code) info.si_signo = SIGTRAP; info.si_code = exit_code; info.si_pid = task_pid_vnr(current); - info.si_uid = current->uid; + info.si_uid = current_uid(); /* Let the debugger run. */ spin_lock_irq(¤t->sighand->siglock); @@ -2211,7 +2214,7 @@ sys_kill(pid_t pid, int sig) info.si_errno = 0; info.si_code = SI_USER; info.si_pid = task_tgid_vnr(current); - info.si_uid = current->uid; + info.si_uid = current_uid(); return kill_something_info(sig, &info, pid); } @@ -2228,7 +2231,7 @@ static int do_tkill(pid_t tgid, pid_t pid, int sig) info.si_errno = 0; info.si_code = SI_TKILL; info.si_pid = task_tgid_vnr(current); - info.si_uid = current->uid; + info.si_uid = current_uid(); rcu_read_lock(); p = find_task_by_vpid(pid); diff --git a/kernel/sys.c b/kernel/sys.c index 31deba8f7d16..ed5c29c748ac 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -114,10 +114,10 @@ void (*pm_power_off_prepare)(void); static int set_one_prio(struct task_struct *p, int niceval, int error) { + uid_t euid = current_euid(); int no_nice; - if (p->uid != current->euid && - p->euid != current->euid && !capable(CAP_SYS_NICE)) { + if (p->uid != euid && p->euid != euid && !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } @@ -176,16 +176,16 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) case PRIO_USER: user = current->user; if (!who) - who = current->uid; + who = current_uid(); else - if ((who != current->uid) && !(user = find_user(who))) + if (who != current_uid() && !(user = find_user(who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) if (p->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); - if (who != current->uid) + if (who != current_uid()) free_uid(user); /* For find_user() */ break; } @@ -238,9 +238,9 @@ asmlinkage long sys_getpriority(int which, int who) case PRIO_USER: user = current->user; if (!who) - who = current->uid; + who = current_uid(); else - if ((who != current->uid) && !(user = find_user(who))) + if (who != current_uid() && !(user = find_user(who))) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) @@ -250,7 +250,7 @@ asmlinkage long sys_getpriority(int which, int who) retval = niceval; } while_each_thread(g, p); - if (who != current->uid) + if (who != current_uid()) free_uid(user); /* for find_user() */ break; } diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 9d048fa2d902..511031381c33 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1641,7 +1641,7 @@ out: static int test_perm(int mode, int op) { - if (!current->euid) + if (!current_euid()) mode >>= 6; else if (in_egroup_p(0)) mode >>= 3; diff --git a/kernel/timer.c b/kernel/timer.c index 56becf373c58..b54e4646cee7 100644 --- a/kernel/timer.c +++ b/kernel/timer.c @@ -1123,25 +1123,25 @@ asmlinkage long sys_getppid(void) asmlinkage long sys_getuid(void) { /* Only we change this so SMP safe */ - return current->uid; + return current_uid(); } asmlinkage long sys_geteuid(void) { /* Only we change this so SMP safe */ - return current->euid; + return current_euid(); } asmlinkage long sys_getgid(void) { /* Only we change this so SMP safe */ - return current->gid; + return current_gid(); } asmlinkage long sys_getegid(void) { /* Only we change this so SMP safe */ - return current->egid; + return current_egid(); } #endif diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 532858fa5b88..f82730adea00 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -38,7 +38,7 @@ static struct user_namespace *clone_user_ns(struct user_namespace *old_ns) } /* Reset current->user with a new one */ - new_user = alloc_uid(ns, current->uid); + new_user = alloc_uid(ns, current_uid()); if (!new_user) { free_uid(ns->root_user); kfree(ns); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 36f42573a335..07a96474077d 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1115,6 +1115,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, nodemask_t old; nodemask_t new; nodemask_t task_nodes; + uid_t uid, euid; int err; err = get_nodes(&old, old_nodes, maxnode); @@ -1144,8 +1145,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - if ((current->euid != task->suid) && (current->euid != task->uid) && - (current->uid != task->suid) && (current->uid != task->uid) && + uid = current_uid(); + euid = current_euid(); + if (euid != task->suid && euid != task->uid && + uid != task->suid && uid != task->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 6602941bfab0..6263c24c4afe 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1048,6 +1048,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, struct task_struct *task; struct mm_struct *mm; int err; + uid_t uid, euid; /* Check flags */ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) @@ -1075,8 +1076,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - if ((current->euid != task->suid) && (current->euid != task->uid) && - (current->uid != task->suid) && (current->uid != task->uid) && + uid = current_uid(); + euid = current_euid(); + if (euid != task->suid && euid != task->uid && + uid != task->suid && uid != task->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/shmem.c b/mm/shmem.c index 0ed075215e5f..f1b0d4871f3a 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1513,8 +1513,8 @@ shmem_get_inode(struct super_block *sb, int mode, dev_t dev) inode = new_inode(sb); if (inode) { inode->i_mode = mode; - inode->i_uid = current->fsuid; - inode->i_gid = current->fsgid; + inode->i_uid = current_fsuid(); + inode->i_gid = current_fsgid(); inode->i_blocks = 0; inode->i_mapping->backing_dev_info = &shmem_backing_dev_info; inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME; @@ -2278,8 +2278,8 @@ static int shmem_fill_super(struct super_block *sb, sbinfo->max_blocks = 0; sbinfo->max_inodes = 0; sbinfo->mode = S_IRWXUGO | S_ISVTX; - sbinfo->uid = current->fsuid; - sbinfo->gid = current->fsgid; + sbinfo->uid = current_fsuid(); + sbinfo->gid = current_fsgid(); sbinfo->mpol = NULL; sb->s_fs_info = sbinfo; -- cgit v1.2.3-70-g09d2 From b6dff3ec5e116e3af6f537d4caedcad6b9e5082a Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:16 +1100 Subject: CRED: Separate task security context from task_struct Separate the task security context from task_struct. At this point, the security data is temporarily embedded in the task_struct with two pointers pointing to it. Note that the Alpha arch is altered as it refers to (E)UID and (E)GID in entry.S via asm-offsets. With comment fixes Signed-off-by: Marc Dionne Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/alpha/kernel/asm-offsets.c | 11 +- arch/alpha/kernel/entry.S | 10 +- arch/ia64/ia32/sys_ia32.c | 8 +- arch/mips/kernel/kspd.c | 4 +- arch/s390/kernel/compat_linux.c | 28 ++--- drivers/connector/cn_proc.c | 8 +- fs/binfmt_elf.c | 12 +- fs/binfmt_elf_fdpic.c | 12 +- fs/exec.c | 4 +- fs/fcntl.c | 4 +- fs/file_table.c | 4 +- fs/fuse/dir.c | 12 +- fs/hugetlbfs/inode.c | 4 +- fs/ioprio.c | 12 +- fs/nfsd/auth.c | 22 ++-- fs/nfsd/nfs4recover.c | 12 +- fs/nfsd/nfsfh.c | 6 +- fs/open.c | 17 +-- fs/proc/array.c | 18 +-- fs/proc/base.c | 16 +-- fs/xfs/linux-2.6/xfs_cred.h | 6 +- fs/xfs/linux-2.6/xfs_globals.h | 2 +- fs/xfs/xfs_inode.h | 2 +- fs/xfs/xfs_vnodeops.h | 10 +- include/linux/cred.h | 155 +++++++++++++++++++---- include/linux/init_task.h | 24 ++-- include/linux/sched.h | 52 +------- include/linux/securebits.h | 2 +- ipc/mqueue.c | 2 +- ipc/shm.c | 4 +- kernel/auditsc.c | 52 ++++---- kernel/capability.c | 4 +- kernel/cgroup.c | 4 +- kernel/exit.c | 10 +- kernel/fork.c | 24 ++-- kernel/futex.c | 6 +- kernel/futex_compat.c | 5 +- kernel/ptrace.c | 19 +-- kernel/sched.c | 10 +- kernel/signal.c | 16 +-- kernel/sys.c | 266 ++++++++++++++++++++++----------------- kernel/trace/trace.c | 2 +- kernel/tsacct.c | 4 +- kernel/uid16.c | 28 ++--- kernel/user.c | 4 +- mm/mempolicy.c | 10 +- mm/migrate.c | 10 +- mm/oom_kill.c | 2 +- net/core/scm.c | 10 +- net/sunrpc/auth.c | 2 +- security/commoncap.c | 161 +++++++++++++----------- security/keys/keyctl.c | 25 ++-- security/keys/permission.c | 11 +- security/keys/process_keys.c | 98 ++++++++------- security/keys/request_key.c | 18 +-- security/keys/request_key_auth.c | 12 +- security/selinux/exports.c | 2 +- security/selinux/hooks.c | 116 ++++++++--------- security/selinux/selinuxfs.c | 2 +- security/selinux/xfrm.c | 6 +- security/smack/smack_access.c | 4 +- security/smack/smack_lsm.c | 77 ++++++------ security/smack/smackfs.c | 6 +- 63 files changed, 832 insertions(+), 677 deletions(-) (limited to 'mm') diff --git a/arch/alpha/kernel/asm-offsets.c b/arch/alpha/kernel/asm-offsets.c index 4b18cd94d59d..6ff8886e7e22 100644 --- a/arch/alpha/kernel/asm-offsets.c +++ b/arch/alpha/kernel/asm-offsets.c @@ -19,15 +19,18 @@ void foo(void) BLANK(); DEFINE(TASK_BLOCKED, offsetof(struct task_struct, blocked)); - DEFINE(TASK_UID, offsetof(struct task_struct, uid)); - DEFINE(TASK_EUID, offsetof(struct task_struct, euid)); - DEFINE(TASK_GID, offsetof(struct task_struct, gid)); - DEFINE(TASK_EGID, offsetof(struct task_struct, egid)); + DEFINE(TASK_CRED, offsetof(struct task_struct, cred)); DEFINE(TASK_REAL_PARENT, offsetof(struct task_struct, real_parent)); DEFINE(TASK_GROUP_LEADER, offsetof(struct task_struct, group_leader)); DEFINE(TASK_TGID, offsetof(struct task_struct, tgid)); BLANK(); + DEFINE(CRED_UID, offsetof(struct cred, uid)); + DEFINE(CRED_EUID, offsetof(struct cred, euid)); + DEFINE(CRED_GID, offsetof(struct cred, gid)); + DEFINE(CRED_EGID, offsetof(struct cred, egid)); + BLANK(); + DEFINE(SIZEOF_PT_REGS, sizeof(struct pt_regs)); DEFINE(PT_PTRACED, PT_PTRACED); DEFINE(CLONE_VM, CLONE_VM); diff --git a/arch/alpha/kernel/entry.S b/arch/alpha/kernel/entry.S index 5fc61e281ac7..f77345bc66a9 100644 --- a/arch/alpha/kernel/entry.S +++ b/arch/alpha/kernel/entry.S @@ -850,8 +850,9 @@ osf_getpriority: sys_getxuid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_UID($2) - ldl $1, TASK_EUID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_UID($3) + ldl $1, CRED_EUID($3) stq $1, 80($sp) ret .end sys_getxuid @@ -862,8 +863,9 @@ sys_getxuid: sys_getxgid: .prologue 0 ldq $2, TI_TASK($8) - ldl $0, TASK_GID($2) - ldl $1, TASK_EGID($2) + ldq $3, TASK_CRED($2) + ldl $0, CRED_GID($3) + ldl $1, CRED_EGID($3) stq $1, 80($sp) ret .end sys_getxgid diff --git a/arch/ia64/ia32/sys_ia32.c b/arch/ia64/ia32/sys_ia32.c index 5e92ae00bdbb..2445a9d3488e 100644 --- a/arch/ia64/ia32/sys_ia32.c +++ b/arch/ia64/ia32/sys_ia32.c @@ -1772,20 +1772,20 @@ sys32_getgroups16 (int gidsetsize, short __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } diff --git a/arch/mips/kernel/kspd.c b/arch/mips/kernel/kspd.c index b0591ae0ce56..fd6e51224034 100644 --- a/arch/mips/kernel/kspd.c +++ b/arch/mips/kernel/kspd.c @@ -174,8 +174,8 @@ static unsigned int translate_open_flags(int flags) static void sp_setfsuidgid( uid_t uid, gid_t gid) { - current->fsuid = uid; - current->fsgid = gid; + current->cred->fsuid = uid; + current->cred->fsgid = gid; key_fsuid_changed(current); key_fsgid_changed(current); diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c index 4646382af34f..6cc87d8c8682 100644 --- a/arch/s390/kernel/compat_linux.c +++ b/arch/s390/kernel/compat_linux.c @@ -148,9 +148,9 @@ asmlinkage long sys32_getresuid16(u16 __user *ruid, u16 __user *euid, u16 __user { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -165,9 +165,9 @@ asmlinkage long sys32_getresgid16(u16 __user *rgid, u16 __user *egid, u16 __user { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -217,20 +217,20 @@ asmlinkage long sys32_getgroups16(int gidsetsize, u16 __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -261,22 +261,22 @@ asmlinkage long sys32_setgroups16(int gidsetsize, u16 __user *grouplist) asmlinkage long sys32_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys32_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys32_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys32_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } /* diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 5c9f67f98d10..354c1ff17159 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -116,11 +116,11 @@ void proc_id_connector(struct task_struct *task, int which_id) ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; if (which_id == PROC_EVENT_UID) { - ev->event_data.id.r.ruid = task->uid; - ev->event_data.id.e.euid = task->euid; + ev->event_data.id.r.ruid = task->cred->uid; + ev->event_data.id.e.euid = task->cred->euid; } else if (which_id == PROC_EVENT_GID) { - ev->event_data.id.r.rgid = task->gid; - ev->event_data.id.e.egid = task->egid; + ev->event_data.id.r.rgid = task->cred->gid; + ev->event_data.id.e.egid = task->cred->egid; } else return; get_seq(&msg->seq, &ev->cpu); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 8fcfa398d350..7a52477ce493 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -223,10 +223,10 @@ create_elf_tables(struct linux_binprm *bprm, struct elfhdr *exec, NEW_AUX_ENT(AT_BASE, interp_load_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec->e_entry); - NEW_AUX_ENT(AT_UID, tsk->uid); - NEW_AUX_ENT(AT_EUID, tsk->euid); - NEW_AUX_ENT(AT_GID, tsk->gid); - NEW_AUX_ENT(AT_EGID, tsk->egid); + NEW_AUX_ENT(AT_UID, tsk->cred->uid); + NEW_AUX_ENT(AT_EUID, tsk->cred->euid); + NEW_AUX_ENT(AT_GID, tsk->cred->gid); + NEW_AUX_ENT(AT_EGID, tsk->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); if (k_platform) { @@ -1388,8 +1388,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 488584c87512..9f67054c2c4e 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -623,10 +623,10 @@ static int create_elf_fdpic_tables(struct linux_binprm *bprm, NEW_AUX_ENT(AT_BASE, interp_params->elfhdr_addr); NEW_AUX_ENT(AT_FLAGS, 0); NEW_AUX_ENT(AT_ENTRY, exec_params->entry_addr); - NEW_AUX_ENT(AT_UID, (elf_addr_t) current_uid()); - NEW_AUX_ENT(AT_EUID, (elf_addr_t) current_euid()); - NEW_AUX_ENT(AT_GID, (elf_addr_t) current_gid()); - NEW_AUX_ENT(AT_EGID, (elf_addr_t) current_egid()); + NEW_AUX_ENT(AT_UID, (elf_addr_t) current->cred->uid); + NEW_AUX_ENT(AT_EUID, (elf_addr_t) current->cred->euid); + NEW_AUX_ENT(AT_GID, (elf_addr_t) current->cred->gid); + NEW_AUX_ENT(AT_EGID, (elf_addr_t) current->cred->egid); NEW_AUX_ENT(AT_SECURE, security_bprm_secureexec(bprm)); NEW_AUX_ENT(AT_EXECFN, bprm->exec); @@ -1440,8 +1440,8 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->uid); - SET_GID(psinfo->pr_gid, p->gid); + SET_UID(psinfo->pr_uid, p->cred->uid); + SET_GID(psinfo->pr_gid, p->cred->gid); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/exec.c b/fs/exec.c index 604834f3b208..31149e430a89 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1738,7 +1738,7 @@ int do_coredump(long signr, int exit_code, struct pt_regs * regs) */ if (get_dumpable(mm) == 2) { /* Setuid core dump mode */ flag = O_EXCL; /* Stop rewrite attacks */ - current->fsuid = 0; /* Dump root private */ + current->cred->fsuid = 0; /* Dump root private */ } retval = coredump_wait(exit_code, &core_state); @@ -1834,7 +1834,7 @@ fail_unlock: if (helper_argv) argv_free(helper_argv); - current->fsuid = fsuid; + current->cred->fsuid = fsuid; coredump_finish(mm); fail: return retval; diff --git a/fs/fcntl.c b/fs/fcntl.c index bf049a805e59..63964d863ad6 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -401,8 +401,8 @@ static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { return (((fown->euid == 0) || - (fown->euid == p->suid) || (fown->euid == p->uid) || - (fown->uid == p->suid) || (fown->uid == p->uid)) && + (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) || + (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) && !security_file_send_sigiotask(p, fown, sig)); } diff --git a/fs/file_table.c b/fs/file_table.c index 5ad0eca6eea2..3152b53cfab0 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -122,8 +122,8 @@ struct file *get_empty_filp(void) INIT_LIST_HEAD(&f->f_u.fu_list); atomic_long_set(&f->f_count, 1); rwlock_init(&f->f_owner.lock); - f->f_uid = tsk->fsuid; - f->f_gid = tsk->fsgid; + f->f_uid = tsk->cred->fsuid; + f->f_gid = tsk->cred->fsgid; eventpoll_init_file(f); /* f->f_version: 0 */ return f; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index fd03330cadeb..e97a98981862 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -872,12 +872,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->euid == fc->user_id && - task->suid == fc->user_id && - task->uid == fc->user_id && - task->egid == fc->group_id && - task->sgid == fc->group_id && - task->gid == fc->group_id) + if (task->cred->euid == fc->user_id && + task->cred->suid == fc->user_id && + task->cred->uid == fc->user_id && + task->cred->egid == fc->group_id && + task->cred->sgid == fc->group_id && + task->cred->gid == fc->group_id) return 1; return 0; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 08ad76c79b49..870a721b8bd2 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -958,7 +958,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size) if (!can_do_hugetlb_shm()) return ERR_PTR(-EPERM); - if (!user_shm_lock(size, current->user)) + if (!user_shm_lock(size, current->cred->user)) return ERR_PTR(-ENOMEM); root = hugetlbfs_vfsmount->mnt_root; @@ -998,7 +998,7 @@ out_inode: out_dentry: dput(dentry); out_shm_unlock: - user_shm_unlock(size, current->user); + user_shm_unlock(size, current->cred->user); return ERR_PTR(error); } diff --git a/fs/ioprio.c b/fs/ioprio.c index 68d2cd807118..bb5210af77c2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -32,8 +32,8 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) int err; struct io_context *ioc; - if (task->uid != current_euid() && - task->uid != current_uid() && !capable(CAP_SYS_NICE)) + if (task->cred->uid != current_euid() && + task->cred->uid != current_uid() && !capable(CAP_SYS_NICE)) return -EPERM; err = security_task_setioprio(task, ioprio); @@ -123,7 +123,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -131,7 +131,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; do_each_thread(g, p) { - if (p->uid != who) + if (p->cred->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -216,7 +216,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; case IOPRIO_WHO_USER: if (!who) - user = current->user; + user = current->cred->user; else user = find_user(who); @@ -224,7 +224,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; do_each_thread(g, p) { - if (p->uid != user->uid) + if (p->cred->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/nfsd/auth.c b/fs/nfsd/auth.c index 294992e9bf69..808fc03a6fbd 100644 --- a/fs/nfsd/auth.c +++ b/fs/nfsd/auth.c @@ -27,6 +27,7 @@ int nfsexp_flags(struct svc_rqst *rqstp, struct svc_export *exp) int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) { + struct cred *act_as = current->cred ; struct svc_cred cred = rqstp->rq_cred; int i; int flags = nfsexp_flags(rqstp, exp); @@ -55,25 +56,26 @@ int nfsd_setuser(struct svc_rqst *rqstp, struct svc_export *exp) get_group_info(cred.cr_group_info); if (cred.cr_uid != (uid_t) -1) - current->fsuid = cred.cr_uid; + act_as->fsuid = cred.cr_uid; else - current->fsuid = exp->ex_anon_uid; + act_as->fsuid = exp->ex_anon_uid; if (cred.cr_gid != (gid_t) -1) - current->fsgid = cred.cr_gid; + act_as->fsgid = cred.cr_gid; else - current->fsgid = exp->ex_anon_gid; + act_as->fsgid = exp->ex_anon_gid; if (!cred.cr_group_info) return -ENOMEM; - ret = set_current_groups(cred.cr_group_info); + ret = set_groups(act_as, cred.cr_group_info); put_group_info(cred.cr_group_info); if ((cred.cr_uid)) { - current->cap_effective = - cap_drop_nfsd_set(current->cap_effective); + act_as->cap_effective = + cap_drop_nfsd_set(act_as->cap_effective); } else { - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + act_as->cap_effective = + cap_raise_nfsd_set(act_as->cap_effective, + act_as->cap_permitted); } return ret; } + diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index bb93946ace22..a5e14e8695ea 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -57,17 +57,17 @@ static int rec_dir_init = 0; static void nfs4_save_user(uid_t *saveuid, gid_t *savegid) { - *saveuid = current->fsuid; - *savegid = current->fsgid; - current->fsuid = 0; - current->fsgid = 0; + *saveuid = current->cred->fsuid; + *savegid = current->cred->fsgid; + current->cred->fsuid = 0; + current->cred->fsgid = 0; } static void nfs4_reset_user(uid_t saveuid, gid_t savegid) { - current->fsuid = saveuid; - current->fsgid = savegid; + current->cred->fsuid = saveuid; + current->cred->fsgid = savegid; } static void diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c index cd25d91895a1..e67cfaea0865 100644 --- a/fs/nfsd/nfsfh.c +++ b/fs/nfsd/nfsfh.c @@ -186,9 +186,9 @@ static __be32 nfsd_set_fh_dentry(struct svc_rqst *rqstp, struct svc_fh *fhp) * access control settings being in effect, we cannot * fix that case easily. */ - current->cap_effective = - cap_raise_nfsd_set(current->cap_effective, - current->cap_permitted); + current->cred->cap_effective = + cap_raise_nfsd_set(current->cred->cap_effective, + current->cred->cap_permitted); } else { error = nfsd_setuser_and_check_port(rqstp, exp); if (error) diff --git a/fs/open.c b/fs/open.c index 500cc0c54762..b1238e195e7e 100644 --- a/fs/open.c +++ b/fs/open.c @@ -425,6 +425,7 @@ out: */ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) { + struct cred *cred = current->cred; struct path path; struct inode *inode; int old_fsuid, old_fsgid; @@ -434,18 +435,18 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) if (mode & ~S_IRWXO) /* where's F_OK, X_OK, W_OK, R_OK? */ return -EINVAL; - old_fsuid = current->fsuid; - old_fsgid = current->fsgid; + old_fsuid = cred->fsuid; + old_fsgid = cred->fsgid; - current->fsuid = current->uid; - current->fsgid = current->gid; + cred->fsuid = cred->uid; + cred->fsgid = cred->gid; if (!issecure(SECURE_NO_SETUID_FIXUP)) { /* Clear the capabilities if we switch to a non-root user */ - if (current->uid) + if (current->cred->uid) old_cap = cap_set_effective(__cap_empty_set); else - old_cap = cap_set_effective(current->cap_permitted); + old_cap = cap_set_effective(cred->cap_permitted); } res = user_path_at(dfd, filename, LOOKUP_FOLLOW, &path); @@ -484,8 +485,8 @@ asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode) out_path_release: path_put(&path); out: - current->fsuid = old_fsuid; - current->fsgid = old_fsgid; + cred->fsuid = old_fsuid; + cred->fsgid = old_fsgid; if (!issecure(SECURE_NO_SETUID_FIXUP)) cap_set_effective(old_cap); diff --git a/fs/proc/array.c b/fs/proc/array.c index 6af7fba7abb1..62fe9b2009b6 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -182,8 +182,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - p->uid, p->euid, p->suid, p->fsuid, - p->gid, p->egid, p->sgid, p->fsgid); + p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid, + p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid); task_lock(p); if (p->files) @@ -194,7 +194,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, fdt ? fdt->max_fds : 0); rcu_read_unlock(); - group_info = p->group_info; + group_info = p->cred->group_info; get_group_info(group_info); task_unlock(p); @@ -262,7 +262,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); - qsize = atomic_read(&p->user->sigpending); + qsize = atomic_read(&p->cred->user->sigpending); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } @@ -293,10 +293,12 @@ static void render_cap_t(struct seq_file *m, const char *header, static inline void task_cap(struct seq_file *m, struct task_struct *p) { - render_cap_t(m, "CapInh:\t", &p->cap_inheritable); - render_cap_t(m, "CapPrm:\t", &p->cap_permitted); - render_cap_t(m, "CapEff:\t", &p->cap_effective); - render_cap_t(m, "CapBnd:\t", &p->cap_bset); + struct cred *cred = p->cred; + + render_cap_t(m, "CapInh:\t", &cred->cap_inheritable); + render_cap_t(m, "CapPrm:\t", &cred->cap_permitted); + render_cap_t(m, "CapEff:\t", &cred->cap_effective); + render_cap_t(m, "CapBnd:\t", &cred->cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index 486cf3fe7139..6862b360c36c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1428,8 +1428,8 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = 0; inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } security_task_to_inode(task, inode); @@ -1454,8 +1454,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->euid; - stat->gid = task->egid; + stat->uid = task->cred->euid; + stat->gid = task->cred->egid; } } rcu_read_unlock(); @@ -1486,8 +1486,8 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1658,8 +1658,8 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->euid; - inode->i_gid = task->egid; + inode->i_uid = task->cred->euid; + inode->i_gid = task->cred->egid; } else { inode->i_uid = 0; inode->i_gid = 0; diff --git a/fs/xfs/linux-2.6/xfs_cred.h b/fs/xfs/linux-2.6/xfs_cred.h index 293043a5573a..8c022cd0ad67 100644 --- a/fs/xfs/linux-2.6/xfs_cred.h +++ b/fs/xfs/linux-2.6/xfs_cred.h @@ -23,11 +23,9 @@ /* * Credentials */ -typedef struct cred { - /* EMPTY */ -} cred_t; +typedef const struct cred cred_t; -extern struct cred *sys_cred; +extern cred_t *sys_cred; /* this is a hack.. (assumes sys_cred is the only cred_t in the system) */ static inline int capable_cred(cred_t *cr, int cid) diff --git a/fs/xfs/linux-2.6/xfs_globals.h b/fs/xfs/linux-2.6/xfs_globals.h index 2770b0085ee8..6eda8a3eb6f1 100644 --- a/fs/xfs/linux-2.6/xfs_globals.h +++ b/fs/xfs/linux-2.6/xfs_globals.h @@ -19,6 +19,6 @@ #define __XFS_GLOBALS_H__ extern uint64_t xfs_panic_mask; /* set to cause more panics */ -extern struct cred *sys_cred; +extern cred_t *sys_cred; #endif /* __XFS_GLOBALS_H__ */ diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 1420c49674d7..6be310d41daf 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -497,7 +497,7 @@ int xfs_iread(struct xfs_mount *, struct xfs_trans *, xfs_ino_t, xfs_inode_t **, xfs_daddr_t, uint); int xfs_iread_extents(struct xfs_trans *, xfs_inode_t *, int); int xfs_ialloc(struct xfs_trans *, xfs_inode_t *, mode_t, - xfs_nlink_t, xfs_dev_t, struct cred *, xfs_prid_t, + xfs_nlink_t, xfs_dev_t, cred_t *, xfs_prid_t, int, struct xfs_buf **, boolean_t *, xfs_inode_t **); void xfs_dinode_from_disk(struct xfs_icdinode *, struct xfs_dinode_core *); diff --git a/fs/xfs/xfs_vnodeops.h b/fs/xfs/xfs_vnodeops.h index e932a96bec54..7b0c2ab88333 100644 --- a/fs/xfs/xfs_vnodeops.h +++ b/fs/xfs/xfs_vnodeops.h @@ -16,7 +16,7 @@ struct xfs_iomap; int xfs_open(struct xfs_inode *ip); int xfs_setattr(struct xfs_inode *ip, struct iattr *vap, int flags, - struct cred *credp); + cred_t *credp); #define XFS_ATTR_DMI 0x01 /* invocation from a DMI function */ #define XFS_ATTR_NONBLOCK 0x02 /* return EAGAIN if operation would block */ #define XFS_ATTR_NOLOCK 0x04 /* Don't grab any conflicting locks */ @@ -28,24 +28,24 @@ int xfs_inactive(struct xfs_inode *ip); int xfs_lookup(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode **ipp, struct xfs_name *ci_name); int xfs_create(struct xfs_inode *dp, struct xfs_name *name, mode_t mode, - xfs_dev_t rdev, struct xfs_inode **ipp, struct cred *credp); + xfs_dev_t rdev, struct xfs_inode **ipp, cred_t *credp); int xfs_remove(struct xfs_inode *dp, struct xfs_name *name, struct xfs_inode *ip); int xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip, struct xfs_name *target_name); int xfs_mkdir(struct xfs_inode *dp, struct xfs_name *dir_name, - mode_t mode, struct xfs_inode **ipp, struct cred *credp); + mode_t mode, struct xfs_inode **ipp, cred_t *credp); int xfs_readdir(struct xfs_inode *dp, void *dirent, size_t bufsize, xfs_off_t *offset, filldir_t filldir); int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name, const char *target_path, mode_t mode, struct xfs_inode **ipp, - struct cred *credp); + cred_t *credp); int xfs_inode_flush(struct xfs_inode *ip, int flags); int xfs_set_dmattrs(struct xfs_inode *ip, u_int evmask, u_int16_t state); int xfs_reclaim(struct xfs_inode *ip); int xfs_change_file_space(struct xfs_inode *ip, int cmd, xfs_flock64_t *bf, xfs_off_t offset, - struct cred *credp, int attr_flags); + cred_t *credp, int attr_flags); int xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name, struct xfs_inode *src_ip, struct xfs_inode *target_dp, struct xfs_name *target_name, struct xfs_inode *target_ip); diff --git a/include/linux/cred.h b/include/linux/cred.h index b69222cc1fd2..3e65587a72e5 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -12,39 +12,150 @@ #ifndef _LINUX_CRED_H #define _LINUX_CRED_H -#define get_current_user() (get_uid(current->user)) - -#define task_uid(task) ((task)->uid) -#define task_gid(task) ((task)->gid) -#define task_euid(task) ((task)->euid) -#define task_egid(task) ((task)->egid) - -#define current_uid() (current->uid) -#define current_gid() (current->gid) -#define current_euid() (current->euid) -#define current_egid() (current->egid) -#define current_suid() (current->suid) -#define current_sgid() (current->sgid) -#define current_fsuid() (current->fsuid) -#define current_fsgid() (current->fsgid) -#define current_cap() (current->cap_effective) +#include +#include +#include + +struct user_struct; +struct cred; + +/* + * COW Supplementary groups list + */ +#define NGROUPS_SMALL 32 +#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) + +struct group_info { + atomic_t usage; + int ngroups; + int nblocks; + gid_t small_block[NGROUPS_SMALL]; + gid_t *blocks[0]; +}; + +/** + * get_group_info - Get a reference to a group info structure + * @group_info: The group info to reference + * + * This must be called with the owning task locked (via task_lock()) when task + * != current. The reason being that the vast majority of callers are looking + * at current->group_info, which can not be changed except by the current task. + * Changing current->group_info requires the task lock, too. + */ +#define get_group_info(group_info) \ +do { \ + atomic_inc(&(group_info)->usage); \ +} while (0) + +/** + * put_group_info - Release a reference to a group info structure + * @group_info: The group info to release + */ +#define put_group_info(group_info) \ +do { \ + if (atomic_dec_and_test(&(group_info)->usage)) \ + groups_free(group_info); \ +} while (0) + +extern struct group_info *groups_alloc(int); +extern void groups_free(struct group_info *); +extern int set_current_groups(struct group_info *); +extern int set_groups(struct cred *, struct group_info *); +extern int groups_search(struct group_info *, gid_t); + +/* access the groups "array" with this macro */ +#define GROUP_AT(gi, i) \ + ((gi)->blocks[(i) / NGROUPS_PER_BLOCK][(i) % NGROUPS_PER_BLOCK]) + +extern int in_group_p(gid_t); +extern int in_egroup_p(gid_t); + +/* + * The security context of a task + * + * The parts of the context break down into two categories: + * + * (1) The objective context of a task. These parts are used when some other + * task is attempting to affect this one. + * + * (2) The subjective context. These details are used when the task is acting + * upon another object, be that a file, a task, a key or whatever. + * + * Note that some members of this structure belong to both categories - the + * LSM security pointer for instance. + * + * A task has two security pointers. task->real_cred points to the objective + * context that defines that task's actual details. The objective part of this + * context is used whenever that task is acted upon. + * + * task->cred points to the subjective context that defines the details of how + * that task is going to act upon another object. This may be overridden + * temporarily to point to another security context, but normally points to the + * same context as task->real_cred. + */ +struct cred { + atomic_t usage; + uid_t uid; /* real UID of the task */ + gid_t gid; /* real GID of the task */ + uid_t suid; /* saved UID of the task */ + gid_t sgid; /* saved GID of the task */ + uid_t euid; /* effective UID of the task */ + gid_t egid; /* effective GID of the task */ + uid_t fsuid; /* UID for VFS ops */ + gid_t fsgid; /* GID for VFS ops */ + unsigned securebits; /* SUID-less security management */ + kernel_cap_t cap_inheritable; /* caps our children can inherit */ + kernel_cap_t cap_permitted; /* caps we're permitted */ + kernel_cap_t cap_effective; /* caps we can actually use */ + kernel_cap_t cap_bset; /* capability bounding set */ +#ifdef CONFIG_KEYS + unsigned char jit_keyring; /* default keyring to attach requested + * keys to */ + struct key *thread_keyring; /* keyring private to this thread */ + struct key *request_key_auth; /* assumed request_key authority */ +#endif +#ifdef CONFIG_SECURITY + void *security; /* subjective LSM security */ +#endif + struct user_struct *user; /* real user ID subscription */ + struct group_info *group_info; /* supplementary groups for euid/fsgid */ + struct rcu_head rcu; /* RCU deletion hook */ + spinlock_t lock; /* lock for pointer changes */ +}; + +#define get_current_user() (get_uid(current->cred->user)) + +#define task_uid(task) ((task)->cred->uid) +#define task_gid(task) ((task)->cred->gid) +#define task_euid(task) ((task)->cred->euid) +#define task_egid(task) ((task)->cred->egid) + +#define current_uid() (current->cred->uid) +#define current_gid() (current->cred->gid) +#define current_euid() (current->cred->euid) +#define current_egid() (current->cred->egid) +#define current_suid() (current->cred->suid) +#define current_sgid() (current->cred->sgid) +#define current_fsuid() (current->cred->fsuid) +#define current_fsgid() (current->cred->fsgid) +#define current_cap() (current->cred->cap_effective) #define current_uid_gid(_uid, _gid) \ do { \ - *(_uid) = current->uid; \ - *(_gid) = current->gid; \ + *(_uid) = current->cred->uid; \ + *(_gid) = current->cred->gid; \ } while(0) #define current_euid_egid(_uid, _gid) \ do { \ - *(_uid) = current->euid; \ - *(_gid) = current->egid; \ + *(_uid) = current->cred->euid; \ + *(_gid) = current->cred->egid; \ } while(0) #define current_fsuid_fsgid(_uid, _gid) \ do { \ - *(_uid) = current->fsuid; \ - *(_gid) = current->fsgid; \ + *(_uid) = current->cred->fsuid; \ + *(_gid) = current->cred->fsgid; \ } while(0) #endif /* _LINUX_CRED_H */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 23fd8909b9e5..9de41ccd67b5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -113,6 +113,21 @@ extern struct group_info init_groups; # define CAP_INIT_BSET CAP_INIT_EFF_SET #endif +extern struct cred init_cred; + +#define INIT_CRED(p) \ +{ \ + .usage = ATOMIC_INIT(3), \ + .securebits = SECUREBITS_DEFAULT, \ + .cap_inheritable = CAP_INIT_INH_SET, \ + .cap_permitted = CAP_FULL_SET, \ + .cap_effective = CAP_INIT_EFF_SET, \ + .cap_bset = CAP_INIT_BSET, \ + .user = INIT_USER, \ + .group_info = &init_groups, \ + .lock = __SPIN_LOCK_UNLOCKED(p.lock), \ +} + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -147,13 +162,8 @@ extern struct group_info init_groups; .children = LIST_HEAD_INIT(tsk.children), \ .sibling = LIST_HEAD_INIT(tsk.sibling), \ .group_leader = &tsk, \ - .group_info = &init_groups, \ - .cap_effective = CAP_INIT_EFF_SET, \ - .cap_inheritable = CAP_INIT_INH_SET, \ - .cap_permitted = CAP_FULL_SET, \ - .cap_bset = CAP_INIT_BSET, \ - .securebits = SECUREBITS_DEFAULT, \ - .user = INIT_USER, \ + .__temp_cred = INIT_CRED(tsk.__temp_cred), \ + .cred = &tsk.__temp_cred, \ .comm = "swapper", \ .thread = INIT_THREAD, \ .fs = &init_fs, \ diff --git a/include/linux/sched.h b/include/linux/sched.h index b483f39a7112..c8b92502354d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -660,6 +660,7 @@ extern struct user_struct *find_user(uid_t); extern struct user_struct root_user; #define INIT_USER (&root_user) + struct backing_dev_info; struct reclaim_state; @@ -883,38 +884,7 @@ partition_sched_domains(int ndoms_new, cpumask_t *doms_new, #endif /* !CONFIG_SMP */ struct io_context; /* See blkdev.h */ -#define NGROUPS_SMALL 32 -#define NGROUPS_PER_BLOCK ((unsigned int)(PAGE_SIZE / sizeof(gid_t))) -struct group_info { - int ngroups; - atomic_t usage; - gid_t small_block[NGROUPS_SMALL]; - int nblocks; - gid_t *blocks[0]; -}; - -/* - * get_group_info() must be called with the owning task locked (via task_lock()) - * when task != current. The reason being that the vast majority of callers are - * looking at current->group_info, which can not be changed except by the - * current task. Changing current->group_info requires the task lock, too. - */ -#define get_group_info(group_info) do { \ - atomic_inc(&(group_info)->usage); \ -} while (0) -#define put_group_info(group_info) do { \ - if (atomic_dec_and_test(&(group_info)->usage)) \ - groups_free(group_info); \ -} while (0) - -extern struct group_info *groups_alloc(int gidsetsize); -extern void groups_free(struct group_info *group_info); -extern int set_current_groups(struct group_info *group_info); -extern int groups_search(struct group_info *group_info, gid_t grp); -/* access the groups "array" with this macro */ -#define GROUP_AT(gi, i) \ - ((gi)->blocks[(i)/NGROUPS_PER_BLOCK][(i)%NGROUPS_PER_BLOCK]) #ifdef ARCH_HAS_PREFETCH_SWITCH_STACK extern void prefetch_stack(struct task_struct *t); @@ -1181,17 +1151,9 @@ struct task_struct { struct list_head cpu_timers[3]; /* process credentials */ - uid_t uid,euid,suid,fsuid; - gid_t gid,egid,sgid,fsgid; - struct group_info *group_info; - kernel_cap_t cap_effective, cap_inheritable, cap_permitted, cap_bset; - struct user_struct *user; - unsigned securebits; -#ifdef CONFIG_KEYS - unsigned char jit_keyring; /* default keyring to attach requested keys to */ - struct key *request_key_auth; /* assumed request_key authority */ - struct key *thread_keyring; /* keyring private to this thread */ -#endif + struct cred __temp_cred __deprecated; /* temporary credentials to be removed */ + struct cred *cred; /* actual/objective task credentials */ + char comm[TASK_COMM_LEN]; /* executable name excluding path - access with [gs]et_task_comm (which lock it with task_lock()) @@ -1228,9 +1190,6 @@ struct task_struct { int (*notifier)(void *priv); void *notifier_data; sigset_t *notifier_mask; -#ifdef CONFIG_SECURITY - void *security; -#endif struct audit_context *audit_context; #ifdef CONFIG_AUDITSYSCALL uid_t loginuid; @@ -1787,9 +1746,6 @@ extern void wake_up_new_task(struct task_struct *tsk, extern void sched_fork(struct task_struct *p, int clone_flags); extern void sched_dead(struct task_struct *p); -extern int in_group_p(gid_t); -extern int in_egroup_p(gid_t); - extern void proc_caches_init(void); extern void flush_signals(struct task_struct *); extern void ignore_signals(struct task_struct *); diff --git a/include/linux/securebits.h b/include/linux/securebits.h index 92f09bdf1175..6d389491bfa2 100644 --- a/include/linux/securebits.h +++ b/include/linux/securebits.h @@ -32,7 +32,7 @@ setting is locked or not. A setting which is locked cannot be changed from user-level. */ #define issecure_mask(X) (1 << (X)) -#define issecure(X) (issecure_mask(X) & current->securebits) +#define issecure(X) (issecure_mask(X) & current->cred->securebits) #define SECURE_ALL_BITS (issecure_mask(SECURE_NOROOT) | \ issecure_mask(SECURE_NO_SETUID_FIXUP) | \ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index abda5991d7e3..e1885b494bac 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -126,7 +126,7 @@ static struct inode *mqueue_get_inode(struct super_block *sb, int mode, if (S_ISREG(mode)) { struct mqueue_inode_info *info; struct task_struct *p = current; - struct user_struct *u = p->user; + struct user_struct *u = p->cred->user; unsigned long mq_bytes, mq_msg_tblsz; inode->i_fop = &mqueue_file_operations; diff --git a/ipc/shm.c b/ipc/shm.c index 0c3debbe32d5..264a9d33c5dd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -366,7 +366,7 @@ static int newseg(struct ipc_namespace *ns, struct ipc_params *params) if (shmflg & SHM_HUGETLB) { /* hugetlb_file_setup takes care of mlock user accounting */ file = hugetlb_file_setup(name, size); - shp->mlock_user = current->user; + shp->mlock_user = current->cred->user; } else { int acctflag = VM_ACCOUNT; /* @@ -767,7 +767,7 @@ asmlinkage long sys_shmctl(int shmid, int cmd, struct shmid_ds __user *buf) goto out_unlock; if(cmd==SHM_LOCK) { - struct user_struct * user = current->user; + struct user_struct *user = current->cred->user; if (!is_file_hugepages(shp->shm_file)) { err = shmem_lock(shp->shm_file, 1, user); if (!err && !(shp->shm_perm.mode & SHM_LOCKED)){ diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 9c7e47ae4576..2febf5165fad 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -447,6 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_names *name, enum audit_state *state) { + struct cred *cred = tsk->cred; int i, j, need_sid = 1; u32 sid; @@ -466,28 +467,28 @@ static int audit_filter_rules(struct task_struct *tsk, } break; case AUDIT_UID: - result = audit_comparator(tsk->uid, f->op, f->val); + result = audit_comparator(cred->uid, f->op, f->val); break; case AUDIT_EUID: - result = audit_comparator(tsk->euid, f->op, f->val); + result = audit_comparator(cred->euid, f->op, f->val); break; case AUDIT_SUID: - result = audit_comparator(tsk->suid, f->op, f->val); + result = audit_comparator(cred->suid, f->op, f->val); break; case AUDIT_FSUID: - result = audit_comparator(tsk->fsuid, f->op, f->val); + result = audit_comparator(cred->fsuid, f->op, f->val); break; case AUDIT_GID: - result = audit_comparator(tsk->gid, f->op, f->val); + result = audit_comparator(cred->gid, f->op, f->val); break; case AUDIT_EGID: - result = audit_comparator(tsk->egid, f->op, f->val); + result = audit_comparator(cred->egid, f->op, f->val); break; case AUDIT_SGID: - result = audit_comparator(tsk->sgid, f->op, f->val); + result = audit_comparator(cred->sgid, f->op, f->val); break; case AUDIT_FSGID: - result = audit_comparator(tsk->fsgid, f->op, f->val); + result = audit_comparator(cred->fsgid, f->op, f->val); break; case AUDIT_PERS: result = audit_comparator(tsk->personality, f->op, f->val); @@ -1228,6 +1229,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { + struct cred *cred = tsk->cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; @@ -1237,14 +1239,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->pid = tsk->pid; if (!context->ppid) context->ppid = sys_getppid(); - context->uid = tsk->uid; - context->gid = tsk->gid; - context->euid = tsk->euid; - context->suid = tsk->suid; - context->fsuid = tsk->fsuid; - context->egid = tsk->egid; - context->sgid = tsk->sgid; - context->fsgid = tsk->fsgid; + context->uid = cred->uid; + context->gid = cred->gid; + context->euid = cred->euid; + context->suid = cred->suid; + context->fsuid = cred->fsuid; + context->egid = cred->egid; + context->sgid = cred->sgid; + context->fsgid = cred->fsgid; context->personality = tsk->personality; ab = audit_log_start(context, GFP_KERNEL, AUDIT_SYSCALL); @@ -2086,7 +2088,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task->uid, + task->pid, task->cred->uid, task->loginuid, loginuid, task->sessionid, sessionid); audit_log_end(ab); @@ -2469,7 +2471,7 @@ void __audit_ptrace(struct task_struct *t) context->target_pid = t->pid; context->target_auid = audit_get_loginuid(t); - context->target_uid = t->uid; + context->target_uid = t->cred->uid; context->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); @@ -2495,7 +2497,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; else - audit_sig_uid = tsk->uid; + audit_sig_uid = tsk->cred->uid; security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) @@ -2507,7 +2509,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (!ctx->target_pid) { ctx->target_pid = t->tgid; ctx->target_auid = audit_get_loginuid(t); - ctx->target_uid = t->uid; + ctx->target_uid = t->cred->uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); @@ -2528,7 +2530,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_pid[axp->pid_count] = t->tgid; axp->target_auid[axp->pid_count] = audit_get_loginuid(t); - axp->target_uid[axp->pid_count] = t->uid; + axp->target_uid[axp->pid_count] = t->cred->uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); @@ -2575,12 +2577,12 @@ void __audit_log_bprm_fcaps(struct linux_binprm *bprm, kernel_cap_t *pP, kernel_ ax->fcap_ver = (vcaps.magic_etc & VFS_CAP_REVISION_MASK) >> VFS_CAP_REVISION_SHIFT; ax->old_pcap.permitted = *pP; - ax->old_pcap.inheritable = current->cap_inheritable; + ax->old_pcap.inheritable = current->cred->cap_inheritable; ax->old_pcap.effective = *pE; - ax->new_pcap.permitted = current->cap_permitted; - ax->new_pcap.inheritable = current->cap_inheritable; - ax->new_pcap.effective = current->cap_effective; + ax->new_pcap.permitted = current->cred->cap_permitted; + ax->new_pcap.inheritable = current->cred->cap_inheritable; + ax->new_pcap.effective = current->cred->cap_effective; } /** diff --git a/kernel/capability.c b/kernel/capability.c index 58b00519624a..a404b980b1bd 100644 --- a/kernel/capability.c +++ b/kernel/capability.c @@ -171,8 +171,8 @@ kernel_cap_t cap_set_effective(const kernel_cap_t pE_new) spin_lock(&task_capability_lock); - pE_old = current->cap_effective; - current->cap_effective = pE_new; + pE_old = current->cred->cap_effective; + current->cred->cap_effective = pE_new; spin_unlock(&task_capability_lock); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index 78f9b310c4f3..e210526e6401 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1293,7 +1293,9 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); euid = current_euid(); - if (euid && euid != tsk->uid && euid != tsk->suid) { + if (euid && + euid != tsk->cred->uid && + euid != tsk->cred->suid) { put_task_struct(tsk); return -EACCES; } diff --git a/kernel/exit.c b/kernel/exit.c index 80137a5d9467..e0f6e1892fb9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -160,7 +160,7 @@ void release_task(struct task_struct * p) int zap_leader; repeat: tracehook_prepare_release_task(p); - atomic_dec(&p->user->processes); + atomic_dec(&p->cred->user->processes); proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); @@ -1272,7 +1272,7 @@ static int wait_task_zombie(struct task_struct *p, int options, return 0; if (unlikely(options & WNOWAIT)) { - uid_t uid = p->uid; + uid_t uid = p->cred->uid; int exit_code = p->exit_code; int why, status; @@ -1393,7 +1393,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->uid, &infop->si_uid); + retval = put_user(p->cred->uid, &infop->si_uid); if (!retval) retval = pid; @@ -1458,7 +1458,7 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, if (!unlikely(options & WNOWAIT)) p->exit_code = 0; - uid = p->uid; + uid = p->cred->uid; unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1535,7 +1535,7 @@ static int wait_task_continued(struct task_struct *p, int options, spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); - uid = p->uid; + uid = p->cred->uid; get_task_struct(p); read_unlock(&tasklist_lock); diff --git a/kernel/fork.c b/kernel/fork.c index f6083561dfe0..81fdc7733908 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -147,8 +147,8 @@ void __put_task_struct(struct task_struct *tsk) WARN_ON(tsk == current); security_task_free(tsk); - free_uid(tsk->user); - put_group_info(tsk->group_info); + free_uid(tsk->__temp_cred.user); + put_group_info(tsk->__temp_cred.group_info); delayacct_tsk_free(tsk); if (!profile_handoff_task(tsk)) @@ -969,17 +969,18 @@ static struct task_struct *copy_process(unsigned long clone_flags, DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled); DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled); #endif + p->cred = &p->__temp_cred; retval = -EAGAIN; - if (atomic_read(&p->user->processes) >= + if (atomic_read(&p->cred->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->user != current->nsproxy->user_ns->root_user) + p->cred->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } - atomic_inc(&p->user->__count); - atomic_inc(&p->user->processes); - get_group_info(p->group_info); + atomic_inc(&p->cred->user->__count); + atomic_inc(&p->cred->user->processes); + get_group_info(p->cred->group_info); /* * If multiple threads are within copy_process(), then this check @@ -1035,9 +1036,8 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->real_start_time = p->start_time; monotonic_to_bootbased(&p->real_start_time); #ifdef CONFIG_SECURITY - p->security = NULL; + p->cred->security = NULL; #endif - p->cap_bset = current->cap_bset; p->io_context = NULL; p->audit_context = NULL; cgroup_fork(p); @@ -1298,9 +1298,9 @@ bad_fork_cleanup_cgroup: bad_fork_cleanup_put_domain: module_put(task_thread_info(p)->exec_domain->module); bad_fork_cleanup_count: - put_group_info(p->group_info); - atomic_dec(&p->user->processes); - free_uid(p->user); + put_group_info(p->cred->group_info); + atomic_dec(&p->cred->user->processes); + free_uid(p->cred->user); bad_fork_free: free_task(p); fork_out: diff --git a/kernel/futex.c b/kernel/futex.c index e06962132aaf..28421d8210b8 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -443,7 +443,8 @@ static struct task_struct * futex_find_get_task(pid_t pid) rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || (euid != p->euid && euid != p->uid)) + if (!p || (euid != p->cred->euid && + euid != p->cred->uid)) p = ERR_PTR(-ESRCH); else get_task_struct(p); @@ -1846,7 +1847,8 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && + if (euid != p->cred->euid && + euid != p->cred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 3254d4e41e88..2c3fd5ed34f5 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->euid && euid != p->uid && - !capable(CAP_SYS_PTRACE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; read_unlock(&tasklist_lock); diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 937f6b5b2008..49849d12dd12 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -115,6 +115,8 @@ int ptrace_check_attach(struct task_struct *child, int kill) int __ptrace_may_access(struct task_struct *task, unsigned int mode) { + struct cred *cred = current->cred, *tcred = task->cred; + /* May we inspect the given task? * This check is used both for attaching with ptrace * and for allowing access to sensitive information in /proc. @@ -123,19 +125,18 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - uid_t uid; - gid_t gid; + uid_t uid = cred->uid; + gid_t gid = cred->gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - current_uid_gid(&uid, &gid); - if ((uid != task->euid || - uid != task->suid || - uid != task->uid || - gid != task->egid || - gid != task->sgid || - gid != task->gid) && !capable(CAP_SYS_PTRACE)) + if ((uid != tcred->euid || + uid != tcred->suid || + uid != tcred->uid || + gid != tcred->egid || + gid != tcred->sgid || + gid != tcred->gid) && !capable(CAP_SYS_PTRACE)) return -EPERM; smp_rmb(); if (task->mm) diff --git a/kernel/sched.c b/kernel/sched.c index c3b8b1fcde0d..733c59e645aa 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -345,7 +345,7 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; #ifdef CONFIG_USER_SCHED - tg = p->user->tg; + tg = p->cred->user->tg; #elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); @@ -5182,8 +5182,8 @@ recheck: /* can't change other user's priorities */ euid = current_euid(); - if (euid != p->euid && - euid != p->uid) + if (euid != p->cred->euid && + euid != p->cred->uid) return -EPERM; } @@ -5417,7 +5417,9 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) euid = current_euid(); retval = -EPERM; - if (euid != p->euid && euid != p->uid && !capable(CAP_SYS_NICE)) + if (euid != p->cred->euid && + euid != p->cred->uid && + !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 167b535fe1a9..80e8a6489f97 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -187,7 +187,7 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, * In order to avoid problems with "switch_user()", we want to make * sure that the compiler doesn't re-load "t->user" */ - user = t->user; + user = t->cred->user; barrier(); atomic_inc(&user->sigpending); if (override_rlimit || @@ -582,8 +582,8 @@ static int check_kill_permission(int sig, struct siginfo *info, uid = current_uid(); euid = current_euid(); - if ((euid ^ t->suid) && (euid ^ t->uid) && - (uid ^ t->suid) && (uid ^ t->uid) && + if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) && + (uid ^ t->cred->suid) && (uid ^ t->cred->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -1100,8 +1100,8 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, goto out_unlock; } if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && (euid != p->suid) && (euid != p->uid) - && (uid != p->suid) && (uid != p->uid)) { + && (euid != p->cred->suid) && (euid != p->cred->uid) + && (uid != p->cred->suid) && (uid != p->cred->uid)) { ret = -EPERM; goto out_unlock; } @@ -1374,7 +1374,7 @@ int do_notify_parent(struct task_struct *tsk, int sig) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; thread_group_cputime(tsk, &cputime); info.si_utime = cputime_to_jiffies(cputime.utime); @@ -1445,7 +1445,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); rcu_read_unlock(); - info.si_uid = tsk->uid; + info.si_uid = tsk->cred->uid; info.si_utime = cputime_to_clock_t(tsk->utime); info.si_stime = cputime_to_clock_t(tsk->stime); @@ -1713,7 +1713,7 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_errno = 0; info->si_code = SI_USER; info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->uid; + info->si_uid = current->parent->cred->uid; } /* If the (new) signal is now blocked, requeue it. */ diff --git a/kernel/sys.c b/kernel/sys.c index ed5c29c748ac..5d81f07c0150 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -117,7 +117,9 @@ static int set_one_prio(struct task_struct *p, int niceval, int error) uid_t euid = current_euid(); int no_nice; - if (p->uid != euid && p->euid != euid && !capable(CAP_SYS_NICE)) { + if (p->cred->uid != euid && + p->cred->euid != euid && + !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } @@ -174,7 +176,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -182,7 +184,7 @@ asmlinkage long sys_setpriority(int which, int who, int niceval) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) + if (p->cred->uid == who) error = set_one_prio(p, niceval, error); while_each_thread(g, p); if (who != current_uid()) @@ -236,7 +238,7 @@ asmlinkage long sys_getpriority(int which, int who) } while_each_pid_thread(pgrp, PIDTYPE_PGID, p); break; case PRIO_USER: - user = current->user; + user = current->cred->user; if (!who) who = current_uid(); else @@ -244,7 +246,7 @@ asmlinkage long sys_getpriority(int which, int who) goto out_unlock; /* No processes for this user */ do_each_thread(g, p) - if (p->uid == who) { + if (p->cred->uid == who) { niceval = 20 - task_nice(p); if (niceval > retval) retval = niceval; @@ -472,8 +474,9 @@ void ctrl_alt_del(void) */ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) { - int old_rgid = current->gid; - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_rgid = cred->gid; + int old_egid = cred->egid; int new_rgid = old_rgid; int new_egid = old_egid; int retval; @@ -484,7 +487,7 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) if (rgid != (gid_t) -1) { if ((old_rgid == rgid) || - (current->egid==rgid) || + (cred->egid == rgid) || capable(CAP_SETGID)) new_rgid = rgid; else @@ -492,8 +495,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (egid != (gid_t) -1) { if ((old_rgid == egid) || - (current->egid == egid) || - (current->sgid == egid) || + (cred->egid == egid) || + (cred->sgid == egid) || capable(CAP_SETGID)) new_egid = egid; else @@ -505,10 +508,10 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) } if (rgid != (gid_t) -1 || (egid != (gid_t) -1 && egid != old_rgid)) - current->sgid = new_egid; - current->fsgid = new_egid; - current->egid = new_egid; - current->gid = new_rgid; + cred->sgid = new_egid; + cred->fsgid = new_egid; + cred->egid = new_egid; + cred->gid = new_rgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); return 0; @@ -521,7 +524,8 @@ asmlinkage long sys_setregid(gid_t rgid, gid_t egid) */ asmlinkage long sys_setgid(gid_t gid) { - int old_egid = current->egid; + struct cred *cred = current->cred; + int old_egid = cred->egid; int retval; retval = security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_ID); @@ -533,13 +537,13 @@ asmlinkage long sys_setgid(gid_t gid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->gid = current->egid = current->sgid = current->fsgid = gid; - } else if ((gid == current->gid) || (gid == current->sgid)) { + cred->gid = cred->egid = cred->sgid = cred->fsgid = gid; + } else if ((gid == cred->gid) || (gid == cred->sgid)) { if (old_egid != gid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = current->fsgid = gid; + cred->egid = cred->fsgid = gid; } else return -EPERM; @@ -570,7 +574,7 @@ static int set_user(uid_t new_ruid, int dumpclear) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->uid = new_ruid; + current->cred->uid = new_ruid; return 0; } @@ -591,6 +595,7 @@ static int set_user(uid_t new_ruid, int dumpclear) */ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) { + struct cred *cred = current->cred; int old_ruid, old_euid, old_suid, new_ruid, new_euid; int retval; @@ -598,14 +603,14 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (retval) return retval; - new_ruid = old_ruid = current->uid; - new_euid = old_euid = current->euid; - old_suid = current->suid; + new_ruid = old_ruid = cred->uid; + new_euid = old_euid = cred->euid; + old_suid = cred->suid; if (ruid != (uid_t) -1) { new_ruid = ruid; if ((old_ruid != ruid) && - (current->euid != ruid) && + (cred->euid != ruid) && !capable(CAP_SETUID)) return -EPERM; } @@ -613,8 +618,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) if (euid != (uid_t) -1) { new_euid = euid; if ((old_ruid != euid) && - (current->euid != euid) && - (current->suid != euid) && + (cred->euid != euid) && + (cred->suid != euid) && !capable(CAP_SETUID)) return -EPERM; } @@ -626,11 +631,11 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = new_euid; + cred->fsuid = cred->euid = new_euid; if (ruid != (uid_t) -1 || (euid != (uid_t) -1 && euid != old_ruid)) - current->suid = current->euid; - current->fsuid = current->euid; + cred->suid = cred->euid; + cred->fsuid = cred->euid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -653,7 +658,8 @@ asmlinkage long sys_setreuid(uid_t ruid, uid_t euid) */ asmlinkage long sys_setuid(uid_t uid) { - int old_euid = current->euid; + struct cred *cred = current->cred; + int old_euid = cred->euid; int old_ruid, old_suid, new_suid; int retval; @@ -661,23 +667,23 @@ asmlinkage long sys_setuid(uid_t uid) if (retval) return retval; - old_ruid = current->uid; - old_suid = current->suid; + old_ruid = cred->uid; + old_suid = cred->suid; new_suid = old_suid; if (capable(CAP_SETUID)) { if (uid != old_ruid && set_user(uid, old_euid != uid) < 0) return -EAGAIN; new_suid = uid; - } else if ((uid != current->uid) && (uid != new_suid)) + } else if ((uid != cred->uid) && (uid != new_suid)) return -EPERM; if (old_euid != uid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = current->euid = uid; - current->suid = new_suid; + cred->fsuid = cred->euid = uid; + cred->suid = new_suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -692,9 +698,10 @@ asmlinkage long sys_setuid(uid_t uid) */ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) { - int old_ruid = current->uid; - int old_euid = current->euid; - int old_suid = current->suid; + struct cred *cred = current->cred; + int old_ruid = cred->uid; + int old_euid = cred->euid; + int old_suid = cred->suid; int retval; retval = security_task_setuid(ruid, euid, suid, LSM_SETID_RES); @@ -702,30 +709,31 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) return retval; if (!capable(CAP_SETUID)) { - if ((ruid != (uid_t) -1) && (ruid != current->uid) && - (ruid != current->euid) && (ruid != current->suid)) + if ((ruid != (uid_t) -1) && (ruid != cred->uid) && + (ruid != cred->euid) && (ruid != cred->suid)) return -EPERM; - if ((euid != (uid_t) -1) && (euid != current->uid) && - (euid != current->euid) && (euid != current->suid)) + if ((euid != (uid_t) -1) && (euid != cred->uid) && + (euid != cred->euid) && (euid != cred->suid)) return -EPERM; - if ((suid != (uid_t) -1) && (suid != current->uid) && - (suid != current->euid) && (suid != current->suid)) + if ((suid != (uid_t) -1) && (suid != cred->uid) && + (suid != cred->euid) && (suid != cred->suid)) return -EPERM; } if (ruid != (uid_t) -1) { - if (ruid != current->uid && set_user(ruid, euid != current->euid) < 0) + if (ruid != cred->uid && + set_user(ruid, euid != cred->euid) < 0) return -EAGAIN; } if (euid != (uid_t) -1) { - if (euid != current->euid) { + if (euid != cred->euid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->euid = euid; + cred->euid = euid; } - current->fsuid = current->euid; + cred->fsuid = cred->euid; if (suid != (uid_t) -1) - current->suid = suid; + cred->suid = suid; key_fsuid_changed(current); proc_id_connector(current, PROC_EVENT_UID); @@ -735,11 +743,12 @@ asmlinkage long sys_setresuid(uid_t ruid, uid_t euid, uid_t suid) asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __user *suid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->uid, ruid)) && - !(retval = put_user(current->euid, euid))) - retval = put_user(current->suid, suid); + if (!(retval = put_user(cred->uid, ruid)) && + !(retval = put_user(cred->euid, euid))) + retval = put_user(cred->suid, suid); return retval; } @@ -749,6 +758,7 @@ asmlinkage long sys_getresuid(uid_t __user *ruid, uid_t __user *euid, uid_t __us */ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) { + struct cred *cred = current->cred; int retval; retval = security_task_setgid(rgid, egid, sgid, LSM_SETID_RES); @@ -756,28 +766,28 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) return retval; if (!capable(CAP_SETGID)) { - if ((rgid != (gid_t) -1) && (rgid != current->gid) && - (rgid != current->egid) && (rgid != current->sgid)) + if ((rgid != (gid_t) -1) && (rgid != cred->gid) && + (rgid != cred->egid) && (rgid != cred->sgid)) return -EPERM; - if ((egid != (gid_t) -1) && (egid != current->gid) && - (egid != current->egid) && (egid != current->sgid)) + if ((egid != (gid_t) -1) && (egid != cred->gid) && + (egid != cred->egid) && (egid != cred->sgid)) return -EPERM; - if ((sgid != (gid_t) -1) && (sgid != current->gid) && - (sgid != current->egid) && (sgid != current->sgid)) + if ((sgid != (gid_t) -1) && (sgid != cred->gid) && + (sgid != cred->egid) && (sgid != cred->sgid)) return -EPERM; } if (egid != (gid_t) -1) { - if (egid != current->egid) { + if (egid != cred->egid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->egid = egid; + cred->egid = egid; } - current->fsgid = current->egid; + cred->fsgid = cred->egid; if (rgid != (gid_t) -1) - current->gid = rgid; + cred->gid = rgid; if (sgid != (gid_t) -1) - current->sgid = sgid; + cred->sgid = sgid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); @@ -786,11 +796,12 @@ asmlinkage long sys_setresgid(gid_t rgid, gid_t egid, gid_t sgid) asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __user *sgid) { + struct cred *cred = current->cred; int retval; - if (!(retval = put_user(current->gid, rgid)) && - !(retval = put_user(current->egid, egid))) - retval = put_user(current->sgid, sgid); + if (!(retval = put_user(cred->gid, rgid)) && + !(retval = put_user(cred->egid, egid))) + retval = put_user(cred->sgid, sgid); return retval; } @@ -804,20 +815,21 @@ asmlinkage long sys_getresgid(gid_t __user *rgid, gid_t __user *egid, gid_t __us */ asmlinkage long sys_setfsuid(uid_t uid) { + struct cred *cred = current->cred; int old_fsuid; - old_fsuid = current->fsuid; + old_fsuid = cred->fsuid; if (security_task_setuid(uid, (uid_t)-1, (uid_t)-1, LSM_SETID_FS)) return old_fsuid; - if (uid == current->uid || uid == current->euid || - uid == current->suid || uid == current->fsuid || + if (uid == cred->uid || uid == cred->euid || + uid == cred->suid || uid == cred->fsuid || capable(CAP_SETUID)) { if (uid != old_fsuid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsuid = uid; + cred->fsuid = uid; } key_fsuid_changed(current); @@ -833,20 +845,21 @@ asmlinkage long sys_setfsuid(uid_t uid) */ asmlinkage long sys_setfsgid(gid_t gid) { + struct cred *cred = current->cred; int old_fsgid; - old_fsgid = current->fsgid; + old_fsgid = cred->fsgid; if (security_task_setgid(gid, (gid_t)-1, (gid_t)-1, LSM_SETID_FS)) return old_fsgid; - if (gid == current->gid || gid == current->egid || - gid == current->sgid || gid == current->fsgid || + if (gid == cred->gid || gid == cred->egid || + gid == cred->sgid || gid == cred->fsgid || capable(CAP_SETGID)) { if (gid != old_fsgid) { set_dumpable(current->mm, suid_dumpable); smp_wmb(); } - current->fsgid = gid; + cred->fsgid = gid; key_fsgid_changed(current); proc_id_connector(current, PROC_EVENT_GID); } @@ -1208,8 +1221,15 @@ int groups_search(struct group_info *group_info, gid_t grp) return 0; } -/* validate and set current->group_info */ -int set_current_groups(struct group_info *group_info) +/** + * set_groups - Change a group subscription in a security record + * @sec: The security record to alter + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon a task security + * record. + */ +int set_groups(struct cred *cred, struct group_info *group_info) { int retval; struct group_info *old_info; @@ -1221,20 +1241,34 @@ int set_current_groups(struct group_info *group_info) groups_sort(group_info); get_group_info(group_info); - task_lock(current); - old_info = current->group_info; - current->group_info = group_info; - task_unlock(current); + spin_lock(&cred->lock); + old_info = cred->group_info; + cred->group_info = group_info; + spin_unlock(&cred->lock); put_group_info(old_info); - return 0; } +EXPORT_SYMBOL(set_groups); + +/** + * set_current_groups - Change current's group subscription + * @group_info: The group list to impose + * + * Validate a group subscription and, if valid, impose it upon current's task + * security record. + */ +int set_current_groups(struct group_info *group_info) +{ + return set_groups(current->cred, group_info); +} + EXPORT_SYMBOL(set_current_groups); asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) { + struct cred *cred = current->cred; int i = 0; /* @@ -1246,13 +1280,13 @@ asmlinkage long sys_getgroups(int gidsetsize, gid_t __user *grouplist) return -EINVAL; /* no need to grab task_lock here; it cannot change */ - i = current->group_info->ngroups; + i = cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups_to_user(grouplist, current->group_info)) { + if (groups_to_user(grouplist, cred->group_info)) { i = -EFAULT; goto out; } @@ -1296,9 +1330,10 @@ asmlinkage long sys_setgroups(int gidsetsize, gid_t __user *grouplist) */ int in_group_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->fsgid) - retval = groups_search(current->group_info, grp); + if (grp != cred->fsgid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1306,9 +1341,10 @@ EXPORT_SYMBOL(in_group_p); int in_egroup_p(gid_t grp) { + struct cred *cred = current->cred; int retval = 1; - if (grp != current->egid) - retval = groups_search(current->group_info, grp); + if (grp != cred->egid) + retval = groups_search(cred->group_info, grp); return retval; } @@ -1624,7 +1660,9 @@ asmlinkage long sys_umask(int mask) asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5) { - long error = 0; + struct task_struct *me = current; + unsigned char comm[sizeof(me->comm)]; + long error; if (security_task_prctl(option, arg2, arg3, arg4, arg5, &error)) return error; @@ -1635,39 +1673,41 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, error = -EINVAL; break; } - current->pdeath_signal = arg2; + me->pdeath_signal = arg2; + error = 0; break; case PR_GET_PDEATHSIG: - error = put_user(current->pdeath_signal, (int __user *)arg2); + error = put_user(me->pdeath_signal, (int __user *)arg2); break; case PR_GET_DUMPABLE: - error = get_dumpable(current->mm); + error = get_dumpable(me->mm); break; case PR_SET_DUMPABLE: if (arg2 < 0 || arg2 > 1) { error = -EINVAL; break; } - set_dumpable(current->mm, arg2); + set_dumpable(me->mm, arg2); + error = 0; break; case PR_SET_UNALIGN: - error = SET_UNALIGN_CTL(current, arg2); + error = SET_UNALIGN_CTL(me, arg2); break; case PR_GET_UNALIGN: - error = GET_UNALIGN_CTL(current, arg2); + error = GET_UNALIGN_CTL(me, arg2); break; case PR_SET_FPEMU: - error = SET_FPEMU_CTL(current, arg2); + error = SET_FPEMU_CTL(me, arg2); break; case PR_GET_FPEMU: - error = GET_FPEMU_CTL(current, arg2); + error = GET_FPEMU_CTL(me, arg2); break; case PR_SET_FPEXC: - error = SET_FPEXC_CTL(current, arg2); + error = SET_FPEXC_CTL(me, arg2); break; case PR_GET_FPEXC: - error = GET_FPEXC_CTL(current, arg2); + error = GET_FPEXC_CTL(me, arg2); break; case PR_GET_TIMING: error = PR_TIMING_STATISTICAL; @@ -1675,33 +1715,28 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, case PR_SET_TIMING: if (arg2 != PR_TIMING_STATISTICAL) error = -EINVAL; + else + error = 0; break; - case PR_SET_NAME: { - struct task_struct *me = current; - unsigned char ncomm[sizeof(me->comm)]; - - ncomm[sizeof(me->comm)-1] = 0; - if (strncpy_from_user(ncomm, (char __user *)arg2, - sizeof(me->comm)-1) < 0) + case PR_SET_NAME: + comm[sizeof(me->comm)-1] = 0; + if (strncpy_from_user(comm, (char __user *)arg2, + sizeof(me->comm) - 1) < 0) return -EFAULT; - set_task_comm(me, ncomm); + set_task_comm(me, comm); return 0; - } - case PR_GET_NAME: { - struct task_struct *me = current; - unsigned char tcomm[sizeof(me->comm)]; - - get_task_comm(tcomm, me); - if (copy_to_user((char __user *)arg2, tcomm, sizeof(tcomm))) + case PR_GET_NAME: + get_task_comm(comm, me); + if (copy_to_user((char __user *)arg2, comm, + sizeof(comm))) return -EFAULT; return 0; - } case PR_GET_ENDIAN: - error = GET_ENDIAN(current, arg2); + error = GET_ENDIAN(me, arg2); break; case PR_SET_ENDIAN: - error = SET_ENDIAN(current, arg2); + error = SET_ENDIAN(me, arg2); break; case PR_GET_SECCOMP: @@ -1725,6 +1760,7 @@ asmlinkage long sys_prctl(int option, unsigned long arg2, unsigned long arg3, current->default_timer_slack_ns; else current->timer_slack_ns = arg2; + error = 0; break; default: error = -EINVAL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 9f3b478f9171..5c97c5b4ea8f 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -246,7 +246,7 @@ __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu) memcpy(data->comm, tsk->comm, TASK_COMM_LEN); data->pid = tsk->pid; - data->uid = tsk->uid; + data->uid = task_uid(tsk); data->nice = tsk->static_prio - 20 - MAX_RT_PRIO; data->policy = tsk->policy; data->rt_priority = tsk->rt_priority; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 8ebcd8532dfb..6d1ed07bf312 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -53,8 +53,8 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_uid = tsk->uid; - stats->ac_gid = tsk->gid; + stats->ac_uid = tsk->cred->uid; + stats->ac_gid = tsk->cred->gid; stats->ac_pid = tsk->pid; rcu_read_lock(); stats->ac_ppid = pid_alive(tsk) ? diff --git a/kernel/uid16.c b/kernel/uid16.c index 3e41c1673e2f..71f07fc39fea 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -86,9 +86,9 @@ asmlinkage long sys_getresuid16(old_uid_t __user *ruid, old_uid_t __user *euid, { int retval; - if (!(retval = put_user(high2lowuid(current->uid), ruid)) && - !(retval = put_user(high2lowuid(current->euid), euid))) - retval = put_user(high2lowuid(current->suid), suid); + if (!(retval = put_user(high2lowuid(current->cred->uid), ruid)) && + !(retval = put_user(high2lowuid(current->cred->euid), euid))) + retval = put_user(high2lowuid(current->cred->suid), suid); return retval; } @@ -106,9 +106,9 @@ asmlinkage long sys_getresgid16(old_gid_t __user *rgid, old_gid_t __user *egid, { int retval; - if (!(retval = put_user(high2lowgid(current->gid), rgid)) && - !(retval = put_user(high2lowgid(current->egid), egid))) - retval = put_user(high2lowgid(current->sgid), sgid); + if (!(retval = put_user(high2lowgid(current->cred->gid), rgid)) && + !(retval = put_user(high2lowgid(current->cred->egid), egid))) + retval = put_user(high2lowgid(current->cred->sgid), sgid); return retval; } @@ -166,20 +166,20 @@ asmlinkage long sys_getgroups16(int gidsetsize, old_gid_t __user *grouplist) if (gidsetsize < 0) return -EINVAL; - get_group_info(current->group_info); - i = current->group_info->ngroups; + get_group_info(current->cred->group_info); + i = current->cred->group_info->ngroups; if (gidsetsize) { if (i > gidsetsize) { i = -EINVAL; goto out; } - if (groups16_to_user(grouplist, current->group_info)) { + if (groups16_to_user(grouplist, current->cred->group_info)) { i = -EFAULT; goto out; } } out: - put_group_info(current->group_info); + put_group_info(current->cred->group_info); return i; } @@ -210,20 +210,20 @@ asmlinkage long sys_setgroups16(int gidsetsize, old_gid_t __user *grouplist) asmlinkage long sys_getuid16(void) { - return high2lowuid(current->uid); + return high2lowuid(current->cred->uid); } asmlinkage long sys_geteuid16(void) { - return high2lowuid(current->euid); + return high2lowuid(current->cred->euid); } asmlinkage long sys_getgid16(void) { - return high2lowgid(current->gid); + return high2lowgid(current->cred->gid); } asmlinkage long sys_getegid16(void) { - return high2lowgid(current->egid); + return high2lowgid(current->cred->egid); } diff --git a/kernel/user.c b/kernel/user.c index 39d6159fae43..104d22ac84d5 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -457,11 +457,11 @@ void switch_uid(struct user_struct *new_user) * cheaply with the new uid cache, so if it matters * we should be checking for it. -DaveM */ - old_user = current->user; + old_user = current->cred->user; atomic_inc(&new_user->processes); atomic_dec(&old_user->processes); switch_uid_keyring(new_user); - current->user = new_user; + current->cred->user = new_user; sched_switch_user(current); /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 07a96474077d..b23492ee3e50 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1110,12 +1110,12 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { + struct cred *cred, *tcred; struct mm_struct *mm; struct task_struct *task; nodemask_t old; nodemask_t new; nodemask_t task_nodes; - uid_t uid, euid; int err; err = get_nodes(&old, old_nodes, maxnode); @@ -1145,10 +1145,10 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/migrate.c b/mm/migrate.c index 6263c24c4afe..794443da1b4f 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1045,10 +1045,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags) { + struct cred *cred, *tcred; struct task_struct *task; struct mm_struct *mm; int err; - uid_t uid, euid; /* Check flags */ if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL)) @@ -1076,10 +1076,10 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - uid = current_uid(); - euid = current_euid(); - if (euid != task->suid && euid != task->uid && - uid != task->suid && uid != task->uid && + cred = current->cred; + tcred = task->cred; + if (cred->euid != tcred->suid && cred->euid != tcred->uid && + cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { err = -EPERM; goto out; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 34a458aa7997..3af787ba2077 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,7 +298,7 @@ static void dump_tasks(const struct mem_cgroup *mem) task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->uid, p->tgid, p->mm->total_vm, + p->pid, p->cred->uid, p->tgid, p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, p->comm); task_unlock(p); diff --git a/net/core/scm.c b/net/core/scm.c index 4681d8f9b45b..c28ca32a7d93 100644 --- a/net/core/scm.c +++ b/net/core/scm.c @@ -44,11 +44,13 @@ static __inline__ int scm_check_creds(struct ucred *creds) { + struct cred *cred = current->cred; + if ((creds->pid == task_tgid_vnr(current) || capable(CAP_SYS_ADMIN)) && - ((creds->uid == current_uid() || creds->uid == current_euid() || - creds->uid == current_suid()) || capable(CAP_SETUID)) && - ((creds->gid == current_gid() || creds->gid == current_egid() || - creds->gid == current_sgid()) || capable(CAP_SETGID))) { + ((creds->uid == cred->uid || creds->uid == cred->euid || + creds->uid == cred->suid) || capable(CAP_SETUID)) && + ((creds->gid == cred->gid || creds->gid == cred->egid || + creds->gid == cred->sgid) || capable(CAP_SETGID))) { return 0; } return -EPERM; diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index 8fc380578807..c79543212602 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -353,7 +353,7 @@ rpcauth_lookupcred(struct rpc_auth *auth, int flags) struct auth_cred acred = { .uid = current_fsuid(), .gid = current_fsgid(), - .group_info = current->group_info, + .group_info = current->cred->group_info, }; struct rpc_cred *ret; diff --git a/security/commoncap.c b/security/commoncap.c index fb4e240720d8..fa61679f8c73 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -30,7 +30,7 @@ int cap_netlink_send(struct sock *sk, struct sk_buff *skb) { - NETLINK_CB(skb).eff_cap = current->cap_effective; + NETLINK_CB(skb).eff_cap = current_cap(); return 0; } @@ -52,7 +52,7 @@ EXPORT_SYMBOL(cap_netlink_recv); int cap_capable(struct task_struct *tsk, int cap, int audit) { /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cap_effective, cap)) + if (cap_raised(tsk->cred->cap_effective, cap)) return 0; return -EPERM; } @@ -67,7 +67,8 @@ int cap_settime(struct timespec *ts, struct timezone *tz) int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) { /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(child->cap_permitted, current->cap_permitted)) + if (cap_issubset(child->cred->cap_permitted, + current->cred->cap_permitted)) return 0; if (capable(CAP_SYS_PTRACE)) return 0; @@ -76,8 +77,8 @@ int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) int cap_ptrace_traceme(struct task_struct *parent) { - /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(current->cap_permitted, parent->cap_permitted)) + if (cap_issubset(current->cred->cap_permitted, + parent->cred->cap_permitted)) return 0; if (has_capability(parent, CAP_SYS_PTRACE)) return 0; @@ -87,10 +88,12 @@ int cap_ptrace_traceme(struct task_struct *parent) int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { + struct cred *cred = target->cred; + /* Derived from kernel/capability.c:sys_capget. */ - *effective = target->cap_effective; - *inheritable = target->cap_inheritable; - *permitted = target->cap_permitted; + *effective = cred->cap_effective; + *inheritable = cred->cap_inheritable; + *permitted = cred->cap_permitted; return 0; } @@ -122,24 +125,26 @@ int cap_capset_check(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { + const struct cred *cred = current->cred; + if (cap_inh_is_capped() && !cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_permitted))) { + cap_combine(cred->cap_inheritable, + cred->cap_permitted))) { /* incapable of using this inheritable set */ return -EPERM; } if (!cap_issubset(*inheritable, - cap_combine(current->cap_inheritable, - current->cap_bset))) { + cap_combine(cred->cap_inheritable, + cred->cap_bset))) { /* no new pI capabilities outside bounding set */ return -EPERM; } /* verify restrictions on target's new Permitted set */ if (!cap_issubset (*permitted, - cap_combine (current->cap_permitted, - current->cap_permitted))) { + cap_combine (cred->cap_permitted, + cred->cap_permitted))) { return -EPERM; } @@ -155,9 +160,11 @@ void cap_capset_set(const kernel_cap_t *effective, const kernel_cap_t *inheritable, const kernel_cap_t *permitted) { - current->cap_effective = *effective; - current->cap_inheritable = *inheritable; - current->cap_permitted = *permitted; + struct cred *cred = current->cred; + + cred->cap_effective = *effective; + cred->cap_inheritable = *inheritable; + cred->cap_permitted = *permitted; } static inline void bprm_clear_caps(struct linux_binprm *bprm) @@ -211,8 +218,8 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps, * pP' = (X & fP) | (pI & fI) */ bprm->cap_post_exec_permitted.cap[i] = - (current->cap_bset.cap[i] & permitted) | - (current->cap_inheritable.cap[i] & inheritable); + (current->cred->cap_bset.cap[i] & permitted) | + (current->cred->cap_inheritable.cap[i] & inheritable); if (permitted & ~bprm->cap_post_exec_permitted.cap[i]) { /* @@ -354,8 +361,8 @@ int cap_bprm_set_security (struct linux_binprm *bprm) if (bprm->e_uid == 0 || current_uid() == 0) { /* pP' = (cap_bset & ~0) | (pI & ~0) */ bprm->cap_post_exec_permitted = cap_combine( - current->cap_bset, current->cap_inheritable - ); + current->cred->cap_bset, + current->cred->cap_inheritable); bprm->cap_effective = (bprm->e_uid == 0); ret = 0; } @@ -366,44 +373,39 @@ int cap_bprm_set_security (struct linux_binprm *bprm) void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) { - kernel_cap_t pP = current->cap_permitted; - kernel_cap_t pE = current->cap_effective; - uid_t uid; - gid_t gid; + struct cred *cred = current->cred; - current_uid_gid(&uid, &gid); - - if (bprm->e_uid != uid || bprm->e_gid != gid || + if (bprm->e_uid != cred->uid || bprm->e_gid != cred->gid || !cap_issubset(bprm->cap_post_exec_permitted, - current->cap_permitted)) { + cred->cap_permitted)) { set_dumpable(current->mm, suid_dumpable); current->pdeath_signal = 0; if (unsafe & ~LSM_UNSAFE_PTRACE_CAP) { if (!capable(CAP_SETUID)) { - bprm->e_uid = uid; - bprm->e_gid = gid; + bprm->e_uid = cred->uid; + bprm->e_gid = cred->gid; } if (cap_limit_ptraced_target()) { bprm->cap_post_exec_permitted = cap_intersect( bprm->cap_post_exec_permitted, - current->cap_permitted); + cred->cap_permitted); } } } - current->suid = current->euid = current->fsuid = bprm->e_uid; - current->sgid = current->egid = current->fsgid = bprm->e_gid; + cred->suid = cred->euid = cred->fsuid = bprm->e_uid; + cred->sgid = cred->egid = cred->fsgid = bprm->e_gid; /* For init, we want to retain the capabilities set * in the init_task struct. Thus we skip the usual * capability rules */ if (!is_global_init(current)) { - current->cap_permitted = bprm->cap_post_exec_permitted; + cred->cap_permitted = bprm->cap_post_exec_permitted; if (bprm->cap_effective) - current->cap_effective = bprm->cap_post_exec_permitted; + cred->cap_effective = bprm->cap_post_exec_permitted; else - cap_clear(current->cap_effective); + cap_clear(cred->cap_effective); } /* @@ -418,27 +420,30 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) * Number 1 above might fail if you don't have a full bset, but I think * that is interesting information to audit. */ - if (!cap_isclear(current->cap_effective)) { - if (!cap_issubset(CAP_FULL_SET, current->cap_effective) || - (bprm->e_uid != 0) || (current->uid != 0) || + if (!cap_isclear(cred->cap_effective)) { + if (!cap_issubset(CAP_FULL_SET, cred->cap_effective) || + (bprm->e_uid != 0) || (cred->uid != 0) || issecure(SECURE_NOROOT)) - audit_log_bprm_fcaps(bprm, &pP, &pE); + audit_log_bprm_fcaps(bprm, &cred->cap_permitted, + &cred->cap_effective); } - current->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); } int cap_bprm_secureexec (struct linux_binprm *bprm) { - if (current_uid() != 0) { + const struct cred *cred = current->cred; + + if (cred->uid != 0) { if (bprm->cap_effective) return 1; if (!cap_isclear(bprm->cap_post_exec_permitted)) return 1; } - return (current_euid() != current_uid() || - current_egid() != current_gid()); + return (cred->euid != cred->uid || + cred->egid != cred->gid); } int cap_inode_setxattr(struct dentry *dentry, const char *name, @@ -501,25 +506,27 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name) static inline void cap_emulate_setxuid (int old_ruid, int old_euid, int old_suid) { - uid_t euid = current_euid(); + struct cred *cred = current->cred; if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && - (current_uid() != 0 && euid != 0 && current_suid() != 0) && + (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear (current->cap_permitted); - cap_clear (current->cap_effective); + cap_clear (cred->cap_permitted); + cap_clear (cred->cap_effective); } - if (old_euid == 0 && euid != 0) { - cap_clear (current->cap_effective); + if (old_euid == 0 && cred->euid != 0) { + cap_clear (cred->cap_effective); } - if (old_euid != 0 && euid == 0) { - current->cap_effective = current->cap_permitted; + if (old_euid != 0 && cred->euid == 0) { + cred->cap_effective = cred->cap_permitted; } } int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, int flags) { + struct cred *cred = current->cred; + switch (flags) { case LSM_SETID_RE: case LSM_SETID_ID: @@ -541,16 +548,16 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ if (!issecure (SECURE_NO_SETUID_FIXUP)) { - if (old_fsuid == 0 && current_fsuid() != 0) { - current->cap_effective = + if (old_fsuid == 0 && cred->fsuid != 0) { + cred->cap_effective = cap_drop_fs_set( - current->cap_effective); + cred->cap_effective); } - if (old_fsuid != 0 && current_fsuid() == 0) { - current->cap_effective = + if (old_fsuid != 0 && cred->fsuid == 0) { + cred->cap_effective = cap_raise_fs_set( - current->cap_effective, - current->cap_permitted); + cred->cap_effective, + cred->cap_permitted); } } break; @@ -575,7 +582,8 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ static int cap_safe_nice(struct task_struct *p) { - if (!cap_issubset(p->cap_permitted, current->cap_permitted) && + if (!cap_issubset(p->cred->cap_permitted, + current->cred->cap_permitted) && !capable(CAP_SYS_NICE)) return -EPERM; return 0; @@ -610,7 +618,7 @@ static long cap_prctl_drop(unsigned long cap) return -EPERM; if (!cap_valid(cap)) return -EINVAL; - cap_lower(current->cap_bset, cap); + cap_lower(current->cred->cap_bset, cap); return 0; } @@ -633,6 +641,7 @@ int cap_task_setnice (struct task_struct *p, int nice) int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4, unsigned long arg5, long *rc_p) { + struct cred *cred = current->cred; long error = 0; switch (option) { @@ -640,7 +649,7 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, if (!cap_valid(arg2)) error = -EINVAL; else - error = !!cap_raised(current->cap_bset, arg2); + error = !!cap_raised(cred->cap_bset, arg2); break; #ifdef CONFIG_SECURITY_FILE_CAPABILITIES case PR_CAPBSET_DROP: @@ -667,9 +676,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, * capability-based-privilege environment. */ case PR_SET_SECUREBITS: - if ((((current->securebits & SECURE_ALL_LOCKS) >> 1) - & (current->securebits ^ arg2)) /*[1]*/ - || ((current->securebits & SECURE_ALL_LOCKS + if ((((cred->securebits & SECURE_ALL_LOCKS) >> 1) + & (cred->securebits ^ arg2)) /*[1]*/ + || ((cred->securebits & SECURE_ALL_LOCKS & ~arg2)) /*[2]*/ || (arg2 & ~(SECURE_ALL_LOCKS | SECURE_ALL_BITS)) /*[3]*/ || (cap_capable(current, CAP_SETPCAP, SECURITY_CAP_AUDIT) != 0)) { /*[4]*/ @@ -682,11 +691,11 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, */ error = -EPERM; /* cannot change a locked bit */ } else { - current->securebits = arg2; + cred->securebits = arg2; } break; case PR_GET_SECUREBITS: - error = current->securebits; + error = cred->securebits; break; #endif /* def CONFIG_SECURITY_FILE_CAPABILITIES */ @@ -701,10 +710,9 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, else if (issecure(SECURE_KEEP_CAPS_LOCKED)) error = -EPERM; else if (arg2) - current->securebits |= issecure_mask(SECURE_KEEP_CAPS); + cred->securebits |= issecure_mask(SECURE_KEEP_CAPS); else - current->securebits &= - ~issecure_mask(SECURE_KEEP_CAPS); + cred->securebits &= ~issecure_mask(SECURE_KEEP_CAPS); break; default: @@ -719,11 +727,12 @@ int cap_task_prctl(int option, unsigned long arg2, unsigned long arg3, void cap_task_reparent_to_init (struct task_struct *p) { - cap_set_init_eff(p->cap_effective); - cap_clear(p->cap_inheritable); - cap_set_full(p->cap_permitted); - p->securebits = SECUREBITS_DEFAULT; - return; + struct cred *cred = p->cred; + + cap_set_init_eff(cred->cap_effective); + cap_clear(cred->cap_inheritable); + cap_set_full(cred->cap_permitted); + p->cred->securebits = SECUREBITS_DEFAULT; } int cap_syslog (int type) diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index fcce331eca72..8833b447adef 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -889,7 +889,7 @@ long keyctl_instantiate_key(key_serial_t id, /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -932,8 +932,8 @@ long keyctl_instantiate_key(key_serial_t id, /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error2: @@ -960,7 +960,7 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* the appropriate instantiation authorisation key must have been * assumed before calling this */ ret = -EPERM; - instkey = current->request_key_auth; + instkey = current->cred->request_key_auth; if (!instkey) goto error; @@ -983,8 +983,8 @@ long keyctl_negate_key(key_serial_t id, unsigned timeout, key_serial_t ringid) /* discard the assumed authority if it's just been disabled by * instantiation of the key */ if (ret == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; } error: @@ -999,6 +999,7 @@ error: */ long keyctl_set_reqkey_keyring(int reqkey_defl) { + struct cred *cred = current->cred; int ret; switch (reqkey_defl) { @@ -1018,10 +1019,10 @@ long keyctl_set_reqkey_keyring(int reqkey_defl) case KEY_REQKEY_DEFL_USER_KEYRING: case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: set: - current->jit_keyring = reqkey_defl; + cred->jit_keyring = reqkey_defl; case KEY_REQKEY_DEFL_NO_CHANGE: - return current->jit_keyring; + return cred->jit_keyring; case KEY_REQKEY_DEFL_GROUP_KEYRING: default: @@ -1086,8 +1087,8 @@ long keyctl_assume_authority(key_serial_t id) /* we divest ourselves of authority if given an ID of 0 */ if (id == 0) { - key_put(current->request_key_auth); - current->request_key_auth = NULL; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = NULL; ret = 0; goto error; } @@ -1103,8 +1104,8 @@ long keyctl_assume_authority(key_serial_t id) goto error; } - key_put(current->request_key_auth); - current->request_key_auth = authkey; + key_put(current->cred->request_key_auth); + current->cred->request_key_auth = authkey; ret = authkey->serial; error: diff --git a/security/keys/permission.c b/security/keys/permission.c index 3b41f9b52537..baf3d5f31e71 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -22,6 +22,7 @@ int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { + struct cred *cred = context->cred; struct key *key; key_perm_t kperm; int ret; @@ -29,7 +30,7 @@ int key_task_permission(const key_ref_t key_ref, key = key_ref_to_ptr(key_ref); /* use the second 8-bits of permissions for keys the caller owns */ - if (key->uid == context->fsuid) { + if (key->uid == cred->fsuid) { kperm = key->perm >> 16; goto use_these_perms; } @@ -37,14 +38,14 @@ int key_task_permission(const key_ref_t key_ref, /* use the third 8-bits of permissions for keys the caller has a group * membership in common with */ if (key->gid != -1 && key->perm & KEY_GRP_ALL) { - if (key->gid == context->fsgid) { + if (key->gid == cred->fsgid) { kperm = key->perm >> 8; goto use_these_perms; } - task_lock(context); - ret = groups_search(context->group_info, key->gid); - task_unlock(context); + spin_lock(&cred->lock); + ret = groups_search(cred->group_info, key->gid); + spin_unlock(&cred->lock); if (ret) { kperm = key->perm >> 8; diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index 1c793b7090a7..b0904cdda2e7 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -42,7 +42,7 @@ struct key_user root_key_user = { */ int install_user_keyrings(void) { - struct user_struct *user = current->user; + struct user_struct *user = current->cred->user; struct key *uid_keyring, *session_keyring; char buf[20]; int ret; @@ -156,7 +156,7 @@ int install_thread_keyring(void) sprintf(buf, "_tid.%u", tsk->pid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -164,8 +164,8 @@ int install_thread_keyring(void) } task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = keyring; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = keyring; task_unlock(tsk); ret = 0; @@ -192,7 +192,7 @@ int install_process_keyring(void) if (!tsk->signal->process_keyring) { sprintf(buf, "_pid.%u", tsk->tgid); - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_QUOTA_OVERRUN, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); @@ -238,7 +238,7 @@ static int install_session_keyring(struct key *keyring) if (tsk->signal->session_keyring) flags = KEY_ALLOC_IN_QUOTA; - keyring = keyring_alloc(buf, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(buf, tsk->cred->uid, tsk->cred->gid, tsk, flags, NULL); if (IS_ERR(keyring)) return PTR_ERR(keyring); @@ -292,14 +292,14 @@ int copy_thread_group_keys(struct task_struct *tsk) */ int copy_keys(unsigned long clone_flags, struct task_struct *tsk) { - key_check(tsk->thread_keyring); - key_check(tsk->request_key_auth); + key_check(tsk->cred->thread_keyring); + key_check(tsk->cred->request_key_auth); /* no thread keyring yet */ - tsk->thread_keyring = NULL; + tsk->cred->thread_keyring = NULL; /* copy the request_key() authorisation for this thread */ - key_get(tsk->request_key_auth); + key_get(tsk->cred->request_key_auth); return 0; @@ -322,8 +322,8 @@ void exit_thread_group_keys(struct signal_struct *tg) */ void exit_keys(struct task_struct *tsk) { - key_put(tsk->thread_keyring); - key_put(tsk->request_key_auth); + key_put(tsk->cred->thread_keyring); + key_put(tsk->cred->request_key_auth); } /* end exit_keys() */ @@ -337,8 +337,8 @@ int exec_keys(struct task_struct *tsk) /* newly exec'd tasks don't get a thread keyring */ task_lock(tsk); - old = tsk->thread_keyring; - tsk->thread_keyring = NULL; + old = tsk->cred->thread_keyring; + tsk->cred->thread_keyring = NULL; task_unlock(tsk); key_put(old); @@ -373,10 +373,11 @@ int suid_keys(struct task_struct *tsk) void key_fsuid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->uid = tsk->fsuid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->uid = tsk->cred->fsuid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsuid_changed() */ @@ -388,10 +389,11 @@ void key_fsuid_changed(struct task_struct *tsk) void key_fsgid_changed(struct task_struct *tsk) { /* update the ownership of the thread keyring */ - if (tsk->thread_keyring) { - down_write(&tsk->thread_keyring->sem); - tsk->thread_keyring->gid = tsk->fsgid; - up_write(&tsk->thread_keyring->sem); + BUG_ON(!tsk->cred); + if (tsk->cred->thread_keyring) { + down_write(&tsk->cred->thread_keyring->sem); + tsk->cred->thread_keyring->gid = tsk->cred->fsgid; + up_write(&tsk->cred->thread_keyring->sem); } } /* end key_fsgid_changed() */ @@ -426,9 +428,9 @@ key_ref_t search_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (context->thread_keyring) { + if (context->cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->thread_keyring, 1), + make_key_ref(context->cred->thread_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -493,9 +495,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (context->user->session_keyring) { + else if (context->cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->user->session_keyring, 1), + make_key_ref(context->cred->user->session_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -517,20 +519,20 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (context->request_key_auth && + if (context->cred->request_key_auth && context == current && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ - down_read(&context->request_key_auth->sem); + down_read(&context->cred->request_key_auth->sem); - if (key_validate(context->request_key_auth) == 0) { - rka = context->request_key_auth->payload.data; + if (key_validate(context->cred->request_key_auth) == 0) { + rka = context->cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, match, rka->context); - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); if (!IS_ERR(key_ref)) goto found; @@ -547,7 +549,7 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } else { - up_read(&context->request_key_auth->sem); + up_read(&context->cred->request_key_auth->sem); } } @@ -580,15 +582,16 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, { struct request_key_auth *rka; struct task_struct *t = current; - key_ref_t key_ref, skey_ref; + struct cred *cred = t->cred; struct key *key; + key_ref_t key_ref, skey_ref; int ret; key_ref = ERR_PTR(-ENOKEY); switch (id) { case KEY_SPEC_THREAD_KEYRING: - if (!t->thread_keyring) { + if (!cred->thread_keyring) { if (!create) goto error; @@ -599,7 +602,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, } } - key = t->thread_keyring; + key = cred->thread_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -628,7 +631,8 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, ret = install_user_keyrings(); if (ret < 0) goto error; - ret = install_session_keyring(t->user->session_keyring); + ret = install_session_keyring( + cred->user->session_keyring); if (ret < 0) goto error; } @@ -641,25 +645,25 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_USER_KEYRING: - if (!t->user->uid_keyring) { + if (!cred->user->uid_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->uid_keyring; + key = cred->user->uid_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; case KEY_SPEC_USER_SESSION_KEYRING: - if (!t->user->session_keyring) { + if (!cred->user->session_keyring) { ret = install_user_keyrings(); if (ret < 0) goto error; } - key = t->user->session_keyring; + key = cred->user->session_keyring; atomic_inc(&key->usage); key_ref = make_key_ref(key, 1); break; @@ -670,7 +674,7 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, goto error; case KEY_SPEC_REQKEY_AUTH_KEY: - key = t->request_key_auth; + key = cred->request_key_auth; if (!key) goto error; @@ -679,19 +683,19 @@ key_ref_t lookup_user_key(key_serial_t id, int create, int partial, break; case KEY_SPEC_REQUESTOR_KEYRING: - if (!t->request_key_auth) + if (!cred->request_key_auth) goto error; - down_read(&t->request_key_auth->sem); - if (t->request_key_auth->flags & KEY_FLAG_REVOKED) { + down_read(&cred->request_key_auth->sem); + if (cred->request_key_auth->flags & KEY_FLAG_REVOKED) { key_ref = ERR_PTR(-EKEYREVOKED); key = NULL; } else { - rka = t->request_key_auth->payload.data; + rka = cred->request_key_auth->payload.data; key = rka->dest_keyring; atomic_inc(&key->usage); } - up_read(&t->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); if (!key) goto error; key_ref = make_key_ref(key, 1); @@ -791,7 +795,7 @@ long join_session_keyring(const char *name) keyring = find_keyring_by_name(name, false); if (PTR_ERR(keyring) == -ENOKEY) { /* not found - try and create a new one */ - keyring = keyring_alloc(name, tsk->uid, tsk->gid, tsk, + keyring = keyring_alloc(name, tsk->cred->uid, tsk->cred->gid, tsk, KEY_ALLOC_IN_QUOTA, NULL); if (IS_ERR(keyring)) { ret = PTR_ERR(keyring); diff --git a/security/keys/request_key.c b/security/keys/request_key.c index 8e9d93b4a402..3e9b9eb1dd28 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -104,7 +104,8 @@ static int call_sbin_request_key(struct key_construction *cons, /* we specify the process's default keyrings */ sprintf(keyring_str[0], "%d", - tsk->thread_keyring ? tsk->thread_keyring->serial : 0); + tsk->cred->thread_keyring ? + tsk->cred->thread_keyring->serial : 0); prkey = 0; if (tsk->signal->process_keyring) @@ -117,7 +118,7 @@ static int call_sbin_request_key(struct key_construction *cons, sskey = rcu_dereference(tsk->signal->session_keyring)->serial; rcu_read_unlock(); } else { - sskey = tsk->user->session_keyring->serial; + sskey = tsk->cred->user->session_keyring->serial; } sprintf(keyring_str[2], "%d", sskey); @@ -232,11 +233,11 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } else { /* use a default keyring; falling through the cases until we * find one that we actually have */ - switch (tsk->jit_keyring) { + switch (tsk->cred->jit_keyring) { case KEY_REQKEY_DEFL_DEFAULT: case KEY_REQKEY_DEFL_REQUESTOR_KEYRING: - if (tsk->request_key_auth) { - authkey = tsk->request_key_auth; + if (tsk->cred->request_key_auth) { + authkey = tsk->cred->request_key_auth; down_read(&authkey->sem); rka = authkey->payload.data; if (!test_bit(KEY_FLAG_REVOKED, @@ -249,7 +250,7 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) } case KEY_REQKEY_DEFL_THREAD_KEYRING: - dest_keyring = key_get(tsk->thread_keyring); + dest_keyring = key_get(tsk->cred->thread_keyring); if (dest_keyring) break; @@ -268,11 +269,12 @@ static void construct_get_dest_keyring(struct key **_dest_keyring) break; case KEY_REQKEY_DEFL_USER_SESSION_KEYRING: - dest_keyring = key_get(tsk->user->session_keyring); + dest_keyring = + key_get(tsk->cred->user->session_keyring); break; case KEY_REQKEY_DEFL_USER_KEYRING: - dest_keyring = key_get(tsk->user->uid_keyring); + dest_keyring = key_get(tsk->cred->user->uid_keyring); break; case KEY_REQKEY_DEFL_GROUP_KEYRING: diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c index 1762d44711d5..2125579d5d73 100644 --- a/security/keys/request_key_auth.c +++ b/security/keys/request_key_auth.c @@ -164,22 +164,22 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, /* see if the calling process is already servicing the key request of * another process */ - if (current->request_key_auth) { + if (current->cred->request_key_auth) { /* it is - use that instantiation context here too */ - down_read(¤t->request_key_auth->sem); + down_read(¤t->cred->request_key_auth->sem); /* if the auth key has been revoked, then the key we're * servicing is already instantiated */ if (test_bit(KEY_FLAG_REVOKED, - ¤t->request_key_auth->flags)) + ¤t->cred->request_key_auth->flags)) goto auth_key_revoked; - irka = current->request_key_auth->payload.data; + irka = current->cred->request_key_auth->payload.data; rka->context = irka->context; rka->pid = irka->pid; get_task_struct(rka->context); - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); } else { /* it isn't - use this process as the context */ @@ -214,7 +214,7 @@ struct key *request_key_auth_new(struct key *target, const void *callout_info, return authkey; auth_key_revoked: - up_read(¤t->request_key_auth->sem); + up_read(¤t->cred->request_key_auth->sem); kfree(rka->callout_info); kfree(rka); kleave("= -EKEYREVOKED"); diff --git a/security/selinux/exports.c b/security/selinux/exports.c index 64af2d3409ef..cf02490cd1eb 100644 --- a/security/selinux/exports.c +++ b/security/selinux/exports.c @@ -39,7 +39,7 @@ EXPORT_SYMBOL_GPL(selinux_string_to_sid); int selinux_secmark_relabel_packet_permission(u32 sid) { if (selinux_enabled) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; return avc_has_perm(tsec->sid, sid, SECCLASS_PACKET, PACKET__RELABELTO, NULL); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9f6da154cc82..328308f2882a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -167,21 +167,21 @@ static int task_alloc_security(struct task_struct *task) return -ENOMEM; tsec->osid = tsec->sid = SECINITSID_UNLABELED; - task->security = tsec; + task->cred->security = tsec; return 0; } static void task_free_security(struct task_struct *task) { - struct task_security_struct *tsec = task->security; - task->security = NULL; + struct task_security_struct *tsec = task->cred->security; + task->cred->security = NULL; kfree(tsec); } static int inode_alloc_security(struct inode *inode) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode_security_struct *isec; isec = kmem_cache_zalloc(sel_inode_cache, GFP_NOFS); @@ -215,7 +215,7 @@ static void inode_free_security(struct inode *inode) static int file_alloc_security(struct file *file) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec; fsec = kzalloc(sizeof(struct file_security_struct), GFP_KERNEL); @@ -554,7 +554,7 @@ static int selinux_set_mnt_opts(struct super_block *sb, struct security_mnt_opts *opts) { int rc = 0, i; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct superblock_security_struct *sbsec = sb->s_security; const char *name = sb->s_type->name; struct inode *inode = sbsec->sb->s_root->d_inode; @@ -1353,8 +1353,8 @@ static int task_has_perm(struct task_struct *tsk1, { struct task_security_struct *tsec1, *tsec2; - tsec1 = tsk1->security; - tsec2 = tsk2->security; + tsec1 = tsk1->cred->security; + tsec2 = tsk2->cred->security; return avc_has_perm(tsec1->sid, tsec2->sid, SECCLASS_PROCESS, perms, NULL); } @@ -1374,7 +1374,7 @@ static int task_has_capability(struct task_struct *tsk, u32 av = CAP_TO_MASK(cap); int rc; - tsec = tsk->security; + tsec = tsk->cred->security; AVC_AUDIT_DATA_INIT(&ad, CAP); ad.tsk = tsk; @@ -1405,7 +1405,7 @@ static int task_has_system(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; return avc_has_perm(tsec->sid, SECINITSID_KERNEL, SECCLASS_SYSTEM, perms, NULL); @@ -1426,7 +1426,7 @@ static int inode_has_perm(struct task_struct *tsk, if (unlikely(IS_PRIVATE(inode))) return 0; - tsec = tsk->security; + tsec = tsk->cred->security; isec = inode->i_security; if (!adp) { @@ -1466,7 +1466,7 @@ static int file_has_perm(struct task_struct *tsk, struct file *file, u32 av) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct file_security_struct *fsec = file->f_security; struct inode *inode = file->f_path.dentry->d_inode; struct avc_audit_data ad; @@ -1503,7 +1503,7 @@ static int may_create(struct inode *dir, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -1540,7 +1540,7 @@ static int may_create_key(u32 ksid, { struct task_security_struct *tsec; - tsec = ctx->security; + tsec = ctx->cred->security; return avc_has_perm(tsec->sid, ksid, SECCLASS_KEY, KEY__CREATE, NULL); } @@ -1561,7 +1561,7 @@ static int may_link(struct inode *dir, u32 av; int rc; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; isec = dentry->d_inode->i_security; @@ -1606,7 +1606,7 @@ static inline int may_rename(struct inode *old_dir, int old_is_dir, new_is_dir; int rc; - tsec = current->security; + tsec = current->cred->security; old_dsec = old_dir->i_security; old_isec = old_dentry->d_inode->i_security; old_is_dir = S_ISDIR(old_dentry->d_inode->i_mode); @@ -1659,7 +1659,7 @@ static int superblock_has_perm(struct task_struct *tsk, struct task_security_struct *tsec; struct superblock_security_struct *sbsec; - tsec = tsk->security; + tsec = tsk->cred->security; sbsec = sb->s_security; return avc_has_perm(tsec->sid, sbsec->sid, SECCLASS_FILESYSTEM, perms, ad); @@ -1758,8 +1758,8 @@ static int selinux_ptrace_may_access(struct task_struct *child, return rc; if (mode == PTRACE_MODE_READ) { - struct task_security_struct *tsec = current->security; - struct task_security_struct *csec = child->security; + struct task_security_struct *tsec = current->cred->security; + struct task_security_struct *csec = child->cred->security; return avc_has_perm(tsec->sid, csec->sid, SECCLASS_FILE, FILE__READ, NULL); } @@ -1874,7 +1874,7 @@ static int selinux_sysctl(ctl_table *table, int op) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; rc = selinux_sysctl_get_sid(table, (op == 0001) ? SECCLASS_DIR : SECCLASS_FILE, &tsid); @@ -2025,7 +2025,7 @@ static int selinux_bprm_set_security(struct linux_binprm *bprm) if (bsec->set) return 0; - tsec = current->security; + tsec = current->cred->security; isec = inode->i_security; /* Default to the current task SID. */ @@ -2090,7 +2090,7 @@ static int selinux_bprm_check_security(struct linux_binprm *bprm) static int selinux_bprm_secureexec(struct linux_binprm *bprm) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int atsecure = 0; if (tsec->osid != tsec->sid) { @@ -2214,7 +2214,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) secondary_ops->bprm_apply_creds(bprm, unsafe); - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; sid = bsec->sid; @@ -2243,7 +2243,7 @@ static void selinux_bprm_apply_creds(struct linux_binprm *bprm, int unsafe) rcu_read_lock(); tracer = tracehook_tracer_task(current); if (likely(tracer != NULL)) { - sec = tracer->security; + sec = tracer->cred->security; ptsid = sec->sid; } rcu_read_unlock(); @@ -2274,7 +2274,7 @@ static void selinux_bprm_post_apply_creds(struct linux_binprm *bprm) int rc, i; unsigned long flags; - tsec = current->security; + tsec = current->cred->security; bsec = bprm->security; if (bsec->unsafe) { @@ -2521,7 +2521,7 @@ static int selinux_inode_init_security(struct inode *inode, struct inode *dir, int rc; char *namep = NULL, *context; - tsec = current->security; + tsec = current->cred->security; dsec = dir->i_security; sbsec = dir->i_sb->s_security; @@ -2706,7 +2706,7 @@ static int selinux_inode_setotherxattr(struct dentry *dentry, const char *name) static int selinux_inode_setxattr(struct dentry *dentry, const char *name, const void *value, size_t size, int flags) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct inode *inode = dentry->d_inode; struct inode_security_struct *isec = inode->i_security; struct superblock_security_struct *sbsec; @@ -2918,7 +2918,7 @@ static int selinux_revalidate_file_permission(struct file *file, int mask) static int selinux_file_permission(struct file *file, int mask) { struct inode *inode = file->f_path.dentry->d_inode; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct file_security_struct *fsec = file->f_security; struct inode_security_struct *isec = inode->i_security; @@ -2995,7 +2995,8 @@ static int selinux_file_mmap(struct file *file, unsigned long reqprot, unsigned long addr, unsigned long addr_only) { int rc = 0; - u32 sid = ((struct task_security_struct *)(current->security))->sid; + u32 sid = ((struct task_security_struct *) + (current->cred->security))->sid; if (addr < mmap_min_addr) rc = avc_has_perm(sid, sid, SECCLASS_MEMPROTECT, @@ -3107,7 +3108,7 @@ static int selinux_file_set_fowner(struct file *file) struct task_security_struct *tsec; struct file_security_struct *fsec; - tsec = current->security; + tsec = current->cred->security; fsec = file->f_security; fsec->fown_sid = tsec->sid; @@ -3125,7 +3126,7 @@ static int selinux_file_send_sigiotask(struct task_struct *tsk, /* struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - tsec = tsk->security; + tsec = tsk->cred->security; fsec = file->f_security; if (!signum) @@ -3188,12 +3189,12 @@ static int selinux_task_alloc_security(struct task_struct *tsk) struct task_security_struct *tsec1, *tsec2; int rc; - tsec1 = current->security; + tsec1 = current->cred->security; rc = task_alloc_security(tsk); if (rc) return rc; - tsec2 = tsk->security; + tsec2 = tsk->cred->security; tsec2->osid = tsec1->osid; tsec2->sid = tsec1->sid; @@ -3251,7 +3252,7 @@ static int selinux_task_getsid(struct task_struct *p) static void selinux_task_getsecid(struct task_struct *p, u32 *secid) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; *secid = tsec->sid; } @@ -3343,7 +3344,7 @@ static int selinux_task_kill(struct task_struct *p, struct siginfo *info, perm = PROCESS__SIGNULL; /* null signal; existence test */ else perm = signal_to_av(sig); - tsec = p->security; + tsec = p->cred->security; if (secid) rc = avc_has_perm(secid, tsec->sid, SECCLASS_PROCESS, perm, NULL); else @@ -3375,7 +3376,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) secondary_ops->task_reparent_to_init(p); - tsec = p->security; + tsec = p->cred->security; tsec->osid = tsec->sid; tsec->sid = SECINITSID_KERNEL; return; @@ -3384,7 +3385,7 @@ static void selinux_task_reparent_to_init(struct task_struct *p) static void selinux_task_to_inode(struct task_struct *p, struct inode *inode) { - struct task_security_struct *tsec = p->security; + struct task_security_struct *tsec = p->cred->security; struct inode_security_struct *isec = inode->i_security; isec->sid = tsec->sid; @@ -3632,7 +3633,7 @@ static int socket_has_perm(struct task_struct *task, struct socket *sock, struct avc_audit_data ad; int err = 0; - tsec = task->security; + tsec = task->cred->security; isec = SOCK_INODE(sock)->i_security; if (isec->sid == SECINITSID_KERNEL) @@ -3656,7 +3657,7 @@ static int selinux_socket_create(int family, int type, if (kern) goto out; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; err = avc_has_perm(tsec->sid, newsid, socket_type_to_security_class(family, type, @@ -3677,7 +3678,7 @@ static int selinux_socket_post_create(struct socket *sock, int family, isec = SOCK_INODE(sock)->i_security; - tsec = current->security; + tsec = current->cred->security; newsid = tsec->sockcreate_sid ? : tsec->sid; isec->sclass = socket_type_to_security_class(family, type, protocol); isec->sid = kern ? SECINITSID_KERNEL : newsid; @@ -3723,7 +3724,7 @@ static int selinux_socket_bind(struct socket *sock, struct sockaddr *address, in struct sock *sk = sock->sk; u32 sid, node_perm; - tsec = current->security; + tsec = current->cred->security; isec = SOCK_INODE(sock)->i_security; if (family == PF_INET) { @@ -4764,7 +4765,7 @@ static int ipc_alloc_security(struct task_struct *task, struct kern_ipc_perm *perm, u16 sclass) { - struct task_security_struct *tsec = task->security; + struct task_security_struct *tsec = task->cred->security; struct ipc_security_struct *isec; isec = kzalloc(sizeof(struct ipc_security_struct), GFP_KERNEL); @@ -4814,7 +4815,7 @@ static int ipc_has_perm(struct kern_ipc_perm *ipc_perms, struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = ipc_perms->security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4845,7 +4846,7 @@ static int selinux_msg_queue_alloc_security(struct msg_queue *msq) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4871,7 +4872,7 @@ static int selinux_msg_queue_associate(struct msg_queue *msq, int msqflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -4917,7 +4918,7 @@ static int selinux_msg_queue_msgsnd(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = current->security; + tsec = current->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4965,7 +4966,7 @@ static int selinux_msg_queue_msgrcv(struct msg_queue *msq, struct msg_msg *msg, struct avc_audit_data ad; int rc; - tsec = target->security; + tsec = target->cred->security; isec = msq->q_perm.security; msec = msg->security; @@ -4992,7 +4993,7 @@ static int selinux_shm_alloc_security(struct shmid_kernel *shp) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5018,7 +5019,7 @@ static int selinux_shm_associate(struct shmid_kernel *shp, int shmflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = shp->shm_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5091,7 +5092,7 @@ static int selinux_sem_alloc_security(struct sem_array *sma) if (rc) return rc; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5117,7 +5118,7 @@ static int selinux_sem_associate(struct sem_array *sma, int semflg) struct ipc_security_struct *isec; struct avc_audit_data ad; - tsec = current->security; + tsec = current->cred->security; isec = sma->sem_perm.security; AVC_AUDIT_DATA_INIT(&ad, IPC); @@ -5224,7 +5225,7 @@ static int selinux_getprocattr(struct task_struct *p, return error; } - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "current")) sid = tsec->sid; @@ -5308,7 +5309,7 @@ static int selinux_setprocattr(struct task_struct *p, operation. See selinux_bprm_set_security for the execve checks and may_create for the file creation checks. The operation will then fail if the context is not permitted. */ - tsec = p->security; + tsec = p->cred->security; if (!strcmp(name, "exec")) tsec->exec_sid = sid; else if (!strcmp(name, "fscreate")) @@ -5361,7 +5362,8 @@ boundary_ok: rcu_read_lock(); tracer = tracehook_tracer_task(p); if (tracer != NULL) { - struct task_security_struct *ptsec = tracer->security; + struct task_security_struct *ptsec = + tracer->cred->security; u32 ptsid = ptsec->sid; rcu_read_unlock(); error = avc_has_perm_noaudit(ptsid, sid, @@ -5405,7 +5407,7 @@ static void selinux_release_secctx(char *secdata, u32 seclen) static int selinux_key_alloc(struct key *k, struct task_struct *tsk, unsigned long flags) { - struct task_security_struct *tsec = tsk->security; + struct task_security_struct *tsec = tsk->cred->security; struct key_security_struct *ksec; ksec = kzalloc(sizeof(struct key_security_struct), GFP_KERNEL); @@ -5439,7 +5441,7 @@ static int selinux_key_permission(key_ref_t key_ref, key = key_ref_to_ptr(key_ref); - tsec = ctx->security; + tsec = ctx->cred->security; ksec = key->security; /* if no specific permissions are requested, we skip the @@ -5683,7 +5685,7 @@ static __init int selinux_init(void) /* Set the security state for the initial task. */ if (task_alloc_security(current)) panic("SELinux: Failed to initialize initial task.\n"); - tsec = current->security; + tsec = current->cred->security; tsec->osid = tsec->sid = SECINITSID_KERNEL; sel_inode_cache = kmem_cache_create("selinux_inode_security", diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 69c9dccc8cf0..10715d1330b9 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -97,7 +97,7 @@ static int task_has_security(struct task_struct *tsk, { struct task_security_struct *tsec; - tsec = tsk->security; + tsec = tsk->cred->security; if (!tsec) return -EACCES; diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 8f17f542a116..d7db76617b0e 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -197,7 +197,7 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx, u32 sid) { int rc = 0; - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = NULL; char *ctx_str = NULL; u32 str_len; @@ -333,7 +333,7 @@ void selinux_xfrm_policy_free(struct xfrm_sec_ctx *ctx) */ int selinux_xfrm_policy_delete(struct xfrm_sec_ctx *ctx) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; int rc = 0; if (ctx) { @@ -378,7 +378,7 @@ void selinux_xfrm_state_free(struct xfrm_state *x) */ int selinux_xfrm_state_delete(struct xfrm_state *x) { - struct task_security_struct *tsec = current->security; + struct task_security_struct *tsec = current->cred->security; struct xfrm_sec_ctx *ctx = x->security; int rc = 0; diff --git a/security/smack/smack_access.c b/security/smack/smack_access.c index 79ff21ed4c3b..b6dd4fc0fb0b 100644 --- a/security/smack/smack_access.c +++ b/security/smack/smack_access.c @@ -164,7 +164,7 @@ int smk_curacc(char *obj_label, u32 mode) { int rc; - rc = smk_access(current->security, obj_label, mode); + rc = smk_access(current->cred->security, obj_label, mode); if (rc == 0) return 0; @@ -173,7 +173,7 @@ int smk_curacc(char *obj_label, u32 mode) * only one that gets privilege and current does not * have that label. */ - if (smack_onlycap != NULL && smack_onlycap != current->security) + if (smack_onlycap != NULL && smack_onlycap != current->cred->security) return rc; if (capable(CAP_MAC_OVERRIDE)) diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index 6e2dc0bab70d..791da238d049 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -102,7 +102,8 @@ static int smack_ptrace_may_access(struct task_struct *ctp, unsigned int mode) if (rc != 0) return rc; - rc = smk_access(current->security, ctp->security, MAY_READWRITE); + rc = smk_access(current->cred->security, ctp->cred->security, + MAY_READWRITE); if (rc != 0 && capable(CAP_MAC_OVERRIDE)) return 0; return rc; @@ -124,7 +125,8 @@ static int smack_ptrace_traceme(struct task_struct *ptp) if (rc != 0) return rc; - rc = smk_access(ptp->security, current->security, MAY_READWRITE); + rc = smk_access(ptp->cred->security, current->cred->security, + MAY_READWRITE); if (rc != 0 && has_capability(ptp, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -141,7 +143,7 @@ static int smack_ptrace_traceme(struct task_struct *ptp) static int smack_syslog(int type) { int rc; - char *sp = current->security; + char *sp = current->cred->security; rc = cap_syslog(type); if (rc != 0) @@ -373,7 +375,7 @@ static int smack_sb_umount(struct vfsmount *mnt, int flags) */ static int smack_inode_alloc_security(struct inode *inode) { - inode->i_security = new_inode_smack(current->security); + inode->i_security = new_inode_smack(current->cred->security); if (inode->i_security == NULL) return -ENOMEM; return 0; @@ -818,7 +820,7 @@ static int smack_file_permission(struct file *file, int mask) */ static int smack_file_alloc_security(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -916,7 +918,7 @@ static int smack_file_fcntl(struct file *file, unsigned int cmd, */ static int smack_file_set_fowner(struct file *file) { - file->f_security = current->security; + file->f_security = current->cred->security; return 0; } @@ -941,7 +943,7 @@ static int smack_file_send_sigiotask(struct task_struct *tsk, * struct fown_struct is never outside the context of a struct file */ file = container_of(fown, struct file, f_owner); - rc = smk_access(file->f_security, tsk->security, MAY_WRITE); + rc = smk_access(file->f_security, tsk->cred->security, MAY_WRITE); if (rc != 0 && has_capability(tsk, CAP_MAC_OVERRIDE)) return 0; return rc; @@ -984,7 +986,7 @@ static int smack_file_receive(struct file *file) */ static int smack_task_alloc_security(struct task_struct *tsk) { - tsk->security = current->security; + tsk->cred->security = current->cred->security; return 0; } @@ -999,7 +1001,7 @@ static int smack_task_alloc_security(struct task_struct *tsk) */ static void smack_task_free_security(struct task_struct *task) { - task->security = NULL; + task->cred->security = NULL; } /** @@ -1011,7 +1013,7 @@ static void smack_task_free_security(struct task_struct *task) */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1022,7 +1024,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1033,7 +1035,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1045,7 +1047,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - *secid = smack_to_secid(p->security); + *secid = smack_to_secid(p->cred->security); } /** @@ -1061,7 +1063,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1078,7 +1080,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1090,7 +1092,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1108,7 +1110,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(p->security, MAY_WRITE); + rc = smk_curacc(p->cred->security, MAY_WRITE); return rc; } @@ -1120,7 +1122,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(p->security, MAY_READ); + return smk_curacc(p->cred->security, MAY_READ); } /** @@ -1131,7 +1133,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); } /** @@ -1154,13 +1156,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, * can write the receiver. */ if (secid == 0) - return smk_curacc(p->security, MAY_WRITE); + return smk_curacc(p->cred->security, MAY_WRITE); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), p->security, MAY_WRITE); + return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE); } /** @@ -1173,7 +1175,7 @@ static int smack_task_wait(struct task_struct *p) { int rc; - rc = smk_access(current->security, p->security, MAY_WRITE); + rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE); if (rc == 0) return 0; @@ -1204,7 +1206,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - isp->smk_inode = p->security; + isp->smk_inode = p->cred->security; } /* @@ -1223,7 +1225,7 @@ static void smack_task_to_inode(struct task_struct *p, struct inode *inode) */ static int smack_sk_alloc_security(struct sock *sk, int family, gfp_t gfp_flags) { - char *csp = current->security; + char *csp = current->cred->security; struct socket_smack *ssp; ssp = kzalloc(sizeof(struct socket_smack), gfp_flags); @@ -1448,7 +1450,7 @@ static int smack_flags_to_may(int flags) */ static int smack_msg_msg_alloc_security(struct msg_msg *msg) { - msg->security = current->security; + msg->security = current->cred->security; return 0; } @@ -1484,7 +1486,7 @@ static int smack_shm_alloc_security(struct shmid_kernel *shp) { struct kern_ipc_perm *isp = &shp->shm_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1593,7 +1595,7 @@ static int smack_sem_alloc_security(struct sem_array *sma) { struct kern_ipc_perm *isp = &sma->sem_perm; - isp->security = current->security; + isp->security = current->cred->security; return 0; } @@ -1697,7 +1699,7 @@ static int smack_msg_queue_alloc_security(struct msg_queue *msq) { struct kern_ipc_perm *kisp = &msq->q_perm; - kisp->security = current->security; + kisp->security = current->cred->security; return 0; } @@ -1852,7 +1854,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode) struct super_block *sbp; struct superblock_smack *sbsp; struct inode_smack *isp; - char *csp = current->security; + char *csp = current->cred->security; char *fetched; char *final; struct dentry *dp; @@ -2009,7 +2011,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) if (strcmp(name, "current") != 0) return -EINVAL; - cp = kstrdup(p->security, GFP_KERNEL); + cp = kstrdup(p->cred->security, GFP_KERNEL); if (cp == NULL) return -ENOMEM; @@ -2055,7 +2057,7 @@ static int smack_setprocattr(struct task_struct *p, char *name, if (newsmack == NULL) return -EINVAL; - p->security = newsmack; + p->cred->security = newsmack; return size; } @@ -2288,8 +2290,8 @@ static void smack_sock_graft(struct sock *sk, struct socket *parent) return; ssp = sk->sk_security; - ssp->smk_in = current->security; - ssp->smk_out = current->security; + ssp->smk_in = current->cred->security; + ssp->smk_out = current->cred->security; ssp->smk_packet[0] = '\0'; rc = smack_netlabel(sk); @@ -2362,7 +2364,7 @@ static int smack_inet_conn_request(struct sock *sk, struct sk_buff *skb, static int smack_key_alloc(struct key *key, struct task_struct *tsk, unsigned long flags) { - key->security = tsk->security; + key->security = tsk->cred->security; return 0; } @@ -2403,10 +2405,11 @@ static int smack_key_permission(key_ref_t key_ref, /* * This should not occur */ - if (context->security == NULL) + if (context->cred->security == NULL) return -EACCES; - return smk_access(context->security, keyp->security, MAY_READWRITE); + return smk_access(context->cred->security, keyp->security, + MAY_READWRITE); } #endif /* CONFIG_KEYS */ @@ -2726,7 +2729,7 @@ static __init int smack_init(void) /* * Set the security state for the initial task. */ - current->security = &smack_known_floor.smk_known; + current->cred->security = &smack_known_floor.smk_known; /* * Initialize locks diff --git a/security/smack/smackfs.c b/security/smack/smackfs.c index c21d8c8bf0c7..c5ca279e0506 100644 --- a/security/smack/smackfs.c +++ b/security/smack/smackfs.c @@ -336,7 +336,7 @@ static void smk_cipso_doi(void) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); rc = netlbl_cfg_map_del(NULL, &audit_info); if (rc != 0) @@ -371,7 +371,7 @@ static void smk_unlbl_ambient(char *oldambient) audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); - audit_info.secid = smack_to_secid(current->security); + audit_info.secid = smack_to_secid(current->cred->security); if (oldambient != NULL) { rc = netlbl_cfg_map_del(oldambient, &audit_info); @@ -843,7 +843,7 @@ static ssize_t smk_write_onlycap(struct file *file, const char __user *buf, size_t count, loff_t *ppos) { char in[SMK_LABELLEN]; - char *sp = current->security; + char *sp = current->cred->security; if (!capable(CAP_MAC_ADMIN)) return -EPERM; -- cgit v1.2.3-70-g09d2 From c69e8d9c01db2adc503464993c358901c9af9de4 Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 14 Nov 2008 10:39:19 +1100 Subject: CRED: Use RCU to access another task's creds and to release a task's own creds Use RCU to access another task's creds and to release a task's own creds. This means that it will be possible for the credentials of a task to be replaced without another task (a) requiring a full lock to read them, and (b) seeing deallocated memory. Signed-off-by: David Howells Acked-by: James Morris Acked-by: Serge Hallyn Signed-off-by: James Morris --- arch/ia64/kernel/perfmon.c | 32 +++++++++++++--------- drivers/connector/cn_proc.c | 16 +++++++---- fs/binfmt_elf.c | 8 ++++-- fs/binfmt_elf_fdpic.c | 8 ++++-- fs/fcntl.c | 15 ++++++++--- fs/fuse/dir.c | 23 ++++++++++------ fs/ioprio.c | 14 +++++++--- fs/proc/array.c | 32 ++++++++++++++-------- fs/proc/base.c | 32 ++++++++++++++++------ include/linux/cred.h | 3 ++- kernel/auditsc.c | 33 +++++++++++++---------- kernel/cgroup.c | 16 +++++------ kernel/exit.c | 14 ++++++---- kernel/futex.c | 22 +++++++++------ kernel/futex_compat.c | 7 ++--- kernel/ptrace.c | 22 ++++++++------- kernel/sched.c | 31 ++++++++++++++------- kernel/signal.c | 49 ++++++++++++++++++++------------- kernel/sys.c | 11 +++++--- kernel/tsacct.c | 6 +++-- mm/mempolicy.c | 8 +++--- mm/migrate.c | 8 +++--- mm/oom_kill.c | 6 ++--- security/commoncap.c | 64 +++++++++++++++++++++++++++----------------- security/keys/permission.c | 10 ++++--- security/keys/process_keys.c | 24 ++++++++++------- security/selinux/selinuxfs.c | 13 ++++++--- security/smack/smack_lsm.c | 32 +++++++++++----------- 28 files changed, 355 insertions(+), 204 deletions(-) (limited to 'mm') diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index dd38db46a77a..0e499757309b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -2399,25 +2399,33 @@ error_kmem: static int pfm_bad_permissions(struct task_struct *task) { + const struct cred *tcred; uid_t uid = current_uid(); gid_t gid = current_gid(); + int ret; + + rcu_read_lock(); + tcred = __task_cred(task); /* inspired by ptrace_attach() */ DPRINT(("cur: uid=%d gid=%d task: euid=%d suid=%d uid=%d egid=%d sgid=%d\n", uid, gid, - task->euid, - task->suid, - task->uid, - task->egid, - task->sgid)); - - return (uid != task->euid) - || (uid != task->suid) - || (uid != task->uid) - || (gid != task->egid) - || (gid != task->sgid) - || (gid != task->gid)) && !capable(CAP_SYS_PTRACE); + tcred->euid, + tcred->suid, + tcred->uid, + tcred->egid, + tcred->sgid)); + + ret = ((uid != tcred->euid) + || (uid != tcred->suid) + || (uid != tcred->uid) + || (gid != tcred->egid) + || (gid != tcred->sgid) + || (gid != tcred->gid)) && !capable(CAP_SYS_PTRACE); + + rcu_read_unlock(); + return ret; } static int diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 354c1ff17159..c5afc98e2675 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -106,6 +106,7 @@ void proc_id_connector(struct task_struct *task, int which_id) struct proc_event *ev; __u8 buffer[CN_PROC_MSG_SIZE]; struct timespec ts; + const struct cred *cred; if (atomic_read(&proc_event_num_listeners) < 1) return; @@ -115,14 +116,19 @@ void proc_id_connector(struct task_struct *task, int which_id) ev->what = which_id; ev->event_data.id.process_pid = task->pid; ev->event_data.id.process_tgid = task->tgid; + rcu_read_lock(); + cred = __task_cred(task); if (which_id == PROC_EVENT_UID) { - ev->event_data.id.r.ruid = task->cred->uid; - ev->event_data.id.e.euid = task->cred->euid; + ev->event_data.id.r.ruid = cred->uid; + ev->event_data.id.e.euid = cred->euid; } else if (which_id == PROC_EVENT_GID) { - ev->event_data.id.r.rgid = task->cred->gid; - ev->event_data.id.e.egid = task->cred->egid; - } else + ev->event_data.id.r.rgid = cred->gid; + ev->event_data.id.e.egid = cred->egid; + } else { + rcu_read_unlock(); return; + } + rcu_read_unlock(); get_seq(&msg->seq, &ev->cpu); ktime_get_ts(&ts); /* get high res monotonic timestamp */ put_unaligned(timespec_to_ns(&ts), (__u64 *)&ev->timestamp_ns); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 0e6655613169..9142ff5dc8e6 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1361,6 +1361,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, struct mm_struct *mm) { + const struct cred *cred; unsigned int i, len; /* first copy the parameters from user space */ @@ -1388,8 +1389,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->cred->uid); - SET_GID(psinfo->pr_gid, p->cred->gid); + rcu_read_lock(); + cred = __task_cred(p); + SET_UID(psinfo->pr_uid, cred->uid); + SET_GID(psinfo->pr_gid, cred->gid); + rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 1f6e8c023b4c..45dabd59936f 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1414,6 +1414,7 @@ static void fill_prstatus(struct elf_prstatus *prstatus, static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, struct mm_struct *mm) { + const struct cred *cred; unsigned int i, len; /* first copy the parameters from user space */ @@ -1441,8 +1442,11 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_zomb = psinfo->pr_sname == 'Z'; psinfo->pr_nice = task_nice(p); psinfo->pr_flag = p->flags; - SET_UID(psinfo->pr_uid, p->cred->uid); - SET_GID(psinfo->pr_gid, p->cred->gid); + rcu_read_lock(); + cred = __task_cred(p); + SET_UID(psinfo->pr_uid, cred->uid); + SET_GID(psinfo->pr_gid, cred->gid); + rcu_read_unlock(); strncpy(psinfo->pr_fname, p->comm, sizeof(psinfo->pr_fname)); return 0; diff --git a/fs/fcntl.c b/fs/fcntl.c index c594cc0e40fb..87c39f1f0817 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -401,10 +401,17 @@ static const long band_table[NSIGPOLL] = { static inline int sigio_perm(struct task_struct *p, struct fown_struct *fown, int sig) { - return (((fown->euid == 0) || - (fown->euid == p->cred->suid) || (fown->euid == p->cred->uid) || - (fown->uid == p->cred->suid) || (fown->uid == p->cred->uid)) && - !security_file_send_sigiotask(p, fown, sig)); + const struct cred *cred; + int ret; + + rcu_read_lock(); + cred = __task_cred(p); + ret = ((fown->euid == 0 || + fown->euid == cred->suid || fown->euid == cred->uid || + fown->uid == cred->suid || fown->uid == cred->uid) && + !security_file_send_sigiotask(p, fown, sig)); + rcu_read_unlock(); + return ret; } static void send_sigio_to_task(struct task_struct *p, diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index e97a98981862..95bc22bdd060 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -869,18 +869,25 @@ int fuse_update_attributes(struct inode *inode, struct kstat *stat, */ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) { + const struct cred *cred; + int ret; + if (fc->flags & FUSE_ALLOW_OTHER) return 1; - if (task->cred->euid == fc->user_id && - task->cred->suid == fc->user_id && - task->cred->uid == fc->user_id && - task->cred->egid == fc->group_id && - task->cred->sgid == fc->group_id && - task->cred->gid == fc->group_id) - return 1; + rcu_read_lock(); + ret = 0; + cred = __task_cred(task); + if (cred->euid == fc->user_id && + cred->suid == fc->user_id && + cred->uid == fc->user_id && + cred->egid == fc->group_id && + cred->sgid == fc->group_id && + cred->gid == fc->group_id) + ret = 1; + rcu_read_unlock(); - return 0; + return ret; } static int fuse_access(struct inode *inode, int mask) diff --git a/fs/ioprio.c b/fs/ioprio.c index 5112554fd210..3569e0ad86a2 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -31,10 +31,16 @@ static int set_task_ioprio(struct task_struct *task, int ioprio) { int err; struct io_context *ioc; + const struct cred *cred = current_cred(), *tcred; - if (task->cred->uid != current_euid() && - task->cred->uid != current_uid() && !capable(CAP_SYS_NICE)) + rcu_read_lock(); + tcred = __task_cred(task); + if (tcred->uid != cred->euid && + tcred->uid != cred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); err = security_task_setioprio(task, ioprio); if (err) @@ -131,7 +137,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) break; do_each_thread(g, p) { - if (p->cred->uid != who) + if (__task_cred(p)->uid != who) continue; ret = set_task_ioprio(p, ioprio); if (ret) @@ -224,7 +230,7 @@ asmlinkage long sys_ioprio_get(int which, int who) break; do_each_thread(g, p) { - if (p->cred->uid != user->uid) + if (__task_cred(p)->uid != user->uid) continue; tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/proc/array.c b/fs/proc/array.c index 62fe9b2009b6..7e4877d9dcb5 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -159,6 +159,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct group_info *group_info; int g; struct fdtable *fdt = NULL; + const struct cred *cred; pid_t ppid, tpid; rcu_read_lock(); @@ -170,6 +171,7 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, if (tracer) tpid = task_pid_nr_ns(tracer, ns); } + cred = get_cred((struct cred *) __task_cred(p)); seq_printf(m, "State:\t%s\n" "Tgid:\t%d\n" @@ -182,8 +184,8 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, task_tgid_nr_ns(p, ns), pid_nr_ns(pid, ns), ppid, tpid, - p->cred->uid, p->cred->euid, p->cred->suid, p->cred->fsuid, - p->cred->gid, p->cred->egid, p->cred->sgid, p->cred->fsgid); + cred->uid, cred->euid, cred->suid, cred->fsuid, + cred->gid, cred->egid, cred->sgid, cred->fsgid); task_lock(p); if (p->files) @@ -194,13 +196,12 @@ static inline void task_state(struct seq_file *m, struct pid_namespace *ns, fdt ? fdt->max_fds : 0); rcu_read_unlock(); - group_info = p->cred->group_info; - get_group_info(group_info); + group_info = cred->group_info; task_unlock(p); for (g = 0; g < min(group_info->ngroups, NGROUPS_SMALL); g++) seq_printf(m, "%d ", GROUP_AT(group_info, g)); - put_group_info(group_info); + put_cred(cred); seq_printf(m, "\n"); } @@ -262,7 +263,7 @@ static inline void task_sig(struct seq_file *m, struct task_struct *p) blocked = p->blocked; collect_sigign_sigcatch(p, &ignored, &caught); num_threads = atomic_read(&p->signal->count); - qsize = atomic_read(&p->cred->user->sigpending); + qsize = atomic_read(&__task_cred(p)->user->sigpending); qlim = p->signal->rlim[RLIMIT_SIGPENDING].rlim_cur; unlock_task_sighand(p, &flags); } @@ -293,12 +294,21 @@ static void render_cap_t(struct seq_file *m, const char *header, static inline void task_cap(struct seq_file *m, struct task_struct *p) { - struct cred *cred = p->cred; + const struct cred *cred; + kernel_cap_t cap_inheritable, cap_permitted, cap_effective, cap_bset; - render_cap_t(m, "CapInh:\t", &cred->cap_inheritable); - render_cap_t(m, "CapPrm:\t", &cred->cap_permitted); - render_cap_t(m, "CapEff:\t", &cred->cap_effective); - render_cap_t(m, "CapBnd:\t", &cred->cap_bset); + rcu_read_lock(); + cred = __task_cred(p); + cap_inheritable = cred->cap_inheritable; + cap_permitted = cred->cap_permitted; + cap_effective = cred->cap_effective; + cap_bset = cred->cap_bset; + rcu_read_unlock(); + + render_cap_t(m, "CapInh:\t", &cap_inheritable); + render_cap_t(m, "CapPrm:\t", &cap_permitted); + render_cap_t(m, "CapEff:\t", &cap_effective); + render_cap_t(m, "CapBnd:\t", &cap_bset); } static inline void task_context_switch_counts(struct seq_file *m, diff --git a/fs/proc/base.c b/fs/proc/base.c index 6862b360c36c..cf42c42cbfbb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1406,6 +1406,7 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st { struct inode * inode; struct proc_inode *ei; + const struct cred *cred; /* We need a new inode */ @@ -1428,8 +1429,11 @@ static struct inode *proc_pid_make_inode(struct super_block * sb, struct task_st inode->i_uid = 0; inode->i_gid = 0; if (task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } security_task_to_inode(task, inode); @@ -1445,6 +1449,8 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat { struct inode *inode = dentry->d_inode; struct task_struct *task; + const struct cred *cred; + generic_fillattr(inode, stat); rcu_read_lock(); @@ -1454,8 +1460,9 @@ static int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - stat->uid = task->cred->euid; - stat->gid = task->cred->egid; + cred = __task_cred(task); + stat->uid = cred->euid; + stat->gid = cred->egid; } } rcu_read_unlock(); @@ -1483,11 +1490,16 @@ static int pid_revalidate(struct dentry *dentry, struct nameidata *nd) { struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); + const struct cred *cred; + if (task) { if ((inode->i_mode == (S_IFDIR|S_IRUGO|S_IXUGO)) || task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; @@ -1649,6 +1661,7 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) struct task_struct *task = get_proc_task(inode); int fd = proc_fd(inode); struct files_struct *files; + const struct cred *cred; if (task) { files = get_files_struct(task); @@ -1658,8 +1671,11 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) rcu_read_unlock(); put_files_struct(files); if (task_dumpable(task)) { - inode->i_uid = task->cred->euid; - inode->i_gid = task->cred->egid; + rcu_read_lock(); + cred = __task_cred(task); + inode->i_uid = cred->euid; + inode->i_gid = cred->egid; + rcu_read_unlock(); } else { inode->i_uid = 0; inode->i_gid = 0; diff --git a/include/linux/cred.h b/include/linux/cred.h index 4221ec6000c1..166ce4ddba64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -147,8 +147,9 @@ static inline struct cred *get_cred(struct cred *cred) * Release a reference to a set of credentials, deleting them when the last ref * is released. */ -static inline void put_cred(struct cred *cred) +static inline void put_cred(const struct cred *_cred) { + struct cred *cred = (struct cred *) _cred; if (atomic_dec_and_test(&(cred)->usage)) __put_cred(cred); } diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 2febf5165fad..ae8ef88ade3f 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -447,7 +447,7 @@ static int audit_filter_rules(struct task_struct *tsk, struct audit_names *name, enum audit_state *state) { - struct cred *cred = tsk->cred; + const struct cred *cred = get_task_cred(tsk); int i, j, need_sid = 1; u32 sid; @@ -642,8 +642,10 @@ static int audit_filter_rules(struct task_struct *tsk, break; } - if (!result) + if (!result) { + put_cred(cred); return 0; + } } if (rule->filterkey && ctx) ctx->filterkey = kstrdup(rule->filterkey, GFP_ATOMIC); @@ -651,6 +653,7 @@ static int audit_filter_rules(struct task_struct *tsk, case AUDIT_NEVER: *state = AUDIT_DISABLED; break; case AUDIT_ALWAYS: *state = AUDIT_RECORD_CONTEXT; break; } + put_cred(cred); return 1; } @@ -1229,7 +1232,7 @@ static void audit_log_fcaps(struct audit_buffer *ab, struct audit_names *name) static void audit_log_exit(struct audit_context *context, struct task_struct *tsk) { - struct cred *cred = tsk->cred; + const struct cred *cred; int i, call_panic = 0; struct audit_buffer *ab; struct audit_aux_data *aux; @@ -1239,13 +1242,14 @@ static void audit_log_exit(struct audit_context *context, struct task_struct *ts context->pid = tsk->pid; if (!context->ppid) context->ppid = sys_getppid(); - context->uid = cred->uid; - context->gid = cred->gid; - context->euid = cred->euid; - context->suid = cred->suid; + cred = current_cred(); + context->uid = cred->uid; + context->gid = cred->gid; + context->euid = cred->euid; + context->suid = cred->suid; context->fsuid = cred->fsuid; - context->egid = cred->egid; - context->sgid = cred->sgid; + context->egid = cred->egid; + context->sgid = cred->sgid; context->fsgid = cred->fsgid; context->personality = tsk->personality; @@ -2088,7 +2092,7 @@ int audit_set_loginuid(struct task_struct *task, uid_t loginuid) audit_log_format(ab, "login pid=%d uid=%u " "old auid=%u new auid=%u" " old ses=%u new ses=%u", - task->pid, task->cred->uid, + task->pid, task_uid(task), task->loginuid, loginuid, task->sessionid, sessionid); audit_log_end(ab); @@ -2471,7 +2475,7 @@ void __audit_ptrace(struct task_struct *t) context->target_pid = t->pid; context->target_auid = audit_get_loginuid(t); - context->target_uid = t->cred->uid; + context->target_uid = task_uid(t); context->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &context->target_sid); memcpy(context->target_comm, t->comm, TASK_COMM_LEN); @@ -2490,6 +2494,7 @@ int __audit_signal_info(int sig, struct task_struct *t) struct audit_aux_data_pids *axp; struct task_struct *tsk = current; struct audit_context *ctx = tsk->audit_context; + uid_t uid = current_uid(), t_uid = task_uid(t); if (audit_pid && t->tgid == audit_pid) { if (sig == SIGTERM || sig == SIGHUP || sig == SIGUSR1 || sig == SIGUSR2) { @@ -2497,7 +2502,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (tsk->loginuid != -1) audit_sig_uid = tsk->loginuid; else - audit_sig_uid = tsk->cred->uid; + audit_sig_uid = uid; security_task_getsecid(tsk, &audit_sig_sid); } if (!audit_signals || audit_dummy_context()) @@ -2509,7 +2514,7 @@ int __audit_signal_info(int sig, struct task_struct *t) if (!ctx->target_pid) { ctx->target_pid = t->tgid; ctx->target_auid = audit_get_loginuid(t); - ctx->target_uid = t->cred->uid; + ctx->target_uid = t_uid; ctx->target_sessionid = audit_get_sessionid(t); security_task_getsecid(t, &ctx->target_sid); memcpy(ctx->target_comm, t->comm, TASK_COMM_LEN); @@ -2530,7 +2535,7 @@ int __audit_signal_info(int sig, struct task_struct *t) axp->target_pid[axp->pid_count] = t->tgid; axp->target_auid[axp->pid_count] = audit_get_loginuid(t); - axp->target_uid[axp->pid_count] = t->cred->uid; + axp->target_uid[axp->pid_count] = t_uid; axp->target_sessionid[axp->pid_count] = audit_get_sessionid(t); security_task_getsecid(t, &axp->target_sid[axp->pid_count]); memcpy(axp->target_comm[axp->pid_count], t->comm, TASK_COMM_LEN); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e210526e6401..a512a75a5560 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1279,7 +1279,7 @@ int cgroup_attach_task(struct cgroup *cgrp, struct task_struct *tsk) static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) { struct task_struct *tsk; - uid_t euid; + const struct cred *cred = current_cred(), *tcred; int ret; if (pid) { @@ -1289,16 +1289,16 @@ static int attach_task_by_pid(struct cgroup *cgrp, u64 pid) rcu_read_unlock(); return -ESRCH; } - get_task_struct(tsk); - rcu_read_unlock(); - euid = current_euid(); - if (euid && - euid != tsk->cred->uid && - euid != tsk->cred->suid) { - put_task_struct(tsk); + tcred = __task_cred(tsk); + if (cred->euid && + cred->euid != tcred->uid && + cred->euid != tcred->suid) { + rcu_read_unlock(); return -EACCES; } + get_task_struct(tsk); + rcu_read_unlock(); } else { tsk = current; get_task_struct(tsk); diff --git a/kernel/exit.c b/kernel/exit.c index e0f6e1892fb9..bbc22530f2c1 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -160,7 +160,10 @@ void release_task(struct task_struct * p) int zap_leader; repeat: tracehook_prepare_release_task(p); - atomic_dec(&p->cred->user->processes); + /* don't need to get the RCU readlock here - the process is dead and + * can't be modifying its own credentials */ + atomic_dec(&__task_cred(p)->user->processes); + proc_flush_task(p); write_lock_irq(&tasklist_lock); tracehook_finish_release_task(p); @@ -1267,12 +1270,12 @@ static int wait_task_zombie(struct task_struct *p, int options, unsigned long state; int retval, status, traced; pid_t pid = task_pid_vnr(p); + uid_t uid = __task_cred(p)->uid; if (!likely(options & WEXITED)) return 0; if (unlikely(options & WNOWAIT)) { - uid_t uid = p->cred->uid; int exit_code = p->exit_code; int why, status; @@ -1393,7 +1396,7 @@ static int wait_task_zombie(struct task_struct *p, int options, if (!retval && infop) retval = put_user(pid, &infop->si_pid); if (!retval && infop) - retval = put_user(p->cred->uid, &infop->si_uid); + retval = put_user(uid, &infop->si_uid); if (!retval) retval = pid; @@ -1458,7 +1461,8 @@ static int wait_task_stopped(int ptrace, struct task_struct *p, if (!unlikely(options & WNOWAIT)) p->exit_code = 0; - uid = p->cred->uid; + /* don't need the RCU readlock here as we're holding a spinlock */ + uid = __task_cred(p)->uid; unlock_sig: spin_unlock_irq(&p->sighand->siglock); if (!exit_code) @@ -1532,10 +1536,10 @@ static int wait_task_continued(struct task_struct *p, int options, } if (!unlikely(options & WNOWAIT)) p->signal->flags &= ~SIGNAL_STOP_CONTINUED; + uid = __task_cred(p)->uid; spin_unlock_irq(&p->sighand->siglock); pid = task_pid_vnr(p); - uid = p->cred->uid; get_task_struct(p); read_unlock(&tasklist_lock); diff --git a/kernel/futex.c b/kernel/futex.c index 28421d8210b8..4fe790e89d0f 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -439,15 +439,20 @@ static void free_pi_state(struct futex_pi_state *pi_state) static struct task_struct * futex_find_get_task(pid_t pid) { struct task_struct *p; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; rcu_read_lock(); p = find_task_by_vpid(pid); - if (!p || (euid != p->cred->euid && - euid != p->cred->uid)) + if (!p) { p = ERR_PTR(-ESRCH); - else - get_task_struct(p); + } else { + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid) + p = ERR_PTR(-ESRCH); + else + get_task_struct(p); + } rcu_read_unlock(); @@ -1831,7 +1836,7 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, { struct robust_list_head __user *head; unsigned long ret; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -1847,8 +1852,9 @@ sys_get_robust_list(int pid, struct robust_list_head __user * __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->robust_list; diff --git a/kernel/futex_compat.c b/kernel/futex_compat.c index 2c3fd5ed34f5..d607a5b9ee29 100644 --- a/kernel/futex_compat.c +++ b/kernel/futex_compat.c @@ -135,7 +135,7 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, { struct compat_robust_list_head __user *head; unsigned long ret; - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred; if (!futex_cmpxchg_enabled) return -ENOSYS; @@ -151,8 +151,9 @@ compat_sys_get_robust_list(int pid, compat_uptr_t __user *head_ptr, if (!p) goto err_unlock; ret = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && + pcred = __task_cred(p); + if (cred->euid != pcred->euid && + cred->euid != pcred->uid && !capable(CAP_SYS_PTRACE)) goto err_unlock; head = p->compat_robust_list; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 49849d12dd12..b9d5f4e4f6a4 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -115,7 +115,7 @@ int ptrace_check_attach(struct task_struct *child, int kill) int __ptrace_may_access(struct task_struct *task, unsigned int mode) { - struct cred *cred = current->cred, *tcred = task->cred; + const struct cred *cred = current_cred(), *tcred; /* May we inspect the given task? * This check is used both for attaching with ptrace @@ -125,19 +125,23 @@ int __ptrace_may_access(struct task_struct *task, unsigned int mode) * because setting up the necessary parent/child relationship * or halting the specified task is impossible. */ - uid_t uid = cred->uid; - gid_t gid = cred->gid; int dumpable = 0; /* Don't let security modules deny introspection */ if (task == current) return 0; - if ((uid != tcred->euid || - uid != tcred->suid || - uid != tcred->uid || - gid != tcred->egid || - gid != tcred->sgid || - gid != tcred->gid) && !capable(CAP_SYS_PTRACE)) + rcu_read_lock(); + tcred = __task_cred(task); + if ((cred->uid != tcred->euid || + cred->uid != tcred->suid || + cred->uid != tcred->uid || + cred->gid != tcred->egid || + cred->gid != tcred->sgid || + cred->gid != tcred->gid) && + !capable(CAP_SYS_PTRACE)) { + rcu_read_unlock(); return -EPERM; + } + rcu_read_unlock(); smp_rmb(); if (task->mm) dumpable = get_dumpable(task->mm); diff --git a/kernel/sched.c b/kernel/sched.c index 733c59e645aa..92992e287b10 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -345,7 +345,9 @@ static inline struct task_group *task_group(struct task_struct *p) struct task_group *tg; #ifdef CONFIG_USER_SCHED - tg = p->cred->user->tg; + rcu_read_lock(); + tg = __task_cred(p)->user->tg; + rcu_read_unlock(); #elif defined(CONFIG_CGROUP_SCHED) tg = container_of(task_subsys_state(p, cpu_cgroup_subsys_id), struct task_group, css); @@ -5121,6 +5123,22 @@ __setscheduler(struct rq *rq, struct task_struct *p, int policy, int prio) set_load_weight(p); } +/* + * check the target process has a UID that matches the current process's + */ +static bool check_same_owner(struct task_struct *p) +{ + const struct cred *cred = current_cred(), *pcred; + bool match; + + rcu_read_lock(); + pcred = __task_cred(p); + match = (cred->euid == pcred->euid || + cred->euid == pcred->uid); + rcu_read_unlock(); + return match; +} + static int __sched_setscheduler(struct task_struct *p, int policy, struct sched_param *param, bool user) { @@ -5128,7 +5146,6 @@ static int __sched_setscheduler(struct task_struct *p, int policy, unsigned long flags; const struct sched_class *prev_class = p->sched_class; struct rq *rq; - uid_t euid; /* may grab non-irq protected spin_locks */ BUG_ON(in_interrupt()); @@ -5181,9 +5198,7 @@ recheck: return -EPERM; /* can't change other user's priorities */ - euid = current_euid(); - if (euid != p->cred->euid && - euid != p->cred->uid) + if (!check_same_owner(p)) return -EPERM; } @@ -5394,7 +5409,6 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) cpumask_t cpus_allowed; cpumask_t new_mask = *in_mask; struct task_struct *p; - uid_t euid; int retval; get_online_cpus(); @@ -5415,11 +5429,8 @@ long sched_setaffinity(pid_t pid, const cpumask_t *in_mask) get_task_struct(p); read_unlock(&tasklist_lock); - euid = current_euid(); retval = -EPERM; - if (euid != p->cred->euid && - euid != p->cred->uid && - !capable(CAP_SYS_NICE)) + if (!check_same_owner(p) && !capable(CAP_SYS_NICE)) goto out_unlock; retval = security_task_setscheduler(p, 0, NULL); diff --git a/kernel/signal.c b/kernel/signal.c index 80e8a6489f97..84989124bafb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -177,6 +177,11 @@ int next_signal(struct sigpending *pending, sigset_t *mask) return sig; } +/* + * allocate a new signal queue record + * - this may be called without locks if and only if t == current, otherwise an + * appopriate lock must be held to protect t's user_struct + */ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, int override_rlimit) { @@ -184,11 +189,12 @@ static struct sigqueue *__sigqueue_alloc(struct task_struct *t, gfp_t flags, struct user_struct *user; /* - * In order to avoid problems with "switch_user()", we want to make - * sure that the compiler doesn't re-load "t->user" + * We won't get problems with the target's UID changing under us + * because changing it requires RCU be used, and if t != current, the + * caller must be holding the RCU readlock (by way of a spinlock) and + * we use RCU protection here */ - user = t->cred->user; - barrier(); + user = __task_cred(t)->user; atomic_inc(&user->sigpending); if (override_rlimit || atomic_read(&user->sigpending) <= @@ -562,12 +568,13 @@ static int rm_from_queue(unsigned long mask, struct sigpending *s) /* * Bad permissions for sending the signal + * - the caller must hold at least the RCU read lock */ static int check_kill_permission(int sig, struct siginfo *info, struct task_struct *t) { + const struct cred *cred = current_cred(), *tcred; struct pid *sid; - uid_t uid, euid; int error; if (!valid_signal(sig)) @@ -580,10 +587,11 @@ static int check_kill_permission(int sig, struct siginfo *info, if (error) return error; - uid = current_uid(); - euid = current_euid(); - if ((euid ^ t->cred->suid) && (euid ^ t->cred->uid) && - (uid ^ t->cred->suid) && (uid ^ t->cred->uid) && + tcred = __task_cred(t); + if ((cred->euid ^ tcred->suid) && + (cred->euid ^ tcred->uid) && + (cred->uid ^ tcred->suid) && + (cred->uid ^ tcred->uid) && !capable(CAP_KILL)) { switch (sig) { case SIGCONT: @@ -1011,6 +1019,10 @@ struct sighand_struct *lock_task_sighand(struct task_struct *tsk, unsigned long return sighand; } +/* + * send signal info to all the members of a group + * - the caller must hold the RCU read lock at least + */ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) { unsigned long flags; @@ -1032,8 +1044,8 @@ int group_send_sig_info(int sig, struct siginfo *info, struct task_struct *p) /* * __kill_pgrp_info() sends a signal to a process group: this is what the tty * control characters do (^C, ^Z etc) + * - the caller must hold at least a readlock on tasklist_lock */ - int __kill_pgrp_info(int sig, struct siginfo *info, struct pid *pgrp) { struct task_struct *p = NULL; @@ -1089,6 +1101,7 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, { int ret = -EINVAL; struct task_struct *p; + const struct cred *pcred; if (!valid_signal(sig)) return ret; @@ -1099,9 +1112,11 @@ int kill_pid_info_as_uid(int sig, struct siginfo *info, struct pid *pid, ret = -ESRCH; goto out_unlock; } - if ((info == SEND_SIG_NOINFO || (!is_si_special(info) && SI_FROMUSER(info))) - && (euid != p->cred->suid) && (euid != p->cred->uid) - && (uid != p->cred->suid) && (uid != p->cred->uid)) { + pcred = __task_cred(p); + if ((info == SEND_SIG_NOINFO || + (!is_si_special(info) && SI_FROMUSER(info))) && + euid != pcred->suid && euid != pcred->uid && + uid != pcred->suid && uid != pcred->uid) { ret = -EPERM; goto out_unlock; } @@ -1372,10 +1387,9 @@ int do_notify_parent(struct task_struct *tsk, int sig) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); + info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - info.si_uid = tsk->cred->uid; - thread_group_cputime(tsk, &cputime); info.si_utime = cputime_to_jiffies(cputime.utime); info.si_stime = cputime_to_jiffies(cputime.stime); @@ -1443,10 +1457,9 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, int why) */ rcu_read_lock(); info.si_pid = task_pid_nr_ns(tsk, tsk->parent->nsproxy->pid_ns); + info.si_uid = __task_cred(tsk)->uid; rcu_read_unlock(); - info.si_uid = tsk->cred->uid; - info.si_utime = cputime_to_clock_t(tsk->utime); info.si_stime = cputime_to_clock_t(tsk->stime); @@ -1713,7 +1726,7 @@ static int ptrace_signal(int signr, siginfo_t *info, info->si_errno = 0; info->si_code = SI_USER; info->si_pid = task_pid_vnr(current->parent); - info->si_uid = current->parent->cred->uid; + info->si_uid = task_uid(current->parent); } /* If the (new) signal is now blocked, requeue it. */ diff --git a/kernel/sys.c b/kernel/sys.c index c4d6b59553e9..ccc9eb736d35 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -112,14 +112,17 @@ EXPORT_SYMBOL(cad_pid); void (*pm_power_off_prepare)(void); +/* + * set the priority of a task + * - the caller must hold the RCU read lock + */ static int set_one_prio(struct task_struct *p, int niceval, int error) { - uid_t euid = current_euid(); + const struct cred *cred = current_cred(), *pcred = __task_cred(p); int no_nice; - if (p->cred->uid != euid && - p->cred->euid != euid && - !capable(CAP_SYS_NICE)) { + if (pcred->uid != cred->euid && + pcred->euid != cred->euid && !capable(CAP_SYS_NICE)) { error = -EPERM; goto out; } diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 6d1ed07bf312..2dc06ab35716 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -27,6 +27,7 @@ */ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) { + const struct cred *tcred; struct timespec uptime, ts; u64 ac_etime; @@ -53,10 +54,11 @@ void bacct_add_tsk(struct taskstats *stats, struct task_struct *tsk) stats->ac_flag |= AXSIG; stats->ac_nice = task_nice(tsk); stats->ac_sched = tsk->policy; - stats->ac_uid = tsk->cred->uid; - stats->ac_gid = tsk->cred->gid; stats->ac_pid = tsk->pid; rcu_read_lock(); + tcred = __task_cred(tsk); + stats->ac_uid = tcred->uid; + stats->ac_gid = tcred->gid; stats->ac_ppid = pid_alive(tsk) ? rcu_dereference(tsk->real_parent)->tgid : 0; rcu_read_unlock(); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index b23492ee3e50..7555219c535b 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1110,7 +1110,7 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, const unsigned long __user *old_nodes, const unsigned long __user *new_nodes) { - struct cred *cred, *tcred; + const struct cred *cred = current_cred(), *tcred; struct mm_struct *mm; struct task_struct *task; nodemask_t old; @@ -1145,14 +1145,16 @@ asmlinkage long sys_migrate_pages(pid_t pid, unsigned long maxnode, * capabilities, superuser privileges or the same * userid as the target process. */ - cred = current->cred; - tcred = task->cred; + rcu_read_lock(); + tcred = __task_cred(task); if (cred->euid != tcred->suid && cred->euid != tcred->uid && cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); task_nodes = cpuset_mems_allowed(task); /* Is the user allowed to access the target nodes? */ diff --git a/mm/migrate.c b/mm/migrate.c index 794443da1b4f..142284229ce2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -1045,7 +1045,7 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, const int __user *nodes, int __user *status, int flags) { - struct cred *cred, *tcred; + const struct cred *cred = current_cred(), *tcred; struct task_struct *task; struct mm_struct *mm; int err; @@ -1076,14 +1076,16 @@ asmlinkage long sys_move_pages(pid_t pid, unsigned long nr_pages, * capabilities, superuser privileges or the same * userid as the target process. */ - cred = current->cred; - tcred = task->cred; + rcu_read_lock(); + tcred = __task_cred(task); if (cred->euid != tcred->suid && cred->euid != tcred->uid && cred->uid != tcred->suid && cred->uid != tcred->uid && !capable(CAP_SYS_NICE)) { + rcu_read_unlock(); err = -EPERM; goto out; } + rcu_read_unlock(); err = security_task_movememory(task); if (err) diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 3af787ba2077..0e0b282a2073 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -298,9 +298,9 @@ static void dump_tasks(const struct mem_cgroup *mem) task_lock(p); printk(KERN_INFO "[%5d] %5d %5d %8lu %8lu %3d %3d %s\n", - p->pid, p->cred->uid, p->tgid, p->mm->total_vm, - get_mm_rss(p->mm), (int)task_cpu(p), p->oomkilladj, - p->comm); + p->pid, __task_cred(p)->uid, p->tgid, + p->mm->total_vm, get_mm_rss(p->mm), (int)task_cpu(p), + p->oomkilladj, p->comm); task_unlock(p); } while_each_thread(g, p); } diff --git a/security/commoncap.c b/security/commoncap.c index 61307f590003..0384bf95db68 100644 --- a/security/commoncap.c +++ b/security/commoncap.c @@ -51,10 +51,13 @@ EXPORT_SYMBOL(cap_netlink_recv); */ int cap_capable(struct task_struct *tsk, int cap, int audit) { + __u32 cap_raised; + /* Derived from include/linux/sched.h:capable. */ - if (cap_raised(tsk->cred->cap_effective, cap)) - return 0; - return -EPERM; + rcu_read_lock(); + cap_raised = cap_raised(__task_cred(tsk)->cap_effective, cap); + rcu_read_unlock(); + return cap_raised ? 0 : -EPERM; } int cap_settime(struct timespec *ts, struct timezone *tz) @@ -66,34 +69,42 @@ int cap_settime(struct timespec *ts, struct timezone *tz) int cap_ptrace_may_access(struct task_struct *child, unsigned int mode) { - /* Derived from arch/i386/kernel/ptrace.c:sys_ptrace. */ - if (cap_issubset(child->cred->cap_permitted, - current->cred->cap_permitted)) - return 0; - if (capable(CAP_SYS_PTRACE)) - return 0; - return -EPERM; + int ret = 0; + + rcu_read_lock(); + if (!cap_issubset(child->cred->cap_permitted, + current->cred->cap_permitted) && + !capable(CAP_SYS_PTRACE)) + ret = -EPERM; + rcu_read_unlock(); + return ret; } int cap_ptrace_traceme(struct task_struct *parent) { - if (cap_issubset(current->cred->cap_permitted, - parent->cred->cap_permitted)) - return 0; - if (has_capability(parent, CAP_SYS_PTRACE)) - return 0; - return -EPERM; + int ret = 0; + + rcu_read_lock(); + if (!cap_issubset(current->cred->cap_permitted, + parent->cred->cap_permitted) && + !has_capability(parent, CAP_SYS_PTRACE)) + ret = -EPERM; + rcu_read_unlock(); + return ret; } int cap_capget (struct task_struct *target, kernel_cap_t *effective, kernel_cap_t *inheritable, kernel_cap_t *permitted) { - struct cred *cred = target->cred; + const struct cred *cred; /* Derived from kernel/capability.c:sys_capget. */ + rcu_read_lock(); + cred = __task_cred(target); *effective = cred->cap_effective; *inheritable = cred->cap_inheritable; *permitted = cred->cap_permitted; + rcu_read_unlock(); return 0; } @@ -433,7 +444,7 @@ void cap_bprm_apply_creds (struct linux_binprm *bprm, int unsafe) int cap_bprm_secureexec (struct linux_binprm *bprm) { - const struct cred *cred = current->cred; + const struct cred *cred = current_cred(); if (cred->uid != 0) { if (bprm->cap_effective) @@ -511,11 +522,11 @@ static inline void cap_emulate_setxuid (int old_ruid, int old_euid, if ((old_ruid == 0 || old_euid == 0 || old_suid == 0) && (cred->uid != 0 && cred->euid != 0 && cred->suid != 0) && !issecure(SECURE_KEEP_CAPS)) { - cap_clear (cred->cap_permitted); - cap_clear (cred->cap_effective); + cap_clear(cred->cap_permitted); + cap_clear(cred->cap_effective); } if (old_euid == 0 && cred->euid != 0) { - cap_clear (cred->cap_effective); + cap_clear(cred->cap_effective); } if (old_euid != 0 && cred->euid == 0) { cred->cap_effective = cred->cap_permitted; @@ -582,9 +593,14 @@ int cap_task_post_setuid (uid_t old_ruid, uid_t old_euid, uid_t old_suid, */ static int cap_safe_nice(struct task_struct *p) { - if (!cap_issubset(p->cred->cap_permitted, - current->cred->cap_permitted) && - !capable(CAP_SYS_NICE)) + int is_subset; + + rcu_read_lock(); + is_subset = cap_issubset(__task_cred(p)->cap_permitted, + current_cred()->cap_permitted); + rcu_read_unlock(); + + if (!is_subset && !capable(CAP_SYS_NICE)) return -EPERM; return 0; } diff --git a/security/keys/permission.c b/security/keys/permission.c index baf3d5f31e71..13c36164f284 100644 --- a/security/keys/permission.c +++ b/security/keys/permission.c @@ -22,13 +22,16 @@ int key_task_permission(const key_ref_t key_ref, struct task_struct *context, key_perm_t perm) { - struct cred *cred = context->cred; + const struct cred *cred; struct key *key; key_perm_t kperm; int ret; key = key_ref_to_ptr(key_ref); + rcu_read_lock(); + cred = __task_cred(context); + /* use the second 8-bits of permissions for keys the caller owns */ if (key->uid == cred->fsuid) { kperm = key->perm >> 16; @@ -43,10 +46,7 @@ int key_task_permission(const key_ref_t key_ref, goto use_these_perms; } - spin_lock(&cred->lock); ret = groups_search(cred->group_info, key->gid); - spin_unlock(&cred->lock); - if (ret) { kperm = key->perm >> 8; goto use_these_perms; @@ -57,6 +57,8 @@ int key_task_permission(const key_ref_t key_ref, kperm = key->perm; use_these_perms: + rcu_read_lock(); + /* use the top 8-bits of permissions for keys the caller possesses * - possessor permissions are additive with other permissions */ diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c index ce8ac6073d57..212601ebaa46 100644 --- a/security/keys/process_keys.c +++ b/security/keys/process_keys.c @@ -412,10 +412,13 @@ key_ref_t search_process_keyrings(struct key_type *type, struct task_struct *context) { struct request_key_auth *rka; + struct cred *cred; key_ref_t key_ref, ret, err; might_sleep(); + cred = get_task_cred(context); + /* we want to return -EAGAIN or -ENOKEY if any of the keyrings were * searchable, but we failed to find a key or we found a negative key; * otherwise we want to return a sample error (probably -EACCES) if @@ -428,9 +431,9 @@ key_ref_t search_process_keyrings(struct key_type *type, err = ERR_PTR(-EAGAIN); /* search the thread keyring first */ - if (context->cred->thread_keyring) { + if (cred->thread_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->cred->thread_keyring, 1), + make_key_ref(cred->thread_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -495,9 +498,9 @@ key_ref_t search_process_keyrings(struct key_type *type, } } /* or search the user-session keyring */ - else if (context->cred->user->session_keyring) { + else if (cred->user->session_keyring) { key_ref = keyring_search_aux( - make_key_ref(context->cred->user->session_keyring, 1), + make_key_ref(cred->user->session_keyring, 1), context, type, description, match); if (!IS_ERR(key_ref)) goto found; @@ -519,20 +522,20 @@ key_ref_t search_process_keyrings(struct key_type *type, * search the keyrings of the process mentioned there * - we don't permit access to request_key auth keys via this method */ - if (context->cred->request_key_auth && + if (cred->request_key_auth && context == current && type != &key_type_request_key_auth ) { /* defend against the auth key being revoked */ - down_read(&context->cred->request_key_auth->sem); + down_read(&cred->request_key_auth->sem); - if (key_validate(context->cred->request_key_auth) == 0) { - rka = context->cred->request_key_auth->payload.data; + if (key_validate(cred->request_key_auth) == 0) { + rka = cred->request_key_auth->payload.data; key_ref = search_process_keyrings(type, description, match, rka->context); - up_read(&context->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); if (!IS_ERR(key_ref)) goto found; @@ -549,7 +552,7 @@ key_ref_t search_process_keyrings(struct key_type *type, break; } } else { - up_read(&context->cred->request_key_auth->sem); + up_read(&cred->request_key_auth->sem); } } @@ -557,6 +560,7 @@ key_ref_t search_process_keyrings(struct key_type *type, key_ref = ret ? ret : err; found: + put_cred(cred); return key_ref; } /* end search_process_keyrings() */ diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c index 10715d1330b9..c86303638235 100644 --- a/security/selinux/selinuxfs.c +++ b/security/selinux/selinuxfs.c @@ -95,13 +95,18 @@ extern void selnl_notify_setenforce(int val); static int task_has_security(struct task_struct *tsk, u32 perms) { - struct task_security_struct *tsec; - - tsec = tsk->cred->security; + const struct task_security_struct *tsec; + u32 sid = 0; + + rcu_read_lock(); + tsec = __task_cred(tsk)->security; + if (tsec) + sid = tsec->sid; + rcu_read_unlock(); if (!tsec) return -EACCES; - return avc_has_perm(tsec->sid, SECINITSID_SECURITY, + return avc_has_perm(sid, SECINITSID_SECURITY, SECCLASS_SECURITY, perms, NULL); } diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c index e8a4fcb1ad04..11167fd567b9 100644 --- a/security/smack/smack_lsm.c +++ b/security/smack/smack_lsm.c @@ -30,6 +30,8 @@ #include "smack.h" +#define task_security(task) (task_cred_xxx((task), security)) + /* * I hope these are the hokeyist lines of code in the module. Casey. */ @@ -1012,7 +1014,7 @@ static void smack_cred_free(struct cred *cred) */ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) { - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); } /** @@ -1023,7 +1025,7 @@ static int smack_task_setpgid(struct task_struct *p, pid_t pgid) */ static int smack_task_getpgid(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1034,7 +1036,7 @@ static int smack_task_getpgid(struct task_struct *p) */ static int smack_task_getsid(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1046,7 +1048,7 @@ static int smack_task_getsid(struct task_struct *p) */ static void smack_task_getsecid(struct task_struct *p, u32 *secid) { - *secid = smack_to_secid(p->cred->security); + *secid = smack_to_secid(task_security(p)); } /** @@ -1062,7 +1064,7 @@ static int smack_task_setnice(struct task_struct *p, int nice) rc = cap_task_setnice(p, nice); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1079,7 +1081,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) rc = cap_task_setioprio(p, ioprio); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1091,7 +1093,7 @@ static int smack_task_setioprio(struct task_struct *p, int ioprio) */ static int smack_task_getioprio(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1109,7 +1111,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, rc = cap_task_setscheduler(p, policy, lp); if (rc == 0) - rc = smk_curacc(p->cred->security, MAY_WRITE); + rc = smk_curacc(task_security(p), MAY_WRITE); return rc; } @@ -1121,7 +1123,7 @@ static int smack_task_setscheduler(struct task_struct *p, int policy, */ static int smack_task_getscheduler(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_READ); + return smk_curacc(task_security(p), MAY_READ); } /** @@ -1132,7 +1134,7 @@ static int smack_task_getscheduler(struct task_struct *p) */ static int smack_task_movememory(struct task_struct *p) { - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); } /** @@ -1155,13 +1157,13 @@ static int smack_task_kill(struct task_struct *p, struct siginfo *info, * can write the receiver. */ if (secid == 0) - return smk_curacc(p->cred->security, MAY_WRITE); + return smk_curacc(task_security(p), MAY_WRITE); /* * If the secid isn't 0 we're dealing with some USB IO * specific behavior. This is not clean. For one thing * we can't take privilege into account. */ - return smk_access(smack_from_secid(secid), p->cred->security, MAY_WRITE); + return smk_access(smack_from_secid(secid), task_security(p), MAY_WRITE); } /** @@ -1174,7 +1176,7 @@ static int smack_task_wait(struct task_struct *p) { int rc; - rc = smk_access(current->cred->security, p->cred->security, MAY_WRITE); + rc = smk_access(current_security(), task_security(p), MAY_WRITE); if (rc == 0) return 0; @@ -1205,7 +1207,7 @@ static int smack_task_wait(struct task_struct *p) static void smack_task_to_inode(struct task_struct *p, struct inode *inode) { struct inode_smack *isp = inode->i_security; - isp->smk_inode = p->cred->security; + isp->smk_inode = task_security(p); } /* @@ -2010,7 +2012,7 @@ static int smack_getprocattr(struct task_struct *p, char *name, char **value) if (strcmp(name, "current") != 0) return -EINVAL; - cp = kstrdup(p->cred->security, GFP_KERNEL); + cp = kstrdup(task_security(p), GFP_KERNEL); if (cp == NULL) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From 5f3ea37c7716db4e894a480e0c18b24399595b6b Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 30 Oct 2008 08:34:33 +0100 Subject: blktrace: port to tracepoints This was a forward port of work done by Mathieu Desnoyers, I changed it to encode the 'what' parameter on the tracepoint name, so that one can register interest in specific events and not on classes of events to then check the 'what' parameter. Signed-off-by: Arnaldo Carvalho de Melo Signed-off-by: Jens Axboe Signed-off-by: Ingo Molnar --- block/Kconfig | 1 + block/blk-core.c | 33 ++--- block/blktrace.c | 332 ++++++++++++++++++++++++++++++++++++++++++- block/elevator.c | 7 +- drivers/md/dm.c | 6 +- fs/bio.c | 3 +- include/linux/blktrace_api.h | 172 +--------------------- include/trace/block.h | 60 ++++++++ mm/bounce.c | 3 +- 9 files changed, 418 insertions(+), 199 deletions(-) create mode 100644 include/trace/block.h (limited to 'mm') diff --git a/block/Kconfig b/block/Kconfig index 1ab7c15c8d7a..290b219fad9c 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -47,6 +47,7 @@ config BLK_DEV_IO_TRACE depends on SYSFS select RELAY select DEBUG_FS + select TRACEPOINTS help Say Y here if you want to be able to trace the block layer actions on a given queue. Tracing allows you to see any traffic happening diff --git a/block/blk-core.c b/block/blk-core.c index 10e8a64a5a5b..04267d66a2b9 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,6 +28,7 @@ #include #include #include +#include #include "blk.h" @@ -205,7 +206,7 @@ void blk_plug_device(struct request_queue *q) if (!queue_flag_test_and_set(QUEUE_FLAG_PLUGGED, q)) { mod_timer(&q->unplug_timer, jiffies + q->unplug_delay); - blk_add_trace_generic(q, NULL, 0, BLK_TA_PLUG); + trace_block_plug(q); } } EXPORT_SYMBOL(blk_plug_device); @@ -292,9 +293,7 @@ void blk_unplug_work(struct work_struct *work) struct request_queue *q = container_of(work, struct request_queue, unplug_work); - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_io(q); q->unplug_fn(q); } @@ -302,9 +301,7 @@ void blk_unplug_timeout(unsigned long data) { struct request_queue *q = (struct request_queue *)data; - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_TIMER, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_timer(q); kblockd_schedule_work(q, &q->unplug_work); } @@ -314,9 +311,7 @@ void blk_unplug(struct request_queue *q) * devices don't necessarily have an ->unplug_fn defined */ if (q->unplug_fn) { - blk_add_trace_pdu_int(q, BLK_TA_UNPLUG_IO, NULL, - q->rq.count[READ] + q->rq.count[WRITE]); - + trace_block_unplug_io(q); q->unplug_fn(q); } } @@ -822,7 +817,7 @@ rq_starved: if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - blk_add_trace_generic(q, bio, rw, BLK_TA_GETRQ); + trace_block_getrq(q, bio, rw); out: return rq; } @@ -848,7 +843,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, prepare_to_wait_exclusive(&rl->wait[rw], &wait, TASK_UNINTERRUPTIBLE); - blk_add_trace_generic(q, bio, rw, BLK_TA_SLEEPRQ); + trace_block_sleeprq(q, bio, rw); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -928,7 +923,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq) { blk_delete_timer(rq); blk_clear_rq_complete(rq); - blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); + trace_block_rq_requeue(q, rq); if (blk_rq_tagged(rq)) blk_queue_end_tag(q, rq); @@ -1167,7 +1162,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_back_merge_fn(q, req, bio)) break; - blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); + trace_block_bio_backmerge(q, bio); req->biotail->bi_next = bio; req->biotail = bio; @@ -1186,7 +1181,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) if (!ll_front_merge_fn(q, req, bio)) break; - blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); + trace_block_bio_frontmerge(q, bio); bio->bi_next = req->bio; req->bio = bio; @@ -1269,7 +1264,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; - blk_add_trace_remap(bdev_get_queue(bio->bi_bdev), bio, + trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, bdev->bd_dev, bio->bi_sector, bio->bi_sector - p->start_sect); } @@ -1441,10 +1436,10 @@ end_io: goto end_io; if (old_sector != -1) - blk_add_trace_remap(q, bio, old_dev, bio->bi_sector, + trace_block_remap(q, bio, old_dev, bio->bi_sector, old_sector); - blk_add_trace_bio(q, bio, BLK_TA_QUEUE); + trace_block_bio_queue(q, bio); old_sector = bio->bi_sector; old_dev = bio->bi_bdev->bd_dev; @@ -1656,7 +1651,7 @@ static int __end_that_request_first(struct request *req, int error, int total_bytes, bio_nbytes, next_idx = 0; struct bio *bio; - blk_add_trace_rq(req->q, req, BLK_TA_COMPLETE); + trace_block_rq_complete(req->q, req); /* * for a REQ_TYPE_BLOCK_PC request, we want to carry any eventual diff --git a/block/blktrace.c b/block/blktrace.c index 85049a7e7a17..b0a2cae886db 100644 --- a/block/blktrace.c +++ b/block/blktrace.c @@ -23,10 +23,18 @@ #include #include #include +#include #include static unsigned int blktrace_seq __read_mostly = 1; +/* Global reference count of probes */ +static DEFINE_MUTEX(blk_probe_mutex); +static atomic_t blk_probes_ref = ATOMIC_INIT(0); + +static int blk_register_tracepoints(void); +static void blk_unregister_tracepoints(void); + /* * Send out a notify message. */ @@ -119,7 +127,7 @@ static u32 ddir_act[2] __read_mostly = { BLK_TC_ACT(BLK_TC_READ), BLK_TC_ACT(BLK * The worker for the various blk_add_trace*() types. Fills out a * blk_io_trace structure and places it in a per-cpu subbuffer. */ -void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, +static void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, int rw, u32 what, int error, int pdu_len, void *pdu_data) { struct task_struct *tsk = current; @@ -177,8 +185,6 @@ void __blk_add_trace(struct blk_trace *bt, sector_t sector, int bytes, local_irq_restore(flags); } -EXPORT_SYMBOL_GPL(__blk_add_trace); - static struct dentry *blk_tree_root; static DEFINE_MUTEX(blk_tree_mutex); static unsigned int root_users; @@ -237,6 +243,10 @@ static void blk_trace_cleanup(struct blk_trace *bt) free_percpu(bt->sequence); free_percpu(bt->msg_data); kfree(bt); + mutex_lock(&blk_probe_mutex); + if (atomic_dec_and_test(&blk_probes_ref)) + blk_unregister_tracepoints(); + mutex_unlock(&blk_probe_mutex); } int blk_trace_remove(struct request_queue *q) @@ -428,6 +438,14 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, bt->pid = buts->pid; bt->trace_state = Blktrace_setup; + mutex_lock(&blk_probe_mutex); + if (atomic_add_return(1, &blk_probes_ref) == 1) { + ret = blk_register_tracepoints(); + if (ret) + goto probe_err; + } + mutex_unlock(&blk_probe_mutex); + ret = -EBUSY; old_bt = xchg(&q->blk_trace, bt); if (old_bt) { @@ -436,6 +454,9 @@ int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, } return 0; +probe_err: + atomic_dec(&blk_probes_ref); + mutex_unlock(&blk_probe_mutex); err: if (dir) blk_remove_tree(dir); @@ -562,3 +583,308 @@ void blk_trace_shutdown(struct request_queue *q) blk_trace_remove(q); } } + +/* + * blktrace probes + */ + +/** + * blk_add_trace_rq - Add a trace for a request oriented action + * @q: queue the io is for + * @rq: the source request + * @what: the action + * + * Description: + * Records an action against a request. Will log the bio offset + size. + * + **/ +static void blk_add_trace_rq(struct request_queue *q, struct request *rq, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + int rw = rq->cmd_flags & 0x03; + + if (likely(!bt)) + return; + + if (blk_discard_rq(rq)) + rw |= (1 << BIO_RW_DISCARD); + + if (blk_pc_request(rq)) { + what |= BLK_TC_ACT(BLK_TC_PC); + __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, + sizeof(rq->cmd), rq->cmd); + } else { + what |= BLK_TC_ACT(BLK_TC_FS); + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + rw, what, rq->errors, 0, NULL); + } +} + +static void blk_add_trace_rq_abort(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ABORT); +} + +static void blk_add_trace_rq_insert(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_INSERT); +} + +static void blk_add_trace_rq_issue(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_ISSUE); +} + +static void blk_add_trace_rq_requeue(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); +} + +static void blk_add_trace_rq_complete(struct request_queue *q, struct request *rq) +{ + blk_add_trace_rq(q, rq, BLK_TA_COMPLETE); +} + +/** + * blk_add_trace_bio - Add a trace for a bio oriented action + * @q: queue the io is for + * @bio: the source bio + * @what: the action + * + * Description: + * Records an action against a bio. Will log the bio offset + size. + * + **/ +static void blk_add_trace_bio(struct request_queue *q, struct bio *bio, + u32 what) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, + !bio_flagged(bio, BIO_UPTODATE), 0, NULL); +} + +static void blk_add_trace_bio_bounce(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BOUNCE); +} + +static void blk_add_trace_bio_complete(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_COMPLETE); +} + +static void blk_add_trace_bio_backmerge(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_BACKMERGE); +} + +static void blk_add_trace_bio_frontmerge(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_FRONTMERGE); +} + +static void blk_add_trace_bio_queue(struct request_queue *q, struct bio *bio) +{ + blk_add_trace_bio(q, bio, BLK_TA_QUEUE); +} + +static void blk_add_trace_getrq(struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_GETRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_GETRQ, 0, 0, NULL); + } +} + + +static void blk_add_trace_sleeprq(struct request_queue *q, struct bio *bio, int rw) +{ + if (bio) + blk_add_trace_bio(q, bio, BLK_TA_SLEEPRQ); + else { + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, rw, BLK_TA_SLEEPRQ, 0, 0, NULL); + } +} + +static void blk_add_trace_plug(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) + __blk_add_trace(bt, 0, 0, 0, BLK_TA_PLUG, 0, 0, NULL); +} + +static void blk_add_trace_unplug_io(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_IO, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_unplug_timer(struct request_queue *q) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + unsigned int pdu = q->rq.count[READ] + q->rq.count[WRITE]; + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, 0, 0, 0, BLK_TA_UNPLUG_TIMER, 0, + sizeof(rpdu), &rpdu); + } +} + +static void blk_add_trace_split(struct request_queue *q, struct bio *bio, + unsigned int pdu) +{ + struct blk_trace *bt = q->blk_trace; + + if (bt) { + __be64 rpdu = cpu_to_be64(pdu); + + __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, + BLK_TA_SPLIT, !bio_flagged(bio, BIO_UPTODATE), + sizeof(rpdu), &rpdu); + } +} + +/** + * blk_add_trace_remap - Add a trace for a remap operation + * @q: queue the io is for + * @bio: the source bio + * @dev: target device + * @from: source sector + * @to: target sector + * + * Description: + * Device mapper or raid target sometimes need to split a bio because + * it spans a stripe (or similar). Add a trace for that action. + * + **/ +static void blk_add_trace_remap(struct request_queue *q, struct bio *bio, + dev_t dev, sector_t from, sector_t to) +{ + struct blk_trace *bt = q->blk_trace; + struct blk_io_trace_remap r; + + if (likely(!bt)) + return; + + r.device = cpu_to_be32(dev); + r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); + r.sector = cpu_to_be64(to); + + __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, + !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); +} + +/** + * blk_add_driver_data - Add binary message with driver-specific data + * @q: queue the io is for + * @rq: io request + * @data: driver-specific data + * @len: length of driver-specific data + * + * Description: + * Some drivers might want to write driver-specific data per request. + * + **/ +void blk_add_driver_data(struct request_queue *q, + struct request *rq, + void *data, size_t len) +{ + struct blk_trace *bt = q->blk_trace; + + if (likely(!bt)) + return; + + if (blk_pc_request(rq)) + __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, + rq->errors, len, data); + else + __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, + 0, BLK_TA_DRV_DATA, rq->errors, len, data); +} +EXPORT_SYMBOL_GPL(blk_add_driver_data); + +static int blk_register_tracepoints(void) +{ + int ret; + + ret = register_trace_block_rq_abort(blk_add_trace_rq_abort); + WARN_ON(ret); + ret = register_trace_block_rq_insert(blk_add_trace_rq_insert); + WARN_ON(ret); + ret = register_trace_block_rq_issue(blk_add_trace_rq_issue); + WARN_ON(ret); + ret = register_trace_block_rq_requeue(blk_add_trace_rq_requeue); + WARN_ON(ret); + ret = register_trace_block_rq_complete(blk_add_trace_rq_complete); + WARN_ON(ret); + ret = register_trace_block_bio_bounce(blk_add_trace_bio_bounce); + WARN_ON(ret); + ret = register_trace_block_bio_complete(blk_add_trace_bio_complete); + WARN_ON(ret); + ret = register_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + WARN_ON(ret); + ret = register_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + WARN_ON(ret); + ret = register_trace_block_bio_queue(blk_add_trace_bio_queue); + WARN_ON(ret); + ret = register_trace_block_getrq(blk_add_trace_getrq); + WARN_ON(ret); + ret = register_trace_block_sleeprq(blk_add_trace_sleeprq); + WARN_ON(ret); + ret = register_trace_block_plug(blk_add_trace_plug); + WARN_ON(ret); + ret = register_trace_block_unplug_timer(blk_add_trace_unplug_timer); + WARN_ON(ret); + ret = register_trace_block_unplug_io(blk_add_trace_unplug_io); + WARN_ON(ret); + ret = register_trace_block_split(blk_add_trace_split); + WARN_ON(ret); + ret = register_trace_block_remap(blk_add_trace_remap); + WARN_ON(ret); + return 0; +} + +static void blk_unregister_tracepoints(void) +{ + unregister_trace_block_remap(blk_add_trace_remap); + unregister_trace_block_split(blk_add_trace_split); + unregister_trace_block_unplug_io(blk_add_trace_unplug_io); + unregister_trace_block_unplug_timer(blk_add_trace_unplug_timer); + unregister_trace_block_plug(blk_add_trace_plug); + unregister_trace_block_sleeprq(blk_add_trace_sleeprq); + unregister_trace_block_getrq(blk_add_trace_getrq); + unregister_trace_block_bio_queue(blk_add_trace_bio_queue); + unregister_trace_block_bio_frontmerge(blk_add_trace_bio_frontmerge); + unregister_trace_block_bio_backmerge(blk_add_trace_bio_backmerge); + unregister_trace_block_bio_complete(blk_add_trace_bio_complete); + unregister_trace_block_bio_bounce(blk_add_trace_bio_bounce); + unregister_trace_block_rq_complete(blk_add_trace_rq_complete); + unregister_trace_block_rq_requeue(blk_add_trace_rq_requeue); + unregister_trace_block_rq_issue(blk_add_trace_rq_issue); + unregister_trace_block_rq_insert(blk_add_trace_rq_insert); + unregister_trace_block_rq_abort(blk_add_trace_rq_abort); + + tracepoint_synchronize_unregister(); +} diff --git a/block/elevator.c b/block/elevator.c index 9ac82dde99dd..530fcfe2ef07 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include @@ -586,7 +587,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) unsigned ordseq; int unplug_it = 1; - blk_add_trace_rq(q, rq, BLK_TA_INSERT); + trace_block_rq_insert(q, rq); rq->q = q; @@ -772,7 +773,7 @@ struct request *elv_next_request(struct request_queue *q) * not be passed by new incoming requests */ rq->cmd_flags |= REQ_STARTED; - blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + trace_block_rq_issue(q, rq); } if (!q->boundary_rq || q->boundary_rq == rq) { @@ -921,7 +922,7 @@ void elv_abort_queue(struct request_queue *q) while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); rq->cmd_flags |= REQ_QUIET; - blk_add_trace_rq(q, rq, BLK_TA_ABORT); + trace_block_rq_abort(q, rq); __blk_end_request(rq, -EIO, blk_rq_bytes(rq)); } } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index c99e4728ff41..d23fda178163 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -21,6 +21,7 @@ #include #include #include +#include #define DM_MSG_PREFIX "core" @@ -504,8 +505,7 @@ static void dec_pending(struct dm_io *io, int error) end_io_acct(io); if (io->error != DM_ENDIO_REQUEUE) { - blk_add_trace_bio(io->md->queue, io->bio, - BLK_TA_COMPLETE); + trace_block_bio_complete(io->md->queue, io->bio); bio_endio(io->bio, io->error); } @@ -598,7 +598,7 @@ static void __map_bio(struct dm_target *ti, struct bio *clone, if (r == DM_MAPIO_REMAPPED) { /* the bio has been remapped so dispatch it */ - blk_add_trace_remap(bdev_get_queue(clone->bi_bdev), clone, + trace_block_remap(bdev_get_queue(clone->bi_bdev), clone, tio->io->bio->bi_bdev->bd_dev, clone->bi_sector, sector); diff --git a/fs/bio.c b/fs/bio.c index 77a55bcceedb..060859c69092 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -26,6 +26,7 @@ #include #include #include +#include #include /* for struct sg_iovec */ static struct kmem_cache *bio_slab __read_mostly; @@ -1263,7 +1264,7 @@ struct bio_pair *bio_split(struct bio *bi, int first_sectors) if (!bp) return bp; - blk_add_trace_pdu_int(bdev_get_queue(bi->bi_bdev), BLK_TA_SPLIT, bi, + trace_block_split(bdev_get_queue(bi->bi_bdev), bi, bi->bi_sector + first_sectors); BUG_ON(bi->bi_vcnt != 1); diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index bdf505d33e77..1dba3493d520 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -160,7 +160,6 @@ struct blk_trace { extern int blk_trace_ioctl(struct block_device *, unsigned, char __user *); extern void blk_trace_shutdown(struct request_queue *); -extern void __blk_add_trace(struct blk_trace *, sector_t, int, int, u32, int, int, void *); extern int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev, struct blk_user_trace_setup *buts); extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); @@ -186,168 +185,8 @@ extern void __trace_note_message(struct blk_trace *, const char *fmt, ...); } while (0) #define BLK_TN_MAX_MSG 128 -/** - * blk_add_trace_rq - Add a trace for a request oriented action - * @q: queue the io is for - * @rq: the source request - * @what: the action - * - * Description: - * Records an action against a request. Will log the bio offset + size. - * - **/ -static inline void blk_add_trace_rq(struct request_queue *q, struct request *rq, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - int rw = rq->cmd_flags & 0x03; - - if (likely(!bt)) - return; - - if (blk_discard_rq(rq)) - rw |= (1 << BIO_RW_DISCARD); - - if (blk_pc_request(rq)) { - what |= BLK_TC_ACT(BLK_TC_PC); - __blk_add_trace(bt, 0, rq->data_len, rw, what, rq->errors, sizeof(rq->cmd), rq->cmd); - } else { - what |= BLK_TC_ACT(BLK_TC_FS); - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, rw, what, rq->errors, 0, NULL); - } -} - -/** - * blk_add_trace_bio - Add a trace for a bio oriented action - * @q: queue the io is for - * @bio: the source bio - * @what: the action - * - * Description: - * Records an action against a bio. Will log the bio offset + size. - * - **/ -static inline void blk_add_trace_bio(struct request_queue *q, struct bio *bio, - u32 what) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), 0, NULL); -} - -/** - * blk_add_trace_generic - Add a trace for a generic action - * @q: queue the io is for - * @bio: the source bio - * @rw: the data direction - * @what: the action - * - * Description: - * Records a simple trace - * - **/ -static inline void blk_add_trace_generic(struct request_queue *q, - struct bio *bio, int rw, u32 what) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - if (bio) - blk_add_trace_bio(q, bio, what); - else - __blk_add_trace(bt, 0, 0, rw, what, 0, 0, NULL); -} - -/** - * blk_add_trace_pdu_int - Add a trace for a bio with an integer payload - * @q: queue the io is for - * @what: the action - * @bio: the source bio - * @pdu: the integer payload - * - * Description: - * Adds a trace with some integer payload. This might be an unplug - * option given as the action, with the depth at unplug time given - * as the payload - * - **/ -static inline void blk_add_trace_pdu_int(struct request_queue *q, u32 what, - struct bio *bio, unsigned int pdu) -{ - struct blk_trace *bt = q->blk_trace; - __be64 rpdu = cpu_to_be64(pdu); - - if (likely(!bt)) - return; - - if (bio) - __blk_add_trace(bt, bio->bi_sector, bio->bi_size, bio->bi_rw, what, !bio_flagged(bio, BIO_UPTODATE), sizeof(rpdu), &rpdu); - else - __blk_add_trace(bt, 0, 0, 0, what, 0, sizeof(rpdu), &rpdu); -} - -/** - * blk_add_trace_remap - Add a trace for a remap operation - * @q: queue the io is for - * @bio: the source bio - * @dev: target device - * @from: source sector - * @to: target sector - * - * Description: - * Device mapper or raid target sometimes need to split a bio because - * it spans a stripe (or similar). Add a trace for that action. - * - **/ -static inline void blk_add_trace_remap(struct request_queue *q, struct bio *bio, - dev_t dev, sector_t from, sector_t to) -{ - struct blk_trace *bt = q->blk_trace; - struct blk_io_trace_remap r; - - if (likely(!bt)) - return; - - r.device = cpu_to_be32(dev); - r.device_from = cpu_to_be32(bio->bi_bdev->bd_dev); - r.sector = cpu_to_be64(to); - - __blk_add_trace(bt, from, bio->bi_size, bio->bi_rw, BLK_TA_REMAP, !bio_flagged(bio, BIO_UPTODATE), sizeof(r), &r); -} - -/** - * blk_add_driver_data - Add binary message with driver-specific data - * @q: queue the io is for - * @rq: io request - * @data: driver-specific data - * @len: length of driver-specific data - * - * Description: - * Some drivers might want to write driver-specific data per request. - * - **/ -static inline void blk_add_driver_data(struct request_queue *q, - struct request *rq, - void *data, size_t len) -{ - struct blk_trace *bt = q->blk_trace; - - if (likely(!bt)) - return; - - if (blk_pc_request(rq)) - __blk_add_trace(bt, 0, rq->data_len, 0, BLK_TA_DRV_DATA, - rq->errors, len, data); - else - __blk_add_trace(bt, rq->hard_sector, rq->hard_nr_sectors << 9, - 0, BLK_TA_DRV_DATA, rq->errors, len, data); -} - +extern void blk_add_driver_data(struct request_queue *q, struct request *rq, + void *data, size_t len); extern int blk_trace_setup(struct request_queue *q, char *name, dev_t dev, char __user *arg); extern int blk_trace_startstop(struct request_queue *q, int start); @@ -356,13 +195,8 @@ extern int blk_trace_remove(struct request_queue *q); #else /* !CONFIG_BLK_DEV_IO_TRACE */ #define blk_trace_ioctl(bdev, cmd, arg) (-ENOTTY) #define blk_trace_shutdown(q) do { } while (0) -#define blk_add_trace_rq(q, rq, what) do { } while (0) -#define blk_add_trace_bio(q, rq, what) do { } while (0) -#define blk_add_trace_generic(q, rq, rw, what) do { } while (0) -#define blk_add_trace_pdu_int(q, what, bio, pdu) do { } while (0) -#define blk_add_trace_remap(q, bio, dev, f, t) do {} while (0) -#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define do_blk_trace_setup(q, name, dev, buts) (-ENOTTY) +#define blk_add_driver_data(q, rq, data, len) do {} while (0) #define blk_trace_setup(q, name, dev, arg) (-ENOTTY) #define blk_trace_startstop(q, start) (-ENOTTY) #define blk_trace_remove(q) (-ENOTTY) diff --git a/include/trace/block.h b/include/trace/block.h new file mode 100644 index 000000000000..3cc2675ebf01 --- /dev/null +++ b/include/trace/block.h @@ -0,0 +1,60 @@ +#ifndef _TRACE_BLOCK_H +#define _TRACE_BLOCK_H + +#include +#include + +DEFINE_TRACE(block_rq_abort, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_insert, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_issue, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_requeue, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_rq_complete, + TPPROTO(struct request_queue *q, struct request *rq), + TPARGS(q, rq)); +DEFINE_TRACE(block_bio_bounce, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_complete, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_backmerge, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_frontmerge, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_bio_queue, + TPPROTO(struct request_queue *q, struct bio *bio), + TPARGS(q, bio)); +DEFINE_TRACE(block_getrq, + TPPROTO(struct request_queue *q, struct bio *bio, int rw), + TPARGS(q, bio, rw)); +DEFINE_TRACE(block_sleeprq, + TPPROTO(struct request_queue *q, struct bio *bio, int rw), + TPARGS(q, bio, rw)); +DEFINE_TRACE(block_plug, + TPPROTO(struct request_queue *q), + TPARGS(q)); +DEFINE_TRACE(block_unplug_timer, + TPPROTO(struct request_queue *q), + TPARGS(q)); +DEFINE_TRACE(block_unplug_io, + TPPROTO(struct request_queue *q), + TPARGS(q)); +DEFINE_TRACE(block_split, + TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), + TPARGS(q, bio, pdu)); +DEFINE_TRACE(block_remap, + TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, + sector_t from, sector_t to), + TPARGS(q, bio, dev, from, to)); + +#endif diff --git a/mm/bounce.c b/mm/bounce.c index 06722c403058..bd1caaa582b8 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #define POOL_SIZE 64 @@ -222,7 +223,7 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig, if (!bio) return; - blk_add_trace_bio(q, *bio_orig, BLK_TA_BOUNCE); + trace_block_bio_bounce(q, *bio_orig); /* * at least one page was bounced, fill in possible non-highmem -- cgit v1.2.3-70-g09d2 From 0bfc24559d7945506184d86739fe365a181f06b7 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 26 Nov 2008 11:59:56 +0100 Subject: blktrace: port to tracepoints, update Port to the new tracepoints API: split DEFINE_TRACE() and DECLARE_TRACE() sites. Spread them out to the usage sites, as suggested by Mathieu Desnoyers. Signed-off-by: Ingo Molnar Acked-by: Mathieu Desnoyers --- block/blk-core.c | 13 ++++++++ block/elevator.c | 5 +++ drivers/md/dm.c | 2 ++ fs/bio.c | 2 ++ include/trace/block.h | 84 ++++++++++++++++++++++++++++++--------------------- mm/bounce.c | 2 ++ 6 files changed, 74 insertions(+), 34 deletions(-) (limited to 'mm') diff --git a/block/blk-core.c b/block/blk-core.c index 04267d66a2b9..0c06cf5aaaf8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -32,6 +32,19 @@ #include "blk.h" +DEFINE_TRACE(block_plug); +DEFINE_TRACE(block_unplug_io); +DEFINE_TRACE(block_unplug_timer); +DEFINE_TRACE(block_getrq); +DEFINE_TRACE(block_sleeprq); +DEFINE_TRACE(block_rq_requeue); +DEFINE_TRACE(block_bio_backmerge); +DEFINE_TRACE(block_bio_frontmerge); +DEFINE_TRACE(block_bio_queue); +DEFINE_TRACE(block_rq_complete); +DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ +EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); + static int __make_request(struct request_queue *q, struct bio *bio); /* diff --git a/block/elevator.c b/block/elevator.c index 530fcfe2ef07..e5677fe4f412 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -42,6 +42,8 @@ static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); +DEFINE_TRACE(block_rq_abort); + /* * Merge hash stuff. */ @@ -53,6 +55,9 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +DEFINE_TRACE(block_rq_insert); +DEFINE_TRACE(block_rq_issue); + /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. diff --git a/drivers/md/dm.c b/drivers/md/dm.c index d23fda178163..343094c3feeb 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -52,6 +52,8 @@ struct dm_target_io { union map_info info; }; +DEFINE_TRACE(block_bio_complete); + union map_info *dm_get_mapinfo(struct bio *bio) { if (bio && bio->bi_private) diff --git a/fs/bio.c b/fs/bio.c index 060859c69092..df99c882b807 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -29,6 +29,8 @@ #include #include /* for struct sg_iovec */ +DEFINE_TRACE(block_split); + static struct kmem_cache *bio_slab __read_mostly; static mempool_t *bio_split_pool __read_mostly; diff --git a/include/trace/block.h b/include/trace/block.h index 3cc2675ebf01..25c6a1fd5b77 100644 --- a/include/trace/block.h +++ b/include/trace/block.h @@ -4,57 +4,73 @@ #include #include -DEFINE_TRACE(block_rq_abort, +DECLARE_TRACE(block_rq_abort, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_insert, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_insert, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_issue, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_issue, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_requeue, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_requeue, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_rq_complete, + TPARGS(q, rq)); + +DECLARE_TRACE(block_rq_complete, TPPROTO(struct request_queue *q, struct request *rq), - TPARGS(q, rq)); -DEFINE_TRACE(block_bio_bounce, + TPARGS(q, rq)); + +DECLARE_TRACE(block_bio_bounce, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_complete, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_complete, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_backmerge, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_backmerge, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_frontmerge, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_frontmerge, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_bio_queue, + TPARGS(q, bio)); + +DECLARE_TRACE(block_bio_queue, TPPROTO(struct request_queue *q, struct bio *bio), - TPARGS(q, bio)); -DEFINE_TRACE(block_getrq, + TPARGS(q, bio)); + +DECLARE_TRACE(block_getrq, TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); -DEFINE_TRACE(block_sleeprq, + TPARGS(q, bio, rw)); + +DECLARE_TRACE(block_sleeprq, TPPROTO(struct request_queue *q, struct bio *bio, int rw), - TPARGS(q, bio, rw)); -DEFINE_TRACE(block_plug, + TPARGS(q, bio, rw)); + +DECLARE_TRACE(block_plug, TPPROTO(struct request_queue *q), - TPARGS(q)); -DEFINE_TRACE(block_unplug_timer, + TPARGS(q)); + +DECLARE_TRACE(block_unplug_timer, TPPROTO(struct request_queue *q), - TPARGS(q)); -DEFINE_TRACE(block_unplug_io, + TPARGS(q)); + +DECLARE_TRACE(block_unplug_io, TPPROTO(struct request_queue *q), - TPARGS(q)); -DEFINE_TRACE(block_split, + TPARGS(q)); + +DECLARE_TRACE(block_split, TPPROTO(struct request_queue *q, struct bio *bio, unsigned int pdu), - TPARGS(q, bio, pdu)); -DEFINE_TRACE(block_remap, + TPARGS(q, bio, pdu)); + +DECLARE_TRACE(block_remap, TPPROTO(struct request_queue *q, struct bio *bio, dev_t dev, sector_t from, sector_t to), - TPARGS(q, bio, dev, from, to)); + TPARGS(q, bio, dev, from, to)); #endif diff --git a/mm/bounce.c b/mm/bounce.c index bd1caaa582b8..bf0cf7c8387b 100644 --- a/mm/bounce.c +++ b/mm/bounce.c @@ -22,6 +22,8 @@ static mempool_t *page_pool, *isa_page_pool; +DEFINE_TRACE(block_bio_bounce); + #ifdef CONFIG_HIGHMEM static __init int init_emergency_pool(void) { -- cgit v1.2.3-70-g09d2 From 5e18e2b8b3d453e68accc3e295643fe4b5bbc295 Mon Sep 17 00:00:00 2001 From: Catalin Marinas Date: Mon, 15 Dec 2008 13:54:16 -0800 Subject: slob: do not pass the SLAB flags as GFP in kmem_cache_create() The kmem_cache_create() function in the slob allocator passes the SLAB flags as GFP flags to the slob_alloc() function. The patch changes this call to pass GFP_KERNEL as the other allocators seem to do. Signed-off-by: Catalin Marinas Acked-by: Matt Mackall Cc: Cyrill Gorcunov Cc: Christoph Lameter Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- mm/slob.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/slob.c b/mm/slob.c index cb675d126791..bf7e8fc3aed8 100644 --- a/mm/slob.c +++ b/mm/slob.c @@ -535,7 +535,7 @@ struct kmem_cache *kmem_cache_create(const char *name, size_t size, struct kmem_cache *c; c = slob_alloc(sizeof(struct kmem_cache), - flags, ARCH_KMALLOC_MINALIGN, -1); + GFP_KERNEL, ARCH_KMALLOC_MINALIGN, -1); if (c) { c->name = name; -- cgit v1.2.3-70-g09d2 From c095adbc211f9f4e990eac7d6cb440de35e4f05f Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 16 Dec 2008 16:06:43 +0900 Subject: mm: Don't touch uninitialized variable in do_pages_stat_array() Commit 80bba1290ab5122c60cdb73332b26d288dc8aedd removed one necessary variable initialization. As a result following warning happened: CC mm/migrate.o mm/migrate.c: In function 'sys_move_pages': mm/migrate.c:1001: warning: 'err' may be used uninitialized in this function More unfortunately, if find_vma() failed, kernel read uninitialized memory. Signed-off-by: KOSAKI Motohiro CC: Brice Goglin Cc: Christoph Lameter Cc: KAMEZAWA Hiroyuki Cc: Nick Piggin Cc: Hugh Dickins Signed-off-by: Linus Torvalds --- mm/migrate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'mm') diff --git a/mm/migrate.c b/mm/migrate.c index d8f07667fc80..037b0967c1e3 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -998,7 +998,7 @@ static void do_pages_stat_array(struct mm_struct *mm, unsigned long nr_pages, unsigned long addr = (unsigned long)(*pages); struct vm_area_struct *vma; struct page *page; - int err; + int err = -EFAULT; vma = find_vma(mm, addr); if (!vma) -- cgit v1.2.3-70-g09d2 From 1796316a8b028a148be48ba5d4e7be493a39d173 Mon Sep 17 00:00:00 2001 From: Jan Beulich Date: Tue, 16 Dec 2008 11:35:24 +0000 Subject: x86: consolidate __swp_XXX() macros Impact: cleanup, code robustization The __swp_...() macros silently relied upon which bits are used for _PAGE_FILE and _PAGE_PROTNONE. After having changed _PAGE_PROTNONE in our Xen kernel to no longer overlap _PAGE_PAT, live locks and crashes were reported that could have been avoided if these macros properly used the symbolic constants. Since, as pointed out earlier, for Xen Dom0 support mainline likewise will need to eliminate the conflict between _PAGE_PAT and _PAGE_PROTNONE, this patch does all the necessary adjustments, plus it introduces a mechanism to check consistency between MAX_SWAPFILES_SHIFT and the actual encoding macros. This also fixes a latent bug in that x86-64 used a 6-bit mask in __swp_type(), and if MAX_SWAPFILES_SHIFT was increased beyond 5 in (the seemingly unrelated) linux/swap.h, this would have resulted in a collision with _PAGE_FILE. Non-PAE 32-bit code gets similarly adjusted for its pte_to_pgoff() and pgoff_to_pte() calculations. Signed-off-by: Jan Beulich Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable-2level.h | 50 ++++++++++++++++++++++++++++------- arch/x86/include/asm/pgtable-3level.h | 1 + arch/x86/include/asm/pgtable.h | 14 +++++----- arch/x86/include/asm/pgtable_64.h | 20 +++++++++++--- mm/swapfile.c | 9 +++++++ 5 files changed, 75 insertions(+), 19 deletions(-) (limited to 'mm') diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index b17edfd23628..e0d199fe1d83 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -56,23 +56,55 @@ static inline pte_t native_ptep_get_and_clear(pte_t *xp) #define pte_none(x) (!(x).pte_low) /* - * Bits 0, 6 and 7 are taken, split up the 29 bits of offset - * into this range: + * Bits _PAGE_BIT_PRESENT, _PAGE_BIT_FILE and _PAGE_BIT_PROTNONE are taken, + * split up the 29 bits of offset into this range: */ #define PTE_FILE_MAX_BITS 29 +#define PTE_FILE_SHIFT1 (_PAGE_BIT_PRESENT + 1) +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE +#define PTE_FILE_SHIFT2 (_PAGE_BIT_FILE + 1) +#define PTE_FILE_SHIFT3 (_PAGE_BIT_PROTNONE + 1) +#else +#define PTE_FILE_SHIFT2 (_PAGE_BIT_PROTNONE + 1) +#define PTE_FILE_SHIFT3 (_PAGE_BIT_FILE + 1) +#endif +#define PTE_FILE_BITS1 (PTE_FILE_SHIFT2 - PTE_FILE_SHIFT1 - 1) +#define PTE_FILE_BITS2 (PTE_FILE_SHIFT3 - PTE_FILE_SHIFT2 - 1) #define pte_to_pgoff(pte) \ - ((((pte).pte_low >> 1) & 0x1f) + (((pte).pte_low >> 8) << 5)) + ((((pte).pte_low >> PTE_FILE_SHIFT1) \ + & ((1U << PTE_FILE_BITS1) - 1)) \ + + ((((pte).pte_low >> PTE_FILE_SHIFT2) \ + & ((1U << PTE_FILE_BITS2) - 1)) << PTE_FILE_BITS1) \ + + (((pte).pte_low >> PTE_FILE_SHIFT3) \ + << (PTE_FILE_BITS1 + PTE_FILE_BITS2))) #define pgoff_to_pte(off) \ - ((pte_t) { .pte_low = (((off) & 0x1f) << 1) + \ - (((off) >> 5) << 8) + _PAGE_FILE }) + ((pte_t) { .pte_low = \ + (((off) & ((1U << PTE_FILE_BITS1) - 1)) << PTE_FILE_SHIFT1) \ + + ((((off) >> PTE_FILE_BITS1) & ((1U << PTE_FILE_BITS2) - 1)) \ + << PTE_FILE_SHIFT2) \ + + (((off) >> (PTE_FILE_BITS1 + PTE_FILE_BITS2)) \ + << PTE_FILE_SHIFT3) \ + + _PAGE_FILE }) /* Encode and de-code a swap entry */ -#define __swp_type(x) (((x).val >> 1) & 0x1f) -#define __swp_offset(x) ((x).val >> 8) -#define __swp_entry(type, offset) \ - ((swp_entry_t) { ((type) << 1) | ((offset) << 8) }) +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE +#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) +#else +#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) +#endif + +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) + +#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ + & ((1U << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << (_PAGE_BIT_PRESENT + 1)) \ + | ((offset) << SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { (pte).pte_low }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 52597aeadfff..447da43cddb3 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -166,6 +166,7 @@ static inline int pte_none(pte_t pte) #define PTE_FILE_MAX_BITS 32 /* Encode and de-code a swap entry */ +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) #define __swp_type(x) (((x).val) & 0x1f) #define __swp_offset(x) ((x).val >> 5) #define __swp_entry(type, offset) ((swp_entry_t){(type) | (offset) << 5}) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index c012f3b11671..b7c2ecdb7658 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -10,7 +10,6 @@ #define _PAGE_BIT_PCD 4 /* page cache disabled */ #define _PAGE_BIT_ACCESSED 5 /* was accessed (raised by CPU) */ #define _PAGE_BIT_DIRTY 6 /* was written to (raised by CPU) */ -#define _PAGE_BIT_FILE 6 #define _PAGE_BIT_PSE 7 /* 4 MB (or 2MB) page */ #define _PAGE_BIT_PAT 7 /* on 4KB pages */ #define _PAGE_BIT_GLOBAL 8 /* Global TLB entry PPro+ */ @@ -22,6 +21,12 @@ #define _PAGE_BIT_CPA_TEST _PAGE_BIT_UNUSED1 #define _PAGE_BIT_NX 63 /* No execute: only valid after cpuid check */ +/* If _PAGE_BIT_PRESENT is clear, we use these: */ +/* - if the user mapped it with PROT_NONE; pte_present gives true */ +#define _PAGE_BIT_PROTNONE _PAGE_BIT_GLOBAL +/* - set: nonlinear file mapping, saved PTE; unset:swap */ +#define _PAGE_BIT_FILE _PAGE_BIT_DIRTY + #define _PAGE_PRESENT (_AT(pteval_t, 1) << _PAGE_BIT_PRESENT) #define _PAGE_RW (_AT(pteval_t, 1) << _PAGE_BIT_RW) #define _PAGE_USER (_AT(pteval_t, 1) << _PAGE_BIT_USER) @@ -46,11 +51,8 @@ #define _PAGE_NX (_AT(pteval_t, 0)) #endif -/* If _PAGE_PRESENT is clear, we use these: */ -#define _PAGE_FILE _PAGE_DIRTY /* nonlinear file mapping, - * saved PTE; unset:swap */ -#define _PAGE_PROTNONE _PAGE_PSE /* if the user mapped it with PROT_NONE; - pte_present gives true */ +#define _PAGE_FILE (_AT(pteval_t, 1) << _PAGE_BIT_FILE) +#define _PAGE_PROTNONE (_AT(pteval_t, 1) << _PAGE_BIT_PROTNONE) #define _PAGE_TABLE (_PAGE_PRESENT | _PAGE_RW | _PAGE_USER | \ _PAGE_ACCESSED | _PAGE_DIRTY) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 545a0e042bb2..65b6be6677c7 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -250,10 +250,22 @@ static inline int pud_large(pud_t pte) extern int direct_gbpages; /* Encode and de-code a swap entry */ -#define __swp_type(x) (((x).val >> 1) & 0x3f) -#define __swp_offset(x) ((x).val >> 8) -#define __swp_entry(type, offset) ((swp_entry_t) { ((type) << 1) | \ - ((offset) << 8) }) +#if _PAGE_BIT_FILE < _PAGE_BIT_PROTNONE +#define SWP_TYPE_BITS (_PAGE_BIT_FILE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_PROTNONE + 1) +#else +#define SWP_TYPE_BITS (_PAGE_BIT_PROTNONE - _PAGE_BIT_PRESENT - 1) +#define SWP_OFFSET_SHIFT (_PAGE_BIT_FILE + 1) +#endif + +#define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > SWP_TYPE_BITS) + +#define __swp_type(x) (((x).val >> (_PAGE_BIT_PRESENT + 1)) \ + & ((1U << SWP_TYPE_BITS) - 1)) +#define __swp_offset(x) ((x).val >> SWP_OFFSET_SHIFT) +#define __swp_entry(type, offset) ((swp_entry_t) { \ + ((type) << (_PAGE_BIT_PRESENT + 1)) \ + | ((offset) << SWP_OFFSET_SHIFT) }) #define __pte_to_swp_entry(pte) ((swp_entry_t) { pte_val((pte)) }) #define __swp_entry_to_pte(x) ((pte_t) { .pte = (x).val }) diff --git a/mm/swapfile.c b/mm/swapfile.c index 90cb67a5417c..54a9f87e5162 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1462,6 +1462,15 @@ static int __init procswaps_init(void) __initcall(procswaps_init); #endif /* CONFIG_PROC_FS */ +#ifdef MAX_SWAPFILES_CHECK +static int __init max_swapfiles_check(void) +{ + MAX_SWAPFILES_CHECK(); + return 0; +} +late_initcall(max_swapfiles_check); +#endif + /* * Written 01/25/92 by Simmule Turner, heavily changed by Linus. * -- cgit v1.2.3-70-g09d2 From 3c8bb73ace6249bd089b70c941440441940e3365 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:27 -0800 Subject: x86: PAT: store vm_pgoff for all linear_over_vma_region mappings - v3 Impact: Code transformation, new functions added should have no effect. Drivers use mmap followed by pgprot_* and remap_pfn_range or vm_insert_pfn, in order to export reserved memory to userspace. Currently, such mappings are not tracked and hence not kept consistent with other mappings (/dev/mem, pci resource, ioremap) for the sme memory, that may exist in the system. The following patchset adds x86 PAT attribute tracking and untracking for pfnmap related APIs. First three patches in the patchset are changing the generic mm code to fit in this tracking. Last four patches are x86 specific to make things work with x86 PAT code. The patchset aso introduces pgprot_writecombine interface, which gives writecombine mapping when enabled, falling back to pgprot_noncached otherwise. This patch: While working on x86 PAT, we faced some hurdles with trackking remap_pfn_range() regions, as we do not have any information to say whether that PFNMAP mapping is linear for the entire vma range or it is smaller granularity regions within the vma. A simple solution to this is to use vm_pgoff as an indicator for linear mapping over the vma region. Currently, remap_pfn_range only sets vm_pgoff for COW mappings. Below patch changes the logic and sets the vm_pgoff irrespective of COW. This will still not be enough for the case where pfn is zero (vma region mapped to physical address zero). But, for all the other cases, we can look at pfnmap VMAs and say whether the mappng is for the entire vma region or not. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 9 +++++++++ mm/memory.c | 7 +++---- 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'mm') diff --git a/include/linux/mm.h b/include/linux/mm.h index ffee2f743418..2be8d9b5e46f 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -145,6 +145,15 @@ extern pgprot_t protection_map[16]; #define FAULT_FLAG_WRITE 0x01 /* Fault was a write access */ #define FAULT_FLAG_NONLINEAR 0x02 /* Fault was via a nonlinear mapping */ +static inline int is_linear_pfn_mapping(struct vm_area_struct *vma) +{ + return ((vma->vm_flags & VM_PFNMAP) && vma->vm_pgoff); +} + +static inline int is_pfn_mapping(struct vm_area_struct *vma) +{ + return (vma->vm_flags & VM_PFNMAP); +} /* * vm_fault is filled by the the pagefault handler and passed to the vma's diff --git a/mm/memory.c b/mm/memory.c index 164951c47305..cef95c8c77fa 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1575,11 +1575,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, * behaviour that some programs depend on. We mark the "original" * un-COW'ed pages by matching them up with "vma->vm_pgoff". */ - if (is_cow_mapping(vma->vm_flags)) { - if (addr != vma->vm_start || end != vma->vm_end) - return -EINVAL; + if (addr == vma->vm_start && end == vma->vm_end) vma->vm_pgoff = pfn; - } + else if (is_cow_mapping(vma->vm_flags)) + return -EINVAL; vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; -- cgit v1.2.3-70-g09d2 From e121e418441525b5636321fe03d16f0193ad218e Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:28 -0800 Subject: x86: PAT: add follow_pfnmp_pte routine to help tracking pfnmap pages - v3 Impact: New currently unused interface. Add a generic interface to follow pfn in a pfnmap vma range. This is used by one of the subsequent x86 PAT related patch to keep track of memory types for vma regions across vma copy and free. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 3 +++ mm/memory.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+) (limited to 'mm') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2be8d9b5e46f..a25024ff9c11 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1223,6 +1223,9 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ +int follow_pfnmap_pte(struct vm_area_struct *vma, + unsigned long address, pte_t *ret_ptep); + typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, diff --git a/mm/memory.c b/mm/memory.c index cef95c8c77fa..8ca6bbf34ad6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1111,6 +1111,49 @@ no_page_table: return page; } +int follow_pfnmap_pte(struct vm_area_struct *vma, unsigned long address, + pte_t *ret_ptep) +{ + pgd_t *pgd; + pud_t *pud; + pmd_t *pmd; + pte_t *ptep, pte; + spinlock_t *ptl; + struct page *page; + struct mm_struct *mm = vma->vm_mm; + + if (!is_pfn_mapping(vma)) + goto err; + + page = NULL; + pgd = pgd_offset(mm, address); + if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + goto err; + + pud = pud_offset(pgd, address); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + goto err; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) + goto err; + + ptep = pte_offset_map_lock(mm, pmd, address, &ptl); + + pte = *ptep; + if (!pte_present(pte)) + goto err_unlock; + + *ret_ptep = pte; + pte_unmap_unlock(ptep, ptl); + return 0; + +err_unlock: + pte_unmap_unlock(ptep, ptl); +err: + return -EINVAL; +} + /* Can we do the FOLL_ANON optimization? */ static inline int use_zero_page(struct vm_area_struct *vma) { -- cgit v1.2.3-70-g09d2 From 2ab640379a0ab4cef746ced1d7e04a0941774bcb Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Thu, 18 Dec 2008 11:41:29 -0800 Subject: x86: PAT: hooks in generic vm code to help archs to track pfnmap regions - v3 Impact: Introduces new hooks, which are currently null. Introduce generic hooks in remap_pfn_range and vm_insert_pfn and corresponding copy and free routines with reserve and free tracking. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 6 +++++ mm/memory.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 81 insertions(+), 1 deletion(-) (limited to 'mm') diff --git a/include/linux/mm.h b/include/linux/mm.h index a25024ff9c11..87ecb40e11a0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -155,6 +155,12 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma) return (vma->vm_flags & VM_PFNMAP); } +extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size); +extern int track_pfn_vma_copy(struct vm_area_struct *vma); +extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size); + /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask diff --git a/mm/memory.c b/mm/memory.c index 8ca6bbf34ad6..1e8f0d347c0e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -99,6 +99,50 @@ int randomize_va_space __read_mostly = 2; #endif +#ifndef track_pfn_vma_new +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_new is called when a _new_ pfn mapping is being established + * for physical range indicated by pfn and size. + */ +int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size) +{ + return 0; +} +#endif + +#ifndef track_pfn_vma_copy +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_copy is called when vma that is covering the pfnmap gets + * copied through copy_page_range(). + */ +int track_pfn_vma_copy(struct vm_area_struct *vma) +{ + return 0; +} +#endif + +#ifndef untrack_pfn_vma +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * untrack_pfn_vma is called while unmapping a pfnmap for a region. + * untrack can be called for a specific region indicated by pfn and size or + * can be for the entire vma (in which case size can be zero). + */ +void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size) +{ +} +#endif + static int __init disable_randmaps(char *s) { randomize_va_space = 0; @@ -669,6 +713,16 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); + if (is_pfn_mapping(vma)) { + /* + * We do not free on error cases below as remove_vma + * gets called on error from higher level routine + */ + ret = track_pfn_vma_copy(vma); + if (ret) + return ret; + } + /* * We need to invalidate the secondary MMU mappings only when * there could be a permission downgrade on the ptes of the @@ -915,6 +969,9 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, if (vma->vm_flags & VM_ACCOUNT) *nr_accounted += (end - start) >> PAGE_SHIFT; + if (is_pfn_mapping(vma)) + untrack_pfn_vma(vma, 0, 0); + while (start != end) { if (!tlb_start_valid) { tlb_start = start; @@ -1473,6 +1530,7 @@ out: int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, unsigned long pfn) { + int ret; /* * Technically, architectures with pte_special can avoid all these * restrictions (same for remap_pfn_range). However we would like @@ -1487,7 +1545,15 @@ int vm_insert_pfn(struct vm_area_struct *vma, unsigned long addr, if (addr < vma->vm_start || addr >= vma->vm_end) return -EFAULT; - return insert_pfn(vma, addr, pfn, vma->vm_page_prot); + if (track_pfn_vma_new(vma, vma->vm_page_prot, pfn, PAGE_SIZE)) + return -EINVAL; + + ret = insert_pfn(vma, addr, pfn, vma->vm_page_prot); + + if (ret) + untrack_pfn_vma(vma, pfn, PAGE_SIZE); + + return ret; } EXPORT_SYMBOL(vm_insert_pfn); @@ -1625,6 +1691,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, vma->vm_flags |= VM_IO | VM_RESERVED | VM_PFNMAP; + err = track_pfn_vma_new(vma, prot, pfn, PAGE_ALIGN(size)); + if (err) + return -EINVAL; + BUG_ON(addr >= end); pfn -= addr >> PAGE_SHIFT; pgd = pgd_offset(mm, addr); @@ -1636,6 +1706,10 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long addr, if (err) break; } while (pgd++, addr = next, addr != end); + + if (err) + untrack_pfn_vma(vma, pfn, PAGE_ALIGN(size)); + return err; } EXPORT_SYMBOL(remap_pfn_range); -- cgit v1.2.3-70-g09d2 From d87fe6607c31944f7572f965c1507ae77026c133 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:27 -0800 Subject: x86: PAT: modify follow_phys to return phys_addr prot and return value Impact: Changes and globalizes an existing static interface. Follow_phys does similar things as follow_pfnmap_pte. Make a minor change to follow_phys so that it can be used in place of follow_pfnmap_pte. Physical address return value with 0 as error return does not work in follow_phys as the actual physical address 0 mapping may exist in pte. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- include/linux/mm.h | 2 ++ mm/memory.c | 31 ++++++++++++++----------------- 2 files changed, 16 insertions(+), 17 deletions(-) (limited to 'mm') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35f811b0cd69..2f6e2f886d4b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -804,6 +804,8 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct vm_area_struct *vma); void unmap_mapping_range(struct address_space *mapping, loff_t const holebegin, loff_t const holelen, int even_cows); +int follow_phys(struct vm_area_struct *vma, unsigned long address, + unsigned int flags, unsigned long *prot, resource_size_t *phys); int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *buf, int len, int write); diff --git a/mm/memory.c b/mm/memory.c index 1e8f0d347c0e..79f28e35d4fc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2981,9 +2981,9 @@ int in_gate_area_no_task(unsigned long addr) #endif /* __HAVE_ARCH_GATE_AREA */ #ifdef CONFIG_HAVE_IOREMAP_PROT -static resource_size_t follow_phys(struct vm_area_struct *vma, - unsigned long address, unsigned int flags, - unsigned long *prot) +int follow_phys(struct vm_area_struct *vma, + unsigned long address, unsigned int flags, + unsigned long *prot, resource_size_t *phys) { pgd_t *pgd; pud_t *pud; @@ -2992,24 +2992,26 @@ static resource_size_t follow_phys(struct vm_area_struct *vma, spinlock_t *ptl; resource_size_t phys_addr = 0; struct mm_struct *mm = vma->vm_mm; + int ret = -EINVAL; - VM_BUG_ON(!(vma->vm_flags & (VM_IO | VM_PFNMAP))); + if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) + goto out; pgd = pgd_offset(mm, address); if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - goto no_page_table; + goto out; pud = pud_offset(pgd, address); if (pud_none(*pud) || unlikely(pud_bad(*pud))) - goto no_page_table; + goto out; pmd = pmd_offset(pud, address); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - goto no_page_table; + goto out; /* We cannot handle huge page PFN maps. Luckily they don't exist. */ if (pmd_huge(*pmd)) - goto no_page_table; + goto out; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); if (!ptep) @@ -3024,13 +3026,13 @@ static resource_size_t follow_phys(struct vm_area_struct *vma, phys_addr <<= PAGE_SHIFT; /* Shift here to avoid overflow on PAE */ *prot = pgprot_val(pte_pgprot(pte)); + *phys = phys_addr; + ret = 0; unlock: pte_unmap_unlock(ptep, ptl); out: - return phys_addr; -no_page_table: - return 0; + return ret; } int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, @@ -3041,12 +3043,7 @@ int generic_access_phys(struct vm_area_struct *vma, unsigned long addr, void *maddr; int offset = addr & (PAGE_SIZE-1); - if (!(vma->vm_flags & (VM_IO | VM_PFNMAP))) - return -EINVAL; - - phys_addr = follow_phys(vma, addr, write, &prot); - - if (!phys_addr) + if (follow_phys(vma, addr, write, &prot, &phys_addr)) return -EINVAL; maddr = ioremap_prot(phys_addr, PAGE_SIZE, prot); -- cgit v1.2.3-70-g09d2 From 982d789ab76c8a11426852fec2fdf2f412e21c0c Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:28 -0800 Subject: x86: PAT: remove follow_pfnmap_pte in favor of follow_phys Impact: Cleanup - removes a new function in favor of a recently modified older one. Replace follow_pfnmap_pte in pat code with follow_phys. follow_phys lso returns protection eliminating the need of pte_pgprot call. Using follow_phys also eliminates the need for pte_pa. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 5 ----- arch/x86/mm/pat.c | 30 +++++++++++------------------ include/linux/mm.h | 3 --- mm/memory.c | 43 ------------------------------------------ 4 files changed, 11 insertions(+), 70 deletions(-) (limited to 'mm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 579f8ceee948..2aa792bbd7e0 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -230,11 +230,6 @@ static inline unsigned long pte_pfn(pte_t pte) return (pte_val(pte) & PTE_PFN_MASK) >> PAGE_SHIFT; } -static inline u64 pte_pa(pte_t pte) -{ - return pte_val(pte) & PTE_PFN_MASK; -} - #define pte_page(pte) pfn_to_page(pte_pfn(pte)) static inline int pmd_large(pmd_t pte) diff --git a/arch/x86/mm/pat.c b/arch/x86/mm/pat.c index d5254bae84f4..541bcc944a5b 100644 --- a/arch/x86/mm/pat.c +++ b/arch/x86/mm/pat.c @@ -685,8 +685,7 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) int retval = 0; unsigned long i, j; u64 paddr; - pgprot_t prot; - pte_t pte; + unsigned long prot; unsigned long vma_start = vma->vm_start; unsigned long vma_end = vma->vm_end; unsigned long vma_size = vma_end - vma_start; @@ -696,26 +695,22 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) if (is_linear_pfn_mapping(vma)) { /* - * reserve the whole chunk starting from vm_pgoff, - * But, we have to get the protection from pte. + * reserve the whole chunk covered by vma. We need the + * starting address and protection from pte. */ - if (follow_pfnmap_pte(vma, vma_start, &pte)) { + if (follow_phys(vma, vma_start, 0, &prot, &paddr)) { WARN_ON_ONCE(1); - return -1; + return -EINVAL; } - prot = pte_pgprot(pte); - paddr = (u64)vma->vm_pgoff << PAGE_SHIFT; - return reserve_pfn_range(paddr, vma_size, prot); + return reserve_pfn_range(paddr, vma_size, __pgprot(prot)); } /* reserve entire vma page by page, using pfn and prot from pte */ for (i = 0; i < vma_size; i += PAGE_SIZE) { - if (follow_pfnmap_pte(vma, vma_start + i, &pte)) + if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) continue; - paddr = pte_pa(pte); - prot = pte_pgprot(pte); - retval = reserve_pfn_range(paddr, PAGE_SIZE, prot); + retval = reserve_pfn_range(paddr, PAGE_SIZE, __pgprot(prot)); if (retval) goto cleanup_ret; } @@ -724,10 +719,9 @@ int track_pfn_vma_copy(struct vm_area_struct *vma) cleanup_ret: /* Reserve error: Cleanup partial reservation and return error */ for (j = 0; j < i; j += PAGE_SIZE) { - if (follow_pfnmap_pte(vma, vma_start + j, &pte)) + if (follow_phys(vma, vma_start + j, 0, &prot, &paddr)) continue; - paddr = pte_pa(pte); free_pfn_range(paddr, PAGE_SIZE); } @@ -797,6 +791,7 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, { unsigned long i; u64 paddr; + unsigned long prot; unsigned long vma_start = vma->vm_start; unsigned long vma_end = vma->vm_end; unsigned long vma_size = vma_end - vma_start; @@ -821,12 +816,9 @@ void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, } else { /* free entire vma, page by page, using the pfn from pte */ for (i = 0; i < vma_size; i += PAGE_SIZE) { - pte_t pte; - - if (follow_pfnmap_pte(vma, vma_start + i, &pte)) + if (follow_phys(vma, vma_start + i, 0, &prot, &paddr)) continue; - paddr = pte_pa(pte); free_pfn_range(paddr, PAGE_SIZE); } } diff --git a/include/linux/mm.h b/include/linux/mm.h index 2f6e2f886d4b..36f9b3fa5e15 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1239,9 +1239,6 @@ struct page *follow_page(struct vm_area_struct *, unsigned long address, #define FOLL_GET 0x04 /* do get_page on page */ #define FOLL_ANON 0x08 /* give ZERO_PAGE if no pgtable */ -int follow_pfnmap_pte(struct vm_area_struct *vma, - unsigned long address, pte_t *ret_ptep); - typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); extern int apply_to_page_range(struct mm_struct *mm, unsigned long address, diff --git a/mm/memory.c b/mm/memory.c index 79f28e35d4fc..6b29f39a5a3e 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1168,49 +1168,6 @@ no_page_table: return page; } -int follow_pfnmap_pte(struct vm_area_struct *vma, unsigned long address, - pte_t *ret_ptep) -{ - pgd_t *pgd; - pud_t *pud; - pmd_t *pmd; - pte_t *ptep, pte; - spinlock_t *ptl; - struct page *page; - struct mm_struct *mm = vma->vm_mm; - - if (!is_pfn_mapping(vma)) - goto err; - - page = NULL; - pgd = pgd_offset(mm, address); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) - goto err; - - pud = pud_offset(pgd, address); - if (pud_none(*pud) || unlikely(pud_bad(*pud))) - goto err; - - pmd = pmd_offset(pud, address); - if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) - goto err; - - ptep = pte_offset_map_lock(mm, pmd, address, &ptl); - - pte = *ptep; - if (!pte_present(pte)) - goto err_unlock; - - *ret_ptep = pte; - pte_unmap_unlock(ptep, ptl); - return 0; - -err_unlock: - pte_unmap_unlock(ptep, ptl); -err: - return -EINVAL; -} - /* Can we do the FOLL_ANON optimization? */ static inline int use_zero_page(struct vm_area_struct *vma) { -- cgit v1.2.3-70-g09d2 From 34801ba9bf0381fcf0e2b08179d2c07f2c6ede74 Mon Sep 17 00:00:00 2001 From: "venkatesh.pallipadi@intel.com" Date: Fri, 19 Dec 2008 13:47:29 -0800 Subject: x86: PAT: move track untrack pfnmap stubs to asm-generic Impact: Cleanup and branch hints only. Move the track and untrack pfn stub routines from memory.c to asm-generic. Also add unlikely to pfnmap related calls in fork and exit path. Signed-off-by: Venkatesh Pallipadi Signed-off-by: Suresh Siddha Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/pgtable.h | 6 ++---- include/asm-generic/pgtable.h | 46 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 6 ------ mm/memory.c | 48 ++---------------------------------------- 4 files changed, 50 insertions(+), 56 deletions(-) (limited to 'mm') diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 2aa792bbd7e0..875192bf72cb 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -339,12 +339,10 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #define canon_pgprot(p) __pgprot(pgprot_val(p) & __supported_pte_mask) +#ifndef __ASSEMBLY__ /* Indicate that x86 has its own track and untrack pfn vma functions */ -#define track_pfn_vma_new track_pfn_vma_new -#define track_pfn_vma_copy track_pfn_vma_copy -#define untrack_pfn_vma untrack_pfn_vma +#define __HAVE_PFNMAP_TRACKING -#ifndef __ASSEMBLY__ #define __HAVE_PHYS_MEM_ACCESS_PROT struct file; pgprot_t phys_mem_access_prot(struct file *file, unsigned long pfn, diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index b84633801fb6..72ebe91005a8 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -293,6 +293,52 @@ static inline void ptep_modify_prot_commit(struct mm_struct *mm, #define arch_flush_lazy_cpu_mode() do {} while (0) #endif +#ifndef __HAVE_PFNMAP_TRACKING +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_new is called when a _new_ pfn mapping is being established + * for physical range indicated by pfn and size. + */ +static inline int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size) +{ + return 0; +} + +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * track_pfn_vma_copy is called when vma that is covering the pfnmap gets + * copied through copy_page_range(). + */ +static inline int track_pfn_vma_copy(struct vm_area_struct *vma) +{ + return 0; +} + +/* + * Interface that can be used by architecture code to keep track of + * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) + * + * untrack_pfn_vma is called while unmapping a pfnmap for a region. + * untrack can be called for a specific region indicated by pfn and size or + * can be for the entire vma (in which case size can be zero). + */ +static inline void untrack_pfn_vma(struct vm_area_struct *vma, + unsigned long pfn, unsigned long size) +{ +} +#else +extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, + unsigned long pfn, unsigned long size); +extern int track_pfn_vma_copy(struct vm_area_struct *vma); +extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, + unsigned long size); +#endif + #endif /* !__ASSEMBLY__ */ #endif /* _ASM_GENERIC_PGTABLE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 36f9b3fa5e15..d3ddd735e375 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -163,12 +163,6 @@ static inline int is_pfn_mapping(struct vm_area_struct *vma) return (vma->vm_flags & VM_PFNMAP); } -extern int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, - unsigned long pfn, unsigned long size); -extern int track_pfn_vma_copy(struct vm_area_struct *vma); -extern void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size); - /* * vm_fault is filled by the the pagefault handler and passed to the vma's * ->fault function. The vma's ->fault is responsible for returning a bitmask diff --git a/mm/memory.c b/mm/memory.c index 6b29f39a5a3e..f01b7eed6e16 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -99,50 +99,6 @@ int randomize_va_space __read_mostly = 2; #endif -#ifndef track_pfn_vma_new -/* - * Interface that can be used by architecture code to keep track of - * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) - * - * track_pfn_vma_new is called when a _new_ pfn mapping is being established - * for physical range indicated by pfn and size. - */ -int track_pfn_vma_new(struct vm_area_struct *vma, pgprot_t prot, - unsigned long pfn, unsigned long size) -{ - return 0; -} -#endif - -#ifndef track_pfn_vma_copy -/* - * Interface that can be used by architecture code to keep track of - * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) - * - * track_pfn_vma_copy is called when vma that is covering the pfnmap gets - * copied through copy_page_range(). - */ -int track_pfn_vma_copy(struct vm_area_struct *vma) -{ - return 0; -} -#endif - -#ifndef untrack_pfn_vma -/* - * Interface that can be used by architecture code to keep track of - * memory type of pfn mappings (remap_pfn_range, vm_insert_pfn) - * - * untrack_pfn_vma is called while unmapping a pfnmap for a region. - * untrack can be called for a specific region indicated by pfn and size or - * can be for the entire vma (in which case size can be zero). - */ -void untrack_pfn_vma(struct vm_area_struct *vma, unsigned long pfn, - unsigned long size) -{ -} -#endif - static int __init disable_randmaps(char *s) { randomize_va_space = 0; @@ -713,7 +669,7 @@ int copy_page_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (is_vm_hugetlb_page(vma)) return copy_hugetlb_page_range(dst_mm, src_mm, vma); - if (is_pfn_mapping(vma)) { + if (unlikely(is_pfn_mapping(vma))) { /* * We do not free on error cases below as remove_vma * gets called on error from higher level routine @@ -969,7 +925,7 @@ unsigned long unmap_vmas(struct mmu_gather **tlbp, if (vma->vm_flags & VM_ACCOUNT) *nr_accounted += (end - start) >> PAGE_SHIFT; - if (is_pfn_mapping(vma)) + if (unlikely(is_pfn_mapping(vma))) untrack_pfn_vma(vma, 0, 0); while (start != end) { -- cgit v1.2.3-70-g09d2 From c5dee6177f4bd2095aab7d9be9f6ebdddd6deee9 Mon Sep 17 00:00:00 2001 From: Markus Metzger Date: Fri, 19 Dec 2008 15:17:02 +0100 Subject: x86, bts: memory accounting Impact: move the BTS buffer accounting to the mlock bucket Add alloc_locked_buffer() and free_locked_buffer() functions to mm/mlock.c to kalloc a buffer and account the locked memory to current. Account the memory for the BTS buffer to the tracer. Signed-off-by: Markus Metzger Signed-off-by: Ingo Molnar --- arch/x86/kernel/ptrace.c | 45 ++++++++++++++++++++++++++++++++++----------- include/linux/mm.h | 2 ++ mm/mlock.c | 45 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 81 insertions(+), 11 deletions(-) (limited to 'mm') diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 6ad2bb607650..0a5df5f82fb9 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -650,6 +650,24 @@ static int ptrace_bts_drain(struct task_struct *child, return drained; } +static int ptrace_bts_allocate_buffer(struct task_struct *child, size_t size) +{ + child->bts_buffer = alloc_locked_buffer(size); + if (!child->bts_buffer) + return -ENOMEM; + + child->bts_size = size; + + return 0; +} + +static void ptrace_bts_free_buffer(struct task_struct *child) +{ + free_locked_buffer(child->bts_buffer, child->bts_size); + child->bts_buffer = NULL; + child->bts_size = 0; +} + static int ptrace_bts_config(struct task_struct *child, long cfg_size, const struct ptrace_bts_config __user *ucfg) @@ -679,14 +697,13 @@ static int ptrace_bts_config(struct task_struct *child, if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != child->bts_size)) { - kfree(child->bts_buffer); + int error; - child->bts_size = cfg.size; - child->bts_buffer = kzalloc(cfg.size, GFP_KERNEL); - if (!child->bts_buffer) { - child->bts_size = 0; - return -ENOMEM; - } + ptrace_bts_free_buffer(child); + + error = ptrace_bts_allocate_buffer(child, cfg.size); + if (error < 0) + return error; } if (cfg.flags & PTRACE_BTS_O_TRACE) @@ -701,10 +718,8 @@ static int ptrace_bts_config(struct task_struct *child, if (IS_ERR(child->bts)) { int error = PTR_ERR(child->bts); - kfree(child->bts_buffer); + ptrace_bts_free_buffer(child); child->bts = NULL; - child->bts_buffer = NULL; - child->bts_size = 0; return error; } @@ -784,6 +799,9 @@ static void ptrace_bts_untrace(struct task_struct *child) ds_release_bts(child->bts); child->bts = NULL; + /* We cannot update total_vm and locked_vm since + child's mm is already gone. But we can reclaim the + memory. */ kfree(child->bts_buffer); child->bts_buffer = NULL; child->bts_size = 0; @@ -792,7 +810,12 @@ static void ptrace_bts_untrace(struct task_struct *child) static void ptrace_bts_detach(struct task_struct *child) { - ptrace_bts_untrace(child); + if (unlikely(child->bts)) { + ds_release_bts(child->bts); + child->bts = NULL; + + ptrace_bts_free_buffer(child); + } } #else static inline void ptrace_bts_fork(struct task_struct *tsk) {} diff --git a/include/linux/mm.h b/include/linux/mm.h index ffee2f743418..9979d3fab6e7 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1286,5 +1286,7 @@ int vmemmap_populate_basepages(struct page *start_page, int vmemmap_populate(struct page *start_page, unsigned long pages, int node); void vmemmap_populate_print_last(void); +extern void *alloc_locked_buffer(size_t size); +extern void free_locked_buffer(void *buffer, size_t size); #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/mm/mlock.c b/mm/mlock.c index 1ada366570cb..3035a56e7616 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -667,3 +667,48 @@ void user_shm_unlock(size_t size, struct user_struct *user) spin_unlock(&shmlock_user_lock); free_uid(user); } + +void *alloc_locked_buffer(size_t size) +{ + unsigned long rlim, vm, pgsz; + void *buffer = NULL; + + pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + rlim = current->signal->rlim[RLIMIT_AS].rlim_cur >> PAGE_SHIFT; + vm = current->mm->total_vm + pgsz; + if (rlim < vm) + goto out; + + rlim = current->signal->rlim[RLIMIT_MEMLOCK].rlim_cur >> PAGE_SHIFT; + vm = current->mm->locked_vm + pgsz; + if (rlim < vm) + goto out; + + buffer = kzalloc(size, GFP_KERNEL); + if (!buffer) + goto out; + + current->mm->total_vm += pgsz; + current->mm->locked_vm += pgsz; + + out: + up_write(¤t->mm->mmap_sem); + return buffer; +} + +void free_locked_buffer(void *buffer, size_t size) +{ + unsigned long pgsz = PAGE_ALIGN(size) >> PAGE_SHIFT; + + down_write(¤t->mm->mmap_sem); + + current->mm->total_vm -= pgsz; + current->mm->locked_vm -= pgsz; + + up_write(¤t->mm->mmap_sem); + + kfree(buffer); +} -- cgit v1.2.3-70-g09d2