From e3912ac37e07a13c70675cd75020694de4841c74 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:36:51 -0800 Subject: proc: use %u for pid printing and slightly less stack PROC_NUMBUF is 13 which is enough for "negative int + \n + \0". However PIDs and TGIDs are never negative and newline is not a concern, so use just 10 per integer. Link: http://lkml.kernel.org/r/20171120203005.GA27743@avx2 Signed-off-by: Alexey Dobriyan Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 16 ++++++++-------- fs/proc/fd.c | 2 +- fs/proc/self.c | 6 +++--- fs/proc/thread_self.c | 5 ++--- 4 files changed, 14 insertions(+), 15 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 60316b52d659..fe56f3c7002a 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -3018,11 +3018,11 @@ static const struct inode_operations proc_tgid_base_inode_operations = { static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) { struct dentry *dentry, *leader, *dir; - char buf[PROC_NUMBUF]; + char buf[10 + 1]; struct qstr name; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", pid); + name.len = snprintf(buf, sizeof(buf), "%u", pid); /* no ->d_hash() rejects on procfs */ dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { @@ -3034,7 +3034,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) return; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", tgid); + name.len = snprintf(buf, sizeof(buf), "%u", tgid); leader = d_hash_and_lookup(mnt->mnt_root, &name); if (!leader) goto out; @@ -3046,7 +3046,7 @@ static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) goto out_put_leader; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", pid); + name.len = snprintf(buf, sizeof(buf), "%u", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { d_invalidate(dentry); @@ -3225,14 +3225,14 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx) for (iter = next_tgid(ns, iter); iter.task; iter.tgid += 1, iter = next_tgid(ns, iter)) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; cond_resched(); if (!has_pid_permissions(ns, iter.task, HIDEPID_INVISIBLE)) continue; - len = snprintf(name, sizeof(name), "%d", iter.tgid); + len = snprintf(name, sizeof(name), "%u", iter.tgid); ctx->pos = iter.tgid + TGID_OFFSET; if (!proc_fill_cache(file, ctx, name, len, proc_pid_instantiate, iter.task, NULL)) { @@ -3560,10 +3560,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) for (task = first_tid(proc_pid(inode), tid, ctx->pos - 2, ns); task; task = next_tid(task), ctx->pos++) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; tid = task_pid_nr_ns(task, ns); - len = snprintf(name, sizeof(name), "%d", tid); + len = snprintf(name, sizeof(name), "%u", tid); if (!proc_fill_cache(file, ctx, name, len, proc_task_instantiate, task, NULL)) { /* returning this tgid failed, save it as the first diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 96fc70225e54..6b80cd1e419a 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -236,7 +236,7 @@ static int proc_readfd_common(struct file *file, struct dir_context *ctx, for (fd = ctx->pos - 2; fd < files_fdtable(files)->max_fds; fd++, ctx->pos++) { - char name[PROC_NUMBUF]; + char name[10 + 1]; int len; if (!fcheck_files(files, fd)) diff --git a/fs/proc/self.c b/fs/proc/self.c index 31326bb23b8b..d30627aa440b 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -17,11 +17,11 @@ static const char *proc_self_get_link(struct dentry *dentry, if (!tgid) return ERR_PTR(-ENOENT); - /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, dentry ? GFP_KERNEL : GFP_ATOMIC); + /* max length of unsigned int in decimal + NULL term */ + name = kmalloc(10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); - sprintf(name, "%d", tgid); + sprintf(name, "%u", tgid); set_delayed_call(done, kfree_link, name); return name; } diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index b813e3b529f2..6c1a54716337 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -18,11 +18,10 @@ static const char *proc_thread_self_get_link(struct dentry *dentry, if (!pid) return ERR_PTR(-ENOENT); - name = kmalloc(PROC_NUMBUF + 6 + PROC_NUMBUF, - dentry ? GFP_KERNEL : GFP_ATOMIC); + name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC); if (unlikely(!name)) return dentry ? ERR_PTR(-ENOMEM) : ERR_PTR(-ECHILD); - sprintf(name, "%d/task/%d", tgid, pid); + sprintf(name, "%u/task/%u", tgid, pid); set_delayed_call(done, kfree_link, name); return name; } -- cgit v1.2.3-70-g09d2 From 9f7118b2007d5e7c7a061550d2ca2ecb841537dc Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:36:55 -0800 Subject: proc: don't use READ_ONCE/WRITE_ONCE for /proc/*/fail-nth READ_ONCE and WRITE_ONCE are useless when there is only one read/write is being made. Link: http://lkml.kernel.org/r/20171120204033.GA9446@avx2 Signed-off-by: Alexey Dobriyan Cc: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index fe56f3c7002a..373091249bdb 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1370,7 +1370,7 @@ static ssize_t proc_fail_nth_write(struct file *file, const char __user *buf, task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; - WRITE_ONCE(task->fail_nth, n); + task->fail_nth = n; put_task_struct(task); return count; @@ -1386,8 +1386,7 @@ static ssize_t proc_fail_nth_read(struct file *file, char __user *buf, task = get_proc_task(file_inode(file)); if (!task) return -ESRCH; - len = snprintf(numbuf, sizeof(numbuf), "%u\n", - READ_ONCE(task->fail_nth)); + len = snprintf(numbuf, sizeof(numbuf), "%u\n", task->fail_nth); len = simple_read_from_buffer(buf, count, ppos, numbuf, len); put_task_struct(task); -- cgit v1.2.3-70-g09d2 From ac7f1061c2c11bb8936b1b6a94cdb48de732f7a4 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:36:59 -0800 Subject: proc: fix /proc/*/map_files lookup Current code does: if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) However sscanf() is broken garbage. It silently accepts whitespace between format specifiers (did you know that?). It silently accepts valid strings which result in integer overflow. Do not use sscanf() for any even remotely reliable parsing code. OK # readlink '/proc/1/map_files/55a23af39000-55a23b05b000' /lib/systemd/systemd broken # readlink '/proc/1/map_files/ 55a23af39000-55a23b05b000' /lib/systemd/systemd broken # readlink '/proc/1/map_files/55a23af39000-55a23b05b000 ' /lib/systemd/systemd very broken # readlink '/proc/1/map_files/1000000000000000055a23af39000-55a23b05b000' /lib/systemd/systemd Andrei said: : This patch breaks criu. It was a bug in criu. And this bug is on a minor : path, which works when memfd_create() isn't available. It is a reason why : I ask to not backport this patch to stable kernels. : : In CRIU this bug can be triggered, only if this patch will be backported : to a kernel which version is lower than v3.16. Link: http://lkml.kernel.org/r/20171120212706.GA14325@avx2 Signed-off-by: Alexey Dobriyan Cc: Pavel Emelyanov Cc: Andrei Vagin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 373091249bdb..4c12cb2cd704 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -100,6 +100,8 @@ #include "internal.h" #include "fd.h" +#include "../../lib/kstrtox.h" + /* NOTE: * Implementing inode permission operations in /proc is almost * certainly an error. Permission checks need to happen during @@ -1906,8 +1908,33 @@ end_instantiate: static int dname_to_vma_addr(struct dentry *dentry, unsigned long *start, unsigned long *end) { - if (sscanf(dentry->d_name.name, "%lx-%lx", start, end) != 2) + const char *str = dentry->d_name.name; + unsigned long long sval, eval; + unsigned int len; + + len = _parse_integer(str, 16, &sval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (sval != (unsigned long)sval) + return -EINVAL; + str += len; + + if (*str != '-') return -EINVAL; + str++; + + len = _parse_integer(str, 16, &eval); + if (len & KSTRTOX_OVERFLOW) + return -EINVAL; + if (eval != (unsigned long)eval) + return -EINVAL; + str += len; + + if (*str != '\0') + return -EINVAL; + + *start = sval; + *end = eval; return 0; } -- cgit v1.2.3-70-g09d2 From 593bc695a1102a540f1613c651e73693b17a7343 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:02 -0800 Subject: fs/proc/vmcore.c: simpler /proc/vmcore cleanup Iterators aren't necessary as you can just grab the first entry and delete it until no entries left. Link: http://lkml.kernel.org/r/20171121191121.GA20757@avx2 Signed-off-by: Alexey Dobriyan Cc: Mahesh Salgaonkar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/vmcore.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 885d445afa0d..a45f0af22a60 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -1178,18 +1178,16 @@ fs_initcall(vmcore_init); /* Cleanup function for vmcore module. */ void vmcore_cleanup(void) { - struct list_head *pos, *next; - if (proc_vmcore) { proc_remove(proc_vmcore); proc_vmcore = NULL; } /* clear the vmcore list. */ - list_for_each_safe(pos, next, &vmcore_list) { + while (!list_empty(&vmcore_list)) { struct vmcore *m; - m = list_entry(pos, struct vmcore, list); + m = list_first_entry(&vmcore_list, struct vmcore, list); list_del(&m->list); kfree(m); } -- cgit v1.2.3-70-g09d2 From 20d28cde5558a2a211620254ec7bc53a4334167f Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:06 -0800 Subject: proc: less memory for /proc/*/map_files readdir dentry name can be evaluated later, right before calling into VFS. Also, spend less time under ->mmap_sem. Link: http://lkml.kernel.org/r/20171110163034.GA2534@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 4c12cb2cd704..a3efc2427c74 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2026,9 +2026,9 @@ out: } struct map_files_info { + unsigned long start; + unsigned long end; fmode_t mode; - unsigned int len; - unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */ }; /* @@ -2198,10 +2198,9 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) if (++pos <= ctx->pos) continue; + info.start = vma->vm_start; + info.end = vma->vm_end; info.mode = vma->vm_file->f_mode; - info.len = snprintf(info.name, - sizeof(info.name), "%lx-%lx", - vma->vm_start, vma->vm_end); if (flex_array_put(fa, i++, &info, GFP_KERNEL)) BUG(); } @@ -2209,9 +2208,13 @@ proc_map_files_readdir(struct file *file, struct dir_context *ctx) up_read(&mm->mmap_sem); for (i = 0; i < nr_files; i++) { + char buf[4 * sizeof(long) + 2]; /* max: %lx-%lx\0 */ + unsigned int len; + p = flex_array_get(fa, i); + len = snprintf(buf, sizeof(buf), "%lx-%lx", p->start, p->end); if (!proc_fill_cache(file, ctx, - p->name, p->len, + buf, len, proc_map_files_instantiate, task, (void *)(unsigned long)p->mode)) -- cgit v1.2.3-70-g09d2 From 171ef917dfe721b1437b0066f7bc5684d776bba8 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:10 -0800 Subject: fs/proc/array.c: delete children_seq_release() It is 1:1 wrapper around seq_release(). Link: http://lkml.kernel.org/r/20171122171510.GA12161@avx2 Signed-off-by: Alexey Dobriyan Acked-by: Cyrill Gorcunov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/array.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/array.c b/fs/proc/array.c index d67a72dcb92c..598803576e4c 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -736,16 +736,10 @@ static int children_seq_open(struct inode *inode, struct file *file) return ret; } -int children_seq_release(struct inode *inode, struct file *file) -{ - seq_release(inode, file); - return 0; -} - const struct file_operations proc_tid_children_operations = { .open = children_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = children_seq_release, + .release = seq_release, }; #endif /* CONFIG_PROC_CHILDREN */ -- cgit v1.2.3-70-g09d2 From d0290bc20d4739b7a900ae37eb5d4cc3be2b393f Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 6 Feb 2018 15:37:13 -0800 Subject: fs/proc/kcore.c: use probe_kernel_read() instead of memcpy() Commit df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") added a bounce buffer to avoid hardened usercopy checks. Copying to the bounce buffer was implemented with a simple memcpy() assuming that it is always valid to read from kernel memory iff the kern_addr_valid() check passed. A simple, but pointless, test case like "dd if=/proc/kcore of=/dev/null" now can easily crash the kernel, since the former execption handling on invalid kernel addresses now doesn't work anymore. Also adding a kern_addr_valid() implementation wouldn't help here. Most architectures simply return 1 here, while a couple implemented a page table walk to figure out if something is mapped at the address in question. With DEBUG_PAGEALLOC active mappings are established and removed all the time, so that relying on the result of kern_addr_valid() before executing the memcpy() also doesn't work. Therefore simply use probe_kernel_read() to copy to the bounce buffer. This also allows to simplify read_kcore(). At least on s390 this fixes the observed crashes and doesn't introduce warnings that were removed with df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data"), even though the generic probe_kernel_read() implementation uses uaccess functions. While looking into this I'm also wondering if kern_addr_valid() could be completely removed...(?) Link: http://lkml.kernel.org/r/20171202132739.99971-1-heiko.carstens@de.ibm.com Fixes: df04abfd181a ("fs/proc/kcore.c: Add bounce buffer for ktext data") Fixes: f5509cc18daa ("mm: Hardened usercopy") Signed-off-by: Heiko Carstens Acked-by: Kees Cook Cc: Jiri Olsa Cc: Al Viro Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/kcore.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 4bc85cb8be6a..e8a93bc8285d 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -512,23 +512,15 @@ read_kcore(struct file *file, char __user *buffer, size_t buflen, loff_t *fpos) return -EFAULT; } else { if (kern_addr_valid(start)) { - unsigned long n; - /* * Using bounce buffer to bypass the * hardened user copy kernel text checks. */ - memcpy(buf, (char *) start, tsz); - n = copy_to_user(buffer, buf, tsz); - /* - * We cannot distinguish between fault on source - * and fault on destination. When this happens - * we clear too and hope it will trigger the - * EFAULT again. - */ - if (n) { - if (clear_user(buffer + tsz - n, - n)) + if (probe_kernel_read(buf, (void *) start, tsz)) { + if (clear_user(buffer, tsz)) + return -EFAULT; + } else { + if (copy_to_user(buffer, buf, tsz)) return -EFAULT; } } else { -- cgit v1.2.3-70-g09d2 From 163cf548db888710695d5dbe907cda4262d45b52 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:18 -0800 Subject: fs/proc/internal.h: rearrange struct proc_dir_entry struct proc_dir_entry became bit messy over years: * move 16-bit ->mode_t before namelen to get rid of padding * make ->in_use first field: it seems to be most used resulting in smaller code on x86_64 (defconfig): add/remove: 0/0 grow/shrink: 7/13 up/down: 24/-67 (-43) Function old new delta proc_readdir_de 451 455 +4 proc_get_inode 282 286 +4 pde_put 65 69 +4 remove_proc_subtree 294 297 +3 remove_proc_entry 297 300 +3 proc_register 295 298 +3 proc_notify_change 94 97 +3 unuse_pde 27 26 -1 proc_reg_write 89 85 -4 proc_reg_unlocked_ioctl 85 81 -4 proc_reg_read 89 85 -4 proc_reg_llseek 87 83 -4 proc_reg_get_unmapped_area 123 119 -4 proc_entry_rundown 139 135 -4 proc_reg_poll 91 85 -6 proc_reg_mmap 79 73 -6 proc_get_link 55 49 -6 proc_reg_release 108 101 -7 proc_reg_open 298 291 -7 close_pdeo 228 218 -10 * move writeable fields together to a first cacheline (on x86_64), those include * ->in_use: reference count, taken every open/read/write/close etc * ->count: reference count, taken at readdir on every entry * ->pde_openers: tracks (nearly) every open, dirtied * ->pde_unload_lock: spinlock protecting ->pde_openers * ->proc_iops, ->proc_fops, ->data: writeonce fields, used right together with previous group. * other rarely written fields go into 1st/2nd and 2nd/3rd cacheline on 32-bit and 64-bit respectively. Additionally on 32-bit, ->subdir, ->subdir_node, ->namelen, ->name go fully into 2nd cacheline, separated from writeable fields. They are all used during lookup. Link: http://lkml.kernel.org/r/20171220215914.GA7877@avx2 Signed-off-by: Alexey Dobriyan Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 4a67188c8d74..a290a1e921a5 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -31,24 +31,27 @@ struct mempolicy; * subdir_node is used to build the rb tree "subdir" of the parent. */ struct proc_dir_entry { + /* + * number of callers into module in progress; + * negative -> it's going away RSN + */ + atomic_t in_use; + atomic_t count; /* use count */ + struct list_head pde_openers; /* who did ->open, but not ->release */ + spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + struct completion *pde_unload_completion; + const struct inode_operations *proc_iops; + const struct file_operations *proc_fops; + void *data; unsigned int low_ino; - umode_t mode; nlink_t nlink; kuid_t uid; kgid_t gid; loff_t size; - const struct inode_operations *proc_iops; - const struct file_operations *proc_fops; struct proc_dir_entry *parent; struct rb_root_cached subdir; struct rb_node subdir_node; - void *data; - atomic_t count; /* use count */ - atomic_t in_use; /* number of callers into module in progress; */ - /* negative -> it's going away RSN */ - struct completion *pde_unload_completion; - struct list_head pde_openers; /* who did ->open, but not ->release */ - spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + umode_t mode; u8 namelen; char name[]; } __randomize_layout; -- cgit v1.2.3-70-g09d2 From 53f63345d893df36b58e81ddb3d11dcd2e9cc966 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:21 -0800 Subject: fs/proc/internal.h: fix up comment Document what ->pde_unload_lock actually does. Link: http://lkml.kernel.org/r/20180103185120.GB31849@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/internal.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'fs/proc') diff --git a/fs/proc/internal.h b/fs/proc/internal.h index a290a1e921a5..5ba317874f0d 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -38,7 +38,8 @@ struct proc_dir_entry { atomic_t in_use; atomic_t count; /* use count */ struct list_head pde_openers; /* who did ->open, but not ->release */ - spinlock_t pde_unload_lock; /* proc_fops checks and pde_users bumps */ + /* protects ->pde_openers and all struct pde_opener instances */ + spinlock_t pde_unload_lock; struct completion *pde_unload_completion; const struct inode_operations *proc_iops; const struct file_operations *proc_fops; -- cgit v1.2.3-70-g09d2 From efb1a57d90cae6af1ddd32f1b920c924a711aba5 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:24 -0800 Subject: fs/proc: use __ro_after_init /proc/self inode numbers, value of proc_inode_cache and st_nlink of /proc/$TGID are fixed constants. Link: http://lkml.kernel.org/r/20180103184707.GA31849@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/base.c | 5 +++-- fs/proc/inode.c | 3 ++- fs/proc/self.c | 3 ++- fs/proc/thread_self.c | 3 ++- 4 files changed, 9 insertions(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index a3efc2427c74..9298324325ed 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include @@ -112,8 +113,8 @@ * in /proc for a task before it execs a suid executable. */ -static u8 nlink_tid; -static u8 nlink_tgid; +static u8 nlink_tid __ro_after_init; +static u8 nlink_tgid __ro_after_init; struct pid_entry { const char *name; diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 8dacaabb9f37..c5c8e7af5520 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -5,6 +5,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds */ +#include #include #include #include @@ -52,7 +53,7 @@ static void proc_evict_inode(struct inode *inode) } } -static struct kmem_cache * proc_inode_cachep; +static struct kmem_cache *proc_inode_cachep __ro_after_init; static struct inode *proc_alloc_inode(struct super_block *sb) { diff --git a/fs/proc/self.c b/fs/proc/self.c index d30627aa440b..4d7d061696b3 100644 --- a/fs/proc/self.c +++ b/fs/proc/self.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -30,7 +31,7 @@ static const struct inode_operations proc_self_inode_operations = { .get_link = proc_self_get_link, }; -static unsigned self_inum; +static unsigned self_inum __ro_after_init; int proc_setup_self(struct super_block *s) { diff --git a/fs/proc/thread_self.c b/fs/proc/thread_self.c index 6c1a54716337..9d2efaca499f 100644 --- a/fs/proc/thread_self.c +++ b/fs/proc/thread_self.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include #include #include #include @@ -30,7 +31,7 @@ static const struct inode_operations proc_thread_self_inode_operations = { .get_link = proc_thread_self_get_link, }; -static unsigned thread_self_inum; +static unsigned thread_self_inum __ro_after_init; int proc_setup_thread_self(struct super_block *s) { -- cgit v1.2.3-70-g09d2 From 15b158b4e6274351fc3cf652cbabc57104efb547 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:28 -0800 Subject: proc: spread likely/unlikely a bit use_pde() is used at every open/read/write/... of every random /proc file. Negative refcount happens only if PDE is being deleted by module (read: never). So it gets "likely". unuse_pde() gets "unlikely" for the same reason. close_pdeo() gets unlikely as the completion is filled only if there is a race between PDE removal and close() (read: never ever). It even saves code on x86_64 defconfig: add/remove: 0/0 grow/shrink: 1/2 up/down: 2/-20 (-18) Function old new delta close_pdeo 183 185 +2 proc_reg_get_unmapped_area 119 111 -8 proc_reg_poll 85 73 -12 Link: http://lkml.kernel.org/r/20180104175657.GA5204@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/inode.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/inode.c b/fs/proc/inode.c index c5c8e7af5520..6e8724958116 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -129,12 +129,12 @@ enum {BIAS = -1U<<31}; static inline int use_pde(struct proc_dir_entry *pde) { - return atomic_inc_unless_negative(&pde->in_use); + return likely(atomic_inc_unless_negative(&pde->in_use)); } static void unuse_pde(struct proc_dir_entry *pde) { - if (atomic_dec_return(&pde->in_use) == BIAS) + if (unlikely(atomic_dec_return(&pde->in_use) == BIAS)) complete(pde->pde_unload_completion); } @@ -167,7 +167,7 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) spin_lock(&pde->pde_unload_lock); /* After ->release. */ list_del(&pdeo->lh); - if (pdeo->c) + if (unlikely(pdeo->c)) complete(pdeo->c); kfree(pdeo); } @@ -421,7 +421,7 @@ static const char *proc_get_link(struct dentry *dentry, struct delayed_call *done) { struct proc_dir_entry *pde = PDE(inode); - if (unlikely(!use_pde(pde))) + if (!use_pde(pde)) return ERR_PTR(-EINVAL); set_delayed_call(done, proc_put_link, pde); return pde->data; -- cgit v1.2.3-70-g09d2 From 93ad5bc6d4addb74e30d421cd3ba5249c961fb3e Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 6 Feb 2018 15:37:31 -0800 Subject: proc: rearrange args Rearrange args for smaller code. lookup revolves around memcmp() which gets len 3rd arg, so propagate length as 3rd arg. readdir and lookup add additional arg to VFS ->readdir and ->lookup, so better add it to the end. Space savings on x86_64: add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-18 (-18) Function old new delta proc_readdir 22 13 -9 proc_lookup 18 9 -9 proc_match() is smaller if not inlined, I promise! Link: http://lkml.kernel.org/r/20180104175958.GB5204@avx2 Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/generic.c | 18 +++++++++--------- fs/proc/internal.h | 5 ++--- fs/proc/proc_net.c | 4 ++-- 3 files changed, 13 insertions(+), 14 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 793a67574668..5d709fa8f3a2 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -28,7 +28,7 @@ static DEFINE_RWLOCK(proc_subdir_lock); -static int proc_match(unsigned int len, const char *name, struct proc_dir_entry *de) +static int proc_match(const char *name, struct proc_dir_entry *de, unsigned int len) { if (len < de->namelen) return -1; @@ -60,7 +60,7 @@ static struct proc_dir_entry *pde_subdir_find(struct proc_dir_entry *dir, struct proc_dir_entry *de = rb_entry(node, struct proc_dir_entry, subdir_node); - int result = proc_match(len, name, de); + int result = proc_match(name, de, len); if (result < 0) node = node->rb_left; @@ -84,7 +84,7 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir, struct proc_dir_entry *this = rb_entry(*new, struct proc_dir_entry, subdir_node); - int result = proc_match(de->namelen, de->name, this); + int result = proc_match(de->name, this, de->namelen); parent = *new; if (result < 0) @@ -211,8 +211,8 @@ void proc_free_inum(unsigned int inum) * Don't create negative dentries here, return -ENOENT by hand * instead. */ -struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, - struct dentry *dentry) +struct dentry *proc_lookup_de(struct inode *dir, struct dentry *dentry, + struct proc_dir_entry *de) { struct inode *inode; @@ -235,7 +235,7 @@ struct dentry *proc_lookup_de(struct proc_dir_entry *de, struct inode *dir, struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags) { - return proc_lookup_de(PDE(dir), dir, dentry); + return proc_lookup_de(dir, dentry, PDE(dir)); } /* @@ -247,8 +247,8 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry, * value of the readdir() call, as long as it's non-negative * for success.. */ -int proc_readdir_de(struct proc_dir_entry *de, struct file *file, - struct dir_context *ctx) +int proc_readdir_de(struct file *file, struct dir_context *ctx, + struct proc_dir_entry *de) { int i; @@ -292,7 +292,7 @@ int proc_readdir(struct file *file, struct dir_context *ctx) { struct inode *inode = file_inode(file); - return proc_readdir_de(PDE(inode), file, ctx); + return proc_readdir_de(file, ctx, PDE(inode)); } /* diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5ba317874f0d..d697c8ab0a14 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -153,10 +153,9 @@ extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, i * generic.c */ extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int); -extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *, - struct dentry *); +struct dentry *proc_lookup_de(struct inode *, struct dentry *, struct proc_dir_entry *); extern int proc_readdir(struct file *, struct dir_context *); -extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *); +int proc_readdir_de(struct file *, struct dir_context *, struct proc_dir_entry *); static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde) { diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index a2bf369c923d..68c06ae7888c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -135,7 +135,7 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir, de = ERR_PTR(-ENOENT); net = get_proc_task_net(dir); if (net != NULL) { - de = proc_lookup_de(net->proc_net, dir, dentry); + de = proc_lookup_de(dir, dentry, net->proc_net); put_net(net); } return de; @@ -172,7 +172,7 @@ static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx) ret = -EINVAL; net = get_proc_task_net(file_inode(file)); if (net != NULL) { - ret = proc_readdir_de(net->proc_net, file, ctx); + ret = proc_readdir_de(file, ctx, net->proc_net); put_net(net); } return ret; -- cgit v1.2.3-70-g09d2 From 4bf8ba811ac1102d7de6f73af3b9f323463e16c0 Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Tue, 6 Feb 2018 15:37:34 -0800 Subject: fs/proc/consoles.c: use seq_putc() in show_console_dev() A single character (line break) should be put into a sequence. Thus use the corresponding function "seq_putc". This issue was detected by using the Coccinelle software. Link: http://lkml.kernel.org/r/04fb69fe-d820-9141-820f-07e9a48f4635@users.sourceforge.net Signed-off-by: Markus Elfring Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/proc/consoles.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/consoles.c b/fs/proc/consoles.c index 290ba85cb900..a8ac48aebd59 100644 --- a/fs/proc/consoles.c +++ b/fs/proc/consoles.c @@ -55,8 +55,7 @@ static int show_console_dev(struct seq_file *m, void *v) if (dev) seq_printf(m, " %4d:%d", MAJOR(dev), MINOR(dev)); - seq_printf(m, "\n"); - + seq_putc(m, '\n'); return 0; } -- cgit v1.2.3-70-g09d2