From 376b0c266143a1dda162db6d5bc9b3a7f0ae97c9 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 15 Jun 2022 14:22:06 +0300 Subject: proc: delete unused includes Those aren't necessary after seq files won. Link: https://lkml.kernel.org/r/YqnA3mS7KBt8Z4If@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/array.c | 1 - fs/proc/inode.c | 2 -- fs/proc/kmsg.c | 1 - fs/proc/nommu.c | 1 - fs/proc/proc_net.c | 3 --- fs/proc/proc_tty.c | 2 -- fs/proc/root.c | 3 --- fs/proc/vmcore.c | 1 - 8 files changed, 14 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/array.c b/fs/proc/array.c index eb815759842c..65fa603422e0 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -69,7 +69,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 73aeb4e6d32e..fd40d60169b5 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -26,8 +26,6 @@ #include #include -#include - #include "internal.h" static void proc_evict_inode(struct inode *inode) diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index b38ad552887f..592e6dc7c110 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -15,7 +15,6 @@ #include #include -#include #include extern wait_queue_head_t log_wait; diff --git a/fs/proc/nommu.c b/fs/proc/nommu.c index 13452b32e2bd..4d3493579458 100644 --- a/fs/proc/nommu.c +++ b/fs/proc/nommu.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include "internal.h" diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index 913e5acefbb6..bbce6fbe779c 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -8,9 +8,6 @@ * * proc net directory handling functions */ - -#include - #include #include #include diff --git a/fs/proc/proc_tty.c b/fs/proc/proc_tty.c index c69ff191e5d8..5c6a5ceab2f1 100644 --- a/fs/proc/proc_tty.c +++ b/fs/proc/proc_tty.c @@ -4,8 +4,6 @@ * * Copyright 1997, Theodore Ts'o */ - -#include #include #include #include diff --git a/fs/proc/root.c b/fs/proc/root.c index c7e3b1350ef8..5a7d15d197f8 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -6,9 +6,6 @@ * * proc root directory handling functions */ - -#include - #include #include #include diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index 4eaeb645e759..f2aa86c421f2 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -25,7 +25,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3-70-g09d2 From d919a1e79bac890421537cf02ae773007bf55e6b Mon Sep 17 00:00:00 2001 From: Zhihao Cheng Date: Wed, 13 Jul 2022 21:00:29 +0800 Subject: proc: fix a dentry lock race between release_task and lookup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc") moved proc_flush_task() behind __exit_signal(). Then, process systemd can take long period high cpu usage during releasing task in following concurrent processes: systemd ps kernel_waitid stat(/proc/tgid) do_wait filename_lookup wait_consider_task lookup_fast release_task __exit_signal __unhash_process detach_pid __change_pid // remove task->pid_links d_revalidate -> pid_revalidate // 0 d_invalidate(/proc/tgid) shrink_dcache_parent(/proc/tgid) d_walk(/proc/tgid) spin_lock_nested(/proc/tgid/fd) // iterating opened fd proc_flush_pid | d_invalidate (/proc/tgid/fd) | shrink_dcache_parent(/proc/tgid/fd) | shrink_dentry_list(subdirs) ↓ shrink_lock_dentry(/proc/tgid/fd) --> race on dentry lock Function d_invalidate() will remove dentry from hash firstly, but why does proc_flush_pid() process dentry '/proc/tgid/fd' before dentry '/proc/tgid'? That's because proc_pid_make_inode() adds proc inode in reverse order by invoking hlist_add_head_rcu(). But proc should not add any inodes under '/proc/tgid' except '/proc/tgid/task/pid', fix it by adding inode into 'pid->inodes' only if the inode is /proc/tgid or /proc/tgid/task/pid. Performance regression: Create 200 tasks, each task open one file for 50,000 times. Kill all tasks when opened files exceed 10,000,000 (cat /proc/sys/fs/file-nr). Before fix: $ time killall -wq aa real 4m40.946s # During this period, we can see 'ps' and 'systemd' taking high cpu usage. After fix: $ time killall -wq aa real 1m20.732s # During this period, we can see 'systemd' taking high cpu usage. Link: https://lkml.kernel.org/r/20220713130029.4133533-1-chengzhihao1@huawei.com Fixes: 7bc3e6e55acf06 ("proc: Use a list of inodes to flush from proc") Link: https://bugzilla.kernel.org/show_bug.cgi?id=216054 Signed-off-by: Zhihao Cheng Signed-off-by: Zhang Yi Suggested-by: Brian Foster Reviewed-by: Brian Foster Cc: Al Viro Cc: Alexey Dobriyan Cc: Eric Biederman Cc: Matthew Wilcox Cc: Baoquan He Cc: Kalesh Singh Cc: Yu Kuai Signed-off-by: Andrew Morton --- fs/proc/base.c | 46 ++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 8 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/base.c b/fs/proc/base.c index 8dfa36a99c74..93f7e3d971e4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1885,7 +1885,7 @@ void proc_pid_evict_inode(struct proc_inode *ei) put_pid(pid); } -struct inode *proc_pid_make_inode(struct super_block * sb, +struct inode *proc_pid_make_inode(struct super_block *sb, struct task_struct *task, umode_t mode) { struct inode * inode; @@ -1914,11 +1914,6 @@ struct inode *proc_pid_make_inode(struct super_block * sb, /* Let the pid remember us for quick removal */ ei->pid = pid; - if (S_ISDIR(mode)) { - spin_lock(&pid->lock); - hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); - spin_unlock(&pid->lock); - } task_dump_owner(task, 0, &inode->i_uid, &inode->i_gid); security_task_to_inode(task, inode); @@ -1931,6 +1926,39 @@ out_unlock: return NULL; } +/* + * Generating an inode and adding it into @pid->inodes, so that task will + * invalidate inode's dentry before being released. + * + * This helper is used for creating dir-type entries under '/proc' and + * '/proc//task'. Other entries(eg. fd, stat) under '/proc/' + * can be released by invalidating '/proc/' dentry. + * In theory, dentries under '/proc//task' can also be released by + * invalidating '/proc/' dentry, we reserve it to handle single + * thread exiting situation: Any one of threads should invalidate its + * '/proc//task/' dentry before released. + */ +static struct inode *proc_pid_make_base_inode(struct super_block *sb, + struct task_struct *task, umode_t mode) +{ + struct inode *inode; + struct proc_inode *ei; + struct pid *pid; + + inode = proc_pid_make_inode(sb, task, mode); + if (!inode) + return NULL; + + /* Let proc_flush_pid find this directory inode */ + ei = PROC_I(inode); + pid = ei->pid; + spin_lock(&pid->lock); + hlist_add_head_rcu(&ei->sibling_inodes, &pid->inodes); + spin_unlock(&pid->lock); + + return inode; +} + int pid_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat, u32 request_mask, unsigned int query_flags) { @@ -3369,7 +3397,8 @@ static struct dentry *proc_pid_instantiate(struct dentry * dentry, { struct inode *inode; - inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_base_inode(dentry->d_sb, task, + S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); @@ -3671,7 +3700,8 @@ static struct dentry *proc_task_instantiate(struct dentry *dentry, struct task_struct *task, const void *ptr) { struct inode *inode; - inode = proc_pid_make_inode(dentry->d_sb, task, S_IFDIR | S_IRUGO | S_IXUGO); + inode = proc_pid_make_base_inode(dentry->d_sb, task, + S_IFDIR | S_IRUGO | S_IXUGO); if (!inode) return ERR_PTR(-ENOENT); -- cgit v1.2.3-70-g09d2 From ed8fb78d7ecdeb3e2e86df0027e2c2cc55f9908b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Sat, 23 Jul 2022 20:09:07 +0300 Subject: proc: add some (hopefully) insightful comments * /proc/${pid}/net status * removing PDE vs last close stuff (again!) * random small stuff Link: https://lkml.kernel.org/r/YtwrM6sDC0OQ53YB@localhost.localdomain Signed-off-by: Alexey Dobriyan Signed-off-by: Andrew Morton --- fs/proc/array.c | 4 ++++ fs/proc/inode.c | 17 ++++++++++++----- fs/proc/proc_net.c | 6 ++++++ fs/proc/root.c | 5 +++++ 4 files changed, 27 insertions(+), 5 deletions(-) (limited to 'fs/proc') diff --git a/fs/proc/array.c b/fs/proc/array.c index 65fa603422e0..99fcbfda8e25 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -99,6 +99,10 @@ void proc_task_name(struct seq_file *m, struct task_struct *p, bool escape) { char tcomm[64]; + /* + * Test before PF_KTHREAD because all workqueue worker threads are + * kernel threads. + */ if (p->flags & PF_WQ_WORKER) wq_worker_comm(tcomm, sizeof(tcomm), p); else if (p->flags & PF_KTHREAD) diff --git a/fs/proc/inode.c b/fs/proc/inode.c index fd40d60169b5..f130499ad843 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -212,7 +212,15 @@ static void unuse_pde(struct proc_dir_entry *pde) complete(pde->pde_unload_completion); } -/* pde is locked on entry, unlocked on exit */ +/* + * At most 2 contexts can enter this function: the one doing the last + * close on the descriptor and whoever is deleting PDE itself. + * + * First to enter calls ->proc_release hook and signals its completion + * to the second one which waits and then does nothing. + * + * PDE is locked on entry, unlocked on exit. + */ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) __releases(&pde->pde_unload_lock) { @@ -222,9 +230,6 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) * * rmmod (remove_proc_entry() et al) can't delete an entry and proceed: * "struct file" needs to be available at the right moment. - * - * Therefore, first process to enter this function does ->release() and - * signals its completion to the other process which does nothing. */ if (pdeo->closing) { /* somebody else is doing that, just wait */ @@ -238,10 +243,12 @@ static void close_pdeo(struct proc_dir_entry *pde, struct pde_opener *pdeo) pdeo->closing = true; spin_unlock(&pde->pde_unload_lock); + file = pdeo->file; pde->proc_ops->proc_release(file_inode(file), file); + spin_lock(&pde->pde_unload_lock); - /* After ->release. */ + /* Strictly after ->proc_release, see above. */ list_del(&pdeo->lh); c = pdeo->c; spin_unlock(&pde->pde_unload_lock); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index bbce6fbe779c..856839b8ae8b 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -350,6 +350,12 @@ static __net_init int proc_net_ns_init(struct net *net) kgid_t gid; int err; + /* + * This PDE acts only as an anchor for /proc/${pid}/net hierarchy. + * Corresponding inode (PDE(inode) == net->proc_net) is never + * instantiated therefore blanket zeroing is fine. + * net->proc_net_stat inode is instantiated normally. + */ err = -ENOMEM; netd = kmem_cache_zalloc(proc_dir_entry_cache, GFP_KERNEL); if (!netd) diff --git a/fs/proc/root.c b/fs/proc/root.c index 5a7d15d197f8..3c2ee3eb1138 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -302,6 +302,11 @@ void __init proc_root_init(void) proc_mkdir("bus", NULL); proc_sys_init(); + /* + * Last things last. It is not like userspace processes eager + * to open /proc files exist at this point but register last + * anyway. + */ register_filesystem(&proc_fs_type); } -- cgit v1.2.3-70-g09d2