From 2896b0f09f2617f953b8038978106cc4cbb4c52b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 5 May 2017 13:31:23 -0500 Subject: pids: Initialize leader_pid in init_task This is cheap and no cost so we might as well. Signed-off-by: "Eric W. Biederman" --- init/init_task.c | 1 + 1 file changed, 1 insertion(+) (limited to 'init') diff --git a/init/init_task.c b/init/init_task.c index 74f60baa2799..7914ffb8dc73 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -33,6 +33,7 @@ static struct signal_struct init_signals = { }, #endif INIT_CPU_TIMERS(init_signals) + .leader_pid = &init_struct_pid, INIT_PREV_CPUTIME(init_signals) }; -- cgit v1.2.3-70-g09d2 From 2c4704756cab7cfa031ada4dab361562f0e357c0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 26 Sep 2017 13:06:43 -0500 Subject: pids: Move the pgrp and session pid pointers from task_struct to signal_struct To access these fields the code always has to go to group leader so going to signal struct is no loss and is actually a fundamental simplification. This saves a little bit of memory by only allocating the pid pointer array once instead of once for every thread, and even better this removes a few potential races caused by the fact that group_leader can be changed by de_thread, while signal_struct can not. Signed-off-by: "Eric W. Biederman" --- arch/ia64/kernel/asm-offsets.c | 2 +- arch/ia64/kernel/fsys.S | 4 ++-- fs/autofs/autofs_i.h | 1 + include/linux/init_task.h | 9 --------- include/linux/pid.h | 8 +------- include/linux/sched.h | 22 ++++----------------- include/linux/sched/signal.h | 26 +++++++++++++++++++++--- init/init_task.c | 11 ++++++----- kernel/fork.c | 23 ++++++++++++++++----- kernel/pid.c | 45 +++++++++++++++++++++--------------------- 10 files changed, 78 insertions(+), 73 deletions(-) (limited to 'init') diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index f5433bb7f04a..c1f8a57855af 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -50,7 +50,7 @@ void foo(void) DEFINE(IA64_TASK_BLOCKED_OFFSET,offsetof (struct task_struct, blocked)); DEFINE(IA64_TASK_CLEAR_CHILD_TID_OFFSET,offsetof (struct task_struct, clear_child_tid)); - DEFINE(IA64_TASK_TGIDLINK_OFFSET, offsetof (struct task_struct, pids[PIDTYPE_PID].pid)); + DEFINE(IA64_TASK_THREAD_PID_OFFSET,offsetof (struct task_struct, thread_pid)); DEFINE(IA64_PID_LEVEL_OFFSET, offsetof (struct pid, level)); DEFINE(IA64_PID_UPID_OFFSET, offsetof (struct pid, numbers[0])); DEFINE(IA64_TASK_PENDING_OFFSET,offsetof (struct task_struct, pending)); diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index eaf5a0d6f3e0..e85ebdac678b 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -96,11 +96,11 @@ ENTRY(fsys_set_tid_address) .altrp b6 .body add r9=TI_FLAGS+IA64_TASK_SIZE,r16 - add r17=IA64_TASK_TGIDLINK_OFFSET,r16 + add r17=IA64_TASK_THREAD_PID_OFFSET,r16 ;; ld4 r9=[r9] tnat.z p6,p7=r32 // check argument register for being NaT - ld8 r17=[r17] // r17 = current->pids[PIDTYPE_PID].pid + ld8 r17=[r17] // r17 = current->thread_pid ;; and r9=TIF_ALLWORK_MASK,r9 add r8=IA64_PID_LEVEL_OFFSET,r17 diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 9400a9f6318a..502812289850 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/init_task.h b/include/linux/init_task.h index a454b8aeb938..a7083a45a26c 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -46,15 +46,6 @@ extern struct cred init_cred; #define INIT_CPU_TIMERS(s) #endif -#define INIT_PID_LINK(type) \ -{ \ - .node = { \ - .next = NULL, \ - .pprev = NULL, \ - }, \ - .pid = &init_struct_pid, \ -} - #define INIT_TASK_COMM "swapper" /* Attach to the init_task data structure for proper alignment */ diff --git a/include/linux/pid.h b/include/linux/pid.h index 7633d55d9a24..3d4c504dcc8c 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -67,12 +67,6 @@ struct pid extern struct pid init_struct_pid; -struct pid_link -{ - struct hlist_node node; - struct pid *pid; -}; - static inline struct pid *get_pid(struct pid *pid) { if (pid) @@ -177,7 +171,7 @@ pid_t pid_vnr(struct pid *pid); do { \ if ((pid) != NULL) \ hlist_for_each_entry_rcu((task), \ - &(pid)->tasks[type], pids[type].node) { + &(pid)->tasks[type], pid_links[type]) { /* * Both old and new leaders may be attached to diff --git a/include/linux/sched.h b/include/linux/sched.h index a461ff89a3af..445bdf5b1f64 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -775,7 +775,8 @@ struct task_struct { struct list_head ptrace_entry; /* PID/PID hash table linkage. */ - struct pid_link pids[PIDTYPE_MAX]; + struct pid *thread_pid; + struct hlist_node pid_links[PIDTYPE_MAX]; struct list_head thread_group; struct list_head thread_node; @@ -1199,22 +1200,7 @@ struct task_struct { static inline struct pid *task_pid(struct task_struct *task) { - return task->pids[PIDTYPE_PID].pid; -} - -/* - * Without tasklist or RCU lock it is not safe to dereference - * the result of task_pgrp/task_session even if task == current, - * we can race with another thread doing sys_setsid/sys_setpgid. - */ -static inline struct pid *task_pgrp(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_PGID].pid; -} - -static inline struct pid *task_session(struct task_struct *task) -{ - return task->group_leader->pids[PIDTYPE_SID].pid; + return task->thread_pid; } /* @@ -1263,7 +1249,7 @@ static inline pid_t task_tgid_nr(struct task_struct *tsk) */ static inline int pid_alive(const struct task_struct *p) { - return p->pids[PIDTYPE_PID].pid != NULL; + return p->thread_pid != NULL; } static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index b95a272c1ab5..2dcded16eb1e 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -146,7 +146,9 @@ struct signal_struct { #endif + /* PID/PID hash table linkage. */ struct pid *leader_pid; + struct pid *pids[PIDTYPE_MAX]; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; @@ -559,9 +561,12 @@ void walk_process_tree(struct task_struct *top, proc_visitor, void *); static inline struct pid *task_pid_type(struct task_struct *task, enum pid_type type) { - if (type != PIDTYPE_PID) - task = task->group_leader; - return task->pids[type].pid; + struct pid *pid; + if (type == PIDTYPE_PID) + pid = task_pid(task); + else + pid = task->signal->pids[type]; + return pid; } static inline struct pid *task_tgid(struct task_struct *task) @@ -569,6 +574,21 @@ static inline struct pid *task_tgid(struct task_struct *task) return task->signal->leader_pid; } +/* + * Without tasklist or RCU lock it is not safe to dereference + * the result of task_pgrp/task_session even if task == current, + * we can race with another thread doing sys_setsid/sys_setpgid. + */ +static inline struct pid *task_pgrp(struct task_struct *task) +{ + return task->signal->pids[PIDTYPE_PGID]; +} + +static inline struct pid *task_session(struct task_struct *task) +{ + return task->signal->pids[PIDTYPE_SID]; +} + static inline int get_nr_threads(struct task_struct *tsk) { return tsk->signal->nr_threads; diff --git a/init/init_task.c b/init/init_task.c index 7914ffb8dc73..db12a61259f1 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -34,6 +34,11 @@ static struct signal_struct init_signals = { #endif INIT_CPU_TIMERS(init_signals) .leader_pid = &init_struct_pid, + .pids = { + [PIDTYPE_PID] = &init_struct_pid, + [PIDTYPE_PGID] = &init_struct_pid, + [PIDTYPE_SID] = &init_struct_pid, + }, INIT_PREV_CPUTIME(init_signals) }; @@ -112,11 +117,7 @@ struct task_struct init_task INIT_CPU_TIMERS(init_task) .pi_lock = __RAW_SPIN_LOCK_UNLOCKED(init_task.pi_lock), .timer_slack_ns = 50000, /* 50 usec default slack */ - .pids = { - [PIDTYPE_PID] = INIT_PID_LINK(PIDTYPE_PID), - [PIDTYPE_PGID] = INIT_PID_LINK(PIDTYPE_PGID), - [PIDTYPE_SID] = INIT_PID_LINK(PIDTYPE_SID), - }, + .thread_pid = &init_struct_pid, .thread_group = LIST_HEAD_INIT(init_task.thread_group), .thread_node = LIST_HEAD_INIT(init_signals.thread_head), #ifdef CONFIG_AUDITSYSCALL diff --git a/kernel/fork.c b/kernel/fork.c index 9440d61b925c..d2952162399b 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1549,10 +1549,22 @@ static void posix_cpu_timers_init(struct task_struct *tsk) static inline void posix_cpu_timers_init(struct task_struct *tsk) { } #endif +static inline void init_task_pid_links(struct task_struct *task) +{ + enum pid_type type; + + for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { + INIT_HLIST_NODE(&task->pid_links[type]); + } +} + static inline void init_task_pid(struct task_struct *task, enum pid_type type, struct pid *pid) { - task->pids[type].pid = pid; + if (type == PIDTYPE_PID) + task->thread_pid = pid; + else + task->signal->pids[type] = pid; } static inline void rcu_copy_process(struct task_struct *p) @@ -1928,6 +1940,7 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } + init_task_pid_links(p); if (likely(p->pid)) { ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace); @@ -2036,13 +2049,13 @@ fork_out: return ERR_PTR(retval); } -static inline void init_idle_pids(struct pid_link *links) +static inline void init_idle_pids(struct task_struct *idle) { enum pid_type type; for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) { - INIT_HLIST_NODE(&links[type].node); /* not really needed */ - links[type].pid = &init_struct_pid; + INIT_HLIST_NODE(&idle->pid_links[type]); /* not really needed */ + init_task_pid(idle, type, &init_struct_pid); } } @@ -2052,7 +2065,7 @@ struct task_struct *fork_idle(int cpu) task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0, 0, cpu_to_node(cpu)); if (!IS_ERR(task)) { - init_idle_pids(task->pids); + init_idle_pids(task); init_idle(task, cpu); } diff --git a/kernel/pid.c b/kernel/pid.c index d0de2b59f86f..f8486d2e2346 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -265,27 +265,35 @@ struct pid *find_vpid(int nr) } EXPORT_SYMBOL_GPL(find_vpid); +static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type) +{ + return (type == PIDTYPE_PID) ? + &task->thread_pid : + (type == __PIDTYPE_TGID) ? + &task->signal->leader_pid : + &task->signal->pids[type]; +} + /* * attach_pid() must be called with the tasklist_lock write-held. */ void attach_pid(struct task_struct *task, enum pid_type type) { - struct pid_link *link = &task->pids[type]; - hlist_add_head_rcu(&link->node, &link->pid->tasks[type]); + struct pid *pid = *task_pid_ptr(task, type); + hlist_add_head_rcu(&task->pid_links[type], &pid->tasks[type]); } static void __change_pid(struct task_struct *task, enum pid_type type, struct pid *new) { - struct pid_link *link; + struct pid **pid_ptr = task_pid_ptr(task, type); struct pid *pid; int tmp; - link = &task->pids[type]; - pid = link->pid; + pid = *pid_ptr; - hlist_del_rcu(&link->node); - link->pid = new; + hlist_del_rcu(&task->pid_links[type]); + *pid_ptr = new; for (tmp = PIDTYPE_MAX; --tmp >= 0; ) if (!hlist_empty(&pid->tasks[tmp])) @@ -310,8 +318,9 @@ void change_pid(struct task_struct *task, enum pid_type type, void transfer_pid(struct task_struct *old, struct task_struct *new, enum pid_type type) { - new->pids[type].pid = old->pids[type].pid; - hlist_replace_rcu(&old->pids[type].node, &new->pids[type].node); + if (type == PIDTYPE_PID) + new->thread_pid = old->thread_pid; + hlist_replace_rcu(&old->pid_links[type], &new->pid_links[type]); } struct task_struct *pid_task(struct pid *pid, enum pid_type type) @@ -322,7 +331,7 @@ struct task_struct *pid_task(struct pid *pid, enum pid_type type) first = rcu_dereference_check(hlist_first_rcu(&pid->tasks[type]), lockdep_tasklist_lock_is_held()); if (first) - result = hlist_entry(first, struct task_struct, pids[(type)].node); + result = hlist_entry(first, struct task_struct, pid_links[(type)]); } return result; } @@ -360,9 +369,7 @@ struct pid *get_task_pid(struct task_struct *task, enum pid_type type) { struct pid *pid; rcu_read_lock(); - if (type != PIDTYPE_PID) - task = task->group_leader; - pid = get_pid(rcu_dereference(task->pids[type].pid)); + pid = get_pid(rcu_dereference(*task_pid_ptr(task, type))); rcu_read_unlock(); return pid; } @@ -420,16 +427,8 @@ pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type, rcu_read_lock(); if (!ns) ns = task_active_pid_ns(current); - if (likely(pid_alive(task))) { - struct pid *pid; - if (type == PIDTYPE_PID) - pid = task_pid(task); - else if (type == __PIDTYPE_TGID) - pid = task_tgid(task); - else - pid = rcu_dereference(task->group_leader->pids[type].pid); - nr = pid_nr_ns(pid, ns); - } + if (likely(pid_alive(task))) + nr = pid_nr_ns(rcu_dereference(*task_pid_ptr(task, type)), ns); rcu_read_unlock(); return nr; -- cgit v1.2.3-70-g09d2 From 6883f81aac6f44e7df70a6af189b3689ff52cbfb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Sun, 4 Jun 2017 04:32:13 -0500 Subject: pid: Implement PIDTYPE_TGID Everywhere except in the pid array we distinguish between a tasks pid and a tasks tgid (thread group id). Even in the enumeration we want that distinction sometimes so we have added __PIDTYPE_TGID. With leader_pid we almost have an implementation of PIDTYPE_TGID in struct signal_struct. Add PIDTYPE_TGID as a first class member of the pid_type enumeration and into the pids array. Then remove the __PIDTYPE_TGID special case and the leader_pid in signal_struct. The net size increase is just an extra pointer added to struct pid and an extra pair of pointers of an hlist_node added to task_struct. The effect on code maintenance is the removal of a number of special cases today and the potential to remove many more special cases as PIDTYPE_TGID gets used to it's fullest. The long term potential is allowing zombie thread group leaders to exit, which will remove a lot more special cases in the code. Signed-off-by: "Eric W. Biederman" --- arch/ia64/kernel/asm-offsets.c | 2 +- arch/ia64/kernel/fsys.S | 4 ++-- arch/s390/kernel/perf_cpum_sf.c | 2 +- fs/exec.c | 1 + include/linux/pid.h | 3 +-- include/linux/sched.h | 4 ++-- include/linux/sched/signal.h | 5 ++--- init/init_task.c | 2 +- kernel/events/core.c | 2 +- kernel/exit.c | 1 + kernel/fork.c | 3 ++- kernel/pid.c | 2 -- kernel/time/itimer.c | 5 +++-- kernel/time/posix-cpu-timers.c | 2 +- 14 files changed, 19 insertions(+), 19 deletions(-) (limited to 'init') diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index c1f8a57855af..00e8e2a1eb19 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -67,7 +67,7 @@ void foo(void) DEFINE(IA64_SIGNAL_GROUP_STOP_COUNT_OFFSET,offsetof (struct signal_struct, group_stop_count)); DEFINE(IA64_SIGNAL_SHARED_PENDING_OFFSET,offsetof (struct signal_struct, shared_pending)); - DEFINE(IA64_SIGNAL_LEADER_PID_OFFSET, offsetof (struct signal_struct, leader_pid)); + DEFINE(IA64_SIGNAL_PIDS_TGID_OFFSET, offsetof (struct signal_struct, pids[PIDTYPE_TGID])); BLANK(); diff --git a/arch/ia64/kernel/fsys.S b/arch/ia64/kernel/fsys.S index e85ebdac678b..d80c99a5f55d 100644 --- a/arch/ia64/kernel/fsys.S +++ b/arch/ia64/kernel/fsys.S @@ -68,10 +68,10 @@ ENTRY(fsys_getpid) add r9=TI_FLAGS+IA64_TASK_SIZE,r16 ;; ld4 r9=[r9] - add r17=IA64_SIGNAL_LEADER_PID_OFFSET,r17 + add r17=IA64_SIGNAL_PIDS_TGID_OFFSET,r17 ;; and r9=TIF_ALLWORK_MASK,r9 - ld8 r17=[r17] // r17 = current->signal->leader_pid + ld8 r17=[r17] // r17 = current->signal->pids[PIDTYPE_TGID] ;; add r8=IA64_PID_LEVEL_OFFSET,r17 ;; diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c index 0292d68e7dde..ca0b7ae894bb 100644 --- a/arch/s390/kernel/perf_cpum_sf.c +++ b/arch/s390/kernel/perf_cpum_sf.c @@ -665,7 +665,7 @@ static void cpumsf_output_event_pid(struct perf_event *event, goto out; /* Update the process ID (see also kernel/events/core.c) */ - data->tid_entry.pid = cpumsf_pid_type(event, pid, __PIDTYPE_TGID); + data->tid_entry.pid = cpumsf_pid_type(event, pid, PIDTYPE_TGID); data->tid_entry.tid = cpumsf_pid_type(event, pid, PIDTYPE_PID); perf_output_sample(&handle, &header, data, event); diff --git a/fs/exec.c b/fs/exec.c index 2d4e0075bd24..79a11fbded7a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1146,6 +1146,7 @@ static int de_thread(struct task_struct *tsk) */ tsk->pid = leader->pid; change_pid(tsk, PIDTYPE_PID, task_pid(leader)); + transfer_pid(leader, tsk, PIDTYPE_TGID); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); diff --git a/include/linux/pid.h b/include/linux/pid.h index 3d4c504dcc8c..14a9a39da9c7 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -7,11 +7,10 @@ enum pid_type { PIDTYPE_PID, + PIDTYPE_TGID, PIDTYPE_PGID, PIDTYPE_SID, PIDTYPE_MAX, - /* only valid to __task_pid_nr_ns() */ - __PIDTYPE_TGID }; /* diff --git a/include/linux/sched.h b/include/linux/sched.h index 445bdf5b1f64..06b4e3bda93a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1275,12 +1275,12 @@ static inline pid_t task_session_vnr(struct task_struct *tsk) static inline pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { - return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, ns); + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, ns); } static inline pid_t task_tgid_vnr(struct task_struct *tsk) { - return __task_pid_nr_ns(tsk, __PIDTYPE_TGID, NULL); + return __task_pid_nr_ns(tsk, PIDTYPE_TGID, NULL); } static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 2dcded16eb1e..ee30a5ba475f 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -147,7 +147,6 @@ struct signal_struct { #endif /* PID/PID hash table linkage. */ - struct pid *leader_pid; struct pid *pids[PIDTYPE_MAX]; #ifdef CONFIG_NO_HZ_FULL @@ -571,7 +570,7 @@ struct pid *task_pid_type(struct task_struct *task, enum pid_type type) static inline struct pid *task_tgid(struct task_struct *task) { - return task->signal->leader_pid; + return task->signal->pids[PIDTYPE_TGID]; } /* @@ -607,7 +606,7 @@ static inline bool thread_group_leader(struct task_struct *p) */ static inline bool has_group_leader_pid(struct task_struct *p) { - return task_pid(p) == p->signal->leader_pid; + return task_pid(p) == task_tgid(p); } static inline diff --git a/init/init_task.c b/init/init_task.c index db12a61259f1..4f97846256d7 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -33,9 +33,9 @@ static struct signal_struct init_signals = { }, #endif INIT_CPU_TIMERS(init_signals) - .leader_pid = &init_struct_pid, .pids = { [PIDTYPE_PID] = &init_struct_pid, + [PIDTYPE_TGID] = &init_struct_pid, [PIDTYPE_PGID] = &init_struct_pid, [PIDTYPE_SID] = &init_struct_pid, }, diff --git a/kernel/events/core.c b/kernel/events/core.c index 80cca2b30c4f..9025b1796ca8 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -1334,7 +1334,7 @@ static u32 perf_event_pid_type(struct perf_event *event, struct task_struct *p, static u32 perf_event_pid(struct perf_event *event, struct task_struct *p) { - return perf_event_pid_type(event, p, __PIDTYPE_TGID); + return perf_event_pid_type(event, p, PIDTYPE_TGID); } static u32 perf_event_tid(struct perf_event *event, struct task_struct *p) diff --git a/kernel/exit.c b/kernel/exit.c index 16432428fc6c..25582b442955 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -73,6 +73,7 @@ static void __unhash_process(struct task_struct *p, bool group_dead) nr_threads--; detach_pid(p, PIDTYPE_PID); if (group_dead) { + detach_pid(p, PIDTYPE_TGID); detach_pid(p, PIDTYPE_PGID); detach_pid(p, PIDTYPE_SID); diff --git a/kernel/fork.c b/kernel/fork.c index d2952162399b..cc5be0d01ce6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1946,6 +1946,7 @@ static __latent_entropy struct task_struct *copy_process( init_task_pid(p, PIDTYPE_PID, pid); if (thread_group_leader(p)) { + init_task_pid(p, PIDTYPE_TGID, pid); init_task_pid(p, PIDTYPE_PGID, task_pgrp(current)); init_task_pid(p, PIDTYPE_SID, task_session(current)); @@ -1954,7 +1955,6 @@ static __latent_entropy struct task_struct *copy_process( p->signal->flags |= SIGNAL_UNKILLABLE; } - p->signal->leader_pid = pid; p->signal->tty = tty_kref_get(current->signal->tty); /* * Inherit has_child_subreaper flag under the same @@ -1965,6 +1965,7 @@ static __latent_entropy struct task_struct *copy_process( p->real_parent->signal->is_child_subreaper; list_add_tail(&p->sibling, &p->real_parent->children); list_add_tail_rcu(&p->tasks, &init_task.tasks); + attach_pid(p, PIDTYPE_TGID); attach_pid(p, PIDTYPE_PGID); attach_pid(p, PIDTYPE_SID); __this_cpu_inc(process_counts); diff --git a/kernel/pid.c b/kernel/pid.c index f8486d2e2346..de1cfc4f75a2 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -269,8 +269,6 @@ static struct pid **task_pid_ptr(struct task_struct *task, enum pid_type type) { return (type == PIDTYPE_PID) ? &task->thread_pid : - (type == __PIDTYPE_TGID) ? - &task->signal->leader_pid : &task->signal->pids[type]; } diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index f26acef5d7b4..9a65713c8309 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -139,9 +139,10 @@ enum hrtimer_restart it_real_fn(struct hrtimer *timer) { struct signal_struct *sig = container_of(timer, struct signal_struct, real_timer); + struct pid *leader_pid = sig->pids[PIDTYPE_TGID]; - trace_itimer_expire(ITIMER_REAL, sig->leader_pid, 0); - kill_pid_info(SIGALRM, SEND_SIG_PRIV, sig->leader_pid); + trace_itimer_expire(ITIMER_REAL, leader_pid, 0); + kill_pid_info(SIGALRM, SEND_SIG_PRIV, leader_pid); return HRTIMER_NORESTART; } diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index 5a6251ac6f7a..40e6fae46cec 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -895,7 +895,7 @@ static void check_cpu_itimer(struct task_struct *tsk, struct cpu_itimer *it, trace_itimer_expire(signo == SIGPROF ? ITIMER_PROF : ITIMER_VIRTUAL, - tsk->signal->leader_pid, cur_time); + task_tgid(tsk), cur_time); __group_send_sig_info(signo, SEND_SIG_PRIV, tsk); } -- cgit v1.2.3-70-g09d2 From c3ad2c3b02e953ead2b8d52a0c9e70312930c3d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 23 Jul 2018 15:20:37 -0500 Subject: signal: Don't restart fork when signals come in. Wen Yang and majiang report that a periodic signal received during fork can cause fork to continually restart preventing an application from making progress. The code was being overly pessimistic. Fork needs to guarantee that a signal sent to multiple processes is logically delivered before the fork and just to the forking process or logically delivered after the fork to both the forking process and it's newly spawned child. For signals like periodic timers that are always delivered to a single process fork can safely complete and let them appear to logically delivered after the fork(). While examining this issue I also discovered that fork today will miss signals delivered to multiple processes during the fork and handled by another thread. Similarly the current code will also miss blocked signals that are delivered to multiple process, as those signals will not appear pending during fork. Add a list of each thread that is currently forking, and keep on that list a signal set that records all of the signals sent to multiple processes. When fork completes initialize the new processes shared_pending signal set with it. The calculate_sigpending function will see those signals and set TIF_SIGPENDING causing the new task to take the slow path to userspace to handle those signals. Making it appear as if those signals were received immediately after the fork. It is not possible to send real time signals to multiple processes and exceptions don't go to multiple processes, which means that that are no signals sent to multiple processes that require siginfo. This means it is safe to not bother collecting siginfo on signals sent during fork. The sigaction of a child of fork is initially the same as the sigaction of the parent process. So a signal the parent ignores the child will also initially ignore. Therefore it is safe to ignore signals sent to multiple processes and ignored by the forking process. Signals sent to only a single process or only a single thread and delivered during fork are treated as if they are received after the fork, and generally not dealt with. They won't cause any problems. V2: Added removal from the multiprocess list on failure. V3: Use -ERESTARTNOINTR directly V4: - Don't queue both SIGCONT and SIGSTOP - Initialize signal_struct.multiprocess in init_task - Move setting of shared_pending to before the new task is visible to signals. This prevents signals from comming in before shared_pending.signal is set to delayed.signal and being lost. V5: - rework list add and delete to account for idle threads v6: - Use sigdelsetmask when removing stop signals Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=200447 Reported-by: Wen Yang and Reported-by: majiang Fixes: 4a2c7a7837da ("[PATCH] make fork() atomic wrt pgrp/session signals") Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 8 ++++++++ init/init_task.c | 1 + kernel/fork.c | 43 +++++++++++++++++++++++++------------------ kernel/signal.c | 15 +++++++++++++++ 4 files changed, 49 insertions(+), 18 deletions(-) (limited to 'init') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ae2b0b81be25..4e9b77fb702d 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -69,6 +69,11 @@ struct thread_group_cputimer { bool checking_timer; }; +struct multiprocess_signals { + sigset_t signal; + struct hlist_node node; +}; + /* * NOTE! "signal_struct" does not have its own * locking, because a shared signal_struct always @@ -90,6 +95,9 @@ struct signal_struct { /* shared signal handling: */ struct sigpending shared_pending; + /* For collecting multiprocess signals during fork */ + struct hlist_head multiprocess; + /* thread group exit support */ int group_exit_code; /* overloaded: diff --git a/init/init_task.c b/init/init_task.c index 4f97846256d7..5aebe3be4d7c 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -22,6 +22,7 @@ static struct signal_struct init_signals = { .list = LIST_HEAD_INIT(init_signals.shared_pending.list), .signal = {{0}} }, + .multiprocess = HLIST_HEAD_INIT, .rlim = INIT_RLIMITS, .cred_guard_mutex = __MUTEX_INITIALIZER(init_signals.cred_guard_mutex), #ifdef CONFIG_POSIX_TIMERS diff --git a/kernel/fork.c b/kernel/fork.c index ab731e15a600..411e34acace7 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1456,6 +1456,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk) init_waitqueue_head(&sig->wait_chldexit); sig->curr_target = tsk; init_sigpending(&sig->shared_pending); + INIT_HLIST_HEAD(&sig->multiprocess); seqlock_init(&sig->stats_lock); prev_cputime_init(&sig->prev_cputime); @@ -1602,6 +1603,7 @@ static __latent_entropy struct task_struct *copy_process( { int retval; struct task_struct *p; + struct multiprocess_signals delayed; /* * Don't allow sharing the root directory with processes in a different @@ -1649,6 +1651,24 @@ static __latent_entropy struct task_struct *copy_process( return ERR_PTR(-EINVAL); } + /* + * Force any signals received before this point to be delivered + * before the fork happens. Collect up signals sent to multiple + * processes that happen during the fork and delay them so that + * they appear to happen after the fork. + */ + sigemptyset(&delayed.signal); + INIT_HLIST_NODE(&delayed.node); + + spin_lock_irq(¤t->sighand->siglock); + if (!(clone_flags & CLONE_THREAD)) + hlist_add_head(&delayed.node, ¤t->signal->multiprocess); + recalc_sigpending(); + spin_unlock_irq(¤t->sighand->siglock); + retval = -ERESTARTNOINTR; + if (signal_pending(current)) + goto fork_out; + retval = -ENOMEM; p = dup_task_struct(current, node); if (!p) @@ -1934,22 +1954,6 @@ static __latent_entropy struct task_struct *copy_process( goto bad_fork_cancel_cgroup; } - if (!(clone_flags & CLONE_THREAD)) { - /* - * Process group and session signals need to be delivered to just the - * parent before the fork or both the parent and the child after the - * fork. Restart if a signal comes in before we add the new process to - * it's process group. - * A fatal signal pending means that current will exit, so the new - * thread can't slip out of an OOM kill (or normal SIGKILL). - */ - recalc_sigpending(); - if (signal_pending(current)) { - retval = -ERESTARTNOINTR; - goto bad_fork_cancel_cgroup; - } - } - init_task_pid_links(p); if (likely(p->pid)) { @@ -1965,7 +1969,7 @@ static __latent_entropy struct task_struct *copy_process( ns_of_pid(pid)->child_reaper = p; p->signal->flags |= SIGNAL_UNKILLABLE; } - + p->signal->shared_pending.signal = delayed.signal; p->signal->tty = tty_kref_get(current->signal->tty); /* * Inherit has_child_subreaper flag under the same @@ -1993,8 +1997,8 @@ static __latent_entropy struct task_struct *copy_process( attach_pid(p, PIDTYPE_PID); nr_threads++; } - total_forks++; + hlist_del_init(&delayed.node); spin_unlock(¤t->sighand->siglock); syscall_tracepoint_update(p); write_unlock_irq(&tasklist_lock); @@ -2059,6 +2063,9 @@ bad_fork_free: put_task_stack(p); free_task(p); fork_out: + spin_lock_irq(¤t->sighand->siglock); + hlist_del_init(&delayed.node); + spin_unlock_irq(¤t->sighand->siglock); return ERR_PTR(retval); } diff --git a/kernel/signal.c b/kernel/signal.c index 9f0eafb6d474..cfa9d10e731a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1121,6 +1121,21 @@ static int __send_signal(int sig, struct siginfo *info, struct task_struct *t, out_set: signalfd_notify(t, sig); sigaddset(&pending->signal, sig); + + /* Let multiprocess signals appear after on-going forks */ + if (type > PIDTYPE_TGID) { + struct multiprocess_signals *delayed; + hlist_for_each_entry(delayed, &t->signal->multiprocess, node) { + sigset_t *signal = &delayed->signal; + /* Can't queue both a stop and a continue signal */ + if (sig == SIGCONT) + sigdelsetmask(signal, SIG_KERNEL_STOP_MASK); + else if (sig_kernel_stop(sig)) + sigdelset(signal, SIGCONT); + sigaddset(signal, sig); + } + } + complete_signal(sig, t, type); ret: trace_signal_generate(sig, info, t, type != PIDTYPE_PID, result); -- cgit v1.2.3-70-g09d2