diff options
Diffstat (limited to 'kernel')
| -rw-r--r-- | kernel/Makefile | 2 | ||||
| -rw-r--r-- | kernel/auditsc.c | 2 | ||||
| -rw-r--r-- | kernel/bpf/btf.c | 8 | ||||
| -rw-r--r-- | kernel/cfi.c | 22 | ||||
| -rw-r--r-- | kernel/dma/debug.c | 2 | ||||
| -rw-r--r-- | kernel/dma/direct.c | 5 | ||||
| -rw-r--r-- | kernel/dma/swiotlb.c | 14 | ||||
| -rw-r--r-- | kernel/entry/kvm.c | 6 | ||||
| -rw-r--r-- | kernel/hung_task.c | 11 | ||||
| -rw-r--r-- | kernel/irq/chip.c | 5 | ||||
| -rw-r--r-- | kernel/kthread.c | 14 | ||||
| -rw-r--r-- | kernel/locking/lockdep.c | 2 | ||||
| -rw-r--r-- | kernel/panic.c | 4 | ||||
| -rw-r--r-- | kernel/platform-feature.c | 27 | ||||
| -rw-r--r-- | kernel/power/hibernate.c | 2 | ||||
| -rw-r--r-- | kernel/printk/printk.c | 587 | ||||
| -rw-r--r-- | kernel/rcu/tree_stall.h | 2 | ||||
| -rw-r--r-- | kernel/reboot.c | 101 | ||||
| -rw-r--r-- | kernel/sched/core.c | 36 | ||||
| -rw-r--r-- | kernel/sched/sched.h | 5 | ||||
| -rw-r--r-- | kernel/trace/blktrace.c | 3 | ||||
| -rw-r--r-- | kernel/trace/bpf_trace.c | 66 | ||||
| -rw-r--r-- | kernel/trace/ftrace.c | 13 | ||||
| -rw-r--r-- | kernel/trace/rethook.c | 9 | ||||
| -rw-r--r-- | kernel/trace/trace.c | 2 | ||||
| -rw-r--r-- | kernel/trace/trace_kprobe.c | 11 | ||||
| -rw-r--r-- | kernel/trace/trace_uprobe.c | 1 | ||||
| -rw-r--r-- | kernel/watchdog.c | 4 | ||||
| -rw-r--r-- | kernel/watchdog_hld.c | 4 | ||||
| -rw-r--r-- | kernel/workqueue.c | 16 | 
30 files changed, 329 insertions, 657 deletions
diff --git a/kernel/Makefile b/kernel/Makefile index 318789c728d3..a7e1f49ab2b3 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -7,7 +7,7 @@ obj-y     = fork.o exec_domain.o panic.o \  	    cpu.o exit.o softirq.o resource.o \  	    sysctl.o capability.o ptrace.o user.o \  	    signal.o sys.o umh.o workqueue.o pid.o task_work.o \ -	    extable.o params.o \ +	    extable.o params.o platform-feature.o \  	    kthread.o sys_ni.o nsproxy.o \  	    notifier.o ksysfs.o cred.o reboot.o \  	    async.o range.o smpboot.o ucount.o regset.o diff --git a/kernel/auditsc.c b/kernel/auditsc.c index f3a2abd6d1a1..3a8c9d744800 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1014,10 +1014,10 @@ static void audit_reset_context(struct audit_context *ctx)  	ctx->target_comm[0] = '\0';  	unroll_tree_refs(ctx, NULL, 0);  	WARN_ON(!list_empty(&ctx->killed_trees)); -	ctx->type = 0;  	audit_free_module(ctx);  	ctx->fds[0] = -1;  	audit_proctitle_free(ctx); +	ctx->type = 0; /* reset last for audit_free_*() */  }  static inline struct audit_context *audit_alloc_context(enum audit_state state) diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index 7bccaa4646e5..eb12d4f705cc 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4815,6 +4815,7 @@ static int btf_check_type_tags(struct btf_verifier_env *env,  	n = btf_nr_types(btf);  	for (i = start_id; i < n; i++) {  		const struct btf_type *t; +		int chain_limit = 32;  		u32 cur_id = i;  		t = btf_type_by_id(btf, i); @@ -4827,6 +4828,10 @@ static int btf_check_type_tags(struct btf_verifier_env *env,  		in_tags = btf_type_is_type_tag(t);  		while (btf_type_is_modifier(t)) { +			if (!chain_limit--) { +				btf_verifier_log(env, "Max chain length or cycle detected"); +				return -ELOOP; +			}  			if (btf_type_is_type_tag(t)) {  				if (!in_tags) {  					btf_verifier_log(env, "Type tags don't precede modifiers"); @@ -6054,6 +6059,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,  				    struct bpf_reg_state *regs,  				    bool ptr_to_mem_ok)  { +	enum bpf_prog_type prog_type = resolve_prog_type(env->prog);  	struct bpf_verifier_log *log = &env->log;  	u32 i, nargs, ref_id, ref_obj_id = 0;  	bool is_kfunc = btf_is_kernel(btf); @@ -6171,7 +6177,7 @@ static int btf_check_func_arg_match(struct bpf_verifier_env *env,  				return -EINVAL;  			}  			/* rest of the arguments can be anything, like normal kfunc */ -		} else if (btf_get_prog_ctx_type(log, btf, t, env->prog->type, i)) { +		} else if (btf_get_prog_ctx_type(log, btf, t, prog_type, i)) {  			/* If function expects ctx type in BTF check that caller  			 * is passing PTR_TO_CTX.  			 */ diff --git a/kernel/cfi.c b/kernel/cfi.c index 9594cfd1cf2c..08102d19ec15 100644 --- a/kernel/cfi.c +++ b/kernel/cfi.c @@ -281,6 +281,8 @@ static inline cfi_check_fn find_module_check_fn(unsigned long ptr)  static inline cfi_check_fn find_check_fn(unsigned long ptr)  {  	cfi_check_fn fn = NULL; +	unsigned long flags; +	bool rcu_idle;  	if (is_kernel_text(ptr))  		return __cfi_check; @@ -290,13 +292,21 @@ static inline cfi_check_fn find_check_fn(unsigned long ptr)  	 * the shadow and __module_address use RCU, so we need to wake it  	 * up if necessary.  	 */ -	RCU_NONIDLE({ -		if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) -			fn = find_shadow_check_fn(ptr); +	rcu_idle = !rcu_is_watching(); +	if (rcu_idle) { +		local_irq_save(flags); +		rcu_irq_enter(); +	} + +	if (IS_ENABLED(CONFIG_CFI_CLANG_SHADOW)) +		fn = find_shadow_check_fn(ptr); +	if (!fn) +		fn = find_module_check_fn(ptr); -		if (!fn) -			fn = find_module_check_fn(ptr); -	}); +	if (rcu_idle) { +		rcu_irq_exit(); +		local_irq_restore(flags); +	}  	return fn;  } diff --git a/kernel/dma/debug.c b/kernel/dma/debug.c index ac740630c79c..2caafd13f8aa 100644 --- a/kernel/dma/debug.c +++ b/kernel/dma/debug.c @@ -564,7 +564,7 @@ static void add_dma_entry(struct dma_debug_entry *entry, unsigned long attrs)  	rc = active_cacheline_insert(entry);  	if (rc == -ENOMEM) { -		pr_err("cacheline tracking ENOMEM, dma-debug disabled\n"); +		pr_err_once("cacheline tracking ENOMEM, dma-debug disabled\n");  		global_disable = true;  	} else if (rc == -EEXIST && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) {  		err_printk(entry->dev, entry, diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index e978f36e6be8..8d0b68a17042 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -357,7 +357,7 @@ void dma_direct_free(struct device *dev, size_t size,  	} else {  		if (IS_ENABLED(CONFIG_ARCH_HAS_DMA_CLEAR_UNCACHED))  			arch_dma_clear_uncached(cpu_addr, size); -		if (dma_set_encrypted(dev, cpu_addr, 1 << page_order)) +		if (dma_set_encrypted(dev, cpu_addr, size))  			return;  	} @@ -392,7 +392,6 @@ void dma_direct_free_pages(struct device *dev, size_t size,  		struct page *page, dma_addr_t dma_addr,  		enum dma_data_direction dir)  { -	unsigned int page_order = get_order(size);  	void *vaddr = page_address(page);  	/* If cpu_addr is not from an atomic pool, dma_free_from_pool() fails */ @@ -400,7 +399,7 @@ void dma_direct_free_pages(struct device *dev, size_t size,  	    dma_free_from_pool(dev, vaddr, size))  		return; -	if (dma_set_encrypted(dev, vaddr, 1 << page_order)) +	if (dma_set_encrypted(dev, vaddr, size))  		return;  	__dma_direct_free_pages(dev, page, size);  } diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index dfa1de89dc94..cb50f8d38360 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -192,7 +192,7 @@ void __init swiotlb_update_mem_attributes(void)  }  static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start, -				    unsigned long nslabs, bool late_alloc) +		unsigned long nslabs, unsigned int flags, bool late_alloc)  {  	void *vaddr = phys_to_virt(start);  	unsigned long bytes = nslabs << IO_TLB_SHIFT, i; @@ -203,8 +203,7 @@ static void swiotlb_init_io_tlb_mem(struct io_tlb_mem *mem, phys_addr_t start,  	mem->index = 0;  	mem->late_alloc = late_alloc; -	if (swiotlb_force_bounce) -		mem->force_bounce = true; +	mem->force_bounce = swiotlb_force_bounce || (flags & SWIOTLB_FORCE);  	spin_lock_init(&mem->lock);  	for (i = 0; i < mem->nslabs; i++) { @@ -275,8 +274,7 @@ retry:  		panic("%s: Failed to allocate %zu bytes align=0x%lx\n",  		      __func__, alloc_size, PAGE_SIZE); -	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, false); -	mem->force_bounce = flags & SWIOTLB_FORCE; +	swiotlb_init_io_tlb_mem(mem, __pa(tlb), nslabs, flags, false);  	if (flags & SWIOTLB_VERBOSE)  		swiotlb_print_info(); @@ -348,7 +346,7 @@ retry:  	set_memory_decrypted((unsigned long)vstart,  			     (nslabs << IO_TLB_SHIFT) >> PAGE_SHIFT); -	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, true); +	swiotlb_init_io_tlb_mem(mem, virt_to_phys(vstart), nslabs, 0, true);  	swiotlb_print_info();  	return 0; @@ -835,8 +833,8 @@ static int rmem_swiotlb_device_init(struct reserved_mem *rmem,  		set_memory_decrypted((unsigned long)phys_to_virt(rmem->base),  				     rmem->size >> PAGE_SHIFT); -		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, false); -		mem->force_bounce = true; +		swiotlb_init_io_tlb_mem(mem, rmem->base, nslabs, SWIOTLB_FORCE, +				false);  		mem->for_alloc = true;  		rmem->priv = mem; diff --git a/kernel/entry/kvm.c b/kernel/entry/kvm.c index 9d09f489b60e..2e0f75bcb7fd 100644 --- a/kernel/entry/kvm.c +++ b/kernel/entry/kvm.c @@ -9,12 +9,6 @@ static int xfer_to_guest_mode_work(struct kvm_vcpu *vcpu, unsigned long ti_work)  		int ret;  		if (ti_work & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL)) { -			clear_notify_signal(); -			if (task_work_pending(current)) -				task_work_run(); -		} - -		if (ti_work & _TIF_SIGPENDING) {  			kvm_handle_signal_exit(vcpu);  			return -EINTR;  		} diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 80bfea5dd5c4..cff3ae8c818f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -127,8 +127,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)  	 * complain:  	 */  	if (sysctl_hung_task_warnings) { -		printk_prefer_direct_enter(); -  		if (sysctl_hung_task_warnings > 0)  			sysctl_hung_task_warnings--;  		pr_err("INFO: task %s:%d blocked for more than %ld seconds.\n", @@ -144,8 +142,6 @@ static void check_hung_task(struct task_struct *t, unsigned long timeout)  		if (sysctl_hung_task_all_cpu_backtrace)  			hung_task_show_all_bt = true; - -		printk_prefer_direct_exit();  	}  	touch_nmi_watchdog(); @@ -208,17 +204,12 @@ static void check_hung_uninterruptible_tasks(unsigned long timeout)  	}   unlock:  	rcu_read_unlock(); -	if (hung_task_show_lock) { -		printk_prefer_direct_enter(); +	if (hung_task_show_lock)  		debug_show_all_locks(); -		printk_prefer_direct_exit(); -	}  	if (hung_task_show_all_bt) {  		hung_task_show_all_bt = false; -		printk_prefer_direct_enter();  		trigger_all_cpu_backtrace(); -		printk_prefer_direct_exit();  	}  	if (hung_task_call_panic) diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c index e6b8e564b37f..886789dcee43 100644 --- a/kernel/irq/chip.c +++ b/kernel/irq/chip.c @@ -1006,8 +1006,10 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,  		if (desc->irq_data.chip != &no_irq_chip)  			mask_ack_irq(desc);  		irq_state_set_disabled(desc); -		if (is_chained) +		if (is_chained) {  			desc->action = NULL; +			WARN_ON(irq_chip_pm_put(irq_desc_get_irq_data(desc))); +		}  		desc->depth = 1;  	}  	desc->handle_irq = handle; @@ -1033,6 +1035,7 @@ __irq_do_set_handler(struct irq_desc *desc, irq_flow_handler_t handle,  		irq_settings_set_norequest(desc);  		irq_settings_set_nothread(desc);  		desc->action = &chained_action; +		WARN_ON(irq_chip_pm_get(irq_desc_get_irq_data(desc)));  		irq_activate_and_startup(desc, IRQ_RESEND);  	}  } diff --git a/kernel/kthread.c b/kernel/kthread.c index 544fd4097406..3c677918d8f2 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -340,7 +340,7 @@ static int kthread(void *_create)  	self = to_kthread(current); -	/* If user was SIGKILLed, I release the structure. */ +	/* Release the structure when caller killed by a fatal signal. */  	done = xchg(&create->done, NULL);  	if (!done) {  		kfree(create); @@ -398,7 +398,7 @@ static void create_kthread(struct kthread_create_info *create)  	/* We want our own signal handler (we take no signals by default). */  	pid = kernel_thread(kthread, create, CLONE_FS | CLONE_FILES | SIGCHLD);  	if (pid < 0) { -		/* If user was SIGKILLed, I release the structure. */ +		/* Release the structure when caller killed by a fatal signal. */  		struct completion *done = xchg(&create->done, NULL);  		if (!done) { @@ -440,9 +440,9 @@ struct task_struct *__kthread_create_on_node(int (*threadfn)(void *data),  	 */  	if (unlikely(wait_for_completion_killable(&done))) {  		/* -		 * If I was SIGKILLed before kthreadd (or new kernel thread) -		 * calls complete(), leave the cleanup of this structure to -		 * that thread. +		 * If I was killed by a fatal signal before kthreadd (or new +		 * kernel thread) calls complete(), leave the cleanup of this +		 * structure to that thread.  		 */  		if (xchg(&create->done, NULL))  			return ERR_PTR(-EINTR); @@ -876,7 +876,7 @@ fail_task:   *   * Returns a pointer to the allocated worker on success, ERR_PTR(-ENOMEM)   * when the needed structures could not get allocated, and ERR_PTR(-EINTR) - * when the worker was SIGKILLed. + * when the caller was killed by a fatal signal.   */  struct kthread_worker *  kthread_create_worker(unsigned int flags, const char namefmt[], ...) @@ -925,7 +925,7 @@ EXPORT_SYMBOL(kthread_create_worker);   * Return:   * The pointer to the allocated worker on success, ERR_PTR(-ENOMEM)   * when the needed structures could not get allocated, and ERR_PTR(-EINTR) - * when the worker was SIGKILLed. + * when the caller was killed by a fatal signal.   */  struct kthread_worker *  kthread_create_worker_on_cpu(int cpu, unsigned int flags, diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 81e87280513e..f06b91ca6482 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -5432,7 +5432,7 @@ static struct pin_cookie __lock_pin_lock(struct lockdep_map *lock)  			 * be guessable and still allows some pin nesting in  			 * our u32 pin_count.  			 */ -			cookie.val = 1 + (prandom_u32() >> 16); +			cookie.val = 1 + (sched_clock() & 0xffff);  			hlock->pin_count += cookie.val;  			return cookie;  		} diff --git a/kernel/panic.c b/kernel/panic.c index a3c758dba15a..a3308af28a21 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -603,8 +603,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint,  {  	disable_trace_on_warning(); -	printk_prefer_direct_enter(); -  	if (file)  		pr_warn("WARNING: CPU: %d PID: %d at %s:%d %pS\n",  			raw_smp_processor_id(), current->pid, file, line, @@ -634,8 +632,6 @@ void __warn(const char *file, int line, void *caller, unsigned taint,  	/* Just a warning, don't kill lockdep. */  	add_taint(taint, LOCKDEP_STILL_OK); - -	printk_prefer_direct_exit();  }  #ifndef __WARN_FLAGS diff --git a/kernel/platform-feature.c b/kernel/platform-feature.c new file mode 100644 index 000000000000..cb6a6c3e4fed --- /dev/null +++ b/kernel/platform-feature.c @@ -0,0 +1,27 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bitops.h> +#include <linux/cache.h> +#include <linux/export.h> +#include <linux/platform-feature.h> + +#define PLATFORM_FEAT_ARRAY_SZ  BITS_TO_LONGS(PLATFORM_FEAT_N) +static unsigned long __read_mostly platform_features[PLATFORM_FEAT_ARRAY_SZ]; + +void platform_set(unsigned int feature) +{ +	set_bit(feature, platform_features); +} +EXPORT_SYMBOL_GPL(platform_set); + +void platform_clear(unsigned int feature) +{ +	clear_bit(feature, platform_features); +} +EXPORT_SYMBOL_GPL(platform_clear); + +bool platform_has(unsigned int feature) +{ +	return test_bit(feature, platform_features); +} +EXPORT_SYMBOL_GPL(platform_has); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 20a66bf9f465..89c71fce225d 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -665,7 +665,7 @@ static void power_down(void)  		hibernation_platform_enter();  		fallthrough;  	case HIBERNATION_SHUTDOWN: -		if (pm_power_off) +		if (kernel_can_power_off())  			kernel_power_off();  		break;  	} diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index ea3dd55709e7..b49c6ff6dca0 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -224,33 +224,6 @@ int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,  static int nr_ext_console_drivers;  /* - * Used to synchronize printing kthreads against direct printing via - * console_trylock/console_unlock. - * - * Values: - * -1 = console kthreads atomically blocked (via global trylock) - *  0 = no kthread printing, console not locked (via trylock) - * >0 = kthread(s) actively printing - * - * Note: For synchronizing against direct printing via - *       console_lock/console_unlock, see the @lock variable in - *       struct console. - */ -static atomic_t console_kthreads_active = ATOMIC_INIT(0); - -#define console_kthreads_atomic_tryblock() \ -	(atomic_cmpxchg(&console_kthreads_active, 0, -1) == 0) -#define console_kthreads_atomic_unblock() \ -	atomic_cmpxchg(&console_kthreads_active, -1, 0) -#define console_kthreads_atomically_blocked() \ -	(atomic_read(&console_kthreads_active) == -1) - -#define console_kthread_printing_tryenter() \ -	atomic_inc_unless_negative(&console_kthreads_active) -#define console_kthread_printing_exit() \ -	atomic_dec(&console_kthreads_active) - -/*   * Helper macros to handle lockdep when locking/unlocking console_sem. We use   * macros instead of functions so that _RET_IP_ contains useful information.   */ @@ -298,49 +271,14 @@ static bool panic_in_progress(void)  }  /* - * Tracks whether kthread printers are all blocked. A value of true implies - * that the console is locked via console_lock() or the console is suspended. - * Writing to this variable requires holding @console_sem. + * This is used for debugging the mess that is the VT code by + * keeping track if we have the console semaphore held. It's + * definitely not the perfect debug tool (we don't know if _WE_ + * hold it and are racing, but it helps tracking those weird code + * paths in the console code where we end up in places I want + * locked without the console semaphore held).   */ -static bool console_kthreads_blocked; - -/* - * Block all kthread printers from a schedulable context. - * - * Requires holding @console_sem. - */ -static void console_kthreads_block(void) -{ -	struct console *con; - -	for_each_console(con) { -		mutex_lock(&con->lock); -		con->blocked = true; -		mutex_unlock(&con->lock); -	} - -	console_kthreads_blocked = true; -} - -/* - * Unblock all kthread printers from a schedulable context. - * - * Requires holding @console_sem. - */ -static void console_kthreads_unblock(void) -{ -	struct console *con; - -	for_each_console(con) { -		mutex_lock(&con->lock); -		con->blocked = false; -		mutex_unlock(&con->lock); -	} - -	console_kthreads_blocked = false; -} - -static int console_suspended; +static int console_locked, console_suspended;  /*   *	Array of consoles built from command line options (console=) @@ -423,75 +361,7 @@ static int console_msg_format = MSG_FORMAT_DEFAULT;  /* syslog_lock protects syslog_* variables and write access to clear_seq. */  static DEFINE_MUTEX(syslog_lock); -/* - * A flag to signify if printk_activate_kthreads() has already started the - * kthread printers. If true, any later registered consoles must start their - * own kthread directly. The flag is write protected by the console_lock. - */ -static bool printk_kthreads_available; -  #ifdef CONFIG_PRINTK -static atomic_t printk_prefer_direct = ATOMIC_INIT(0); - -/** - * printk_prefer_direct_enter - cause printk() calls to attempt direct - *                              printing to all enabled consoles - * - * Since it is not possible to call into the console printing code from any - * context, there is no guarantee that direct printing will occur. - * - * This globally effects all printk() callers. - * - * Context: Any context. - */ -void printk_prefer_direct_enter(void) -{ -	atomic_inc(&printk_prefer_direct); -} - -/** - * printk_prefer_direct_exit - restore printk() behavior - * - * Context: Any context. - */ -void printk_prefer_direct_exit(void) -{ -	WARN_ON(atomic_dec_if_positive(&printk_prefer_direct) < 0); -} - -/* - * Calling printk() always wakes kthread printers so that they can - * flush the new message to their respective consoles. Also, if direct - * printing is allowed, printk() tries to flush the messages directly. - * - * Direct printing is allowed in situations when the kthreads - * are not available or the system is in a problematic state. - * - * See the implementation about possible races. - */ -static inline bool allow_direct_printing(void) -{ -	/* -	 * Checking kthread availability is a possible race because the -	 * kthread printers can become permanently disabled during runtime. -	 * However, doing that requires holding the console_lock, so any -	 * pending messages will be direct printed by console_unlock(). -	 */ -	if (!printk_kthreads_available) -		return true; - -	/* -	 * Prefer direct printing when the system is in a problematic state. -	 * The context that sets this state will always see the updated value. -	 * The other contexts do not care. Anyway, direct printing is just a -	 * best effort. The direct output is only possible when console_lock -	 * is not already taken and no kthread printers are actively printing. -	 */ -	return (system_state > SYSTEM_RUNNING || -		oops_in_progress || -		atomic_read(&printk_prefer_direct)); -} -  DECLARE_WAIT_QUEUE_HEAD(log_wait);  /* All 3 protected by @syslog_lock. */  /* the next printk record to read by syslog(READ) or /proc/kmsg */ @@ -2382,10 +2252,10 @@ asmlinkage int vprintk_emit(int facility, int level,  	printed_len = vprintk_store(facility, level, dev_info, fmt, args);  	/* If called from the scheduler, we can not call up(). */ -	if (!in_sched && allow_direct_printing()) { +	if (!in_sched) {  		/*  		 * The caller may be holding system-critical or -		 * timing-sensitive locks. Disable preemption during direct +		 * timing-sensitive locks. Disable preemption during  		 * printing of all remaining records to all consoles so that  		 * this context can return as soon as possible. Hopefully  		 * another printk() caller will take over the printing. @@ -2428,8 +2298,6 @@ EXPORT_SYMBOL(_printk);  static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress); -static void printk_start_kthread(struct console *con); -  #else /* CONFIG_PRINTK */  #define CONSOLE_LOG_MAX		0 @@ -2463,8 +2331,6 @@ static void call_console_driver(struct console *con, const char *text, size_t le  }  static bool suppress_message_printing(int level) { return false; }  static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) { return true; } -static void printk_start_kthread(struct console *con) { } -static bool allow_direct_printing(void) { return true; }  #endif /* CONFIG_PRINTK */ @@ -2683,14 +2549,6 @@ static int console_cpu_notify(unsigned int cpu)  		/* If trylock fails, someone else is doing the printing */  		if (console_trylock())  			console_unlock(); -		else { -			/* -			 * If a new CPU comes online, the conditions for -			 * printer_should_wake() may have changed for some -			 * kthread printer with !CON_ANYTIME. -			 */ -			wake_up_klogd(); -		}  	}  	return 0;  } @@ -2710,7 +2568,7 @@ void console_lock(void)  	down_console_sem();  	if (console_suspended)  		return; -	console_kthreads_block(); +	console_locked = 1;  	console_may_schedule = 1;  }  EXPORT_SYMBOL(console_lock); @@ -2731,30 +2589,15 @@ int console_trylock(void)  		up_console_sem();  		return 0;  	} -	if (!console_kthreads_atomic_tryblock()) { -		up_console_sem(); -		return 0; -	} +	console_locked = 1;  	console_may_schedule = 0;  	return 1;  }  EXPORT_SYMBOL(console_trylock); -/* - * This is used to help to make sure that certain paths within the VT code are - * running with the console lock held. It is definitely not the perfect debug - * tool (it is not known if the VT code is the task holding the console lock), - * but it helps tracking those weird code paths in the console code such as - * when the console is suspended: where the console is not locked but no - * console printing may occur. - * - * Note: This returns true when the console is suspended but is not locked. - *       This is intentional because the VT code must consider that situation - *       the same as if the console was locked. - */  int is_console_locked(void)  { -	return (console_kthreads_blocked || atomic_read(&console_kthreads_active)); +	return console_locked;  }  EXPORT_SYMBOL(is_console_locked); @@ -2777,9 +2620,18 @@ static bool abandon_console_lock_in_panic(void)  	return atomic_read(&panic_cpu) != raw_smp_processor_id();  } -static inline bool __console_is_usable(short flags) +/* + * Check if the given console is currently capable and allowed to print + * records. + * + * Requires the console_lock. + */ +static inline bool console_is_usable(struct console *con)  { -	if (!(flags & CON_ENABLED)) +	if (!(con->flags & CON_ENABLED)) +		return false; + +	if (!con->write)  		return false;  	/* @@ -2788,43 +2640,15 @@ static inline bool __console_is_usable(short flags)  	 * cope (CON_ANYTIME) don't call them until this CPU is officially up.  	 */  	if (!cpu_online(raw_smp_processor_id()) && -	    !(flags & CON_ANYTIME)) +	    !(con->flags & CON_ANYTIME))  		return false;  	return true;  } -/* - * Check if the given console is currently capable and allowed to print - * records. - * - * Requires holding the console_lock. - */ -static inline bool console_is_usable(struct console *con) -{ -	if (!con->write) -		return false; - -	return __console_is_usable(con->flags); -} -  static void __console_unlock(void)  { -	/* -	 * Depending on whether console_lock() or console_trylock() was used, -	 * appropriately allow the kthread printers to continue. -	 */ -	if (console_kthreads_blocked) -		console_kthreads_unblock(); -	else -		console_kthreads_atomic_unblock(); - -	/* -	 * New records may have arrived while the console was locked. -	 * Wake the kthread printers to print them. -	 */ -	wake_up_klogd(); - +	console_locked = 0;  	up_console_sem();  } @@ -2842,19 +2666,17 @@ static void __console_unlock(void)   *   * @handover will be set to true if a printk waiter has taken over the   * console_lock, in which case the caller is no longer holding the - * console_lock. Otherwise it is set to false. A NULL pointer may be provided - * to disable allowing the console_lock to be taken over by a printk waiter. + * console_lock. Otherwise it is set to false.   *   * Returns false if the given console has no next record to print, otherwise   * true.   * - * Requires the console_lock if @handover is non-NULL. - * Requires con->lock otherwise. + * Requires the console_lock.   */ -static bool __console_emit_next_record(struct console *con, char *text, char *ext_text, -				       char *dropped_text, bool *handover) +static bool console_emit_next_record(struct console *con, char *text, char *ext_text, +				     char *dropped_text, bool *handover)  { -	static atomic_t panic_console_dropped = ATOMIC_INIT(0); +	static int panic_console_dropped;  	struct printk_info info;  	struct printk_record r;  	unsigned long flags; @@ -2863,8 +2685,7 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex  	prb_rec_init_rd(&r, &info, text, CONSOLE_LOG_MAX); -	if (handover) -		*handover = false; +	*handover = false;  	if (!prb_read_valid(prb, con->seq, &r))  		return false; @@ -2872,8 +2693,7 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex  	if (con->seq != r.info->seq) {  		con->dropped += r.info->seq - con->seq;  		con->seq = r.info->seq; -		if (panic_in_progress() && -		    atomic_fetch_inc_relaxed(&panic_console_dropped) > 10) { +		if (panic_in_progress() && panic_console_dropped++ > 10) {  			suppress_panic_printk = 1;  			pr_warn_once("Too many dropped messages. Suppress messages on non-panic CPUs to prevent livelock.\n");  		} @@ -2895,62 +2715,32 @@ static bool __console_emit_next_record(struct console *con, char *text, char *ex  		len = record_print_text(&r, console_msg_format & MSG_FORMAT_SYSLOG, printk_time);  	} -	if (handover) { -		/* -		 * While actively printing out messages, if another printk() -		 * were to occur on another CPU, it may wait for this one to -		 * finish. This task can not be preempted if there is a -		 * waiter waiting to take over. -		 * -		 * Interrupts are disabled because the hand over to a waiter -		 * must not be interrupted until the hand over is completed -		 * (@console_waiter is cleared). -		 */ -		printk_safe_enter_irqsave(flags); -		console_lock_spinning_enable(); - -		/* don't trace irqsoff print latency */ -		stop_critical_timings(); -	} +	/* +	 * While actively printing out messages, if another printk() +	 * were to occur on another CPU, it may wait for this one to +	 * finish. This task can not be preempted if there is a +	 * waiter waiting to take over. +	 * +	 * Interrupts are disabled because the hand over to a waiter +	 * must not be interrupted until the hand over is completed +	 * (@console_waiter is cleared). +	 */ +	printk_safe_enter_irqsave(flags); +	console_lock_spinning_enable(); +	stop_critical_timings();	/* don't trace print latency */  	call_console_driver(con, write_text, len, dropped_text); +	start_critical_timings();  	con->seq++; -	if (handover) { -		start_critical_timings(); -		*handover = console_lock_spinning_disable_and_check(); -		printk_safe_exit_irqrestore(flags); -	} +	*handover = console_lock_spinning_disable_and_check(); +	printk_safe_exit_irqrestore(flags);  skip:  	return true;  }  /* - * Print a record for a given console, but allow another printk() caller to - * take over the console_lock and continue printing. - * - * Requires the console_lock, but depending on @handover after the call, the - * caller may no longer have the console_lock. - * - * See __console_emit_next_record() for argument and return details. - */ -static bool console_emit_next_record_transferable(struct console *con, char *text, char *ext_text, -						  char *dropped_text, bool *handover) -{ -	/* -	 * Handovers are only supported if threaded printers are atomically -	 * blocked. The context taking over the console_lock may be atomic. -	 */ -	if (!console_kthreads_atomically_blocked()) { -		*handover = false; -		handover = NULL; -	} - -	return __console_emit_next_record(con, text, ext_text, dropped_text, handover); -} - -/*   * Print out all remaining records to all consoles.   *   * @do_cond_resched is set by the caller. It can be true only in schedulable @@ -2968,8 +2758,8 @@ static bool console_emit_next_record_transferable(struct console *con, char *tex   * were flushed to all usable consoles. A returned false informs the caller   * that everything was not flushed (either there were no usable consoles or   * another context has taken over printing or it is a panic situation and this - * is not the panic CPU or direct printing is not preferred). Regardless the - * reason, the caller should assume it is not useful to immediately try again. + * is not the panic CPU). Regardless the reason, the caller should assume it + * is not useful to immediately try again.   *   * Requires the console_lock.   */ @@ -2986,10 +2776,6 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove  	*handover = false;  	do { -		/* Let the kthread printers do the work if they can. */ -		if (!allow_direct_printing()) -			return false; -  		any_progress = false;  		for_each_console(con) { @@ -3001,11 +2787,13 @@ static bool console_flush_all(bool do_cond_resched, u64 *next_seq, bool *handove  			if (con->flags & CON_EXTENDED) {  				/* Extended consoles do not print "dropped messages". */ -				progress = console_emit_next_record_transferable(con, &text[0], -								&ext_text[0], NULL, handover); +				progress = console_emit_next_record(con, &text[0], +								    &ext_text[0], NULL, +								    handover);  			} else { -				progress = console_emit_next_record_transferable(con, &text[0], -								NULL, &dropped_text[0], handover); +				progress = console_emit_next_record(con, &text[0], +								    NULL, &dropped_text[0], +								    handover);  			}  			if (*handover)  				return false; @@ -3120,13 +2908,10 @@ void console_unblank(void)  	if (oops_in_progress) {  		if (down_trylock_console_sem() != 0)  			return; -		if (!console_kthreads_atomic_tryblock()) { -			up_console_sem(); -			return; -		}  	} else  		console_lock(); +	console_locked = 1;  	console_may_schedule = 0;  	for_each_console(c)  		if ((c->flags & CON_ENABLED) && c->unblank) @@ -3405,10 +3190,6 @@ void register_console(struct console *newcon)  		nr_ext_console_drivers++;  	newcon->dropped = 0; -	newcon->thread = NULL; -	newcon->blocked = true; -	mutex_init(&newcon->lock); -  	if (newcon->flags & CON_PRINTBUFFER) {  		/* Get a consistent copy of @syslog_seq. */  		mutex_lock(&syslog_lock); @@ -3418,10 +3199,6 @@ void register_console(struct console *newcon)  		/* Begin with next message. */  		newcon->seq = prb_next_seq(prb);  	} - -	if (printk_kthreads_available) -		printk_start_kthread(newcon); -  	console_unlock();  	console_sysfs_notify(); @@ -3448,7 +3225,6 @@ EXPORT_SYMBOL(register_console);  int unregister_console(struct console *console)  { -	struct task_struct *thd;  	struct console *con;  	int res; @@ -3489,20 +3265,7 @@ int unregister_console(struct console *console)  		console_drivers->flags |= CON_CONSDEV;  	console->flags &= ~CON_ENABLED; - -	/* -	 * console->thread can only be cleared under the console lock. But -	 * stopping the thread must be done without the console lock. The -	 * task that clears @thread is the task that stops the kthread. -	 */ -	thd = console->thread; -	console->thread = NULL; -  	console_unlock(); - -	if (thd) -		kthread_stop(thd); -  	console_sysfs_notify();  	if (console->exit) @@ -3598,20 +3361,6 @@ static int __init printk_late_init(void)  }  late_initcall(printk_late_init); -static int __init printk_activate_kthreads(void) -{ -	struct console *con; - -	console_lock(); -	printk_kthreads_available = true; -	for_each_console(con) -		printk_start_kthread(con); -	console_unlock(); - -	return 0; -} -early_initcall(printk_activate_kthreads); -  #if defined CONFIG_PRINTK  /* If @con is specified, only wait for that console. Otherwise wait for all. */  static bool __pr_flush(struct console *con, int timeout_ms, bool reset_on_progress) @@ -3686,206 +3435,11 @@ bool pr_flush(int timeout_ms, bool reset_on_progress)  }  EXPORT_SYMBOL(pr_flush); -static void __printk_fallback_preferred_direct(void) -{ -	printk_prefer_direct_enter(); -	pr_err("falling back to preferred direct printing\n"); -	printk_kthreads_available = false; -} - -/* - * Enter preferred direct printing, but never exit. Mark console threads as - * unavailable. The system is then forever in preferred direct printing and - * any printing threads will exit. - * - * Must *not* be called under console_lock. Use - * __printk_fallback_preferred_direct() if already holding console_lock. - */ -static void printk_fallback_preferred_direct(void) -{ -	console_lock(); -	__printk_fallback_preferred_direct(); -	console_unlock(); -} - -/* - * Print a record for a given console, not allowing another printk() caller - * to take over. This is appropriate for contexts that do not have the - * console_lock. - * - * See __console_emit_next_record() for argument and return details. - */ -static bool console_emit_next_record(struct console *con, char *text, char *ext_text, -				     char *dropped_text) -{ -	return __console_emit_next_record(con, text, ext_text, dropped_text, NULL); -} - -static bool printer_should_wake(struct console *con, u64 seq) -{ -	short flags; - -	if (kthread_should_stop() || !printk_kthreads_available) -		return true; - -	if (con->blocked || -	    console_kthreads_atomically_blocked()) { -		return false; -	} - -	/* -	 * This is an unsafe read from con->flags, but a false positive is -	 * not a problem. Worst case it would allow the printer to wake up -	 * although it is disabled. But the printer will notice that when -	 * attempting to print and instead go back to sleep. -	 */ -	flags = data_race(READ_ONCE(con->flags)); - -	if (!__console_is_usable(flags)) -		return false; - -	return prb_read_valid(prb, seq, NULL); -} - -static int printk_kthread_func(void *data) -{ -	struct console *con = data; -	char *dropped_text = NULL; -	char *ext_text = NULL; -	u64 seq = 0; -	char *text; -	int error; - -	text = kmalloc(CONSOLE_LOG_MAX, GFP_KERNEL); -	if (!text) { -		con_printk(KERN_ERR, con, "failed to allocate text buffer\n"); -		printk_fallback_preferred_direct(); -		goto out; -	} - -	if (con->flags & CON_EXTENDED) { -		ext_text = kmalloc(CONSOLE_EXT_LOG_MAX, GFP_KERNEL); -		if (!ext_text) { -			con_printk(KERN_ERR, con, "failed to allocate ext_text buffer\n"); -			printk_fallback_preferred_direct(); -			goto out; -		} -	} else { -		dropped_text = kmalloc(DROPPED_TEXT_MAX, GFP_KERNEL); -		if (!dropped_text) { -			con_printk(KERN_ERR, con, "failed to allocate dropped_text buffer\n"); -			printk_fallback_preferred_direct(); -			goto out; -		} -	} - -	con_printk(KERN_INFO, con, "printing thread started\n"); - -	for (;;) { -		/* -		 * Guarantee this task is visible on the waitqueue before -		 * checking the wake condition. -		 * -		 * The full memory barrier within set_current_state() of -		 * prepare_to_wait_event() pairs with the full memory barrier -		 * within wq_has_sleeper(). -		 * -		 * This pairs with __wake_up_klogd:A. -		 */ -		error = wait_event_interruptible(log_wait, -				printer_should_wake(con, seq)); /* LMM(printk_kthread_func:A) */ - -		if (kthread_should_stop() || !printk_kthreads_available) -			break; - -		if (error) -			continue; - -		error = mutex_lock_interruptible(&con->lock); -		if (error) -			continue; - -		if (con->blocked || -		    !console_kthread_printing_tryenter()) { -			/* Another context has locked the console_lock. */ -			mutex_unlock(&con->lock); -			continue; -		} - -		/* -		 * Although this context has not locked the console_lock, it -		 * is known that the console_lock is not locked and it is not -		 * possible for any other context to lock the console_lock. -		 * Therefore it is safe to read con->flags. -		 */ - -		if (!__console_is_usable(con->flags)) { -			console_kthread_printing_exit(); -			mutex_unlock(&con->lock); -			continue; -		} - -		/* -		 * Even though the printk kthread is always preemptible, it is -		 * still not allowed to call cond_resched() from within -		 * console drivers. The task may become non-preemptible in the -		 * console driver call chain. For example, vt_console_print() -		 * takes a spinlock and then can call into fbcon_redraw(), -		 * which can conditionally invoke cond_resched(). -		 */ -		console_may_schedule = 0; -		console_emit_next_record(con, text, ext_text, dropped_text); - -		seq = con->seq; - -		console_kthread_printing_exit(); - -		mutex_unlock(&con->lock); -	} - -	con_printk(KERN_INFO, con, "printing thread stopped\n"); -out: -	kfree(dropped_text); -	kfree(ext_text); -	kfree(text); - -	console_lock(); -	/* -	 * If this kthread is being stopped by another task, con->thread will -	 * already be NULL. That is fine. The important thing is that it is -	 * NULL after the kthread exits. -	 */ -	con->thread = NULL; -	console_unlock(); - -	return 0; -} - -/* Must be called under console_lock. */ -static void printk_start_kthread(struct console *con) -{ -	/* -	 * Do not start a kthread if there is no write() callback. The -	 * kthreads assume the write() callback exists. -	 */ -	if (!con->write) -		return; - -	con->thread = kthread_run(printk_kthread_func, con, -				  "pr/%s%d", con->name, con->index); -	if (IS_ERR(con->thread)) { -		con->thread = NULL; -		con_printk(KERN_ERR, con, "unable to start printing thread\n"); -		__printk_fallback_preferred_direct(); -		return; -	} -} -  /*   * Delayed printk version, for scheduler-internal messages:   */ -#define PRINTK_PENDING_WAKEUP		0x01 -#define PRINTK_PENDING_DIRECT_OUTPUT	0x02 +#define PRINTK_PENDING_WAKEUP	0x01 +#define PRINTK_PENDING_OUTPUT	0x02  static DEFINE_PER_CPU(int, printk_pending); @@ -3893,14 +3447,10 @@ static void wake_up_klogd_work_func(struct irq_work *irq_work)  {  	int pending = this_cpu_xchg(printk_pending, 0); -	if (pending & PRINTK_PENDING_DIRECT_OUTPUT) { -		printk_prefer_direct_enter(); - +	if (pending & PRINTK_PENDING_OUTPUT) {  		/* If trylock fails, someone else is doing the printing */  		if (console_trylock())  			console_unlock(); - -		printk_prefer_direct_exit();  	}  	if (pending & PRINTK_PENDING_WAKEUP) @@ -3925,11 +3475,10 @@ static void __wake_up_klogd(int val)  	 * prepare_to_wait_event(), which is called after ___wait_event() adds  	 * the waiter but before it has checked the wait condition.  	 * -	 * This pairs with devkmsg_read:A, syslog_print:A, and -	 * printk_kthread_func:A. +	 * This pairs with devkmsg_read:A and syslog_print:A.  	 */  	if (wq_has_sleeper(&log_wait) || /* LMM(__wake_up_klogd:A) */ -	    (val & PRINTK_PENDING_DIRECT_OUTPUT)) { +	    (val & PRINTK_PENDING_OUTPUT)) {  		this_cpu_or(printk_pending, val);  		irq_work_queue(this_cpu_ptr(&wake_up_klogd_work));  	} @@ -3947,17 +3496,7 @@ void defer_console_output(void)  	 * New messages may have been added directly to the ringbuffer  	 * using vprintk_store(), so wake any waiters as well.  	 */ -	int val = PRINTK_PENDING_WAKEUP; - -	/* -	 * Make sure that some context will print the messages when direct -	 * printing is allowed. This happens in situations when the kthreads -	 * may not be as reliable or perhaps unusable. -	 */ -	if (allow_direct_printing()) -		val |= PRINTK_PENDING_DIRECT_OUTPUT; - -	__wake_up_klogd(val); +	__wake_up_klogd(PRINTK_PENDING_WAKEUP | PRINTK_PENDING_OUTPUT);  }  void printk_trigger_flush(void) diff --git a/kernel/rcu/tree_stall.h b/kernel/rcu/tree_stall.h index 4995c078cff9..a001e1e7a992 100644 --- a/kernel/rcu/tree_stall.h +++ b/kernel/rcu/tree_stall.h @@ -647,7 +647,6 @@ static void print_cpu_stall(unsigned long gps)  	 * See Documentation/RCU/stallwarn.rst for info on how to debug  	 * RCU CPU stall warnings.  	 */ -	printk_prefer_direct_enter();  	trace_rcu_stall_warning(rcu_state.name, TPS("SelfDetected"));  	pr_err("INFO: %s self-detected stall on CPU\n", rcu_state.name);  	raw_spin_lock_irqsave_rcu_node(rdp->mynode, flags); @@ -685,7 +684,6 @@ static void print_cpu_stall(unsigned long gps)  	 */  	set_tsk_need_resched(current);  	set_preempt_need_resched(); -	printk_prefer_direct_exit();  }  static void check_cpu_stall(struct rcu_data *rdp) diff --git a/kernel/reboot.c b/kernel/reboot.c index a091145ee710..3c35445bf5ad 100644 --- a/kernel/reboot.c +++ b/kernel/reboot.c @@ -315,6 +315,43 @@ static int sys_off_notify(struct notifier_block *nb,  	return handler->sys_off_cb(&data);  } +static struct sys_off_handler platform_sys_off_handler; + +static struct sys_off_handler *alloc_sys_off_handler(int priority) +{ +	struct sys_off_handler *handler; +	gfp_t flags; + +	/* +	 * Platforms like m68k can't allocate sys_off handler dynamically +	 * at the early boot time because memory allocator isn't available yet. +	 */ +	if (priority == SYS_OFF_PRIO_PLATFORM) { +		handler = &platform_sys_off_handler; +		if (handler->cb_data) +			return ERR_PTR(-EBUSY); +	} else { +		if (system_state > SYSTEM_RUNNING) +			flags = GFP_ATOMIC; +		else +			flags = GFP_KERNEL; + +		handler = kzalloc(sizeof(*handler), flags); +		if (!handler) +			return ERR_PTR(-ENOMEM); +	} + +	return handler; +} + +static void free_sys_off_handler(struct sys_off_handler *handler) +{ +	if (handler == &platform_sys_off_handler) +		memset(handler, 0, sizeof(*handler)); +	else +		kfree(handler); +} +  /**   *	register_sys_off_handler - Register sys-off handler   *	@mode: Sys-off mode @@ -345,9 +382,9 @@ register_sys_off_handler(enum sys_off_mode mode,  	struct sys_off_handler *handler;  	int err; -	handler = kzalloc(sizeof(*handler), GFP_KERNEL); -	if (!handler) -		return ERR_PTR(-ENOMEM); +	handler = alloc_sys_off_handler(priority); +	if (IS_ERR(handler)) +		return handler;  	switch (mode) {  	case SYS_OFF_MODE_POWER_OFF_PREPARE: @@ -364,7 +401,7 @@ register_sys_off_handler(enum sys_off_mode mode,  		break;  	default: -		kfree(handler); +		free_sys_off_handler(handler);  		return ERR_PTR(-EINVAL);  	} @@ -391,7 +428,7 @@ register_sys_off_handler(enum sys_off_mode mode,  	}  	if (err) { -		kfree(handler); +		free_sys_off_handler(handler);  		return ERR_PTR(err);  	} @@ -409,7 +446,7 @@ void unregister_sys_off_handler(struct sys_off_handler *handler)  {  	int err; -	if (!handler) +	if (IS_ERR_OR_NULL(handler))  		return;  	if (handler->blocking) @@ -422,7 +459,7 @@ void unregister_sys_off_handler(struct sys_off_handler *handler)  	/* sanity check, shall never happen */  	WARN_ON(err); -	kfree(handler); +	free_sys_off_handler(handler);  }  EXPORT_SYMBOL_GPL(unregister_sys_off_handler); @@ -584,7 +621,23 @@ static void do_kernel_power_off_prepare(void)   */  void do_kernel_power_off(void)  { +	struct sys_off_handler *sys_off = NULL; + +	/* +	 * Register sys-off handlers for legacy PM callback. This allows +	 * legacy PM callbacks temporary co-exist with the new sys-off API. +	 * +	 * TODO: Remove legacy handlers once all legacy PM users will be +	 *       switched to the sys-off based APIs. +	 */ +	if (pm_power_off) +		sys_off = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, +						   SYS_OFF_PRIO_DEFAULT, +						   legacy_pm_power_off, NULL); +  	atomic_notifier_call_chain(&power_off_handler_list, 0, NULL); + +	unregister_sys_off_handler(sys_off);  }  /** @@ -595,7 +648,8 @@ void do_kernel_power_off(void)   */  bool kernel_can_power_off(void)  { -	return !atomic_notifier_call_chain_is_empty(&power_off_handler_list); +	return !atomic_notifier_call_chain_is_empty(&power_off_handler_list) || +		pm_power_off;  }  EXPORT_SYMBOL_GPL(kernel_can_power_off); @@ -630,7 +684,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,  		void __user *, arg)  {  	struct pid_namespace *pid_ns = task_active_pid_ns(current); -	struct sys_off_handler *sys_off = NULL;  	char buffer[256];  	int ret = 0; @@ -655,21 +708,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,  	if (ret)  		return ret; -	/* -	 * Register sys-off handlers for legacy PM callback. This allows -	 * legacy PM callbacks temporary co-exist with the new sys-off API. -	 * -	 * TODO: Remove legacy handlers once all legacy PM users will be -	 *       switched to the sys-off based APIs. -	 */ -	if (pm_power_off) { -		sys_off = register_sys_off_handler(SYS_OFF_MODE_POWER_OFF, -						   SYS_OFF_PRIO_DEFAULT, -						   legacy_pm_power_off, NULL); -		if (IS_ERR(sys_off)) -			return PTR_ERR(sys_off); -	} -  	/* Instead of trying to make the power_off code look like  	 * halt when pm_power_off is not set do it the easy way.  	 */ @@ -727,7 +765,6 @@ SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,  		break;  	}  	mutex_unlock(&system_transition_mutex); -	unregister_sys_off_handler(sys_off);  	return ret;  } @@ -782,11 +819,9 @@ static int __orderly_reboot(void)  	ret = run_cmd(reboot_cmd);  	if (ret) { -		printk_prefer_direct_enter();  		pr_warn("Failed to start orderly reboot: forcing the issue\n");  		emergency_sync();  		kernel_restart(NULL); -		printk_prefer_direct_exit();  	}  	return ret; @@ -799,7 +834,6 @@ static int __orderly_poweroff(bool force)  	ret = run_cmd(poweroff_cmd);  	if (ret && force) { -		printk_prefer_direct_enter();  		pr_warn("Failed to start orderly shutdown: forcing the issue\n");  		/* @@ -809,7 +843,6 @@ static int __orderly_poweroff(bool force)  		 */  		emergency_sync();  		kernel_power_off(); -		printk_prefer_direct_exit();  	}  	return ret; @@ -867,8 +900,6 @@ EXPORT_SYMBOL_GPL(orderly_reboot);   */  static void hw_failure_emergency_poweroff_func(struct work_struct *work)  { -	printk_prefer_direct_enter(); -  	/*  	 * We have reached here after the emergency shutdown waiting period has  	 * expired. This means orderly_poweroff has not been able to shut off @@ -885,8 +916,6 @@ static void hw_failure_emergency_poweroff_func(struct work_struct *work)  	 */  	pr_emerg("Hardware protection shutdown failed. Trying emergency restart\n");  	emergency_restart(); - -	printk_prefer_direct_exit();  }  static DECLARE_DELAYED_WORK(hw_failure_emergency_poweroff_work, @@ -925,13 +954,11 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced)  {  	static atomic_t allow_proceed = ATOMIC_INIT(1); -	printk_prefer_direct_enter(); -  	pr_emerg("HARDWARE PROTECTION shutdown (%s)\n", reason);  	/* Shutdown should be initiated only once. */  	if (!atomic_dec_and_test(&allow_proceed)) -		goto out; +		return;  	/*  	 * Queue a backup emergency shutdown in the event of @@ -939,8 +966,6 @@ void hw_protection_shutdown(const char *reason, int ms_until_forced)  	 */  	hw_failure_emergency_poweroff(ms_until_forced);  	orderly_poweroff(true); -out: -	printk_prefer_direct_exit();  }  EXPORT_SYMBOL_GPL(hw_protection_shutdown); diff --git a/kernel/sched/core.c b/kernel/sched/core.c index bfa7452ca92e..da0bf6fe9ecd 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4798,25 +4798,55 @@ static void do_balance_callbacks(struct rq *rq, struct callback_head *head)  static void balance_push(struct rq *rq); +/* + * balance_push_callback is a right abuse of the callback interface and plays + * by significantly different rules. + * + * Where the normal balance_callback's purpose is to be ran in the same context + * that queued it (only later, when it's safe to drop rq->lock again), + * balance_push_callback is specifically targeted at __schedule(). + * + * This abuse is tolerated because it places all the unlikely/odd cases behind + * a single test, namely: rq->balance_callback == NULL. + */  struct callback_head balance_push_callback = {  	.next = NULL,  	.func = (void (*)(struct callback_head *))balance_push,  }; -static inline struct callback_head *splice_balance_callbacks(struct rq *rq) +static inline struct callback_head * +__splice_balance_callbacks(struct rq *rq, bool split)  {  	struct callback_head *head = rq->balance_callback; +	if (likely(!head)) +		return NULL; +  	lockdep_assert_rq_held(rq); -	if (head) +	/* +	 * Must not take balance_push_callback off the list when +	 * splice_balance_callbacks() and balance_callbacks() are not +	 * in the same rq->lock section. +	 * +	 * In that case it would be possible for __schedule() to interleave +	 * and observe the list empty. +	 */ +	if (split && head == &balance_push_callback) +		head = NULL; +	else  		rq->balance_callback = NULL;  	return head;  } +static inline struct callback_head *splice_balance_callbacks(struct rq *rq) +{ +	return __splice_balance_callbacks(rq, true); +} +  static void __balance_callbacks(struct rq *rq)  { -	do_balance_callbacks(rq, splice_balance_callbacks(rq)); +	do_balance_callbacks(rq, __splice_balance_callbacks(rq, false));  }  static inline void balance_callbacks(struct rq *rq, struct callback_head *head) diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 01259611beb9..47b89a0fc6e5 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1693,6 +1693,11 @@ queue_balance_callback(struct rq *rq,  {  	lockdep_assert_rq_held(rq); +	/* +	 * Don't (re)queue an already queued item; nor queue anything when +	 * balance_push() is active, see the comment with +	 * balance_push_callback. +	 */  	if (unlikely(head->next || rq->balance_callback == &balance_push_callback))  		return; diff --git a/kernel/trace/blktrace.c b/kernel/trace/blktrace.c index 10a32b0f2deb..fe04c6f96ca5 100644 --- a/kernel/trace/blktrace.c +++ b/kernel/trace/blktrace.c @@ -770,14 +770,11 @@ int blk_trace_ioctl(struct block_device *bdev, unsigned cmd, char __user *arg)   **/  void blk_trace_shutdown(struct request_queue *q)  { -	mutex_lock(&q->debugfs_mutex);  	if (rcu_dereference_protected(q->blk_trace,  				      lockdep_is_held(&q->debugfs_mutex))) {  		__blk_trace_startstop(q, 0);  		__blk_trace_remove(q);  	} - -	mutex_unlock(&q->debugfs_mutex);  }  #ifdef CONFIG_BLK_CGROUP diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 10b157a6d73e..88589d74a892 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -2263,11 +2263,11 @@ static int copy_user_syms(struct user_syms *us, unsigned long __user *usyms, u32  	int err = -ENOMEM;  	unsigned int i; -	syms = kvmalloc(cnt * sizeof(*syms), GFP_KERNEL); +	syms = kvmalloc_array(cnt, sizeof(*syms), GFP_KERNEL);  	if (!syms)  		goto error; -	buf = kvmalloc(cnt * KSYM_NAME_LEN, GFP_KERNEL); +	buf = kvmalloc_array(cnt, KSYM_NAME_LEN, GFP_KERNEL);  	if (!buf)  		goto error; @@ -2423,7 +2423,7 @@ kprobe_multi_link_handler(struct fprobe *fp, unsigned long entry_ip,  	kprobe_multi_link_prog_run(link, entry_ip, regs);  } -static int symbols_cmp(const void *a, const void *b) +static int symbols_cmp_r(const void *a, const void *b, const void *priv)  {  	const char **str_a = (const char **) a;  	const char **str_b = (const char **) b; @@ -2431,6 +2431,28 @@ static int symbols_cmp(const void *a, const void *b)  	return strcmp(*str_a, *str_b);  } +struct multi_symbols_sort { +	const char **funcs; +	u64 *cookies; +}; + +static void symbols_swap_r(void *a, void *b, int size, const void *priv) +{ +	const struct multi_symbols_sort *data = priv; +	const char **name_a = a, **name_b = b; + +	swap(*name_a, *name_b); + +	/* If defined, swap also related cookies. */ +	if (data->cookies) { +		u64 *cookie_a, *cookie_b; + +		cookie_a = data->cookies + (name_a - data->funcs); +		cookie_b = data->cookies + (name_b - data->funcs); +		swap(*cookie_a, *cookie_b); +	} +} +  int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)  {  	struct bpf_kprobe_multi_link *link = NULL; @@ -2464,42 +2486,50 @@ int bpf_kprobe_multi_link_attach(const union bpf_attr *attr, struct bpf_prog *pr  		return -EINVAL;  	size = cnt * sizeof(*addrs); -	addrs = kvmalloc(size, GFP_KERNEL); +	addrs = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL);  	if (!addrs)  		return -ENOMEM; +	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); +	if (ucookies) { +		cookies = kvmalloc_array(cnt, sizeof(*addrs), GFP_KERNEL); +		if (!cookies) { +			err = -ENOMEM; +			goto error; +		} +		if (copy_from_user(cookies, ucookies, size)) { +			err = -EFAULT; +			goto error; +		} +	} +  	if (uaddrs) {  		if (copy_from_user(addrs, uaddrs, size)) {  			err = -EFAULT;  			goto error;  		}  	} else { +		struct multi_symbols_sort data = { +			.cookies = cookies, +		};  		struct user_syms us;  		err = copy_user_syms(&us, usyms, cnt);  		if (err)  			goto error; -		sort(us.syms, cnt, sizeof(*us.syms), symbols_cmp, NULL); +		if (cookies) +			data.funcs = us.syms; + +		sort_r(us.syms, cnt, sizeof(*us.syms), symbols_cmp_r, +		       symbols_swap_r, &data); +  		err = ftrace_lookup_symbols(us.syms, cnt, addrs);  		free_user_syms(&us);  		if (err)  			goto error;  	} -	ucookies = u64_to_user_ptr(attr->link_create.kprobe_multi.cookies); -	if (ucookies) { -		cookies = kvmalloc(size, GFP_KERNEL); -		if (!cookies) { -			err = -ENOMEM; -			goto error; -		} -		if (copy_from_user(cookies, ucookies, size)) { -			err = -EFAULT; -			goto error; -		} -	} -  	link = kzalloc(sizeof(*link), GFP_KERNEL);  	if (!link) {  		err = -ENOMEM; diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index e750fe141a60..601ccf1b2f09 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -8029,15 +8029,23 @@ static int kallsyms_callback(void *data, const char *name,  			     struct module *mod, unsigned long addr)  {  	struct kallsyms_data *args = data; +	const char **sym; +	int idx; -	if (!bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp)) +	sym = bsearch(&name, args->syms, args->cnt, sizeof(*args->syms), symbols_cmp); +	if (!sym) +		return 0; + +	idx = sym - args->syms; +	if (args->addrs[idx])  		return 0;  	addr = ftrace_location(addr);  	if (!addr)  		return 0; -	args->addrs[args->found++] = addr; +	args->addrs[idx] = addr; +	args->found++;  	return args->found == args->cnt ? 1 : 0;  } @@ -8062,6 +8070,7 @@ int ftrace_lookup_symbols(const char **sorted_syms, size_t cnt, unsigned long *a  	struct kallsyms_data args;  	int err; +	memset(addrs, 0, sizeof(*addrs) * cnt);  	args.addrs = addrs;  	args.syms = sorted_syms;  	args.cnt = cnt; diff --git a/kernel/trace/rethook.c b/kernel/trace/rethook.c index b56833700d23..c69d82273ce7 100644 --- a/kernel/trace/rethook.c +++ b/kernel/trace/rethook.c @@ -154,6 +154,15 @@ struct rethook_node *rethook_try_get(struct rethook *rh)  	if (unlikely(!handler))  		return NULL; +	/* +	 * This expects the caller will set up a rethook on a function entry. +	 * When the function returns, the rethook will eventually be reclaimed +	 * or released in the rethook_recycle() with call_rcu(). +	 * This means the caller must be run in the RCU-availabe context. +	 */ +	if (unlikely(!rcu_is_watching())) +		return NULL; +  	fn = freelist_try_get(&rh->pool);  	if (!fn)  		return NULL; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 2c95992e2c71..a8cfac0611bc 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -6424,9 +6424,7 @@ int tracing_set_tracer(struct trace_array *tr, const char *buf)  		synchronize_rcu();  		free_snapshot(tr);  	} -#endif -#ifdef CONFIG_TRACER_MAX_TRACE  	if (t->use_max_tr && !had_max_tr) {  		ret = tracing_alloc_snapshot_instance(tr);  		if (ret < 0) diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 93507330462c..a245ea673715 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1718,8 +1718,17 @@ static int  kretprobe_dispatcher(struct kretprobe_instance *ri, struct pt_regs *regs)  {  	struct kretprobe *rp = get_kretprobe(ri); -	struct trace_kprobe *tk = container_of(rp, struct trace_kprobe, rp); +	struct trace_kprobe *tk; + +	/* +	 * There is a small chance that get_kretprobe(ri) returns NULL when +	 * the kretprobe is unregister on another CPU between kretprobe's +	 * trampoline_handler and this function. +	 */ +	if (unlikely(!rp)) +		return 0; +	tk = container_of(rp, struct trace_kprobe, rp);  	raw_cpu_inc(*tk->nhit);  	if (trace_probe_test_flag(&tk->tp, TP_FLAG_TRACE)) diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index 9711589273cd..c3dc4f859a6b 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -546,7 +546,6 @@ static int __trace_uprobe_create(int argc, const char **argv)  	bool is_return = false;  	int i, ret; -	ret = 0;  	ref_ctr_offset = 0;  	switch (argv[0][0]) { diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 20a7a55e62b6..ecb0e8346e65 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -424,8 +424,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  		/* Start period for the next softlockup warning. */  		update_report_ts(); -		printk_prefer_direct_enter(); -  		pr_emerg("BUG: soft lockup - CPU#%d stuck for %us! [%s:%d]\n",  			smp_processor_id(), duration,  			current->comm, task_pid_nr(current)); @@ -444,8 +442,6 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer)  		add_taint(TAINT_SOFTLOCKUP, LOCKDEP_STILL_OK);  		if (softlockup_panic)  			panic("softlockup: hung tasks"); - -		printk_prefer_direct_exit();  	}  	return HRTIMER_RESTART; diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index 701f35f0e2d4..247bf0b1582c 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -135,8 +135,6 @@ static void watchdog_overflow_callback(struct perf_event *event,  		if (__this_cpu_read(hard_watchdog_warn) == true)  			return; -		printk_prefer_direct_enter(); -  		pr_emerg("Watchdog detected hard LOCKUP on cpu %d\n",  			 this_cpu);  		print_modules(); @@ -157,8 +155,6 @@ static void watchdog_overflow_callback(struct perf_event *event,  		if (hardlockup_panic)  			nmi_panic(regs, "Hard LOCKUP"); -		printk_prefer_direct_exit(); -  		__this_cpu_write(hard_watchdog_warn, true);  		return;  	} diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 4056f2a3f9d5..1ea50f6be843 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -2788,13 +2788,13 @@ static bool flush_workqueue_prep_pwqs(struct workqueue_struct *wq,  }  /** - * flush_workqueue - ensure that any scheduled work has run to completion. + * __flush_workqueue - ensure that any scheduled work has run to completion.   * @wq: workqueue to flush   *   * This function sleeps until all work items which were queued on entry   * have finished execution, but it is not livelocked by new incoming ones.   */ -void flush_workqueue(struct workqueue_struct *wq) +void __flush_workqueue(struct workqueue_struct *wq)  {  	struct wq_flusher this_flusher = {  		.list = LIST_HEAD_INIT(this_flusher.list), @@ -2943,7 +2943,7 @@ void flush_workqueue(struct workqueue_struct *wq)  out_unlock:  	mutex_unlock(&wq->mutex);  } -EXPORT_SYMBOL(flush_workqueue); +EXPORT_SYMBOL(__flush_workqueue);  /**   * drain_workqueue - drain a workqueue @@ -2971,7 +2971,7 @@ void drain_workqueue(struct workqueue_struct *wq)  		wq->flags |= __WQ_DRAINING;  	mutex_unlock(&wq->mutex);  reflush: -	flush_workqueue(wq); +	__flush_workqueue(wq);  	mutex_lock(&wq->mutex); @@ -6111,3 +6111,11 @@ void __init workqueue_init(void)  	wq_online = true;  	wq_watchdog_init();  } + +/* + * Despite the naming, this is a no-op function which is here only for avoiding + * link error. Since compile-time warning may fail to catch, we will need to + * emit run-time warning from __flush_workqueue(). + */ +void __warn_flushing_systemwide_wq(void) { } +EXPORT_SYMBOL(__warn_flushing_systemwide_wq);  | 
