diff options
Diffstat (limited to 'kernel')
36 files changed, 294 insertions, 148 deletions
diff --git a/kernel/audit.c b/kernel/audit.c index 34c5a2310fbf..3392d3e0254a 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -182,7 +182,7 @@ struct audit_buffer {  struct audit_reply {  	__u32 portid; -	pid_t pid; +	struct net *net;	  	struct sk_buff *skb;  }; @@ -500,7 +500,7 @@ int audit_send_list(void *_dest)  {  	struct audit_netlink_list *dest = _dest;  	struct sk_buff *skb; -	struct net *net = get_net_ns_by_pid(dest->pid); +	struct net *net = dest->net;  	struct audit_net *aunet = net_generic(net, audit_net_id);  	/* wait for parent to finish and send an ACK */ @@ -510,6 +510,7 @@ int audit_send_list(void *_dest)  	while ((skb = __skb_dequeue(&dest->q)) != NULL)  		netlink_unicast(aunet->nlsk, skb, dest->portid, 0); +	put_net(net);  	kfree(dest);  	return 0; @@ -543,7 +544,7 @@ out_kfree_skb:  static int audit_send_reply_thread(void *arg)  {  	struct audit_reply *reply = (struct audit_reply *)arg; -	struct net *net = get_net_ns_by_pid(reply->pid); +	struct net *net = reply->net;  	struct audit_net *aunet = net_generic(net, audit_net_id);  	mutex_lock(&audit_cmd_mutex); @@ -552,12 +553,13 @@ static int audit_send_reply_thread(void *arg)  	/* Ignore failure. It'll only happen if the sender goes away,  	   because our timeout is set to infinite. */  	netlink_unicast(aunet->nlsk , reply->skb, reply->portid, 0); +	put_net(net);  	kfree(reply);  	return 0;  }  /**   * audit_send_reply - send an audit reply message via netlink - * @portid: netlink port to which to send reply + * @request_skb: skb of request we are replying to (used to target the reply)   * @seq: sequence number   * @type: audit message type   * @done: done (last) flag @@ -568,9 +570,11 @@ static int audit_send_reply_thread(void *arg)   * Allocates an skb, builds the netlink message, and sends it to the port id.   * No failure notifications.   */ -static void audit_send_reply(__u32 portid, int seq, int type, int done, +static void audit_send_reply(struct sk_buff *request_skb, int seq, int type, int done,  			     int multi, const void *payload, int size)  { +	u32 portid = NETLINK_CB(request_skb).portid; +	struct net *net = sock_net(NETLINK_CB(request_skb).sk);  	struct sk_buff *skb;  	struct task_struct *tsk;  	struct audit_reply *reply = kmalloc(sizeof(struct audit_reply), @@ -583,8 +587,8 @@ static void audit_send_reply(__u32 portid, int seq, int type, int done,  	if (!skb)  		goto out; +	reply->net = get_net(net);  	reply->portid = portid; -	reply->pid = task_pid_vnr(current);  	reply->skb = skb;  	tsk = kthread_run(audit_send_reply_thread, reply, "audit_send_reply"); @@ -673,8 +677,7 @@ static int audit_get_feature(struct sk_buff *skb)  	seq = nlmsg_hdr(skb)->nlmsg_seq; -	audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, -			 &af, sizeof(af)); +	audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &af, sizeof(af));  	return 0;  } @@ -794,8 +797,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  		s.backlog		= skb_queue_len(&audit_skb_queue);  		s.version		= AUDIT_VERSION_LATEST;  		s.backlog_wait_time	= audit_backlog_wait_time; -		audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, -				 &s, sizeof(s)); +		audit_send_reply(skb, seq, AUDIT_GET, 0, 0, &s, sizeof(s));  		break;  	}  	case AUDIT_SET: { @@ -905,7 +907,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  					   seq, data, nlmsg_len(nlh));  		break;  	case AUDIT_LIST_RULES: -		err = audit_list_rules_send(NETLINK_CB(skb).portid, seq); +		err = audit_list_rules_send(skb, seq);  		break;  	case AUDIT_TRIM:  		audit_trim_trees(); @@ -970,8 +972,8 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  			memcpy(sig_data->ctx, ctx, len);  			security_release_secctx(ctx, len);  		} -		audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, -				0, 0, sig_data, sizeof(*sig_data) + len); +		audit_send_reply(skb, seq, AUDIT_SIGNAL_INFO, 0, 0, +				 sig_data, sizeof(*sig_data) + len);  		kfree(sig_data);  		break;  	case AUDIT_TTY_GET: { @@ -983,8 +985,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh)  		s.log_passwd = tsk->signal->audit_tty_log_passwd;  		spin_unlock(&tsk->sighand->siglock); -		audit_send_reply(NETLINK_CB(skb).portid, seq, -				 AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); +		audit_send_reply(skb, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s));  		break;  	}  	case AUDIT_TTY_SET: { diff --git a/kernel/audit.h b/kernel/audit.h index 57cc64d67718..8df132214606 100644 --- a/kernel/audit.h +++ b/kernel/audit.h @@ -247,7 +247,7 @@ extern void		    audit_panic(const char *message);  struct audit_netlink_list {  	__u32 portid; -	pid_t pid; +	struct net *net;  	struct sk_buff_head q;  }; diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c index 67ccf0e7cca9..135944a7b28a 100644 --- a/kernel/audit_tree.c +++ b/kernel/audit_tree.c @@ -916,7 +916,7 @@ static int audit_tree_handle_event(struct fsnotify_group *group,  				   struct fsnotify_mark *inode_mark,  				   struct fsnotify_mark *vfsmount_mark,  				   u32 mask, void *data, int data_type, -				   const unsigned char *file_name) +				   const unsigned char *file_name, u32 cookie)  {  	return 0;  } diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c index 2596fac5dcb4..70b4554d2fbe 100644 --- a/kernel/audit_watch.c +++ b/kernel/audit_watch.c @@ -471,7 +471,7 @@ static int audit_watch_handle_event(struct fsnotify_group *group,  				    struct fsnotify_mark *inode_mark,  				    struct fsnotify_mark *vfsmount_mark,  				    u32 mask, void *data, int data_type, -				    const unsigned char *dname) +				    const unsigned char *dname, u32 cookie)  {  	struct inode *inode;  	struct audit_parent *parent; diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c index 14a78cca384e..92062fd6cc8c 100644 --- a/kernel/auditfilter.c +++ b/kernel/auditfilter.c @@ -29,6 +29,8 @@  #include <linux/sched.h>  #include <linux/slab.h>  #include <linux/security.h> +#include <net/net_namespace.h> +#include <net/sock.h>  #include "audit.h"  /* @@ -1065,11 +1067,13 @@ int audit_rule_change(int type, __u32 portid, int seq, void *data,  /**   * audit_list_rules_send - list the audit rules - * @portid: target portid for netlink audit messages + * @request_skb: skb of request we are replying to (used to target the reply)   * @seq: netlink audit message sequence (serial) number   */ -int audit_list_rules_send(__u32 portid, int seq) +int audit_list_rules_send(struct sk_buff *request_skb, int seq)  { +	u32 portid = NETLINK_CB(request_skb).portid; +	struct net *net = sock_net(NETLINK_CB(request_skb).sk);  	struct task_struct *tsk;  	struct audit_netlink_list *dest;  	int err = 0; @@ -1083,8 +1087,8 @@ int audit_list_rules_send(__u32 portid, int seq)  	dest = kmalloc(sizeof(struct audit_netlink_list), GFP_KERNEL);  	if (!dest)  		return -ENOMEM; +	dest->net = get_net(net);  	dest->portid = portid; -	dest->pid = task_pid_vnr(current);  	skb_queue_head_init(&dest->q);  	mutex_lock(&audit_filter_mutex); diff --git a/kernel/auditsc.c b/kernel/auditsc.c index 10176cd5956a..7aef2f4b6c64 100644 --- a/kernel/auditsc.c +++ b/kernel/auditsc.c @@ -1719,7 +1719,7 @@ void audit_putname(struct filename *name)  	struct audit_context *context = current->audit_context;  	BUG_ON(!context); -	if (!context->in_syscall) { +	if (!name->aname || !context->in_syscall) {  #if AUDIT_DEBUG == 2  		printk(KERN_ERR "%s:%d(:%d): final_putname(%p)\n",  		       __FILE__, __LINE__, context->serial, name); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index e2f46ba37f72..0c753ddd223b 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -886,7 +886,9 @@ static void cgroup_diput(struct dentry *dentry, struct inode *inode)  		 * per-subsystem and moved to css->id so that lookups are  		 * successful until the target css is released.  		 */ +		mutex_lock(&cgroup_mutex);  		idr_remove(&cgrp->root->cgroup_idr, cgrp->id); +		mutex_unlock(&cgroup_mutex);  		cgrp->id = -1;  		call_rcu(&cgrp->rcu_head, cgroup_free_rcu); @@ -1566,10 +1568,10 @@ static struct dentry *cgroup_mount(struct file_system_type *fs_type,  		mutex_lock(&cgroup_mutex);  		mutex_lock(&cgroup_root_mutex); -		root_cgrp->id = idr_alloc(&root->cgroup_idr, root_cgrp, -					   0, 1, GFP_KERNEL); -		if (root_cgrp->id < 0) +		ret = idr_alloc(&root->cgroup_idr, root_cgrp, 0, 1, GFP_KERNEL); +		if (ret < 0)  			goto unlock_drop; +		root_cgrp->id = ret;  		/* Check for name clashes with existing mounts */  		ret = -EBUSY; @@ -2763,10 +2765,7 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)  	 */  	update_before = cgroup_serial_nr_next; -	mutex_unlock(&cgroup_mutex); -  	/* add/rm files for all cgroups created before */ -	rcu_read_lock();  	css_for_each_descendant_pre(css, cgroup_css(root, ss)) {  		struct cgroup *cgrp = css->cgroup; @@ -2775,23 +2774,19 @@ static int cgroup_cfts_commit(struct cftype *cfts, bool is_add)  		inode = cgrp->dentry->d_inode;  		dget(cgrp->dentry); -		rcu_read_unlock(); -  		dput(prev);  		prev = cgrp->dentry; +		mutex_unlock(&cgroup_mutex);  		mutex_lock(&inode->i_mutex);  		mutex_lock(&cgroup_mutex);  		if (cgrp->serial_nr < update_before && !cgroup_is_dead(cgrp))  			ret = cgroup_addrm_files(cgrp, cfts, is_add); -		mutex_unlock(&cgroup_mutex);  		mutex_unlock(&inode->i_mutex); - -		rcu_read_lock();  		if (ret)  			break;  	} -	rcu_read_unlock(); +	mutex_unlock(&cgroup_mutex);  	dput(prev);  	deactivate_super(sb);  	return ret; @@ -2910,9 +2905,14 @@ static void cgroup_enable_task_cg_lists(void)  		 * We should check if the process is exiting, otherwise  		 * it will race with cgroup_exit() in that the list  		 * entry won't be deleted though the process has exited. +		 * Do it while holding siglock so that we don't end up +		 * racing against cgroup_exit().  		 */ +		spin_lock_irq(&p->sighand->siglock);  		if (!(p->flags & PF_EXITING) && list_empty(&p->cg_list))  			list_add(&p->cg_list, &task_css_set(p)->tasks); +		spin_unlock_irq(&p->sighand->siglock); +  		task_unlock(p);  	} while_each_thread(g, p);  	read_unlock(&tasklist_lock); @@ -4112,17 +4112,17 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)  	err = percpu_ref_init(&css->refcnt, css_release);  	if (err) -		goto err_free; +		goto err_free_css;  	init_css(css, ss, cgrp);  	err = cgroup_populate_dir(cgrp, 1 << ss->subsys_id);  	if (err) -		goto err_free; +		goto err_free_percpu_ref;  	err = online_css(css);  	if (err) -		goto err_free; +		goto err_clear_dir;  	dget(cgrp->dentry);  	css_get(css->parent); @@ -4138,8 +4138,11 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss)  	return 0; -err_free: +err_clear_dir: +	cgroup_clear_dir(css->cgroup, 1 << css->ss->subsys_id); +err_free_percpu_ref:  	percpu_ref_cancel_init(&css->refcnt); +err_free_css:  	ss->css_free(css);  	return err;  } @@ -4158,7 +4161,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,  	struct cgroup *cgrp;  	struct cgroup_name *name;  	struct cgroupfs_root *root = parent->root; -	int ssid, err = 0; +	int ssid, err;  	struct cgroup_subsys *ss;  	struct super_block *sb = root->sb; @@ -4168,19 +4171,13 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,  		return -ENOMEM;  	name = cgroup_alloc_name(dentry); -	if (!name) +	if (!name) { +		err = -ENOMEM;  		goto err_free_cgrp; +	}  	rcu_assign_pointer(cgrp->name, name);  	/* -	 * Temporarily set the pointer to NULL, so idr_find() won't return -	 * a half-baked cgroup. -	 */ -	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL); -	if (cgrp->id < 0) -		goto err_free_name; - -	/*  	 * Only live parents can have children.  Note that the liveliness  	 * check isn't strictly necessary because cgroup_mkdir() and  	 * cgroup_rmdir() are fully synchronized by i_mutex; however, do it @@ -4189,7 +4186,17 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,  	 */  	if (!cgroup_lock_live_group(parent)) {  		err = -ENODEV; -		goto err_free_id; +		goto err_free_name; +	} + +	/* +	 * Temporarily set the pointer to NULL, so idr_find() won't return +	 * a half-baked cgroup. +	 */ +	cgrp->id = idr_alloc(&root->cgroup_idr, NULL, 1, 0, GFP_KERNEL); +	if (cgrp->id < 0) { +		err = -ENOMEM; +		goto err_unlock;  	}  	/* Grab a reference on the superblock so the hierarchy doesn't @@ -4221,7 +4228,7 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,  	 */  	err = cgroup_create_file(dentry, S_IFDIR | mode, sb);  	if (err < 0) -		goto err_unlock; +		goto err_free_id;  	lockdep_assert_held(&dentry->d_inode->i_mutex);  	cgrp->serial_nr = cgroup_serial_nr_next++; @@ -4257,12 +4264,12 @@ static long cgroup_create(struct cgroup *parent, struct dentry *dentry,  	return 0; -err_unlock: -	mutex_unlock(&cgroup_mutex); -	/* Release the reference count that we took on the superblock */ -	deactivate_super(sb);  err_free_id:  	idr_remove(&root->cgroup_idr, cgrp->id); +	/* Release the reference count that we took on the superblock */ +	deactivate_super(sb); +err_unlock: +	mutex_unlock(&cgroup_mutex);  err_free_name:  	kfree(rcu_dereference_raw(cgrp->name));  err_free_cgrp: diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 4410ac6a55f1..e6b1b66afe52 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -974,12 +974,6 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,   *    Temporarilly set tasks mems_allowed to target nodes of migration,   *    so that the migration code can allocate pages on these nodes.   * - *    Call holding cpuset_mutex, so current's cpuset won't change - *    during this call, as manage_mutex holds off any cpuset_attach() - *    calls.  Therefore we don't need to take task_lock around the - *    call to guarantee_online_mems(), as we know no one is changing - *    our task's cpuset. - *   *    While the mm_struct we are migrating is typically from some   *    other task, the task_struct mems_allowed that we are hacking   *    is for our current task, which must allocate new pages for that @@ -996,8 +990,10 @@ static void cpuset_migrate_mm(struct mm_struct *mm, const nodemask_t *from,  	do_migrate_pages(mm, from, to, MPOL_MF_MOVE_ALL); +	rcu_read_lock();  	mems_cs = effective_nodemask_cpuset(task_cs(tsk));  	guarantee_online_mems(mems_cs, &tsk->mems_allowed); +	rcu_read_unlock();  }  /* @@ -2486,9 +2482,9 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)  	task_lock(current);  	cs = nearest_hardwall_ancestor(task_cs(current)); +	allowed = node_isset(node, cs->mems_allowed);  	task_unlock(current); -	allowed = node_isset(node, cs->mems_allowed);  	mutex_unlock(&callback_mutex);  	return allowed;  } diff --git a/kernel/events/core.c b/kernel/events/core.c index 56003c6edfd3..fa0b2d4ad83c 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7856,14 +7856,14 @@ static void perf_pmu_rotate_stop(struct pmu *pmu)  static void __perf_event_exit_context(void *__info)  {  	struct perf_event_context *ctx = __info; -	struct perf_event *event, *tmp; +	struct perf_event *event;  	perf_pmu_rotate_stop(ctx->pmu); -	list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) -		__perf_remove_from_context(event); -	list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) +	rcu_read_lock(); +	list_for_each_entry_rcu(event, &ctx->event_list, event_entry)  		__perf_remove_from_context(event); +	rcu_read_unlock();  }  static void perf_event_exit_cpu_context(int cpu) @@ -7887,11 +7887,11 @@ static void perf_event_exit_cpu(int cpu)  {  	struct swevent_htable *swhash = &per_cpu(swevent_htable, cpu); +	perf_event_exit_cpu_context(cpu); +  	mutex_lock(&swhash->hlist_mutex);  	swevent_hlist_release(swhash);  	mutex_unlock(&swhash->hlist_mutex); - -	perf_event_exit_cpu_context(cpu);  }  #else  static inline void perf_event_exit_cpu(int cpu) { } diff --git a/kernel/futex.c b/kernel/futex.c index 44a1261cb9ff..08ec814ad9d2 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -234,6 +234,7 @@ static const struct futex_q futex_q_init = {   * waiting on a futex.   */  struct futex_hash_bucket { +	atomic_t waiters;  	spinlock_t lock;  	struct plist_head chain;  } ____cacheline_aligned_in_smp; @@ -253,22 +254,37 @@ static inline void futex_get_mm(union futex_key *key)  	smp_mb__after_atomic_inc();  } -static inline bool hb_waiters_pending(struct futex_hash_bucket *hb) +/* + * Reflects a new waiter being added to the waitqueue. + */ +static inline void hb_waiters_inc(struct futex_hash_bucket *hb)  {  #ifdef CONFIG_SMP +	atomic_inc(&hb->waiters);  	/* -	 * Tasks trying to enter the critical region are most likely -	 * potential waiters that will be added to the plist. Ensure -	 * that wakers won't miss to-be-slept tasks in the window between -	 * the wait call and the actual plist_add. +	 * Full barrier (A), see the ordering comment above.  	 */ -	if (spin_is_locked(&hb->lock)) -		return true; -	smp_rmb(); /* Make sure we check the lock state first */ +	smp_mb__after_atomic_inc(); +#endif +} + +/* + * Reflects a waiter being removed from the waitqueue by wakeup + * paths. + */ +static inline void hb_waiters_dec(struct futex_hash_bucket *hb) +{ +#ifdef CONFIG_SMP +	atomic_dec(&hb->waiters); +#endif +} -	return !plist_head_empty(&hb->chain); +static inline int hb_waiters_pending(struct futex_hash_bucket *hb) +{ +#ifdef CONFIG_SMP +	return atomic_read(&hb->waiters);  #else -	return true; +	return 1;  #endif  } @@ -954,6 +970,7 @@ static void __unqueue_futex(struct futex_q *q)  	hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);  	plist_del(&q->list, &hb->chain); +	hb_waiters_dec(hb);  }  /* @@ -1257,7 +1274,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,  	 */  	if (likely(&hb1->chain != &hb2->chain)) {  		plist_del(&q->list, &hb1->chain); +		hb_waiters_dec(hb1);  		plist_add(&q->list, &hb2->chain); +		hb_waiters_inc(hb2);  		q->lock_ptr = &hb2->lock;  	}  	get_futex_key_refs(key2); @@ -1600,6 +1619,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)  	struct futex_hash_bucket *hb;  	hb = hash_futex(&q->key); + +	/* +	 * Increment the counter before taking the lock so that +	 * a potential waker won't miss a to-be-slept task that is +	 * waiting for the spinlock. This is safe as all queue_lock() +	 * users end up calling queue_me(). Similarly, for housekeeping, +	 * decrement the counter at queue_unlock() when some error has +	 * occurred and we don't end up adding the task to the list. +	 */ +	hb_waiters_inc(hb); +  	q->lock_ptr = &hb->lock;  	spin_lock(&hb->lock); /* implies MB (A) */ @@ -1611,6 +1641,7 @@ queue_unlock(struct futex_hash_bucket *hb)  	__releases(&hb->lock)  {  	spin_unlock(&hb->lock); +	hb_waiters_dec(hb);  }  /** @@ -2342,6 +2373,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,  		 * Unqueue the futex_q and determine which it was.  		 */  		plist_del(&q->list, &hb->chain); +		hb_waiters_dec(hb);  		/* Handle spurious wakeups gracefully */  		ret = -EWOULDBLOCK; @@ -2875,6 +2907,7 @@ static int __init futex_init(void)  		futex_cmpxchg_enabled = 1;  	for (i = 0; i < futex_hashsize; i++) { +		atomic_set(&futex_queues[i].waiters, 0);  		plist_head_init(&futex_queues[i].chain);  		spin_lock_init(&futex_queues[i].lock);  	} diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig index 4a1fef09f658..07cbdfea9ae2 100644 --- a/kernel/irq/Kconfig +++ b/kernel/irq/Kconfig @@ -40,6 +40,7 @@ config IRQ_EDGE_EOI_HANDLER  # Generic configurable interrupt chip implementation  config GENERIC_IRQ_CHIP         bool +       select IRQ_DOMAIN  # Generic irq_domain hw <--> linux irq number translation  config IRQ_DOMAIN diff --git a/kernel/irq/devres.c b/kernel/irq/devres.c index bd8e788d71e0..1ef0606797c9 100644 --- a/kernel/irq/devres.c +++ b/kernel/irq/devres.c @@ -73,6 +73,51 @@ int devm_request_threaded_irq(struct device *dev, unsigned int irq,  EXPORT_SYMBOL(devm_request_threaded_irq);  /** + *	devm_request_any_context_irq - allocate an interrupt line for a managed device + *	@dev: device to request interrupt for + *	@irq: Interrupt line to allocate + *	@handler: Function to be called when the IRQ occurs + *	@thread_fn: function to be called in a threaded interrupt context. NULL + *		    for devices which handle everything in @handler + *	@irqflags: Interrupt type flags + *	@devname: An ascii name for the claiming device + *	@dev_id: A cookie passed back to the handler function + * + *	Except for the extra @dev argument, this function takes the + *	same arguments and performs the same function as + *	request_any_context_irq().  IRQs requested with this function will be + *	automatically freed on driver detach. + * + *	If an IRQ allocated with this function needs to be freed + *	separately, devm_free_irq() must be used. + */ +int devm_request_any_context_irq(struct device *dev, unsigned int irq, +			      irq_handler_t handler, unsigned long irqflags, +			      const char *devname, void *dev_id) +{ +	struct irq_devres *dr; +	int rc; + +	dr = devres_alloc(devm_irq_release, sizeof(struct irq_devres), +			  GFP_KERNEL); +	if (!dr) +		return -ENOMEM; + +	rc = request_any_context_irq(irq, handler, irqflags, devname, dev_id); +	if (rc) { +		devres_free(dr); +		return rc; +	} + +	dr->irq = irq; +	dr->dev_id = dev_id; +	devres_add(dev, dr); + +	return 0; +} +EXPORT_SYMBOL(devm_request_any_context_irq); + +/**   *	devm_free_irq - free an interrupt   *	@dev: device to free interrupt for   *	@irq: Interrupt line to free diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c index 192a302d6cfd..8ab8e9390297 100644 --- a/kernel/irq/irqdesc.c +++ b/kernel/irq/irqdesc.c @@ -274,6 +274,7 @@ struct irq_desc *irq_to_desc(unsigned int irq)  {  	return (irq < NR_IRQS) ? irq_desc + irq : NULL;  } +EXPORT_SYMBOL(irq_to_desc);  static void free_desc(unsigned int irq)  { diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c index cf68bb36fe58..f14033700c25 100644 --- a/kernel/irq/irqdomain.c +++ b/kernel/irq/irqdomain.c @@ -10,6 +10,7 @@  #include <linux/mutex.h>  #include <linux/of.h>  #include <linux/of_address.h> +#include <linux/of_irq.h>  #include <linux/topology.h>  #include <linux/seq_file.h>  #include <linux/slab.h> diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 481a13c43b17..d3bf660cb57f 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -802,8 +802,7 @@ static irqreturn_t irq_thread_fn(struct irq_desc *desc,  static void wake_threads_waitq(struct irq_desc *desc)  { -	if (atomic_dec_and_test(&desc->threads_active) && -	    waitqueue_active(&desc->wait_for_threads)) +	if (atomic_dec_and_test(&desc->threads_active))  		wake_up(&desc->wait_for_threads);  } diff --git a/kernel/kmod.c b/kernel/kmod.c index b086006c59e7..6b375af4958d 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -239,7 +239,7 @@ static int ____call_usermodehelper(void *data)  	commit_creds(new); -	retval = do_execve(sub_info->path, +	retval = do_execve(getname_kernel(sub_info->path),  			   (const char __user *const __user *)sub_info->argv,  			   (const char __user *const __user *)sub_info->envp);  	if (!retval) diff --git a/kernel/power/console.c b/kernel/power/console.c index eacb8bd8cab4..aba9c545a0e3 100644 --- a/kernel/power/console.c +++ b/kernel/power/console.c @@ -9,6 +9,7 @@  #include <linux/kbd_kern.h>  #include <linux/vt.h>  #include <linux/module.h> +#include <linux/slab.h>  #include "power.h"  #define SUSPEND_CONSOLE	(MAX_NR_CONSOLES-1) diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index b1d255f04135..4dae9cbe9259 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -1076,7 +1076,6 @@ static int syslog_print_all(char __user *buf, int size, bool clear)  		next_seq = log_next_seq;  		len = 0; -		prev = 0;  		while (len >= 0 && seq < next_seq) {  			struct printk_log *msg = log_from_idx(idx);  			int textlen; @@ -2788,7 +2787,6 @@ bool kmsg_dump_get_buffer(struct kmsg_dumper *dumper, bool syslog,  	next_idx = idx;  	l = 0; -	prev = 0;  	while (seq < dumper->next_seq) {  		struct printk_log *msg = log_from_idx(idx); diff --git a/kernel/profile.c b/kernel/profile.c index 6631e1ef55ab..ebdd9c1a86b4 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -549,14 +549,14 @@ static int create_hash_tables(void)  		struct page *page;  		page = alloc_pages_exact_node(node, -				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, +				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,  				0);  		if (!page)  			goto out_cleanup;  		per_cpu(cpu_profile_hits, cpu)[1]  				= (struct profile_hit *)page_address(page);  		page = alloc_pages_exact_node(node, -				GFP_KERNEL | __GFP_ZERO | GFP_THISNODE, +				GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE,  				0);  		if (!page)  			goto out_cleanup; diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 43c2bcc35761..b30a2924ef14 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -301,14 +301,14 @@ u64 sched_clock_cpu(int cpu)  	if (unlikely(!sched_clock_running))  		return 0ull; -	preempt_disable(); +	preempt_disable_notrace();  	scd = cpu_sdc(cpu);  	if (cpu != smp_processor_id())  		clock = sched_clock_remote(scd);  	else  		clock = sched_clock_local(scd); -	preempt_enable(); +	preempt_enable_notrace();  	return clock;  } diff --git a/kernel/sched/core.c b/kernel/sched/core.c index b46131ef6aab..f5c6635b806c 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1952,7 +1952,7 @@ static int dl_overflow(struct task_struct *p, int policy,  {  	struct dl_bw *dl_b = dl_bw_of(task_cpu(p)); -	u64 period = attr->sched_period; +	u64 period = attr->sched_period ?: attr->sched_deadline;  	u64 runtime = attr->sched_runtime;  	u64 new_bw = dl_policy(policy) ? to_ratio(period, runtime) : 0;  	int cpus, err = -1; @@ -3338,6 +3338,15 @@ recheck:  				return -EPERM;  		} +		 /* +		  * Can't set/change SCHED_DEADLINE policy at all for now +		  * (safest behavior); in the future we would like to allow +		  * unprivileged DL tasks to increase their relative deadline +		  * or reduce their runtime (both ways reducing utilization) +		  */ +		if (dl_policy(policy)) +			return -EPERM; +  		/*  		 * Treat SCHED_IDLE as nice 20. Only allow a switch to  		 * SCHED_NORMAL if the RLIMIT_NICE would normally permit it. @@ -3661,13 +3670,14 @@ SYSCALL_DEFINE2(sched_setparam, pid_t, pid, struct sched_param __user *, param)   * @pid: the pid in question.   * @uattr: structure containing the extended parameters.   */ -SYSCALL_DEFINE2(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr) +SYSCALL_DEFINE3(sched_setattr, pid_t, pid, struct sched_attr __user *, uattr, +			       unsigned int, flags)  {  	struct sched_attr attr;  	struct task_struct *p;  	int retval; -	if (!uattr || pid < 0) +	if (!uattr || pid < 0 || flags)  		return -EINVAL;  	if (sched_copy_attr(uattr, &attr)) @@ -3786,7 +3796,7 @@ static int sched_read_attr(struct sched_attr __user *uattr,  		attr->size = usize;  	} -	ret = copy_to_user(uattr, attr, usize); +	ret = copy_to_user(uattr, attr, attr->size);  	if (ret)  		return -EFAULT; @@ -3804,8 +3814,8 @@ err_size:   * @uattr: structure containing the extended parameters.   * @size: sizeof(attr) for fwd/bwd comp.   */ -SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, -		unsigned int, size) +SYSCALL_DEFINE4(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr, +		unsigned int, size, unsigned int, flags)  {  	struct sched_attr attr = {  		.size = sizeof(struct sched_attr), @@ -3814,7 +3824,7 @@ SYSCALL_DEFINE3(sched_getattr, pid_t, pid, struct sched_attr __user *, uattr,  	int retval;  	if (!uattr || pid < 0 || size > PAGE_SIZE || -	    size < SCHED_ATTR_SIZE_VER0) +	    size < SCHED_ATTR_SIZE_VER0 || flags)  		return -EINVAL;  	rcu_read_lock(); @@ -7422,6 +7432,7 @@ static int sched_dl_global_constraints(void)  	u64 period = global_rt_period();  	u64 new_bw = to_ratio(period, runtime);  	int cpu, ret = 0; +	unsigned long flags;  	/*  	 * Here we want to check the bandwidth not being set to some @@ -7435,10 +7446,10 @@ static int sched_dl_global_constraints(void)  	for_each_possible_cpu(cpu) {  		struct dl_bw *dl_b = dl_bw_of(cpu); -		raw_spin_lock(&dl_b->lock); +		raw_spin_lock_irqsave(&dl_b->lock, flags);  		if (new_bw < dl_b->total_bw)  			ret = -EBUSY; -		raw_spin_unlock(&dl_b->lock); +		raw_spin_unlock_irqrestore(&dl_b->lock, flags);  		if (ret)  			break; @@ -7451,6 +7462,7 @@ static void sched_dl_do_global(void)  {  	u64 new_bw = -1;  	int cpu; +	unsigned long flags;  	def_dl_bandwidth.dl_period = global_rt_period();  	def_dl_bandwidth.dl_runtime = global_rt_runtime(); @@ -7464,9 +7476,9 @@ static void sched_dl_do_global(void)  	for_each_possible_cpu(cpu) {  		struct dl_bw *dl_b = dl_bw_of(cpu); -		raw_spin_lock(&dl_b->lock); +		raw_spin_lock_irqsave(&dl_b->lock, flags);  		dl_b->bw = new_bw; -		raw_spin_unlock(&dl_b->lock); +		raw_spin_unlock_irqrestore(&dl_b->lock, flags);  	}  } @@ -7475,7 +7487,8 @@ static int sched_rt_global_validate(void)  	if (sysctl_sched_rt_period <= 0)  		return -EINVAL; -	if (sysctl_sched_rt_runtime > sysctl_sched_rt_period) +	if ((sysctl_sched_rt_runtime != RUNTIME_INF) && +		(sysctl_sched_rt_runtime > sysctl_sched_rt_period))  		return -EINVAL;  	return 0; diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c index 045fc74e3f09..5b9bb42b2d47 100644 --- a/kernel/sched/cpudeadline.c +++ b/kernel/sched/cpudeadline.c @@ -70,7 +70,7 @@ static void cpudl_heapify(struct cpudl *cp, int idx)  static void cpudl_change_key(struct cpudl *cp, int idx, u64 new_dl)  { -	WARN_ON(idx > num_present_cpus() || idx == IDX_INVALID); +	WARN_ON(idx == IDX_INVALID || !cpu_present(idx));  	if (dl_time_before(new_dl, cp->elements[idx].dl)) {  		cp->elements[idx].dl = new_dl; @@ -117,7 +117,7 @@ int cpudl_find(struct cpudl *cp, struct task_struct *p,  	}  out: -	WARN_ON(best_cpu > num_present_cpus() && best_cpu != -1); +	WARN_ON(best_cpu != -1 && !cpu_present(best_cpu));  	return best_cpu;  } @@ -137,7 +137,7 @@ void cpudl_set(struct cpudl *cp, int cpu, u64 dl, int is_valid)  	int old_idx, new_cpu;  	unsigned long flags; -	WARN_ON(cpu > num_present_cpus()); +	WARN_ON(!cpu_present(cpu));  	raw_spin_lock_irqsave(&cp->lock, flags);  	old_idx = cp->cpu_to_idx[cpu]; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 0dd5e0971a07..6e79b3faa4cd 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -121,7 +121,7 @@ static inline void dl_clear_overload(struct rq *rq)  static void update_dl_migration(struct dl_rq *dl_rq)  { -	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_total > 1) { +	if (dl_rq->dl_nr_migratory && dl_rq->dl_nr_running > 1) {  		if (!dl_rq->overloaded) {  			dl_set_overload(rq_of_dl_rq(dl_rq));  			dl_rq->overloaded = 1; @@ -135,9 +135,7 @@ static void update_dl_migration(struct dl_rq *dl_rq)  static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)  {  	struct task_struct *p = dl_task_of(dl_se); -	dl_rq = &rq_of_dl_rq(dl_rq)->dl; -	dl_rq->dl_nr_total++;  	if (p->nr_cpus_allowed > 1)  		dl_rq->dl_nr_migratory++; @@ -147,9 +145,7 @@ static void inc_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)  static void dec_dl_migration(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)  {  	struct task_struct *p = dl_task_of(dl_se); -	dl_rq = &rq_of_dl_rq(dl_rq)->dl; -	dl_rq->dl_nr_total--;  	if (p->nr_cpus_allowed > 1)  		dl_rq->dl_nr_migratory--; @@ -566,6 +562,8 @@ int dl_runtime_exceeded(struct rq *rq, struct sched_dl_entity *dl_se)  	return 1;  } +extern bool sched_rt_bandwidth_account(struct rt_rq *rt_rq); +  /*   * Update the current task's runtime statistics (provided it is still   * a -deadline task and has not been removed from the dl_rq). @@ -629,11 +627,13 @@ static void update_curr_dl(struct rq *rq)  		struct rt_rq *rt_rq = &rq->rt;  		raw_spin_lock(&rt_rq->rt_runtime_lock); -		rt_rq->rt_time += delta_exec;  		/*  		 * We'll let actual RT tasks worry about the overflow here, we -		 * have our own CBS to keep us inline -- see above. +		 * have our own CBS to keep us inline; only account when RT +		 * bandwidth is relevant.  		 */ +		if (sched_rt_bandwidth_account(rt_rq)) +			rt_rq->rt_time += delta_exec;  		raw_spin_unlock(&rt_rq->rt_runtime_lock);  	}  } @@ -717,6 +717,7 @@ void inc_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)  	WARN_ON(!dl_prio(prio));  	dl_rq->dl_nr_running++; +	inc_nr_running(rq_of_dl_rq(dl_rq));  	inc_dl_deadline(dl_rq, deadline);  	inc_dl_migration(dl_se, dl_rq); @@ -730,6 +731,7 @@ void dec_dl_tasks(struct sched_dl_entity *dl_se, struct dl_rq *dl_rq)  	WARN_ON(!dl_prio(prio));  	WARN_ON(!dl_rq->dl_nr_running);  	dl_rq->dl_nr_running--; +	dec_nr_running(rq_of_dl_rq(dl_rq));  	dec_dl_deadline(dl_rq, dl_se->deadline);  	dec_dl_migration(dl_se, dl_rq); @@ -836,8 +838,6 @@ static void enqueue_task_dl(struct rq *rq, struct task_struct *p, int flags)  	if (!task_current(rq, p) && p->nr_cpus_allowed > 1)  		enqueue_pushable_dl_task(rq, p); - -	inc_nr_running(rq);  }  static void __dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags) @@ -850,8 +850,6 @@ static void dequeue_task_dl(struct rq *rq, struct task_struct *p, int flags)  {  	update_curr_dl(rq);  	__dequeue_task_dl(rq, p, flags); - -	dec_nr_running(rq);  }  /* diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 966cc2bfcb77..9b4c4f320130 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -1757,6 +1757,8 @@ void task_numa_work(struct callback_head *work)  			start = end;  			if (pages <= 0)  				goto out; + +			cond_resched();  		} while (end != vma->vm_end);  	} @@ -6999,15 +7001,15 @@ static void switched_from_fair(struct rq *rq, struct task_struct *p)  	struct cfs_rq *cfs_rq = cfs_rq_of(se);  	/* -	 * Ensure the task's vruntime is normalized, so that when its +	 * Ensure the task's vruntime is normalized, so that when it's  	 * switched back to the fair class the enqueue_entity(.flags=0) will  	 * do the right thing.  	 * -	 * If it was on_rq, then the dequeue_entity(.flags=0) will already -	 * have normalized the vruntime, if it was !on_rq, then only when +	 * If it's on_rq, then the dequeue_entity(.flags=0) will already +	 * have normalized the vruntime, if it's !on_rq, then only when  	 * the task is sleeping will it still have non-normalized vruntime.  	 */ -	if (!se->on_rq && p->state != TASK_RUNNING) { +	if (!p->on_rq && p->state != TASK_RUNNING) {  		/*  		 * Fix up our vruntime so that the current sleep doesn't  		 * cause 'unlimited' sleep bonus. diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index a2740b775b45..1999021042c7 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -538,6 +538,14 @@ static inline struct rt_bandwidth *sched_rt_bandwidth(struct rt_rq *rt_rq)  #endif /* CONFIG_RT_GROUP_SCHED */ +bool sched_rt_bandwidth_account(struct rt_rq *rt_rq) +{ +	struct rt_bandwidth *rt_b = sched_rt_bandwidth(rt_rq); + +	return (hrtimer_active(&rt_b->rt_period_timer) || +		rt_rq->rt_time < rt_b->rt_runtime); +} +  #ifdef CONFIG_SMP  /*   * We ran out of runtime, see if we can borrow some from our neighbours. diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index c2119fd20f8b..f964add50f38 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -462,7 +462,6 @@ struct dl_rq {  	} earliest_dl;  	unsigned long dl_nr_migratory; -	unsigned long dl_nr_total;  	int overloaded;  	/* diff --git a/kernel/stop_machine.c b/kernel/stop_machine.c index 84571e09c907..01fbae5b97b7 100644 --- a/kernel/stop_machine.c +++ b/kernel/stop_machine.c @@ -293,7 +293,7 @@ int stop_two_cpus(unsigned int cpu1, unsigned int cpu2, cpu_stop_fn_t fn, void *  	 */  	smp_call_function_single(min(cpu1, cpu2),  				 &irq_cpu_stop_queue_work, -				 &call_args, 0); +				 &call_args, 1);  	lg_local_unlock(&stop_cpus_lock);  	preempt_enable(); diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c index 7a925ba456fb..a6a5bf53e86d 100644 --- a/kernel/time/jiffies.c +++ b/kernel/time/jiffies.c @@ -51,7 +51,13 @@   * HZ shrinks, so values greater than 8 overflow 32bits when   * HZ=100.   */ +#if HZ < 34 +#define JIFFIES_SHIFT	6 +#elif HZ < 67 +#define JIFFIES_SHIFT	7 +#else  #define JIFFIES_SHIFT	8 +#endif  static cycle_t jiffies_read(struct clocksource *cs)  { diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index 0abb36464281..4d23dc4d8139 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -116,20 +116,42 @@ static enum hrtimer_restart sched_clock_poll(struct hrtimer *hrt)  void __init sched_clock_register(u64 (*read)(void), int bits,  				 unsigned long rate)  { +	u64 res, wrap, new_mask, new_epoch, cyc, ns; +	u32 new_mult, new_shift; +	ktime_t new_wrap_kt;  	unsigned long r; -	u64 res, wrap;  	char r_unit;  	if (cd.rate > rate)  		return;  	WARN_ON(!irqs_disabled()); -	read_sched_clock = read; -	sched_clock_mask = CLOCKSOURCE_MASK(bits); -	cd.rate = rate;  	/* calculate the mult/shift to convert counter ticks to ns. */ -	clocks_calc_mult_shift(&cd.mult, &cd.shift, rate, NSEC_PER_SEC, 3600); +	clocks_calc_mult_shift(&new_mult, &new_shift, rate, NSEC_PER_SEC, 3600); + +	new_mask = CLOCKSOURCE_MASK(bits); + +	/* calculate how many ns until we wrap */ +	wrap = clocks_calc_max_nsecs(new_mult, new_shift, 0, new_mask); +	new_wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); + +	/* update epoch for new counter and update epoch_ns from old counter*/ +	new_epoch = read(); +	cyc = read_sched_clock(); +	ns = cd.epoch_ns + cyc_to_ns((cyc - cd.epoch_cyc) & sched_clock_mask, +			  cd.mult, cd.shift); + +	raw_write_seqcount_begin(&cd.seq); +	read_sched_clock = read; +	sched_clock_mask = new_mask; +	cd.rate = rate; +	cd.wrap_kt = new_wrap_kt; +	cd.mult = new_mult; +	cd.shift = new_shift; +	cd.epoch_cyc = new_epoch; +	cd.epoch_ns = ns; +	raw_write_seqcount_end(&cd.seq);  	r = rate;  	if (r >= 4000000) { @@ -141,22 +163,12 @@ void __init sched_clock_register(u64 (*read)(void), int bits,  	} else  		r_unit = ' '; -	/* calculate how many ns until we wrap */ -	wrap = clocks_calc_max_nsecs(cd.mult, cd.shift, 0, sched_clock_mask); -	cd.wrap_kt = ns_to_ktime(wrap - (wrap >> 3)); -  	/* calculate the ns resolution of this counter */ -	res = cyc_to_ns(1ULL, cd.mult, cd.shift); +	res = cyc_to_ns(1ULL, new_mult, new_shift); +  	pr_info("sched_clock: %u bits at %lu%cHz, resolution %lluns, wraps every %lluns\n",  		bits, r, r_unit, res, wrap); -	update_sched_clock(); - -	/* -	 * Ensure that sched_clock() starts off at 0ns -	 */ -	cd.epoch_ns = 0; -  	/* Enable IRQ time accounting if we have a fast enough sched_clock */  	if (irqtime > 0 || (irqtime == -1 && rate >= 1000000))  		enable_sched_clock_irqtime(); diff --git a/kernel/time/tick-broadcast.c b/kernel/time/tick-broadcast.c index 43780ab5e279..98977a57ac72 100644 --- a/kernel/time/tick-broadcast.c +++ b/kernel/time/tick-broadcast.c @@ -756,6 +756,7 @@ out:  static void tick_broadcast_clear_oneshot(int cpu)  {  	cpumask_clear_cpu(cpu, tick_broadcast_oneshot_mask); +	cpumask_clear_cpu(cpu, tick_broadcast_pending_mask);  }  static void tick_broadcast_init_next_event(struct cpumask *mask, diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 294b8a271a04..fc4da2d97f9b 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -2397,6 +2397,13 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,  	write &= RB_WRITE_MASK;  	tail = write - length; +	/* +	 * If this is the first commit on the page, then it has the same +	 * timestamp as the page itself. +	 */ +	if (!tail) +		delta = 0; +  	/* See if we shot pass the end of this buffer page */  	if (unlikely(write > BUF_PAGE_SIZE))  		return rb_move_tail(cpu_buffer, length, tail, diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c index e71ffd4eccb5..7b16d40bd64d 100644 --- a/kernel/trace/trace_events.c +++ b/kernel/trace/trace_events.c @@ -27,12 +27,6 @@  DEFINE_MUTEX(event_mutex); -DEFINE_MUTEX(event_storage_mutex); -EXPORT_SYMBOL_GPL(event_storage_mutex); - -char event_storage[EVENT_STORAGE_SIZE]; -EXPORT_SYMBOL_GPL(event_storage); -  LIST_HEAD(ftrace_events);  static LIST_HEAD(ftrace_common_fields); @@ -1777,6 +1771,16 @@ static void trace_module_add_events(struct module *mod)  {  	struct ftrace_event_call **call, **start, **end; +	if (!mod->num_trace_events) +		return; + +	/* Don't add infrastructure for mods without tracepoints */ +	if (trace_module_has_bad_taint(mod)) { +		pr_err("%s: module has bad taint, not creating trace events\n", +		       mod->name); +		return; +	} +  	start = mod->trace_events;  	end = mod->trace_events + mod->num_trace_events; diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c index 7c3e3e72e2b6..ee0a5098ac43 100644 --- a/kernel/trace/trace_export.c +++ b/kernel/trace/trace_export.c @@ -95,15 +95,12 @@ static void __always_unused ____ftrace_check_##name(void)		\  #undef __array  #define __array(type, item, len)					\  	do {								\ +		char *type_str = #type"["__stringify(len)"]";		\  		BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);			\ -		mutex_lock(&event_storage_mutex);			\ -		snprintf(event_storage, sizeof(event_storage),		\ -			 "%s[%d]", #type, len);				\ -		ret = trace_define_field(event_call, event_storage, #item, \ +		ret = trace_define_field(event_call, type_str, #item,	\  				 offsetof(typeof(field), item),		\  				 sizeof(field.item),			\  				 is_signed_type(type), filter_type);	\ -		mutex_unlock(&event_storage_mutex);			\  		if (ret)						\  			return ret;					\  	} while (0); diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 29f26540e9c9..031cc5655a51 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -631,6 +631,11 @@ void tracepoint_iter_reset(struct tracepoint_iter *iter)  EXPORT_SYMBOL_GPL(tracepoint_iter_reset);  #ifdef CONFIG_MODULES +bool trace_module_has_bad_taint(struct module *mod) +{ +	return mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP)); +} +  static int tracepoint_module_coming(struct module *mod)  {  	struct tp_module *tp_mod, *iter; @@ -641,7 +646,7 @@ static int tracepoint_module_coming(struct module *mod)  	 * module headers (for forced load), to make sure we don't cause a crash.  	 * Staging and out-of-tree GPL modules are fine.  	 */ -	if (mod->taints & ~((1 << TAINT_OOT_MODULE) | (1 << TAINT_CRAP))) +	if (trace_module_has_bad_taint(mod))  		return 0;  	mutex_lock(&tracepoints_mutex);  	tp_mod = kmalloc(sizeof(struct tp_module), GFP_KERNEL); diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 240fb62cf394..dd06439b9c84 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -225,7 +225,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id)   *   *	When there is no mapping defined for the user-namespace uid   *	pair INVALID_UID is returned.  Callers are expected to test - *	for and handle handle INVALID_UID being returned.  INVALID_UID + *	for and handle INVALID_UID being returned.  INVALID_UID   *	may be tested for using uid_valid().   */  kuid_t make_kuid(struct user_namespace *ns, uid_t uid) diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 82ef9f3b7473..193e977a10ea 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -1851,6 +1851,12 @@ static void destroy_worker(struct worker *worker)  	if (worker->flags & WORKER_IDLE)  		pool->nr_idle--; +	/* +	 * Once WORKER_DIE is set, the kworker may destroy itself at any +	 * point.  Pin to ensure the task stays until we're done with it. +	 */ +	get_task_struct(worker->task); +  	list_del_init(&worker->entry);  	worker->flags |= WORKER_DIE; @@ -1859,6 +1865,7 @@ static void destroy_worker(struct worker *worker)  	spin_unlock_irq(&pool->lock);  	kthread_stop(worker->task); +	put_task_struct(worker->task);  	kfree(worker);  	spin_lock_irq(&pool->lock);  | 
