summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/acct.c6
-rw-r--r--kernel/audit.c2
-rw-r--r--kernel/audit.h7
-rw-r--r--kernel/audit_watch.c3
-rw-r--r--kernel/auditfilter.c65
-rw-r--r--kernel/auditsc.c217
-rw-r--r--kernel/debug/debug_core.c18
-rw-r--r--kernel/debug/kdb/kdb_bt.c2
-rw-r--r--kernel/debug/kdb/kdb_io.c33
-rw-r--r--kernel/debug/kdb/kdb_main.c2
-rw-r--r--kernel/events/core.c21
-rw-r--r--kernel/irq/irqdomain.c33
-rw-r--r--kernel/kmod.c7
-rw-r--r--kernel/kthread.c1
-rw-r--r--kernel/rcutree.c21
-rw-r--r--kernel/rcutree.h6
-rw-r--r--kernel/sched/core.c71
-rw-r--r--kernel/time.c2
-rw-r--r--kernel/time/Kconfig4
-rw-r--r--kernel/time/alarmtimer.c118
-rw-r--r--kernel/time/jiffies.c32
-rw-r--r--kernel/time/tick-sched.c2
-rw-r--r--kernel/time/timekeeping.c117
-rw-r--r--kernel/timer.c10
24 files changed, 503 insertions, 297 deletions
diff --git a/kernel/acct.c b/kernel/acct.c
index 6cd7529c9e6a..051e071a06e7 100644
--- a/kernel/acct.c
+++ b/kernel/acct.c
@@ -193,7 +193,7 @@ static void acct_file_reopen(struct bsd_acct_struct *acct, struct file *file,
}
}
-static int acct_on(char *name)
+static int acct_on(struct filename *pathname)
{
struct file *file;
struct vfsmount *mnt;
@@ -201,7 +201,7 @@ static int acct_on(char *name)
struct bsd_acct_struct *acct = NULL;
/* Difference from BSD - they don't do O_APPEND */
- file = filp_open(name, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
+ file = file_open_name(pathname, O_WRONLY|O_APPEND|O_LARGEFILE, 0);
if (IS_ERR(file))
return PTR_ERR(file);
@@ -260,7 +260,7 @@ SYSCALL_DEFINE1(acct, const char __user *, name)
return -EPERM;
if (name) {
- char *tmp = getname(name);
+ struct filename *tmp = getname(name);
if (IS_ERR(tmp))
return (PTR_ERR(tmp));
error = acct_on(tmp);
diff --git a/kernel/audit.c b/kernel/audit.c
index 4d0ceede3319..40414e9143db 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1440,6 +1440,8 @@ void audit_log_link_denied(const char *operation, struct path *link)
ab = audit_log_start(current->audit_context, GFP_KERNEL,
AUDIT_ANOM_LINK);
+ if (!ab)
+ return;
audit_log_format(ab, "op=%s action=denied", operation);
audit_log_format(ab, " pid=%d comm=", current->pid);
audit_log_untrustedstring(ab, current->comm);
diff --git a/kernel/audit.h b/kernel/audit.h
index 9eb3d79482b6..d51cba868e1b 100644
--- a/kernel/audit.h
+++ b/kernel/audit.h
@@ -74,12 +74,15 @@ static inline int audit_hash_ino(u32 ino)
return (ino & (AUDIT_INODE_BUCKETS-1));
}
+/* Indicates that audit should log the full pathname. */
+#define AUDIT_NAME_FULL -1
+
extern int audit_match_class(int class, unsigned syscall);
extern int audit_comparator(const u32 left, const u32 op, const u32 right);
extern int audit_uid_comparator(kuid_t left, u32 op, kuid_t right);
extern int audit_gid_comparator(kgid_t left, u32 op, kgid_t right);
-extern int audit_compare_dname_path(const char *dname, const char *path,
- int *dirlen);
+extern int parent_len(const char *path);
+extern int audit_compare_dname_path(const char *dname, const char *path, int plen);
extern struct sk_buff * audit_make_reply(int pid, int seq, int type,
int done, int multi,
const void *payload, int size);
diff --git a/kernel/audit_watch.c b/kernel/audit_watch.c
index 1c22ec3d87bc..9a9ae6e3d290 100644
--- a/kernel/audit_watch.c
+++ b/kernel/audit_watch.c
@@ -265,7 +265,8 @@ static void audit_update_watch(struct audit_parent *parent,
/* Run all of the watches on this parent looking for the one that
* matches the given dname */
list_for_each_entry_safe(owatch, nextw, &parent->watches, wlist) {
- if (audit_compare_dname_path(dname, owatch->path, NULL))
+ if (audit_compare_dname_path(dname, owatch->path,
+ AUDIT_NAME_FULL))
continue;
/* If the update involves invalidating rules, do the inode-based
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index c4bcdbaf4d4d..7f19f23d38a3 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -1298,41 +1298,60 @@ int audit_gid_comparator(kgid_t left, u32 op, kgid_t right)
}
}
-/* Compare given dentry name with last component in given path,
- * return of 0 indicates a match. */
-int audit_compare_dname_path(const char *dname, const char *path,
- int *dirlen)
+/**
+ * parent_len - find the length of the parent portion of a pathname
+ * @path: pathname of which to determine length
+ */
+int parent_len(const char *path)
{
- int dlen, plen;
+ int plen;
const char *p;
- if (!dname || !path)
- return 1;
-
- dlen = strlen(dname);
plen = strlen(path);
- if (plen < dlen)
- return 1;
+
+ if (plen == 0)
+ return plen;
/* disregard trailing slashes */
p = path + plen - 1;
while ((*p == '/') && (p > path))
p--;
- /* find last path component */
- p = p - dlen + 1;
- if (p < path)
+ /* walk backward until we find the next slash or hit beginning */
+ while ((*p != '/') && (p > path))
+ p--;
+
+ /* did we find a slash? Then increment to include it in path */
+ if (*p == '/')
+ p++;
+
+ return p - path;
+}
+
+/**
+ * audit_compare_dname_path - compare given dentry name with last component in
+ * given path. Return of 0 indicates a match.
+ * @dname: dentry name that we're comparing
+ * @path: full pathname that we're comparing
+ * @parentlen: length of the parent if known. Passing in AUDIT_NAME_FULL
+ * here indicates that we must compute this value.
+ */
+int audit_compare_dname_path(const char *dname, const char *path, int parentlen)
+{
+ int dlen, pathlen;
+ const char *p;
+
+ dlen = strlen(dname);
+ pathlen = strlen(path);
+ if (pathlen < dlen)
return 1;
- else if (p > path) {
- if (*--p != '/')
- return 1;
- else
- p++;
- }
- /* return length of path's directory component */
- if (dirlen)
- *dirlen = p - path;
+ parentlen = parentlen == AUDIT_NAME_FULL ? parent_len(path) : parentlen;
+ if (pathlen - parentlen != dlen)
+ return 1;
+
+ p = path + parentlen;
+
return strncmp(p, dname, dlen);
}
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index f4a7756f999c..2f186ed80c40 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -81,9 +81,6 @@
* a name dynamically and also add those to the list anchored by names_list. */
#define AUDIT_NAMES 5
-/* Indicates that audit should log the full pathname. */
-#define AUDIT_NAME_FULL -1
-
/* no execve audit message should be longer than this (userspace limits) */
#define MAX_EXECVE_AUDIT_LEN 7500
@@ -106,27 +103,29 @@ struct audit_cap_data {
* we don't let putname() free it (instead we free all of the saved
* pointers at syscall exit time).
*
- * Further, in fs/namei.c:path_lookup() we store the inode and device. */
+ * Further, in fs/namei.c:path_lookup() we store the inode and device.
+ */
struct audit_names {
- struct list_head list; /* audit_context->names_list */
- const char *name;
- unsigned long ino;
- dev_t dev;
- umode_t mode;
- kuid_t uid;
- kgid_t gid;
- dev_t rdev;
- u32 osid;
- struct audit_cap_data fcap;
- unsigned int fcap_ver;
- int name_len; /* number of name's characters to log */
- bool name_put; /* call __putname() for this name */
+ struct list_head list; /* audit_context->names_list */
+ struct filename *name;
+ unsigned long ino;
+ dev_t dev;
+ umode_t mode;
+ kuid_t uid;
+ kgid_t gid;
+ dev_t rdev;
+ u32 osid;
+ struct audit_cap_data fcap;
+ unsigned int fcap_ver;
+ int name_len; /* number of name's characters to log */
+ unsigned char type; /* record type */
+ bool name_put; /* call __putname() for this name */
/*
* This was an allocated audit_names and not from the array of
* names allocated in the task audit context. Thus this name
* should be freed on syscall exit
*/
- bool should_free;
+ bool should_free;
};
struct audit_aux_data {
@@ -998,7 +997,7 @@ static inline void audit_free_names(struct audit_context *context)
context->ino_count);
list_for_each_entry(n, &context->names_list, list) {
printk(KERN_ERR "names[%d] = %p = %s\n", i,
- n->name, n->name ?: "(null)");
+ n->name, n->name->name ?: "(null)");
}
dump_stack();
return;
@@ -1555,7 +1554,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
case AUDIT_NAME_FULL:
/* log the full path */
audit_log_format(ab, " name=");
- audit_log_untrustedstring(ab, n->name);
+ audit_log_untrustedstring(ab, n->name->name);
break;
case 0:
/* name was specified as a relative path and the
@@ -1565,7 +1564,7 @@ static void audit_log_name(struct audit_context *context, struct audit_names *n,
default:
/* log the name's directory component */
audit_log_format(ab, " name=");
- audit_log_n_untrustedstring(ab, n->name,
+ audit_log_n_untrustedstring(ab, n->name->name,
n->name_len);
}
} else
@@ -1995,7 +1994,8 @@ retry:
#endif
}
-static struct audit_names *audit_alloc_name(struct audit_context *context)
+static struct audit_names *audit_alloc_name(struct audit_context *context,
+ unsigned char type)
{
struct audit_names *aname;
@@ -2010,6 +2010,7 @@ static struct audit_names *audit_alloc_name(struct audit_context *context)
}
aname->ino = (unsigned long)-1;
+ aname->type = type;
list_add_tail(&aname->list, &context->names_list);
context->name_count++;
@@ -2020,13 +2021,36 @@ static struct audit_names *audit_alloc_name(struct audit_context *context)
}
/**
+ * audit_reusename - fill out filename with info from existing entry
+ * @uptr: userland ptr to pathname
+ *
+ * Search the audit_names list for the current audit context. If there is an
+ * existing entry with a matching "uptr" then return the filename
+ * associated with that audit_name. If not, return NULL.
+ */
+struct filename *
+__audit_reusename(const __user char *uptr)
+{
+ struct audit_context *context = current->audit_context;
+ struct audit_names *n;
+
+ list_for_each_entry(n, &context->names_list, list) {
+ if (!n->name)
+ continue;
+ if (n->name->uptr == uptr)
+ return n->name;
+ }
+ return NULL;
+}
+
+/**
* audit_getname - add a name to the list
* @name: name to add
*
* Add a name to the list of audit names for this context.
* Called from fs/namei.c:getname().
*/
-void __audit_getname(const char *name)
+void __audit_getname(struct filename *name)
{
struct audit_context *context = current->audit_context;
struct audit_names *n;
@@ -2040,13 +2064,19 @@ void __audit_getname(const char *name)
return;
}
- n = audit_alloc_name(context);
+#if AUDIT_DEBUG
+ /* The filename _must_ have a populated ->name */
+ BUG_ON(!name->name);
+#endif
+
+ n = audit_alloc_name(context, AUDIT_TYPE_UNKNOWN);
if (!n)
return;
n->name = name;
n->name_len = AUDIT_NAME_FULL;
n->name_put = true;
+ name->aname = n;
if (!context->pwd.dentry)
get_fs_pwd(current->fs, &context->pwd);
@@ -2059,7 +2089,7 @@ void __audit_getname(const char *name)
* then we delay the putname until syscall exit.
* Called from include/linux/fs.h:putname().
*/
-void audit_putname(const char *name)
+void audit_putname(struct filename *name)
{
struct audit_context *context = current->audit_context;
@@ -2074,7 +2104,7 @@ void audit_putname(const char *name)
list_for_each_entry(n, &context->names_list, list)
printk(KERN_ERR "name[%d] = %p = %s\n", i,
- n->name, n->name ?: "(null)");
+ n->name, n->name->name ?: "(null)");
}
#endif
__putname(name);
@@ -2088,8 +2118,8 @@ void audit_putname(const char *name)
" put_count=%d\n",
__FILE__, __LINE__,
context->serial, context->major,
- context->in_syscall, name, context->name_count,
- context->put_count);
+ context->in_syscall, name->name,
+ context->name_count, context->put_count);
dump_stack();
}
}
@@ -2132,13 +2162,13 @@ static void audit_copy_inode(struct audit_names *name, const struct dentry *dent
}
/**
- * audit_inode - store the inode and device from a lookup
+ * __audit_inode - store the inode and device from a lookup
* @name: name being audited
* @dentry: dentry being audited
- *
- * Called from fs/namei.c:path_lookup().
+ * @parent: does this dentry represent the parent?
*/
-void __audit_inode(const char *name, const struct dentry *dentry)
+void __audit_inode(struct filename *name, const struct dentry *dentry,
+ unsigned int parent)
{
struct audit_context *context = current->audit_context;
const struct inode *inode = dentry->d_inode;
@@ -2147,24 +2177,69 @@ void __audit_inode(const char *name, const struct dentry *dentry)
if (!context->in_syscall)
return;
+ if (!name)
+ goto out_alloc;
+
+#if AUDIT_DEBUG
+ /* The struct filename _must_ have a populated ->name */
+ BUG_ON(!name->name);
+#endif
+ /*
+ * If we have a pointer to an audit_names entry already, then we can
+ * just use it directly if the type is correct.
+ */
+ n = name->aname;
+ if (n) {
+ if (parent) {
+ if (n->type == AUDIT_TYPE_PARENT ||
+ n->type == AUDIT_TYPE_UNKNOWN)
+ goto out;
+ } else {
+ if (n->type != AUDIT_TYPE_PARENT)
+ goto out;
+ }
+ }
+
list_for_each_entry_reverse(n, &context->names_list, list) {
- if (n->name && (n->name == name))
- goto out;
+ /* does the name pointer match? */
+ if (!n->name || n->name->name != name->name)
+ continue;
+
+ /* match the correct record type */
+ if (parent) {
+ if (n->type == AUDIT_TYPE_PARENT ||
+ n->type == AUDIT_TYPE_UNKNOWN)
+ goto out;
+ } else {
+ if (n->type != AUDIT_TYPE_PARENT)
+ goto out;
+ }
}
- /* unable to find the name from a previous getname() */
- n = audit_alloc_name(context);
+out_alloc:
+ /* unable to find the name from a previous getname(). Allocate a new
+ * anonymous entry.
+ */
+ n = audit_alloc_name(context, AUDIT_TYPE_NORMAL);
if (!n)
return;
out:
+ if (parent) {
+ n->name_len = n->name ? parent_len(n->name->name) : AUDIT_NAME_FULL;
+ n->type = AUDIT_TYPE_PARENT;
+ } else {
+ n->name_len = AUDIT_NAME_FULL;
+ n->type = AUDIT_TYPE_NORMAL;
+ }
handle_path(dentry);
audit_copy_inode(n, dentry, inode);
}
/**
- * audit_inode_child - collect inode info for created/removed objects
- * @dentry: dentry being audited
+ * __audit_inode_child - collect inode info for created/removed objects
* @parent: inode of dentry parent
+ * @dentry: dentry being audited
+ * @type: AUDIT_TYPE_* value that we're looking for
*
* For syscalls that create or remove filesystem objects, audit_inode
* can only collect information for the filesystem object's parent.
@@ -2174,15 +2249,14 @@ out:
* must be hooked prior, in order to capture the target inode during
* unsuccessful attempts.
*/
-void __audit_inode_child(const struct dentry *dentry,
- const struct inode *parent)
+void __audit_inode_child(const struct inode *parent,
+ const struct dentry *dentry,
+ const unsigned char type)
{
struct audit_context *context = current->audit_context;
- const char *found_parent = NULL, *found_child = NULL;
const struct inode *inode = dentry->d_inode;
const char *dname = dentry->d_name.name;
- struct audit_names *n;
- int dirlen = 0;
+ struct audit_names *n, *found_parent = NULL, *found_child = NULL;
if (!context->in_syscall)
return;
@@ -2190,62 +2264,65 @@ void __audit_inode_child(const struct dentry *dentry,
if (inode)
handle_one(inode);
- /* parent is more likely, look for it first */
+ /* look for a parent entry first */
list_for_each_entry(n, &context->names_list, list) {
- if (!n->name)
+ if (!n->name || n->type != AUDIT_TYPE_PARENT)
continue;
if (n->ino == parent->i_ino &&
- !audit_compare_dname_path(dname, n->name, &dirlen)) {
- n->name_len = dirlen; /* update parent data in place */
- found_parent = n->name;
- goto add_names;
+ !audit_compare_dname_path(dname, n->name->name, n->name_len)) {
+ found_parent = n;
+ break;
}
}
- /* no matching parent, look for matching child */
+ /* is there a matching child entry? */
list_for_each_entry(n, &context->names_list, list) {
- if (!n->name)
+ /* can only match entries that have a name */
+ if (!n->name || n->type != type)
continue;
- /* strcmp() is the more likely scenario */
- if (!strcmp(dname, n->name) ||
- !audit_compare_dname_path(dname, n->name, &dirlen)) {
- if (inode)
- audit_copy_inode(n, NULL, inode);
- else
- n->ino = (unsigned long)-1;
- found_child = n->name;
- goto add_names;
+ /* if we found a parent, make sure this one is a child of it */
+ if (found_parent && (n->name != found_parent->name))
+ continue;
+
+ if (!strcmp(dname, n->name->name) ||
+ !audit_compare_dname_path(dname, n->name->name,
+ found_parent ?
+ found_parent->name_len :
+ AUDIT_NAME_FULL)) {
+ found_child = n;
+ break;
}
}
-add_names:
if (!found_parent) {
- n = audit_alloc_name(context);
+ /* create a new, "anonymous" parent record */
+ n = audit_alloc_name(context, AUDIT_TYPE_PARENT);
if (!n)
return;
audit_copy_inode(n, NULL, parent);
}
if (!found_child) {
- n = audit_alloc_name(context);
- if (!n)
+ found_child = audit_alloc_name(context, type);
+ if (!found_child)
return;
/* Re-use the name belonging to the slot for a matching parent
* directory. All names for this context are relinquished in
* audit_free_names() */
if (found_parent) {
- n->name = found_parent;
- n->name_len = AUDIT_NAME_FULL;
+ found_child->name = found_parent->name;
+ found_child->name_len = AUDIT_NAME_FULL;
/* don't call __putname() */
- n->name_put = false;
+ found_child->name_put = false;
}
-
- if (inode)
- audit_copy_inode(n, NULL, inode);
}
+ if (inode)
+ audit_copy_inode(found_child, dentry, inode);
+ else
+ found_child->ino = (unsigned long)-1;
}
EXPORT_SYMBOL_GPL(__audit_inode_child);
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 17e073c309e6..9a61738cefc8 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -696,6 +696,22 @@ out:
return ret;
}
+/*
+ * GDB places a breakpoint at this function to know dynamically
+ * loaded objects. It's not defined static so that only one instance with this
+ * name exists in the kernel.
+ */
+
+static int module_event(struct notifier_block *self, unsigned long val,
+ void *data)
+{
+ return 0;
+}
+
+static struct notifier_block dbg_module_load_nb = {
+ .notifier_call = module_event,
+};
+
int kgdb_nmicallback(int cpu, void *regs)
{
#ifdef CONFIG_SMP
@@ -824,6 +840,7 @@ static void kgdb_register_callbacks(void)
kgdb_arch_init();
if (!dbg_is_early)
kgdb_arch_late();
+ register_module_notifier(&dbg_module_load_nb);
register_reboot_notifier(&dbg_reboot_notifier);
atomic_notifier_chain_register(&panic_notifier_list,
&kgdb_panic_event_nb);
@@ -847,6 +864,7 @@ static void kgdb_unregister_callbacks(void)
if (kgdb_io_module_registered) {
kgdb_io_module_registered = 0;
unregister_reboot_notifier(&dbg_reboot_notifier);
+ unregister_module_notifier(&dbg_module_load_nb);
atomic_notifier_chain_unregister(&panic_notifier_list,
&kgdb_panic_event_nb);
kgdb_arch_exit();
diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c
index 07c9bbb94a0b..b03e0e814e43 100644
--- a/kernel/debug/kdb/kdb_bt.c
+++ b/kernel/debug/kdb/kdb_bt.c
@@ -129,6 +129,8 @@ kdb_bt(int argc, const char **argv)
}
/* Now the inactive tasks */
kdb_do_each_thread(g, p) {
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
if (task_curr(p))
continue;
if (kdb_bt1(p, mask, argcount, btaprompt))
diff --git a/kernel/debug/kdb/kdb_io.c b/kernel/debug/kdb/kdb_io.c
index 0a69d2adc4f3..14ff4849262c 100644
--- a/kernel/debug/kdb/kdb_io.c
+++ b/kernel/debug/kdb/kdb_io.c
@@ -552,6 +552,7 @@ int vkdb_printf(const char *fmt, va_list ap)
{
int diag;
int linecount;
+ int colcount;
int logging, saved_loglevel = 0;
int saved_trap_printk;
int got_printf_lock = 0;
@@ -584,6 +585,10 @@ int vkdb_printf(const char *fmt, va_list ap)
if (diag || linecount <= 1)
linecount = 24;
+ diag = kdbgetintenv("COLUMNS", &colcount);
+ if (diag || colcount <= 1)
+ colcount = 80;
+
diag = kdbgetintenv("LOGGING", &logging);
if (diag)
logging = 0;
@@ -690,7 +695,7 @@ kdb_printit:
gdbstub_msg_write(kdb_buffer, retlen);
} else {
if (dbg_io_ops && !dbg_io_ops->is_console) {
- len = strlen(kdb_buffer);
+ len = retlen;
cp = kdb_buffer;
while (len--) {
dbg_io_ops->write_char(*cp);
@@ -709,11 +714,29 @@ kdb_printit:
printk(KERN_INFO "%s", kdb_buffer);
}
- if (KDB_STATE(PAGER) && strchr(kdb_buffer, '\n'))
- kdb_nextline++;
+ if (KDB_STATE(PAGER)) {
+ /*
+ * Check printed string to decide how to bump the
+ * kdb_nextline to control when the more prompt should
+ * show up.
+ */
+ int got = 0;
+ len = retlen;
+ while (len--) {
+ if (kdb_buffer[len] == '\n') {
+ kdb_nextline++;
+ got = 0;
+ } else if (kdb_buffer[len] == '\r') {
+ got = 0;
+ } else {
+ got++;
+ }
+ }
+ kdb_nextline += got / (colcount + 1);
+ }
/* check for having reached the LINES number of printed lines */
- if (kdb_nextline == linecount) {
+ if (kdb_nextline >= linecount) {
char buf1[16] = "";
/* Watch out for recursion here. Any routine that calls
@@ -765,7 +788,7 @@ kdb_printit:
kdb_grepping_flag = 0;
kdb_printf("\n");
} else if (buf1[0] == ' ') {
- kdb_printf("\n");
+ kdb_printf("\r");
suspend_grep = 1; /* for this recursion */
} else if (buf1[0] == '\n') {
kdb_nextline = linecount - 1;
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 1261dc7eaeb9..4d5f8d5612f3 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2101,6 +2101,8 @@ static int kdb_dmesg(int argc, const char **argv)
}
if (!lines--)
break;
+ if (KDB_FLAG(CMD_INTERRUPT))
+ return 0;
kdb_printf("%.*s\n", (int)len - 1, buf);
}
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cda3ebd49e86..dbccf83c134d 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -372,6 +372,8 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
+ if (cpuctx->unique_pmu != pmu)
+ continue; /* ensure we process each cpuctx once */
/*
* perf_cgroup_events says at least one
@@ -395,9 +397,10 @@ void perf_cgroup_switch(struct task_struct *task, int mode)
if (mode & PERF_CGROUP_SWIN) {
WARN_ON_ONCE(cpuctx->cgrp);
- /* set cgrp before ctxsw in to
- * allow event_filter_match() to not
- * have to pass task around
+ /*
+ * set cgrp before ctxsw in to allow
+ * event_filter_match() to not have to pass
+ * task around
*/
cpuctx->cgrp = perf_cgroup_from_task(task);
cpu_ctx_sched_in(cpuctx, EVENT_ALL, task);
@@ -4412,7 +4415,7 @@ static void perf_event_task_event(struct perf_task_event *task_event)
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->active_pmu != pmu)
+ if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_task_ctx(&cpuctx->ctx, task_event);
@@ -4558,7 +4561,7 @@ static void perf_event_comm_event(struct perf_comm_event *comm_event)
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->active_pmu != pmu)
+ if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_comm_ctx(&cpuctx->ctx, comm_event);
@@ -4754,7 +4757,7 @@ got_name:
rcu_read_lock();
list_for_each_entry_rcu(pmu, &pmus, entry) {
cpuctx = get_cpu_ptr(pmu->pmu_cpu_context);
- if (cpuctx->active_pmu != pmu)
+ if (cpuctx->unique_pmu != pmu)
goto next;
perf_event_mmap_ctx(&cpuctx->ctx, mmap_event,
vma->vm_flags & VM_EXEC);
@@ -5855,8 +5858,8 @@ static void update_pmu_context(struct pmu *pmu, struct pmu *old_pmu)
cpuctx = per_cpu_ptr(pmu->pmu_cpu_context, cpu);
- if (cpuctx->active_pmu == old_pmu)
- cpuctx->active_pmu = pmu;
+ if (cpuctx->unique_pmu == old_pmu)
+ cpuctx->unique_pmu = pmu;
}
}
@@ -5991,7 +5994,7 @@ skip_type:
cpuctx->ctx.pmu = pmu;
cpuctx->jiffies_interval = 1;
INIT_LIST_HEAD(&cpuctx->rotation_list);
- cpuctx->active_pmu = pmu;
+ cpuctx->unique_pmu = pmu;
}
got_cpu_context:
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 49a77727db42..4e69e24d3d7d 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -148,7 +148,8 @@ static unsigned int irq_domain_legacy_revmap(struct irq_domain *domain,
* @host_data: Controller private data pointer
*
* Allocates a legacy irq_domain if irq_base is positive or a linear
- * domain otherwise.
+ * domain otherwise. For the legacy domain, IRQ descriptors will also
+ * be allocated.
*
* This is intended to implement the expected behaviour for most
* interrupt controllers which is that a linear mapping should
@@ -162,11 +163,33 @@ struct irq_domain *irq_domain_add_simple(struct device_node *of_node,
const struct irq_domain_ops *ops,
void *host_data)
{
- if (first_irq > 0)
- return irq_domain_add_legacy(of_node, size, first_irq, 0,
+ if (first_irq > 0) {
+ int irq_base;
+
+ if (IS_ENABLED(CONFIG_SPARSE_IRQ)) {
+ /*
+ * Set the descriptor allocator to search for a
+ * 1-to-1 mapping, such as irq_alloc_desc_at().
+ * Use of_node_to_nid() which is defined to
+ * numa_node_id() on platforms that have no custom
+ * implementation.
+ */
+ irq_base = irq_alloc_descs(first_irq, first_irq, size,
+ of_node_to_nid(of_node));
+ if (irq_base < 0) {
+ WARN(1, "Cannot allocate irq_descs @ IRQ%d, assuming pre-allocated\n",
+ first_irq);
+ irq_base = first_irq;
+ }
+ } else
+ irq_base = first_irq;
+
+ return irq_domain_add_legacy(of_node, size, irq_base, 0,
ops, host_data);
- else
- return irq_domain_add_linear(of_node, size, ops, host_data);
+ }
+
+ /* A linear domain is the default */
+ return irq_domain_add_linear(of_node, size, ops, host_data);
}
/**
diff --git a/kernel/kmod.c b/kernel/kmod.c
index 6f99aead66c6..1c317e386831 100644
--- a/kernel/kmod.c
+++ b/kernel/kmod.c
@@ -37,6 +37,7 @@
#include <linux/notifier.h>
#include <linux/suspend.h>
#include <linux/rwsem.h>
+#include <linux/ptrace.h>
#include <asm/uaccess.h>
#include <trace/events/module.h>
@@ -221,11 +222,13 @@ static int ____call_usermodehelper(void *data)
retval = kernel_execve(sub_info->path,
(const char *const *)sub_info->argv,
(const char *const *)sub_info->envp);
+ if (!retval)
+ return 0;
/* Exec failed? */
fail:
sub_info->retval = retval;
- return 0;
+ do_exit(0);
}
static int call_helper(void *data)
@@ -292,7 +295,7 @@ static int wait_for_helper(void *data)
}
umh_complete(sub_info);
- return 0;
+ do_exit(0);
}
/* This is run by khelper thread */
diff --git a/kernel/kthread.c b/kernel/kthread.c
index 146a6fa96825..29fb60caecb5 100644
--- a/kernel/kthread.c
+++ b/kernel/kthread.c
@@ -16,6 +16,7 @@
#include <linux/mutex.h>
#include <linux/slab.h>
#include <linux/freezer.h>
+#include <linux/ptrace.h>
#include <trace/events/sched.h>
static DEFINE_SPINLOCK(kthread_create_lock);
diff --git a/kernel/rcutree.c b/kernel/rcutree.c
index 4fb2376ddf06..74df86bd9204 100644
--- a/kernel/rcutree.c
+++ b/kernel/rcutree.c
@@ -74,6 +74,7 @@ static struct lock_class_key rcu_fqs_class[RCU_NUM_LVLS];
.orphan_nxttail = &sname##_state.orphan_nxtlist, \
.orphan_donetail = &sname##_state.orphan_donelist, \
.barrier_mutex = __MUTEX_INITIALIZER(sname##_state.barrier_mutex), \
+ .onoff_mutex = __MUTEX_INITIALIZER(sname##_state.onoff_mutex), \
.name = #sname, \
}
@@ -1197,7 +1198,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
raw_spin_unlock_irq(&rnp->lock);
/* Exclude any concurrent CPU-hotplug operations. */
- get_online_cpus();
+ mutex_lock(&rsp->onoff_mutex);
/*
* Set the quiescent-state-needed bits in all the rcu_node
@@ -1234,7 +1235,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
cond_resched();
}
- put_online_cpus();
+ mutex_unlock(&rsp->onoff_mutex);
return 1;
}
@@ -1700,6 +1701,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
/* Remove the dead CPU from the bitmasks in the rcu_node hierarchy. */
/* Exclude any attempts to start a new grace period. */
+ mutex_lock(&rsp->onoff_mutex);
raw_spin_lock_irqsave(&rsp->onofflock, flags);
/* Orphan the dead CPU's callbacks, and adopt them if appropriate. */
@@ -1744,6 +1746,7 @@ static void rcu_cleanup_dead_cpu(int cpu, struct rcu_state *rsp)
init_callback_list(rdp);
/* Disallow further callbacks on this CPU. */
rdp->nxttail[RCU_NEXT_TAIL] = NULL;
+ mutex_unlock(&rsp->onoff_mutex);
}
#else /* #ifdef CONFIG_HOTPLUG_CPU */
@@ -2648,6 +2651,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
struct rcu_node *rnp = rcu_get_root(rsp);
+ /* Exclude new grace periods. */
+ mutex_lock(&rsp->onoff_mutex);
+
/* Set up local state, ensuring consistent view of global state. */
raw_spin_lock_irqsave(&rnp->lock, flags);
rdp->beenonline = 1; /* We have now been online. */
@@ -2662,14 +2668,6 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
rcu_prepare_for_idle_init(cpu);
raw_spin_unlock(&rnp->lock); /* irqs remain disabled. */
- /*
- * A new grace period might start here. If so, we won't be part
- * of it, but that is OK, as we are currently in a quiescent state.
- */
-
- /* Exclude any attempts to start a new GP on large systems. */
- raw_spin_lock(&rsp->onofflock); /* irqs already disabled. */
-
/* Add CPU to rcu_node bitmasks. */
rnp = rdp->mynode;
mask = rdp->grpmask;
@@ -2693,8 +2691,9 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)
raw_spin_unlock(&rnp->lock); /* irqs already disabled. */
rnp = rnp->parent;
} while (rnp != NULL && !(rnp->qsmaskinit & mask));
+ local_irq_restore(flags);
- raw_spin_unlock_irqrestore(&rsp->onofflock, flags);
+ mutex_unlock(&rsp->onoff_mutex);
}
static void __cpuinit rcu_prepare_cpu(int cpu)
diff --git a/kernel/rcutree.h b/kernel/rcutree.h
index 5faf05d68326..a240f032848e 100644
--- a/kernel/rcutree.h
+++ b/kernel/rcutree.h
@@ -394,11 +394,17 @@ struct rcu_state {
struct rcu_head **orphan_donetail; /* Tail of above. */
long qlen_lazy; /* Number of lazy callbacks. */
long qlen; /* Total number of callbacks. */
+ /* End of fields guarded by onofflock. */
+
+ struct mutex onoff_mutex; /* Coordinate hotplug & GPs. */
+
struct mutex barrier_mutex; /* Guards barrier fields. */
atomic_t barrier_cpu_count; /* # CPUs waiting on. */
struct completion barrier_completion; /* Wake at barrier end. */
unsigned long n_barrier_done; /* ++ at start and end of */
/* _rcu_barrier(). */
+ /* End of fields guarded by barrier_mutex. */
+
unsigned long jiffies_force_qs; /* Time at which to invoke */
/* force_quiescent_state(). */
unsigned long n_force_qs; /* Number of calls to */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index c17747236438..2d8927fda712 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -505,7 +505,7 @@ static inline void init_hrtick(void)
#ifdef CONFIG_SMP
#ifndef tsk_is_polling
-#define tsk_is_polling(t) test_tsk_thread_flag(t, TIF_POLLING_NRFLAG)
+#define tsk_is_polling(t) 0
#endif
void resched_task(struct task_struct *p)
@@ -6122,6 +6122,17 @@ static void sched_init_numa(void)
* numbers.
*/
+ /*
+ * Here, we should temporarily reset sched_domains_numa_levels to 0.
+ * If it fails to allocate memory for array sched_domains_numa_masks[][],
+ * the array will contain less then 'level' members. This could be
+ * dangerous when we use it to iterate array sched_domains_numa_masks[][]
+ * in other functions.
+ *
+ * We reset it to 'level' at the end of this function.
+ */
+ sched_domains_numa_levels = 0;
+
sched_domains_numa_masks = kzalloc(sizeof(void *) * level, GFP_KERNEL);
if (!sched_domains_numa_masks)
return;
@@ -6176,11 +6187,68 @@ static void sched_init_numa(void)
}
sched_domain_topology = tl;
+
+ sched_domains_numa_levels = level;
+}
+
+static void sched_domains_numa_masks_set(int cpu)
+{
+ int i, j;
+ int node = cpu_to_node(cpu);
+
+ for (i = 0; i < sched_domains_numa_levels; i++) {
+ for (j = 0; j < nr_node_ids; j++) {
+ if (node_distance(j, node) <= sched_domains_numa_distance[i])
+ cpumask_set_cpu(cpu, sched_domains_numa_masks[i][j]);
+ }
+ }
+}
+
+static void sched_domains_numa_masks_clear(int cpu)
+{
+ int i, j;
+ for (i = 0; i < sched_domains_numa_levels; i++) {
+ for (j = 0; j < nr_node_ids; j++)
+ cpumask_clear_cpu(cpu, sched_domains_numa_masks[i][j]);
+ }
+}
+
+/*
+ * Update sched_domains_numa_masks[level][node] array when new cpus
+ * are onlined.
+ */
+static int sched_domains_numa_masks_update(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ int cpu = (long)hcpu;
+
+ switch (action & ~CPU_TASKS_FROZEN) {
+ case CPU_ONLINE:
+ sched_domains_numa_masks_set(cpu);
+ break;
+
+ case CPU_DEAD:
+ sched_domains_numa_masks_clear(cpu);
+ break;
+
+ default:
+ return NOTIFY_DONE;
+ }
+
+ return NOTIFY_OK;
}
#else
static inline void sched_init_numa(void)
{
}
+
+static int sched_domains_numa_masks_update(struct notifier_block *nfb,
+ unsigned long action,
+ void *hcpu)
+{
+ return 0;
+}
#endif /* CONFIG_NUMA */
static int __sdt_alloc(const struct cpumask *cpu_map)
@@ -6629,6 +6697,7 @@ void __init sched_init_smp(void)
mutex_unlock(&sched_domains_mutex);
put_online_cpus();
+ hotcpu_notifier(sched_domains_numa_masks_update, CPU_PRI_SCHED_ACTIVE);
hotcpu_notifier(cpuset_cpu_active, CPU_PRI_CPUSET_ACTIVE);
hotcpu_notifier(cpuset_cpu_inactive, CPU_PRI_CPUSET_INACTIVE);
diff --git a/kernel/time.c b/kernel/time.c
index ba744cf80696..d226c6a3fd28 100644
--- a/kernel/time.c
+++ b/kernel/time.c
@@ -30,7 +30,7 @@
#include <linux/export.h>
#include <linux/timex.h>
#include <linux/capability.h>
-#include <linux/clocksource.h>
+#include <linux/timekeeper_internal.h>
#include <linux/errno.h>
#include <linux/syscalls.h>
#include <linux/security.h>
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index fd42bd452b75..8601f0db1261 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -16,6 +16,10 @@ config ARCH_CLOCKSOURCE_DATA
config GENERIC_TIME_VSYSCALL
bool
+# Timekeeping vsyscall support
+config GENERIC_TIME_VSYSCALL_OLD
+ bool
+
# ktime_t scalar 64bit nsec representation
config KTIME_SCALAR
bool
diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c
index aa27d391bfc8..f11d83b12949 100644
--- a/kernel/time/alarmtimer.c
+++ b/kernel/time/alarmtimer.c
@@ -37,7 +37,6 @@
static struct alarm_base {
spinlock_t lock;
struct timerqueue_head timerqueue;
- struct hrtimer timer;
ktime_t (*gettime)(void);
clockid_t base_clockid;
} alarm_bases[ALARM_NUMTYPE];
@@ -46,6 +45,8 @@ static struct alarm_base {
static ktime_t freezer_delta;
static DEFINE_SPINLOCK(freezer_delta_lock);
+static struct wakeup_source *ws;
+
#ifdef CONFIG_RTC_CLASS
/* rtc timer and device for setting alarm wakeups at suspend */
static struct rtc_timer rtctimer;
@@ -130,50 +131,35 @@ static inline void alarmtimer_rtc_timer_init(void) { }
* @base: pointer to the base where the timer is being run
* @alarm: pointer to alarm being enqueued.
*
- * Adds alarm to a alarm_base timerqueue and if necessary sets
- * an hrtimer to run.
+ * Adds alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
static void alarmtimer_enqueue(struct alarm_base *base, struct alarm *alarm)
{
+ if (alarm->state & ALARMTIMER_STATE_ENQUEUED)
+ timerqueue_del(&base->timerqueue, &alarm->node);
+
timerqueue_add(&base->timerqueue, &alarm->node);
alarm->state |= ALARMTIMER_STATE_ENQUEUED;
-
- if (&alarm->node == timerqueue_getnext(&base->timerqueue)) {
- hrtimer_try_to_cancel(&base->timer);
- hrtimer_start(&base->timer, alarm->node.expires,
- HRTIMER_MODE_ABS);
- }
}
/**
- * alarmtimer_remove - Removes an alarm timer from an alarm_base timerqueue
+ * alarmtimer_dequeue - Removes an alarm timer from an alarm_base timerqueue
* @base: pointer to the base where the timer is running
* @alarm: pointer to alarm being removed
*
- * Removes alarm to a alarm_base timerqueue and if necessary sets
- * a new timer to run.
+ * Removes alarm to a alarm_base timerqueue
*
* Must hold base->lock when calling.
*/
-static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
+static void alarmtimer_dequeue(struct alarm_base *base, struct alarm *alarm)
{
- struct timerqueue_node *next = timerqueue_getnext(&base->timerqueue);
-
if (!(alarm->state & ALARMTIMER_STATE_ENQUEUED))
return;
timerqueue_del(&base->timerqueue, &alarm->node);
alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
-
- if (next == &alarm->node) {
- hrtimer_try_to_cancel(&base->timer);
- next = timerqueue_getnext(&base->timerqueue);
- if (!next)
- return;
- hrtimer_start(&base->timer, next->expires, HRTIMER_MODE_ABS);
- }
}
@@ -188,42 +174,23 @@ static void alarmtimer_remove(struct alarm_base *base, struct alarm *alarm)
*/
static enum hrtimer_restart alarmtimer_fired(struct hrtimer *timer)
{
- struct alarm_base *base = container_of(timer, struct alarm_base, timer);
- struct timerqueue_node *next;
+ struct alarm *alarm = container_of(timer, struct alarm, timer);
+ struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
- ktime_t now;
int ret = HRTIMER_NORESTART;
int restart = ALARMTIMER_NORESTART;
spin_lock_irqsave(&base->lock, flags);
- now = base->gettime();
- while ((next = timerqueue_getnext(&base->timerqueue))) {
- struct alarm *alarm;
- ktime_t expired = next->expires;
-
- if (expired.tv64 > now.tv64)
- break;
-
- alarm = container_of(next, struct alarm, node);
-
- timerqueue_del(&base->timerqueue, &alarm->node);
- alarm->state &= ~ALARMTIMER_STATE_ENQUEUED;
-
- alarm->state |= ALARMTIMER_STATE_CALLBACK;
- spin_unlock_irqrestore(&base->lock, flags);
- if (alarm->function)
- restart = alarm->function(alarm, now);
- spin_lock_irqsave(&base->lock, flags);
- alarm->state &= ~ALARMTIMER_STATE_CALLBACK;
+ alarmtimer_dequeue(base, alarm);
+ spin_unlock_irqrestore(&base->lock, flags);
- if (restart != ALARMTIMER_NORESTART) {
- timerqueue_add(&base->timerqueue, &alarm->node);
- alarm->state |= ALARMTIMER_STATE_ENQUEUED;
- }
- }
+ if (alarm->function)
+ restart = alarm->function(alarm, base->gettime());
- if (next) {
- hrtimer_set_expires(&base->timer, next->expires);
+ spin_lock_irqsave(&base->lock, flags);
+ if (restart != ALARMTIMER_NORESTART) {
+ hrtimer_set_expires(&alarm->timer, alarm->node.expires);
+ alarmtimer_enqueue(base, alarm);
ret = HRTIMER_RESTART;
}
spin_unlock_irqrestore(&base->lock, flags);
@@ -250,6 +217,7 @@ static int alarmtimer_suspend(struct device *dev)
unsigned long flags;
struct rtc_device *rtc;
int i;
+ int ret;
spin_lock_irqsave(&freezer_delta_lock, flags);
min = freezer_delta;
@@ -279,8 +247,10 @@ static int alarmtimer_suspend(struct device *dev)
if (min.tv64 == 0)
return 0;
- /* XXX - Should we enforce a minimum sleep time? */
- WARN_ON(min.tv64 < NSEC_PER_SEC);
+ if (ktime_to_ns(min) < 2 * NSEC_PER_SEC) {
+ __pm_wakeup_event(ws, 2 * MSEC_PER_SEC);
+ return -EBUSY;
+ }
/* Setup an rtc timer to fire that far in the future */
rtc_timer_cancel(rtc, &rtctimer);
@@ -288,9 +258,11 @@ static int alarmtimer_suspend(struct device *dev)
now = rtc_tm_to_ktime(tm);
now = ktime_add(now, min);
- rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
-
- return 0;
+ /* Set alarm, if in the past reject suspend briefly to handle */
+ ret = rtc_timer_start(rtc, &rtctimer, now, ktime_set(0, 0));
+ if (ret < 0)
+ __pm_wakeup_event(ws, MSEC_PER_SEC);
+ return ret;
}
#else
static int alarmtimer_suspend(struct device *dev)
@@ -324,6 +296,9 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
enum alarmtimer_restart (*function)(struct alarm *, ktime_t))
{
timerqueue_init(&alarm->node);
+ hrtimer_init(&alarm->timer, alarm_bases[type].base_clockid,
+ HRTIMER_MODE_ABS);
+ alarm->timer.function = alarmtimer_fired;
alarm->function = function;
alarm->type = type;
alarm->state = ALARMTIMER_STATE_INACTIVE;
@@ -334,17 +309,19 @@ void alarm_init(struct alarm *alarm, enum alarmtimer_type type,
* @alarm: ptr to alarm to set
* @start: time to run the alarm
*/
-void alarm_start(struct alarm *alarm, ktime_t start)
+int alarm_start(struct alarm *alarm, ktime_t start)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
+ int ret;
spin_lock_irqsave(&base->lock, flags);
- if (alarmtimer_active(alarm))
- alarmtimer_remove(base, alarm);
alarm->node.expires = start;
alarmtimer_enqueue(base, alarm);
+ ret = hrtimer_start(&alarm->timer, alarm->node.expires,
+ HRTIMER_MODE_ABS);
spin_unlock_irqrestore(&base->lock, flags);
+ return ret;
}
/**
@@ -358,18 +335,12 @@ int alarm_try_to_cancel(struct alarm *alarm)
{
struct alarm_base *base = &alarm_bases[alarm->type];
unsigned long flags;
- int ret = -1;
- spin_lock_irqsave(&base->lock, flags);
-
- if (alarmtimer_callback_running(alarm))
- goto out;
+ int ret;
- if (alarmtimer_is_queued(alarm)) {
- alarmtimer_remove(base, alarm);
- ret = 1;
- } else
- ret = 0;
-out:
+ spin_lock_irqsave(&base->lock, flags);
+ ret = hrtimer_try_to_cancel(&alarm->timer);
+ if (ret >= 0)
+ alarmtimer_dequeue(base, alarm);
spin_unlock_irqrestore(&base->lock, flags);
return ret;
}
@@ -802,10 +773,6 @@ static int __init alarmtimer_init(void)
for (i = 0; i < ALARM_NUMTYPE; i++) {
timerqueue_init_head(&alarm_bases[i].timerqueue);
spin_lock_init(&alarm_bases[i].lock);
- hrtimer_init(&alarm_bases[i].timer,
- alarm_bases[i].base_clockid,
- HRTIMER_MODE_ABS);
- alarm_bases[i].timer.function = alarmtimer_fired;
}
error = alarmtimer_rtc_interface_setup();
@@ -821,6 +788,7 @@ static int __init alarmtimer_init(void)
error = PTR_ERR(pdev);
goto out_drv;
}
+ ws = wakeup_source_register("alarmtimer");
return 0;
out_drv:
diff --git a/kernel/time/jiffies.c b/kernel/time/jiffies.c
index 46da0537c10b..6629bf7b5285 100644
--- a/kernel/time/jiffies.c
+++ b/kernel/time/jiffies.c
@@ -37,7 +37,7 @@
* requested HZ value. It is also not recommended
* for "tick-less" systems.
*/
-#define NSEC_PER_JIFFY ((u32)((((u64)NSEC_PER_SEC)<<8)/SHIFTED_HZ))
+#define NSEC_PER_JIFFY ((NSEC_PER_SEC+HZ/2)/HZ)
/* Since jiffies uses a simple NSEC_PER_JIFFY multiplier
* conversion, the .shift value could be zero. However
@@ -95,3 +95,33 @@ struct clocksource * __init __weak clocksource_default_clock(void)
{
return &clocksource_jiffies;
}
+
+struct clocksource refined_jiffies;
+
+int register_refined_jiffies(long cycles_per_second)
+{
+ u64 nsec_per_tick, shift_hz;
+ long cycles_per_tick;
+
+
+
+ refined_jiffies = clocksource_jiffies;
+ refined_jiffies.name = "refined-jiffies";
+ refined_jiffies.rating++;
+
+ /* Calc cycles per tick */
+ cycles_per_tick = (cycles_per_second + HZ/2)/HZ;
+ /* shift_hz stores hz<<8 for extra accuracy */
+ shift_hz = (u64)cycles_per_second << 8;
+ shift_hz += cycles_per_tick/2;
+ do_div(shift_hz, cycles_per_tick);
+ /* Calculate nsec_per_tick using shift_hz */
+ nsec_per_tick = (u64)NSEC_PER_SEC << 8;
+ nsec_per_tick += (u32)shift_hz/2;
+ do_div(nsec_per_tick, (u32)shift_hz);
+
+ refined_jiffies.mult = ((u32)nsec_per_tick) << JIFFIES_SHIFT;
+
+ clocksource_register(&refined_jiffies);
+ return 0;
+}
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f423bdd035c2..a40260885265 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -835,7 +835,7 @@ static enum hrtimer_restart tick_sched_timer(struct hrtimer *timer)
*/
if (ts->tick_stopped) {
touch_softlockup_watchdog();
- if (idle_cpu(cpu))
+ if (is_idle_task(current))
ts->idle_jiffies++;
}
update_process_times(user_mode(regs));
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 5ce06a3fa91e..e424970bb562 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -8,6 +8,7 @@
*
*/
+#include <linux/timekeeper_internal.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/percpu.h>
@@ -21,61 +22,6 @@
#include <linux/tick.h>
#include <linux/stop_machine.h>
-/* Structure holding internal timekeeping values. */
-struct timekeeper {
- /* Current clocksource used for timekeeping. */
- struct clocksource *clock;
- /* NTP adjusted clock multiplier */
- u32 mult;
- /* The shift value of the current clocksource. */
- u32 shift;
- /* Number of clock cycles in one NTP interval. */
- cycle_t cycle_interval;
- /* Number of clock shifted nano seconds in one NTP interval. */
- u64 xtime_interval;
- /* shifted nano seconds left over when rounding cycle_interval */
- s64 xtime_remainder;
- /* Raw nano seconds accumulated per NTP interval. */
- u32 raw_interval;
-
- /* Current CLOCK_REALTIME time in seconds */
- u64 xtime_sec;
- /* Clock shifted nano seconds */
- u64 xtime_nsec;
-
- /* Difference between accumulated time and NTP time in ntp
- * shifted nano seconds. */
- s64 ntp_error;
- /* Shift conversion between clock shifted nano seconds and
- * ntp shifted nano seconds. */
- u32 ntp_error_shift;
-
- /*
- * wall_to_monotonic is what we need to add to xtime (or xtime corrected
- * for sub jiffie times) to get to monotonic time. Monotonic is pegged
- * at zero at system boot time, so wall_to_monotonic will be negative,
- * however, we will ALWAYS keep the tv_nsec part positive so we can use
- * the usual normalization.
- *
- * wall_to_monotonic is moved after resume from suspend for the
- * monotonic time not to jump. We need to add total_sleep_time to
- * wall_to_monotonic to get the real boot based time offset.
- *
- * - wall_to_monotonic is no longer the boot time, getboottime must be
- * used instead.
- */
- struct timespec wall_to_monotonic;
- /* Offset clock monotonic -> clock realtime */
- ktime_t offs_real;
- /* time spent in suspend */
- struct timespec total_sleep_time;
- /* Offset clock monotonic -> clock boottime */
- ktime_t offs_boot;
- /* The raw monotonic time for the CLOCK_MONOTONIC_RAW posix clock. */
- struct timespec raw_time;
- /* Seqlock for all timekeeper values */
- seqlock_t lock;
-};
static struct timekeeper timekeeper;
@@ -96,15 +42,6 @@ static inline void tk_normalize_xtime(struct timekeeper *tk)
}
}
-static struct timespec tk_xtime(struct timekeeper *tk)
-{
- struct timespec ts;
-
- ts.tv_sec = tk->xtime_sec;
- ts.tv_nsec = (long)(tk->xtime_nsec >> tk->shift);
- return ts;
-}
-
static void tk_set_xtime(struct timekeeper *tk, const struct timespec *ts)
{
tk->xtime_sec = ts->tv_sec;
@@ -246,14 +183,11 @@ static inline s64 timekeeping_get_ns_raw(struct timekeeper *tk)
/* must hold write on timekeeper.lock */
static void timekeeping_update(struct timekeeper *tk, bool clearntp)
{
- struct timespec xt;
-
if (clearntp) {
tk->ntp_error = 0;
ntp_clear();
}
- xt = tk_xtime(tk);
- update_vsyscall(&xt, &tk->wall_to_monotonic, tk->clock, tk->mult);
+ update_vsyscall(tk);
}
/**
@@ -1113,7 +1047,7 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
accumulate_nsecs_to_secs(tk);
/* Accumulate raw time */
- raw_nsecs = tk->raw_interval << shift;
+ raw_nsecs = (u64)tk->raw_interval << shift;
raw_nsecs += tk->raw_time.tv_nsec;
if (raw_nsecs >= NSEC_PER_SEC) {
u64 raw_secs = raw_nsecs;
@@ -1130,6 +1064,33 @@ static cycle_t logarithmic_accumulation(struct timekeeper *tk, cycle_t offset,
return offset;
}
+#ifdef CONFIG_GENERIC_TIME_VSYSCALL_OLD
+static inline void old_vsyscall_fixup(struct timekeeper *tk)
+{
+ s64 remainder;
+
+ /*
+ * Store only full nanoseconds into xtime_nsec after rounding
+ * it up and add the remainder to the error difference.
+ * XXX - This is necessary to avoid small 1ns inconsistnecies caused
+ * by truncating the remainder in vsyscalls. However, it causes
+ * additional work to be done in timekeeping_adjust(). Once
+ * the vsyscall implementations are converted to use xtime_nsec
+ * (shifted nanoseconds), and CONFIG_GENERIC_TIME_VSYSCALL_OLD
+ * users are removed, this can be killed.
+ */
+ remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
+ tk->xtime_nsec -= remainder;
+ tk->xtime_nsec += 1ULL << tk->shift;
+ tk->ntp_error += remainder << tk->ntp_error_shift;
+
+}
+#else
+#define old_vsyscall_fixup(tk)
+#endif
+
+
+
/**
* update_wall_time - Uses the current clocksource to increment the wall time
*
@@ -1141,7 +1102,6 @@ static void update_wall_time(void)
cycle_t offset;
int shift = 0, maxshift;
unsigned long flags;
- s64 remainder;
write_seqlock_irqsave(&tk->lock, flags);
@@ -1183,20 +1143,11 @@ static void update_wall_time(void)
/* correct the clock when NTP error is too big */
timekeeping_adjust(tk, offset);
-
/*
- * Store only full nanoseconds into xtime_nsec after rounding
- * it up and add the remainder to the error difference.
- * XXX - This is necessary to avoid small 1ns inconsistnecies caused
- * by truncating the remainder in vsyscalls. However, it causes
- * additional work to be done in timekeeping_adjust(). Once
- * the vsyscall implementations are converted to use xtime_nsec
- * (shifted nanoseconds), this can be killed.
- */
- remainder = tk->xtime_nsec & ((1ULL << tk->shift) - 1);
- tk->xtime_nsec -= remainder;
- tk->xtime_nsec += 1ULL << tk->shift;
- tk->ntp_error += remainder << tk->ntp_error_shift;
+ * XXX This can be killed once everyone converts
+ * to the new update_vsyscall.
+ */
+ old_vsyscall_fixup(tk);
/*
* Finally, make sure that after the rounding
diff --git a/kernel/timer.c b/kernel/timer.c
index d5de1b2292aa..367d00858482 100644
--- a/kernel/timer.c
+++ b/kernel/timer.c
@@ -63,6 +63,7 @@ EXPORT_SYMBOL(jiffies_64);
#define TVR_SIZE (1 << TVR_BITS)
#define TVN_MASK (TVN_SIZE - 1)
#define TVR_MASK (TVR_SIZE - 1)
+#define MAX_TVAL ((unsigned long)((1ULL << (TVR_BITS + 4*TVN_BITS)) - 1))
struct tvec {
struct list_head vec[TVN_SIZE];
@@ -359,11 +360,12 @@ __internal_add_timer(struct tvec_base *base, struct timer_list *timer)
vec = base->tv1.vec + (base->timer_jiffies & TVR_MASK);
} else {
int i;
- /* If the timeout is larger than 0xffffffff on 64-bit
- * architectures then we use the maximum timeout:
+ /* If the timeout is larger than MAX_TVAL (on 64-bit
+ * architectures or with CONFIG_BASE_SMALL=1) then we
+ * use the maximum timeout.
*/
- if (idx > 0xffffffffUL) {
- idx = 0xffffffffUL;
+ if (idx > MAX_TVAL) {
+ idx = MAX_TVAL;
expires = idx + base->timer_jiffies;
}
i = (expires >> (TVR_BITS + 3 * TVN_BITS)) & TVN_MASK;