summaryrefslogtreecommitdiff
path: root/fs/proc
diff options
context:
space:
mode:
Diffstat (limited to 'fs/proc')
-rw-r--r--fs/proc/base.c458
-rw-r--r--fs/proc/fd.c110
-rw-r--r--fs/proc/generic.c98
-rw-r--r--fs/proc/internal.h10
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/proc/namespaces.c87
-rw-r--r--fs/proc/proc_net.c9
-rw-r--r--fs/proc/proc_sysctl.c78
-rw-r--r--fs/proc/root.c19
-rw-r--r--fs/proc/task_mmu.c168
-rw-r--r--fs/proc/uptime.c3
-rw-r--r--fs/proc/vmcore.c694
12 files changed, 945 insertions, 791 deletions
diff --git a/fs/proc/base.c b/fs/proc/base.c
index c3834dad09b3..1485e38daaa3 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -1681,46 +1681,34 @@ const struct dentry_operations pid_dentry_operations =
* reported by readdir in sync with the inode numbers reported
* by stat.
*/
-int proc_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
+bool proc_fill_cache(struct file *file, struct dir_context *ctx,
const char *name, int len,
instantiate_t instantiate, struct task_struct *task, const void *ptr)
{
- struct dentry *child, *dir = filp->f_path.dentry;
+ struct dentry *child, *dir = file->f_path.dentry;
+ struct qstr qname = QSTR_INIT(name, len);
struct inode *inode;
- struct qstr qname;
- ino_t ino = 0;
- unsigned type = DT_UNKNOWN;
-
- qname.name = name;
- qname.len = len;
- qname.hash = full_name_hash(name, len);
+ unsigned type;
+ ino_t ino;
- child = d_lookup(dir, &qname);
+ child = d_hash_and_lookup(dir, &qname);
if (!child) {
- struct dentry *new;
- new = d_alloc(dir, &qname);
- if (new) {
- child = instantiate(dir->d_inode, new, task, ptr);
- if (child)
- dput(new);
- else
- child = new;
+ child = d_alloc(dir, &qname);
+ if (!child)
+ goto end_instantiate;
+ if (instantiate(dir->d_inode, child, task, ptr) < 0) {
+ dput(child);
+ goto end_instantiate;
}
}
- if (!child || IS_ERR(child) || !child->d_inode)
- goto end_instantiate;
inode = child->d_inode;
- if (inode) {
- ino = inode->i_ino;
- type = inode->i_mode >> 12;
- }
+ ino = inode->i_ino;
+ type = inode->i_mode >> 12;
dput(child);
+ return dir_emit(ctx, name, len, ino, type);
+
end_instantiate:
- if (!ino)
- ino = find_inode_number(dir, &qname);
- if (!ino)
- ino = 1;
- return filldir(dirent, name, len, filp->f_pos, ino, type);
+ return dir_emit(ctx, name, len, 1, DT_UNKNOWN);
}
#ifdef CONFIG_CHECKPOINT_RESTORE
@@ -1846,7 +1834,7 @@ struct map_files_info {
unsigned char name[4*sizeof(long)+2]; /* max: %lx-%lx\0 */
};
-static struct dentry *
+static int
proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
@@ -1856,7 +1844,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
- return ERR_PTR(-ENOENT);
+ return -ENOENT;
ei = PROC_I(inode);
ei->op.proc_get_link = proc_map_files_get_link;
@@ -1873,7 +1861,7 @@ proc_map_files_instantiate(struct inode *dir, struct dentry *dentry,
d_set_d_op(dentry, &tid_map_files_dentry_operations);
d_add(dentry, inode);
- return NULL;
+ return 0;
}
static struct dentry *proc_map_files_lookup(struct inode *dir,
@@ -1882,23 +1870,23 @@ static struct dentry *proc_map_files_lookup(struct inode *dir,
unsigned long vm_start, vm_end;
struct vm_area_struct *vma;
struct task_struct *task;
- struct dentry *result;
+ int result;
struct mm_struct *mm;
- result = ERR_PTR(-EPERM);
+ result = -EPERM;
if (!capable(CAP_SYS_ADMIN))
goto out;
- result = ERR_PTR(-ENOENT);
+ result = -ENOENT;
task = get_proc_task(dir);
if (!task)
goto out;
- result = ERR_PTR(-EACCES);
+ result = -EACCES;
if (!ptrace_may_access(task, PTRACE_MODE_READ))
goto out_put_task;
- result = ERR_PTR(-ENOENT);
+ result = -ENOENT;
if (dname_to_vma_addr(dentry, &vm_start, &vm_end))
goto out_put_task;
@@ -1921,7 +1909,7 @@ out_no_vma:
out_put_task:
put_task_struct(task);
out:
- return result;
+ return ERR_PTR(result);
}
static const struct inode_operations proc_map_files_inode_operations = {
@@ -1931,14 +1919,15 @@ static const struct inode_operations proc_map_files_inode_operations = {
};
static int
-proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
+proc_map_files_readdir(struct file *file, struct dir_context *ctx)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
struct vm_area_struct *vma;
struct task_struct *task;
struct mm_struct *mm;
- ino_t ino;
+ unsigned long nr_files, pos, i;
+ struct flex_array *fa = NULL;
+ struct map_files_info info;
+ struct map_files_info *p;
int ret;
ret = -EPERM;
@@ -1946,7 +1935,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
goto out;
ret = -ENOENT;
- task = get_proc_task(inode);
+ task = get_proc_task(file_inode(file));
if (!task)
goto out;
@@ -1955,91 +1944,73 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir)
goto out_put_task;
ret = 0;
- switch (filp->f_pos) {
- case 0:
- ino = inode->i_ino;
- if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0)
- goto out_put_task;
- filp->f_pos++;
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out_put_task;
- filp->f_pos++;
- default:
- {
- unsigned long nr_files, pos, i;
- struct flex_array *fa = NULL;
- struct map_files_info info;
- struct map_files_info *p;
-
- mm = get_task_mm(task);
- if (!mm)
- goto out_put_task;
- down_read(&mm->mmap_sem);
+ if (!dir_emit_dots(file, ctx))
+ goto out_put_task;
- nr_files = 0;
+ mm = get_task_mm(task);
+ if (!mm)
+ goto out_put_task;
+ down_read(&mm->mmap_sem);
- /*
- * We need two passes here:
- *
- * 1) Collect vmas of mapped files with mmap_sem taken
- * 2) Release mmap_sem and instantiate entries
- *
- * otherwise we get lockdep complained, since filldir()
- * routine might require mmap_sem taken in might_fault().
- */
+ nr_files = 0;
- for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
- if (vma->vm_file && ++pos > filp->f_pos)
- nr_files++;
- }
+ /*
+ * We need two passes here:
+ *
+ * 1) Collect vmas of mapped files with mmap_sem taken
+ * 2) Release mmap_sem and instantiate entries
+ *
+ * otherwise we get lockdep complained, since filldir()
+ * routine might require mmap_sem taken in might_fault().
+ */
- if (nr_files) {
- fa = flex_array_alloc(sizeof(info), nr_files,
- GFP_KERNEL);
- if (!fa || flex_array_prealloc(fa, 0, nr_files,
- GFP_KERNEL)) {
- ret = -ENOMEM;
- if (fa)
- flex_array_free(fa);
- up_read(&mm->mmap_sem);
- mmput(mm);
- goto out_put_task;
- }
- for (i = 0, vma = mm->mmap, pos = 2; vma;
- vma = vma->vm_next) {
- if (!vma->vm_file)
- continue;
- if (++pos <= filp->f_pos)
- continue;
+ for (vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) {
+ if (vma->vm_file && ++pos > ctx->pos)
+ nr_files++;
+ }
- info.mode = vma->vm_file->f_mode;
- info.len = snprintf(info.name,
- sizeof(info.name), "%lx-%lx",
- vma->vm_start, vma->vm_end);
- if (flex_array_put(fa, i++, &info, GFP_KERNEL))
- BUG();
- }
+ if (nr_files) {
+ fa = flex_array_alloc(sizeof(info), nr_files,
+ GFP_KERNEL);
+ if (!fa || flex_array_prealloc(fa, 0, nr_files,
+ GFP_KERNEL)) {
+ ret = -ENOMEM;
+ if (fa)
+ flex_array_free(fa);
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ goto out_put_task;
}
- up_read(&mm->mmap_sem);
+ for (i = 0, vma = mm->mmap, pos = 2; vma;
+ vma = vma->vm_next) {
+ if (!vma->vm_file)
+ continue;
+ if (++pos <= ctx->pos)
+ continue;
- for (i = 0; i < nr_files; i++) {
- p = flex_array_get(fa, i);
- ret = proc_fill_cache(filp, dirent, filldir,
- p->name, p->len,
- proc_map_files_instantiate,
- task,
- (void *)(unsigned long)p->mode);
- if (ret)
- break;
- filp->f_pos++;
+ info.mode = vma->vm_file->f_mode;
+ info.len = snprintf(info.name,
+ sizeof(info.name), "%lx-%lx",
+ vma->vm_start, vma->vm_end);
+ if (flex_array_put(fa, i++, &info, GFP_KERNEL))
+ BUG();
}
- if (fa)
- flex_array_free(fa);
- mmput(mm);
}
+ up_read(&mm->mmap_sem);
+
+ for (i = 0; i < nr_files; i++) {
+ p = flex_array_get(fa, i);
+ if (!proc_fill_cache(file, ctx,
+ p->name, p->len,
+ proc_map_files_instantiate,
+ task,
+ (void *)(unsigned long)p->mode))
+ break;
+ ctx->pos++;
}
+ if (fa)
+ flex_array_free(fa);
+ mmput(mm);
out_put_task:
put_task_struct(task);
@@ -2049,7 +2020,7 @@ out:
static const struct file_operations proc_map_files_operations = {
.read = generic_read_dir,
- .readdir = proc_map_files_readdir,
+ .iterate = proc_map_files_readdir,
.llseek = default_llseek,
};
@@ -2152,13 +2123,12 @@ static const struct file_operations proc_timers_operations = {
};
#endif /* CONFIG_CHECKPOINT_RESTORE */
-static struct dentry *proc_pident_instantiate(struct inode *dir,
+static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
const struct pid_entry *p = ptr;
struct inode *inode;
struct proc_inode *ei;
- struct dentry *error = ERR_PTR(-ENOENT);
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
@@ -2177,9 +2147,9 @@ static struct dentry *proc_pident_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *proc_pident_lookup(struct inode *dir,
@@ -2187,11 +2157,11 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
const struct pid_entry *ents,
unsigned int nents)
{
- struct dentry *error;
+ int error;
struct task_struct *task = get_proc_task(dir);
const struct pid_entry *p, *last;
- error = ERR_PTR(-ENOENT);
+ error = -ENOENT;
if (!task)
goto out_no_task;
@@ -2214,70 +2184,33 @@ static struct dentry *proc_pident_lookup(struct inode *dir,
out:
put_task_struct(task);
out_no_task:
- return error;
-}
-
-static int proc_pident_fill_cache(struct file *filp, void *dirent,
- filldir_t filldir, struct task_struct *task, const struct pid_entry *p)
-{
- return proc_fill_cache(filp, dirent, filldir, p->name, p->len,
- proc_pident_instantiate, task, p);
+ return ERR_PTR(error);
}
-static int proc_pident_readdir(struct file *filp,
- void *dirent, filldir_t filldir,
+static int proc_pident_readdir(struct file *file, struct dir_context *ctx,
const struct pid_entry *ents, unsigned int nents)
{
- int i;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
- const struct pid_entry *p, *last;
- ino_t ino;
- int ret;
+ struct task_struct *task = get_proc_task(file_inode(file));
+ const struct pid_entry *p;
- ret = -ENOENT;
if (!task)
- goto out_no_task;
+ return -ENOENT;
- ret = 0;
- i = filp->f_pos;
- switch (i) {
- case 0:
- ino = inode->i_ino;
- if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- default:
- i -= 2;
- if (i >= nents) {
- ret = 1;
- goto out;
- }
- p = ents + i;
- last = &ents[nents - 1];
- while (p <= last) {
- if (proc_pident_fill_cache(filp, dirent, filldir, task, p) < 0)
- goto out;
- filp->f_pos++;
- p++;
- }
- }
+ if (!dir_emit_dots(file, ctx))
+ goto out;
+
+ if (ctx->pos >= nents + 2)
+ goto out;
- ret = 1;
+ for (p = ents + (ctx->pos - 2); p <= ents + nents - 1; p++) {
+ if (!proc_fill_cache(file, ctx, p->name, p->len,
+ proc_pident_instantiate, task, p))
+ break;
+ ctx->pos++;
+ }
out:
put_task_struct(task);
-out_no_task:
- return ret;
+ return 0;
}
#ifdef CONFIG_SECURITY
@@ -2362,16 +2295,15 @@ static const struct pid_entry attr_dir_stuff[] = {
REG("sockcreate", S_IRUGO|S_IWUGO, proc_pid_attr_operations),
};
-static int proc_attr_dir_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int proc_attr_dir_readdir(struct file *file, struct dir_context *ctx)
{
- return proc_pident_readdir(filp,dirent,filldir,
- attr_dir_stuff,ARRAY_SIZE(attr_dir_stuff));
+ return proc_pident_readdir(file, ctx,
+ attr_dir_stuff, ARRAY_SIZE(attr_dir_stuff));
}
static const struct file_operations proc_attr_dir_operations = {
.read = generic_read_dir,
- .readdir = proc_attr_dir_readdir,
+ .iterate = proc_attr_dir_readdir,
.llseek = default_llseek,
};
@@ -2725,16 +2657,15 @@ static const struct pid_entry tgid_base_stuff[] = {
#endif
};
-static int proc_tgid_base_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
{
- return proc_pident_readdir(filp,dirent,filldir,
- tgid_base_stuff,ARRAY_SIZE(tgid_base_stuff));
+ return proc_pident_readdir(file, ctx,
+ tgid_base_stuff, ARRAY_SIZE(tgid_base_stuff));
}
static const struct file_operations proc_tgid_base_operations = {
.read = generic_read_dir,
- .readdir = proc_tgid_base_readdir,
+ .iterate = proc_tgid_base_readdir,
.llseek = default_llseek,
};
@@ -2836,11 +2767,10 @@ void proc_flush_task(struct task_struct *task)
}
}
-static struct dentry *proc_pid_instantiate(struct inode *dir,
- struct dentry * dentry,
- struct task_struct *task, const void *ptr)
+static int proc_pid_instantiate(struct inode *dir,
+ struct dentry * dentry,
+ struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
struct inode *inode;
inode = proc_pid_make_inode(dir->i_sb, task);
@@ -2860,14 +2790,14 @@ static struct dentry *proc_pid_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- struct dentry *result = NULL;
+ int result = 0;
struct task_struct *task;
unsigned tgid;
struct pid_namespace *ns;
@@ -2888,7 +2818,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsign
result = proc_pid_instantiate(dir, dentry, task, NULL);
put_task_struct(task);
out:
- return result;
+ return ERR_PTR(result);
}
/*
@@ -2936,58 +2866,42 @@ retry:
#define TGID_OFFSET (FIRST_PROCESS_ENTRY + 1)
-static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- struct tgid_iter iter)
-{
- char name[PROC_NUMBUF];
- int len = snprintf(name, sizeof(name), "%d", iter.tgid);
- return proc_fill_cache(filp, dirent, filldir, name, len,
- proc_pid_instantiate, iter.task, NULL);
-}
-
-static int fake_filldir(void *buf, const char *name, int namelen,
- loff_t offset, u64 ino, unsigned d_type)
-{
- return 0;
-}
-
/* for the /proc/ directory itself, after non-process stuff has been done */
-int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
+int proc_pid_readdir(struct file *file, struct dir_context *ctx)
{
struct tgid_iter iter;
- struct pid_namespace *ns;
- filldir_t __filldir;
- loff_t pos = filp->f_pos;
+ struct pid_namespace *ns = file->f_dentry->d_sb->s_fs_info;
+ loff_t pos = ctx->pos;
if (pos >= PID_MAX_LIMIT + TGID_OFFSET)
- goto out;
+ return 0;
if (pos == TGID_OFFSET - 1) {
- if (proc_fill_cache(filp, dirent, filldir, "self", 4,
- NULL, NULL, NULL) < 0)
- goto out;
+ struct inode *inode = ns->proc_self->d_inode;
+ if (!dir_emit(ctx, "self", 4, inode->i_ino, DT_LNK))
+ return 0;
iter.tgid = 0;
} else {
iter.tgid = pos - TGID_OFFSET;
}
iter.task = NULL;
- ns = filp->f_dentry->d_sb->s_fs_info;
for (iter = next_tgid(ns, iter);
iter.task;
iter.tgid += 1, iter = next_tgid(ns, iter)) {
- if (has_pid_permissions(ns, iter.task, 2))
- __filldir = filldir;
- else
- __filldir = fake_filldir;
+ char name[PROC_NUMBUF];
+ int len;
+ if (!has_pid_permissions(ns, iter.task, 2))
+ continue;
- filp->f_pos = iter.tgid + TGID_OFFSET;
- if (proc_pid_fill_cache(filp, dirent, __filldir, iter) < 0) {
+ len = snprintf(name, sizeof(name), "%d", iter.tgid);
+ ctx->pos = iter.tgid + TGID_OFFSET;
+ if (!proc_fill_cache(file, ctx, name, len,
+ proc_pid_instantiate, iter.task, NULL)) {
put_task_struct(iter.task);
- goto out;
+ return 0;
}
}
- filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET;
-out:
+ ctx->pos = PID_MAX_LIMIT + TGID_OFFSET;
return 0;
}
@@ -3075,11 +2989,10 @@ static const struct pid_entry tid_base_stuff[] = {
#endif
};
-static int proc_tid_base_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int proc_tid_base_readdir(struct file *file, struct dir_context *ctx)
{
- return proc_pident_readdir(filp,dirent,filldir,
- tid_base_stuff,ARRAY_SIZE(tid_base_stuff));
+ return proc_pident_readdir(file, ctx,
+ tid_base_stuff, ARRAY_SIZE(tid_base_stuff));
}
static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
@@ -3090,7 +3003,7 @@ static struct dentry *proc_tid_base_lookup(struct inode *dir, struct dentry *den
static const struct file_operations proc_tid_base_operations = {
.read = generic_read_dir,
- .readdir = proc_tid_base_readdir,
+ .iterate = proc_tid_base_readdir,
.llseek = default_llseek,
};
@@ -3100,10 +3013,9 @@ static const struct inode_operations proc_tid_base_inode_operations = {
.setattr = proc_setattr,
};
-static struct dentry *proc_task_instantiate(struct inode *dir,
+static int proc_task_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
struct inode *inode;
inode = proc_pid_make_inode(dir->i_sb, task);
@@ -3122,14 +3034,14 @@ static struct dentry *proc_task_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
{
- struct dentry *result = ERR_PTR(-ENOENT);
+ int result = -ENOENT;
struct task_struct *task;
struct task_struct *leader = get_proc_task(dir);
unsigned tid;
@@ -3159,7 +3071,7 @@ out_drop_task:
out:
put_task_struct(leader);
out_no_task:
- return result;
+ return ERR_PTR(result);
}
/*
@@ -3231,30 +3143,16 @@ static struct task_struct *next_tid(struct task_struct *start)
return pos;
}
-static int proc_task_fill_cache(struct file *filp, void *dirent, filldir_t filldir,
- struct task_struct *task, int tid)
-{
- char name[PROC_NUMBUF];
- int len = snprintf(name, sizeof(name), "%d", tid);
- return proc_fill_cache(filp, dirent, filldir, name, len,
- proc_task_instantiate, task, NULL);
-}
-
/* for the /proc/TGID/task/ directories */
-static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldir)
+static int proc_task_readdir(struct file *file, struct dir_context *ctx)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
struct task_struct *leader = NULL;
- struct task_struct *task;
- int retval = -ENOENT;
- ino_t ino;
- int tid;
+ struct task_struct *task = get_proc_task(file_inode(file));
struct pid_namespace *ns;
+ int tid;
- task = get_proc_task(inode);
if (!task)
- goto out_no_task;
+ return -ENOENT;
rcu_read_lock();
if (pid_alive(task)) {
leader = task->group_leader;
@@ -3263,46 +3161,36 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi
rcu_read_unlock();
put_task_struct(task);
if (!leader)
- goto out_no_task;
- retval = 0;
+ return -ENOENT;
- switch ((unsigned long)filp->f_pos) {
- case 0:
- ino = inode->i_ino;
- if (filldir(dirent, ".", 1, filp->f_pos, ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- /* fall through */
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, filp->f_pos, ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- /* fall through */
- }
+ if (!dir_emit_dots(file, ctx))
+ goto out;
/* f_version caches the tgid value that the last readdir call couldn't
* return. lseek aka telldir automagically resets f_version to 0.
*/
- ns = filp->f_dentry->d_sb->s_fs_info;
- tid = (int)filp->f_version;
- filp->f_version = 0;
- for (task = first_tid(leader, tid, filp->f_pos - 2, ns);
+ ns = file->f_dentry->d_sb->s_fs_info;
+ tid = (int)file->f_version;
+ file->f_version = 0;
+ for (task = first_tid(leader, tid, ctx->pos - 2, ns);
task;
- task = next_tid(task), filp->f_pos++) {
+ task = next_tid(task), ctx->pos++) {
+ char name[PROC_NUMBUF];
+ int len;
tid = task_pid_nr_ns(task, ns);
- if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) {
+ len = snprintf(name, sizeof(name), "%d", tid);
+ if (!proc_fill_cache(file, ctx, name, len,
+ proc_task_instantiate, task, NULL)) {
/* returning this tgid failed, save it as the first
* pid for the next readir call */
- filp->f_version = (u64)tid;
+ file->f_version = (u64)tid;
put_task_struct(task);
break;
}
}
out:
put_task_struct(leader);
-out_no_task:
- return retval;
+ return 0;
}
static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat)
@@ -3328,6 +3216,6 @@ static const struct inode_operations proc_task_inode_operations = {
static const struct file_operations proc_task_operations = {
.read = generic_read_dir,
- .readdir = proc_task_readdir,
+ .iterate = proc_task_readdir,
.llseek = default_llseek,
};
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index d7a4a28ef630..75f2890abbd8 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -167,11 +167,10 @@ static int proc_fd_link(struct dentry *dentry, struct path *path)
return ret;
}
-static struct dentry *
+static int
proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
unsigned fd = (unsigned long)ptr;
struct proc_inode *ei;
struct inode *inode;
@@ -194,9 +193,9 @@ proc_fd_instantiate(struct inode *dir, struct dentry *dentry,
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *proc_lookupfd_common(struct inode *dir,
@@ -204,7 +203,7 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
instantiate_t instantiate)
{
struct task_struct *task = get_proc_task(dir);
- struct dentry *result = ERR_PTR(-ENOENT);
+ int result = -ENOENT;
unsigned fd = name_to_int(dentry);
if (!task)
@@ -216,77 +215,61 @@ static struct dentry *proc_lookupfd_common(struct inode *dir,
out:
put_task_struct(task);
out_no_task:
- return result;
+ return ERR_PTR(result);
}
-static int proc_readfd_common(struct file * filp, void * dirent,
- filldir_t filldir, instantiate_t instantiate)
+static int proc_readfd_common(struct file *file, struct dir_context *ctx,
+ instantiate_t instantiate)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- struct task_struct *p = get_proc_task(inode);
+ struct task_struct *p = get_proc_task(file_inode(file));
struct files_struct *files;
- unsigned int fd, ino;
- int retval;
+ unsigned int fd;
- retval = -ENOENT;
if (!p)
- goto out_no_task;
- retval = 0;
+ return -ENOENT;
- fd = filp->f_pos;
- switch (fd) {
- case 0:
- if (filldir(dirent, ".", 1, 0, inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- default:
- files = get_files_struct(p);
- if (!files)
- goto out;
- rcu_read_lock();
- for (fd = filp->f_pos - 2;
- fd < files_fdtable(files)->max_fds;
- fd++, filp->f_pos++) {
- char name[PROC_NUMBUF];
- int len;
- int rv;
+ if (!dir_emit_dots(file, ctx))
+ goto out;
+ if (!dir_emit_dots(file, ctx))
+ goto out;
+ files = get_files_struct(p);
+ if (!files)
+ goto out;
- if (!fcheck_files(files, fd))
- continue;
- rcu_read_unlock();
+ rcu_read_lock();
+ for (fd = ctx->pos - 2;
+ fd < files_fdtable(files)->max_fds;
+ fd++, ctx->pos++) {
+ char name[PROC_NUMBUF];
+ int len;
- len = snprintf(name, sizeof(name), "%d", fd);
- rv = proc_fill_cache(filp, dirent, filldir,
- name, len, instantiate, p,
- (void *)(unsigned long)fd);
- if (rv < 0)
- goto out_fd_loop;
- rcu_read_lock();
- }
- rcu_read_unlock();
-out_fd_loop:
- put_files_struct(files);
+ if (!fcheck_files(files, fd))
+ continue;
+ rcu_read_unlock();
+
+ len = snprintf(name, sizeof(name), "%d", fd);
+ if (!proc_fill_cache(file, ctx,
+ name, len, instantiate, p,
+ (void *)(unsigned long)fd))
+ goto out_fd_loop;
+ rcu_read_lock();
}
+ rcu_read_unlock();
+out_fd_loop:
+ put_files_struct(files);
out:
put_task_struct(p);
-out_no_task:
- return retval;
+ return 0;
}
-static int proc_readfd(struct file *filp, void *dirent, filldir_t filldir)
+static int proc_readfd(struct file *file, struct dir_context *ctx)
{
- return proc_readfd_common(filp, dirent, filldir, proc_fd_instantiate);
+ return proc_readfd_common(file, ctx, proc_fd_instantiate);
}
const struct file_operations proc_fd_operations = {
.read = generic_read_dir,
- .readdir = proc_readfd,
+ .iterate = proc_readfd,
.llseek = default_llseek,
};
@@ -316,11 +299,10 @@ const struct inode_operations proc_fd_inode_operations = {
.setattr = proc_setattr,
};
-static struct dentry *
+static int
proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
struct task_struct *task, const void *ptr)
{
- struct dentry *error = ERR_PTR(-ENOENT);
unsigned fd = (unsigned long)ptr;
struct proc_inode *ei;
struct inode *inode;
@@ -340,9 +322,9 @@ proc_fdinfo_instantiate(struct inode *dir, struct dentry *dentry,
/* Close the race of the process dying before we return the dentry */
if (tid_fd_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
+ return -ENOENT;
}
static struct dentry *
@@ -351,9 +333,9 @@ proc_lookupfdinfo(struct inode *dir, struct dentry *dentry, unsigned int flags)
return proc_lookupfd_common(dir, dentry, proc_fdinfo_instantiate);
}
-static int proc_readfdinfo(struct file *filp, void *dirent, filldir_t filldir)
+static int proc_readfdinfo(struct file *file, struct dir_context *ctx)
{
- return proc_readfd_common(filp, dirent, filldir,
+ return proc_readfd_common(file, ctx,
proc_fdinfo_instantiate);
}
@@ -364,6 +346,6 @@ const struct inode_operations proc_fdinfo_inode_operations = {
const struct file_operations proc_fdinfo_operations = {
.read = generic_read_dir,
- .readdir = proc_readfdinfo,
+ .iterate = proc_readfdinfo,
.llseek = default_llseek,
};
diff --git a/fs/proc/generic.c b/fs/proc/generic.c
index a2596afffae6..94441a407337 100644
--- a/fs/proc/generic.c
+++ b/fs/proc/generic.c
@@ -233,76 +233,52 @@ struct dentry *proc_lookup(struct inode *dir, struct dentry *dentry,
* value of the readdir() call, as long as it's non-negative
* for success..
*/
-int proc_readdir_de(struct proc_dir_entry *de, struct file *filp, void *dirent,
- filldir_t filldir)
+int proc_readdir_de(struct proc_dir_entry *de, struct file *file,
+ struct dir_context *ctx)
{
- unsigned int ino;
int i;
- struct inode *inode = file_inode(filp);
- int ret = 0;
- ino = inode->i_ino;
- i = filp->f_pos;
- switch (i) {
- case 0:
- if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- case 1:
- if (filldir(dirent, "..", 2, i,
- parent_ino(filp->f_path.dentry),
- DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- default:
- spin_lock(&proc_subdir_lock);
- de = de->subdir;
- i -= 2;
- for (;;) {
- if (!de) {
- ret = 1;
- spin_unlock(&proc_subdir_lock);
- goto out;
- }
- if (!i)
- break;
- de = de->next;
- i--;
- }
-
- do {
- struct proc_dir_entry *next;
+ if (!dir_emit_dots(file, ctx))
+ return 0;
- /* filldir passes info to user space */
- pde_get(de);
- spin_unlock(&proc_subdir_lock);
- if (filldir(dirent, de->name, de->namelen, filp->f_pos,
- de->low_ino, de->mode >> 12) < 0) {
- pde_put(de);
- goto out;
- }
- spin_lock(&proc_subdir_lock);
- filp->f_pos++;
- next = de->next;
- pde_put(de);
- de = next;
- } while (de);
+ spin_lock(&proc_subdir_lock);
+ de = de->subdir;
+ i = ctx->pos - 2;
+ for (;;) {
+ if (!de) {
spin_unlock(&proc_subdir_lock);
+ return 0;
+ }
+ if (!i)
+ break;
+ de = de->next;
+ i--;
}
- ret = 1;
-out:
- return ret;
+
+ do {
+ struct proc_dir_entry *next;
+ pde_get(de);
+ spin_unlock(&proc_subdir_lock);
+ if (!dir_emit(ctx, de->name, de->namelen,
+ de->low_ino, de->mode >> 12)) {
+ pde_put(de);
+ return 0;
+ }
+ spin_lock(&proc_subdir_lock);
+ ctx->pos++;
+ next = de->next;
+ pde_put(de);
+ de = next;
+ } while (de);
+ spin_unlock(&proc_subdir_lock);
+ return 0;
}
-int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
+int proc_readdir(struct file *file, struct dir_context *ctx)
{
- struct inode *inode = file_inode(filp);
+ struct inode *inode = file_inode(file);
- return proc_readdir_de(PDE(inode), filp, dirent, filldir);
+ return proc_readdir_de(PDE(inode), file, ctx);
}
/*
@@ -313,7 +289,7 @@ int proc_readdir(struct file *filp, void *dirent, filldir_t filldir)
static const struct file_operations proc_dir_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .readdir = proc_readdir,
+ .iterate = proc_readdir,
};
/*
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index d600fb098b6a..651d09a11dde 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -165,14 +165,14 @@ extern int proc_setattr(struct dentry *, struct iattr *);
extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *);
extern int pid_revalidate(struct dentry *, unsigned int);
extern int pid_delete_dentry(const struct dentry *);
-extern int proc_pid_readdir(struct file *, void *, filldir_t);
+extern int proc_pid_readdir(struct file *, struct dir_context *);
extern struct dentry *proc_pid_lookup(struct inode *, struct dentry *, unsigned int);
extern loff_t mem_lseek(struct file *, loff_t, int);
/* Lookups */
-typedef struct dentry *instantiate_t(struct inode *, struct dentry *,
+typedef int instantiate_t(struct inode *, struct dentry *,
struct task_struct *, const void *);
-extern int proc_fill_cache(struct file *, void *, filldir_t, const char *, int,
+extern bool proc_fill_cache(struct file *, struct dir_context *, const char *, int,
instantiate_t, struct task_struct *, const void *);
/*
@@ -183,8 +183,8 @@ extern spinlock_t proc_subdir_lock;
extern struct dentry *proc_lookup(struct inode *, struct dentry *, unsigned int);
extern struct dentry *proc_lookup_de(struct proc_dir_entry *, struct inode *,
struct dentry *);
-extern int proc_readdir(struct file *, void *, filldir_t);
-extern int proc_readdir_de(struct proc_dir_entry *, struct file *, void *, filldir_t);
+extern int proc_readdir(struct file *, struct dir_context *);
+extern int proc_readdir_de(struct proc_dir_entry *, struct file *, struct dir_context *);
static inline struct proc_dir_entry *pde_get(struct proc_dir_entry *pde)
{
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 0a22194e5d58..06ea155e1a59 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -408,7 +408,7 @@ static void elf_kcore_store_hdr(char *bufp, int nphdr, int dataoff)
prpsinfo.pr_zomb = 0;
strcpy(prpsinfo.pr_fname, "vmlinux");
- strncpy(prpsinfo.pr_psargs, saved_command_line, ELF_PRARGSZ);
+ strlcpy(prpsinfo.pr_psargs, saved_command_line, sizeof(prpsinfo.pr_psargs));
nhdr->p_filesz += notesize(&notes[1]);
bufp = storenote(&notes[1], bufp);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 54bdc6701e9f..49a7fff2e83a 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -187,13 +187,12 @@ static const struct inode_operations proc_ns_link_inode_operations = {
.setattr = proc_setattr,
};
-static struct dentry *proc_ns_instantiate(struct inode *dir,
+static int proc_ns_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
{
const struct proc_ns_operations *ns_ops = ptr;
struct inode *inode;
struct proc_inode *ei;
- struct dentry *error = ERR_PTR(-ENOENT);
inode = proc_pid_make_inode(dir->i_sb, task);
if (!inode)
@@ -208,90 +207,52 @@ static struct dentry *proc_ns_instantiate(struct inode *dir,
d_add(dentry, inode);
/* Close the race of the process dying before we return the dentry */
if (pid_revalidate(dentry, 0))
- error = NULL;
+ return 0;
out:
- return error;
-}
-
-static int proc_ns_fill_cache(struct file *filp, void *dirent,
- filldir_t filldir, struct task_struct *task,
- const struct proc_ns_operations *ops)
-{
- return proc_fill_cache(filp, dirent, filldir,
- ops->name, strlen(ops->name),
- proc_ns_instantiate, task, ops);
+ return -ENOENT;
}
-static int proc_ns_dir_readdir(struct file *filp, void *dirent,
- filldir_t filldir)
+static int proc_ns_dir_readdir(struct file *file, struct dir_context *ctx)
{
- int i;
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- struct task_struct *task = get_proc_task(inode);
+ struct task_struct *task = get_proc_task(file_inode(file));
const struct proc_ns_operations **entry, **last;
- ino_t ino;
- int ret;
- ret = -ENOENT;
if (!task)
- goto out_no_task;
+ return -ENOENT;
- ret = 0;
- i = filp->f_pos;
- switch (i) {
- case 0:
- ino = inode->i_ino;
- if (filldir(dirent, ".", 1, i, ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- case 1:
- ino = parent_ino(dentry);
- if (filldir(dirent, "..", 2, i, ino, DT_DIR) < 0)
- goto out;
- i++;
- filp->f_pos++;
- /* fall through */
- default:
- i -= 2;
- if (i >= ARRAY_SIZE(ns_entries)) {
- ret = 1;
- goto out;
- }
- entry = ns_entries + i;
- last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
- while (entry <= last) {
- if (proc_ns_fill_cache(filp, dirent, filldir,
- task, *entry) < 0)
- goto out;
- filp->f_pos++;
- entry++;
- }
+ if (!dir_emit_dots(file, ctx))
+ goto out;
+ if (ctx->pos >= 2 + ARRAY_SIZE(ns_entries))
+ goto out;
+ entry = ns_entries + (ctx->pos - 2);
+ last = &ns_entries[ARRAY_SIZE(ns_entries) - 1];
+ while (entry <= last) {
+ const struct proc_ns_operations *ops = *entry;
+ if (!proc_fill_cache(file, ctx, ops->name, strlen(ops->name),
+ proc_ns_instantiate, task, ops))
+ break;
+ ctx->pos++;
+ entry++;
}
-
- ret = 1;
out:
put_task_struct(task);
-out_no_task:
- return ret;
+ return 0;
}
const struct file_operations proc_ns_dir_operations = {
.read = generic_read_dir,
- .readdir = proc_ns_dir_readdir,
+ .iterate = proc_ns_dir_readdir,
};
static struct dentry *proc_ns_dir_lookup(struct inode *dir,
struct dentry *dentry, unsigned int flags)
{
- struct dentry *error;
+ int error;
struct task_struct *task = get_proc_task(dir);
const struct proc_ns_operations **entry, **last;
unsigned int len = dentry->d_name.len;
- error = ERR_PTR(-ENOENT);
+ error = -ENOENT;
if (!task)
goto out_no_task;
@@ -310,7 +271,7 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir,
out:
put_task_struct(task);
out_no_task:
- return error;
+ return ERR_PTR(error);
}
const struct inode_operations proc_ns_dir_inode_operations = {
diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 986e83220d56..4677bb7dc7c2 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -160,16 +160,15 @@ const struct inode_operations proc_net_inode_operations = {
.getattr = proc_tgid_net_getattr,
};
-static int proc_tgid_net_readdir(struct file *filp, void *dirent,
- filldir_t filldir)
+static int proc_tgid_net_readdir(struct file *file, struct dir_context *ctx)
{
int ret;
struct net *net;
ret = -EINVAL;
- net = get_proc_task_net(file_inode(filp));
+ net = get_proc_task_net(file_inode(file));
if (net != NULL) {
- ret = proc_readdir_de(net->proc_net, filp, dirent, filldir);
+ ret = proc_readdir_de(net->proc_net, file, ctx);
put_net(net);
}
return ret;
@@ -178,7 +177,7 @@ static int proc_tgid_net_readdir(struct file *filp, void *dirent,
const struct file_operations proc_net_operations = {
.llseek = generic_file_llseek,
.read = generic_read_dir,
- .readdir = proc_tgid_net_readdir,
+ .iterate = proc_tgid_net_readdir,
};
static __net_init int proc_net_ns_init(struct net *net)
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index ac05f33a0dde..71290463a1d3 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -573,12 +573,12 @@ out:
return ret;
}
-static int proc_sys_fill_cache(struct file *filp, void *dirent,
- filldir_t filldir,
+static bool proc_sys_fill_cache(struct file *file,
+ struct dir_context *ctx,
struct ctl_table_header *head,
struct ctl_table *table)
{
- struct dentry *child, *dir = filp->f_path.dentry;
+ struct dentry *child, *dir = file->f_path.dentry;
struct inode *inode;
struct qstr qname;
ino_t ino = 0;
@@ -595,38 +595,38 @@ static int proc_sys_fill_cache(struct file *filp, void *dirent,
inode = proc_sys_make_inode(dir->d_sb, head, table);
if (!inode) {
dput(child);
- return -ENOMEM;
+ return false;
} else {
d_set_d_op(child, &proc_sys_dentry_operations);
d_add(child, inode);
}
} else {
- return -ENOMEM;
+ return false;
}
}
inode = child->d_inode;
ino = inode->i_ino;
type = inode->i_mode >> 12;
dput(child);
- return !!filldir(dirent, qname.name, qname.len, filp->f_pos, ino, type);
+ return dir_emit(ctx, qname.name, qname.len, ino, type);
}
-static int proc_sys_link_fill_cache(struct file *filp, void *dirent,
- filldir_t filldir,
+static bool proc_sys_link_fill_cache(struct file *file,
+ struct dir_context *ctx,
struct ctl_table_header *head,
struct ctl_table *table)
{
- int err, ret = 0;
+ bool ret = true;
head = sysctl_head_grab(head);
if (S_ISLNK(table->mode)) {
/* It is not an error if we can not follow the link ignore it */
- err = sysctl_follow_link(&head, &table, current->nsproxy);
+ int err = sysctl_follow_link(&head, &table, current->nsproxy);
if (err)
goto out;
}
- ret = proc_sys_fill_cache(filp, dirent, filldir, head, table);
+ ret = proc_sys_fill_cache(file, ctx, head, table);
out:
sysctl_head_finish(head);
return ret;
@@ -634,67 +634,50 @@ out:
static int scan(struct ctl_table_header *head, ctl_table *table,
unsigned long *pos, struct file *file,
- void *dirent, filldir_t filldir)
+ struct dir_context *ctx)
{
- int res;
+ bool res;
- if ((*pos)++ < file->f_pos)
- return 0;
+ if ((*pos)++ < ctx->pos)
+ return true;
if (unlikely(S_ISLNK(table->mode)))
- res = proc_sys_link_fill_cache(file, dirent, filldir, head, table);
+ res = proc_sys_link_fill_cache(file, ctx, head, table);
else
- res = proc_sys_fill_cache(file, dirent, filldir, head, table);
+ res = proc_sys_fill_cache(file, ctx, head, table);
- if (res == 0)
- file->f_pos = *pos;
+ if (res)
+ ctx->pos = *pos;
return res;
}
-static int proc_sys_readdir(struct file *filp, void *dirent, filldir_t filldir)
+static int proc_sys_readdir(struct file *file, struct dir_context *ctx)
{
- struct dentry *dentry = filp->f_path.dentry;
- struct inode *inode = dentry->d_inode;
- struct ctl_table_header *head = grab_header(inode);
+ struct ctl_table_header *head = grab_header(file_inode(file));
struct ctl_table_header *h = NULL;
struct ctl_table *entry;
struct ctl_dir *ctl_dir;
unsigned long pos;
- int ret = -EINVAL;
if (IS_ERR(head))
return PTR_ERR(head);
ctl_dir = container_of(head, struct ctl_dir, header);
- ret = 0;
- /* Avoid a switch here: arm builds fail with missing __cmpdi2 */
- if (filp->f_pos == 0) {
- if (filldir(dirent, ".", 1, filp->f_pos,
- inode->i_ino, DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- }
- if (filp->f_pos == 1) {
- if (filldir(dirent, "..", 2, filp->f_pos,
- parent_ino(dentry), DT_DIR) < 0)
- goto out;
- filp->f_pos++;
- }
+ if (!dir_emit_dots(file, ctx))
+ return 0;
+
pos = 2;
for (first_entry(ctl_dir, &h, &entry); h; next_entry(&h, &entry)) {
- ret = scan(h, entry, &pos, filp, dirent, filldir);
- if (ret) {
+ if (!scan(h, entry, &pos, file, ctx)) {
sysctl_head_finish(h);
break;
}
}
- ret = 1;
-out:
sysctl_head_finish(head);
- return ret;
+ return 0;
}
static int proc_sys_permission(struct inode *inode, int mask)
@@ -769,7 +752,7 @@ static const struct file_operations proc_sys_file_operations = {
static const struct file_operations proc_sys_dir_file_operations = {
.read = generic_read_dir,
- .readdir = proc_sys_readdir,
+ .iterate = proc_sys_readdir,
.llseek = generic_file_llseek,
};
@@ -813,15 +796,16 @@ static int sysctl_is_seen(struct ctl_table_header *p)
return res;
}
-static int proc_sys_compare(const struct dentry *parent,
- const struct inode *pinode,
- const struct dentry *dentry, const struct inode *inode,
+static int proc_sys_compare(const struct dentry *parent, const struct dentry *dentry,
unsigned int len, const char *str, const struct qstr *name)
{
struct ctl_table_header *head;
+ struct inode *inode;
+
/* Although proc doesn't have negative dentries, rcu-walk means
* that inode here can be NULL */
/* AV: can it, indeed? */
+ inode = ACCESS_ONCE(dentry->d_inode);
if (!inode)
return 1;
if (name->len != len)
diff --git a/fs/proc/root.c b/fs/proc/root.c
index 41a6ea93f486..229e366598da 100644
--- a/fs/proc/root.c
+++ b/fs/proc/root.c
@@ -202,21 +202,14 @@ static struct dentry *proc_root_lookup(struct inode * dir, struct dentry * dentr
return proc_pid_lookup(dir, dentry, flags);
}
-static int proc_root_readdir(struct file * filp,
- void * dirent, filldir_t filldir)
+static int proc_root_readdir(struct file *file, struct dir_context *ctx)
{
- unsigned int nr = filp->f_pos;
- int ret;
-
- if (nr < FIRST_PROCESS_ENTRY) {
- int error = proc_readdir(filp, dirent, filldir);
- if (error <= 0)
- return error;
- filp->f_pos = FIRST_PROCESS_ENTRY;
+ if (ctx->pos < FIRST_PROCESS_ENTRY) {
+ proc_readdir(file, ctx);
+ ctx->pos = FIRST_PROCESS_ENTRY;
}
- ret = proc_pid_readdir(filp, dirent, filldir);
- return ret;
+ return proc_pid_readdir(file, ctx);
}
/*
@@ -226,7 +219,7 @@ static int proc_root_readdir(struct file * filp,
*/
static const struct file_operations proc_root_operations = {
.read = generic_read_dir,
- .readdir = proc_root_readdir,
+ .iterate = proc_root_readdir,
.llseek = default_llseek,
};
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 3e636d864d56..107d026f5d6e 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -11,6 +11,7 @@
#include <linux/rmap.h>
#include <linux/swap.h>
#include <linux/swapops.h>
+#include <linux/mmu_notifier.h>
#include <asm/elf.h>
#include <asm/uaccess.h>
@@ -688,10 +689,66 @@ const struct file_operations proc_tid_smaps_operations = {
.release = seq_release_private,
};
+/*
+ * We do not want to have constant page-shift bits sitting in
+ * pagemap entries and are about to reuse them some time soon.
+ *
+ * Here's the "migration strategy":
+ * 1. when the system boots these bits remain what they are,
+ * but a warning about future change is printed in log;
+ * 2. once anyone clears soft-dirty bits via clear_refs file,
+ * these flag is set to denote, that user is aware of the
+ * new API and those page-shift bits change their meaning.
+ * The respective warning is printed in dmesg;
+ * 3. In a couple of releases we will remove all the mentions
+ * of page-shift in pagemap entries.
+ */
+
+static bool soft_dirty_cleared __read_mostly;
+
+enum clear_refs_types {
+ CLEAR_REFS_ALL = 1,
+ CLEAR_REFS_ANON,
+ CLEAR_REFS_MAPPED,
+ CLEAR_REFS_SOFT_DIRTY,
+ CLEAR_REFS_LAST,
+};
+
+struct clear_refs_private {
+ struct vm_area_struct *vma;
+ enum clear_refs_types type;
+};
+
+static inline void clear_soft_dirty(struct vm_area_struct *vma,
+ unsigned long addr, pte_t *pte)
+{
+#ifdef CONFIG_MEM_SOFT_DIRTY
+ /*
+ * The soft-dirty tracker uses #PF-s to catch writes
+ * to pages, so write-protect the pte as well. See the
+ * Documentation/vm/soft-dirty.txt for full description
+ * of how soft-dirty works.
+ */
+ pte_t ptent = *pte;
+
+ if (pte_present(ptent)) {
+ ptent = pte_wrprotect(ptent);
+ ptent = pte_clear_flags(ptent, _PAGE_SOFT_DIRTY);
+ } else if (is_swap_pte(ptent)) {
+ ptent = pte_swp_clear_soft_dirty(ptent);
+ } else if (pte_file(ptent)) {
+ ptent = pte_file_clear_soft_dirty(ptent);
+ }
+
+ set_pte_at(vma->vm_mm, addr, pte, ptent);
+#endif
+}
+
static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
unsigned long end, struct mm_walk *walk)
{
- struct vm_area_struct *vma = walk->private;
+ struct clear_refs_private *cp = walk->private;
+ struct vm_area_struct *vma = cp->vma;
pte_t *pte, ptent;
spinlock_t *ptl;
struct page *page;
@@ -703,6 +760,12 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
pte = pte_offset_map_lock(vma->vm_mm, pmd, addr, &ptl);
for (; addr != end; pte++, addr += PAGE_SIZE) {
ptent = *pte;
+
+ if (cp->type == CLEAR_REFS_SOFT_DIRTY) {
+ clear_soft_dirty(vma, addr, pte);
+ continue;
+ }
+
if (!pte_present(ptent))
continue;
@@ -719,10 +782,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr,
return 0;
}
-#define CLEAR_REFS_ALL 1
-#define CLEAR_REFS_ANON 2
-#define CLEAR_REFS_MAPPED 3
-
static ssize_t clear_refs_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
@@ -730,7 +789,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
char buffer[PROC_NUMBUF];
struct mm_struct *mm;
struct vm_area_struct *vma;
- int type;
+ enum clear_refs_types type;
+ int itype;
int rv;
memset(buffer, 0, sizeof(buffer));
@@ -738,23 +798,37 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
count = sizeof(buffer) - 1;
if (copy_from_user(buffer, buf, count))
return -EFAULT;
- rv = kstrtoint(strstrip(buffer), 10, &type);
+ rv = kstrtoint(strstrip(buffer), 10, &itype);
if (rv < 0)
return rv;
- if (type < CLEAR_REFS_ALL || type > CLEAR_REFS_MAPPED)
+ type = (enum clear_refs_types)itype;
+ if (type < CLEAR_REFS_ALL || type >= CLEAR_REFS_LAST)
return -EINVAL;
+
+ if (type == CLEAR_REFS_SOFT_DIRTY) {
+ soft_dirty_cleared = true;
+ pr_warn_once("The pagemap bits 55-60 has changed their meaning! "
+ "See the linux/Documentation/vm/pagemap.txt for details.\n");
+ }
+
task = get_proc_task(file_inode(file));
if (!task)
return -ESRCH;
mm = get_task_mm(task);
if (mm) {
+ struct clear_refs_private cp = {
+ .type = type,
+ };
struct mm_walk clear_refs_walk = {
.pmd_entry = clear_refs_pte_range,
.mm = mm,
+ .private = &cp,
};
down_read(&mm->mmap_sem);
+ if (type == CLEAR_REFS_SOFT_DIRTY)
+ mmu_notifier_invalidate_range_start(mm, 0, -1);
for (vma = mm->mmap; vma; vma = vma->vm_next) {
- clear_refs_walk.private = vma;
+ cp.vma = vma;
if (is_vm_hugetlb_page(vma))
continue;
/*
@@ -773,6 +847,8 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
walk_page_range(vma->vm_start, vma->vm_end,
&clear_refs_walk);
}
+ if (type == CLEAR_REFS_SOFT_DIRTY)
+ mmu_notifier_invalidate_range_end(mm, 0, -1);
flush_tlb_mm(mm);
up_read(&mm->mmap_sem);
mmput(mm);
@@ -792,14 +868,15 @@ typedef struct {
} pagemap_entry_t;
struct pagemapread {
- int pos, len;
+ int pos, len; /* units: PM_ENTRY_BYTES, not bytes */
pagemap_entry_t *buffer;
+ bool v2;
};
#define PAGEMAP_WALK_SIZE (PMD_SIZE)
#define PAGEMAP_WALK_MASK (PMD_MASK)
-#define PM_ENTRY_BYTES sizeof(u64)
+#define PM_ENTRY_BYTES sizeof(pagemap_entry_t)
#define PM_STATUS_BITS 3
#define PM_STATUS_OFFSET (64 - PM_STATUS_BITS)
#define PM_STATUS_MASK (((1LL << PM_STATUS_BITS) - 1) << PM_STATUS_OFFSET)
@@ -807,14 +884,17 @@ struct pagemapread {
#define PM_PSHIFT_BITS 6
#define PM_PSHIFT_OFFSET (PM_STATUS_OFFSET - PM_PSHIFT_BITS)
#define PM_PSHIFT_MASK (((1LL << PM_PSHIFT_BITS) - 1) << PM_PSHIFT_OFFSET)
-#define PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
+#define __PM_PSHIFT(x) (((u64) (x) << PM_PSHIFT_OFFSET) & PM_PSHIFT_MASK)
#define PM_PFRAME_MASK ((1LL << PM_PSHIFT_OFFSET) - 1)
#define PM_PFRAME(x) ((x) & PM_PFRAME_MASK)
+/* in "new" pagemap pshift bits are occupied with more status bits */
+#define PM_STATUS2(v2, x) (__PM_PSHIFT(v2 ? x : PAGE_SHIFT))
+#define __PM_SOFT_DIRTY (1LL)
#define PM_PRESENT PM_STATUS(4LL)
#define PM_SWAP PM_STATUS(2LL)
#define PM_FILE PM_STATUS(1LL)
-#define PM_NOT_PRESENT PM_PSHIFT(PAGE_SHIFT)
+#define PM_NOT_PRESENT(v2) PM_STATUS2(v2, 0)
#define PM_END_OF_BUFFER 1
static inline pagemap_entry_t make_pme(u64 val)
@@ -837,7 +917,7 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
struct pagemapread *pm = walk->private;
unsigned long addr;
int err = 0;
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
for (addr = start; addr < end; addr += PAGE_SIZE) {
err = add_to_pagemap(addr, &pme, pm);
@@ -847,38 +927,43 @@ static int pagemap_pte_hole(unsigned long start, unsigned long end,
return err;
}
-static void pte_to_pagemap_entry(pagemap_entry_t *pme,
+static void pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
struct vm_area_struct *vma, unsigned long addr, pte_t pte)
{
u64 frame, flags;
struct page *page = NULL;
+ int flags2 = 0;
if (pte_present(pte)) {
frame = pte_pfn(pte);
flags = PM_PRESENT;
page = vm_normal_page(vma, addr, pte);
} else if (is_swap_pte(pte)) {
- swp_entry_t entry = pte_to_swp_entry(pte);
-
+ swp_entry_t entry;
+ if (pte_swp_soft_dirty(pte))
+ flags2 |= __PM_SOFT_DIRTY;
+ entry = pte_to_swp_entry(pte);
frame = swp_type(entry) |
(swp_offset(entry) << MAX_SWAPFILES_SHIFT);
flags = PM_SWAP;
if (is_migration_entry(entry))
page = migration_entry_to_page(entry);
} else {
- *pme = make_pme(PM_NOT_PRESENT);
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2));
return;
}
if (page && !PageAnon(page))
flags |= PM_FILE;
+ if (pte_soft_dirty(pte))
+ flags2 |= __PM_SOFT_DIRTY;
- *pme = make_pme(PM_PFRAME(frame) | PM_PSHIFT(PAGE_SHIFT) | flags);
+ *pme = make_pme(PM_PFRAME(frame) | PM_STATUS2(pm->v2, flags2) | flags);
}
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
- pmd_t pmd, int offset)
+static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
+ pmd_t pmd, int offset, int pmd_flags2)
{
/*
* Currently pmd for thp is always present because thp can not be
@@ -887,13 +972,13 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
*/
if (pmd_present(pmd))
*pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset)
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ | PM_STATUS2(pm->v2, pmd_flags2) | PM_PRESENT);
else
- *pme = make_pme(PM_NOT_PRESENT);
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2));
}
#else
-static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme,
- pmd_t pmd, int offset)
+static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
+ pmd_t pmd, int offset, int pmd_flags2)
{
}
#endif
@@ -905,17 +990,20 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
struct pagemapread *pm = walk->private;
pte_t *pte;
int err = 0;
- pagemap_entry_t pme = make_pme(PM_NOT_PRESENT);
+ pagemap_entry_t pme = make_pme(PM_NOT_PRESENT(pm->v2));
/* find the first VMA at or above 'addr' */
vma = find_vma(walk->mm, addr);
if (vma && pmd_trans_huge_lock(pmd, vma) == 1) {
+ int pmd_flags2;
+
+ pmd_flags2 = (pmd_soft_dirty(*pmd) ? __PM_SOFT_DIRTY : 0);
for (; addr != end; addr += PAGE_SIZE) {
unsigned long offset;
offset = (addr & ~PAGEMAP_WALK_MASK) >>
PAGE_SHIFT;
- thp_pmd_to_pagemap_entry(&pme, *pmd, offset);
+ thp_pmd_to_pagemap_entry(&pme, pm, *pmd, offset, pmd_flags2);
err = add_to_pagemap(addr, &pme, pm);
if (err)
break;
@@ -932,7 +1020,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
* and need a new, higher one */
if (vma && (addr >= vma->vm_end)) {
vma = find_vma(walk->mm, addr);
- pme = make_pme(PM_NOT_PRESENT);
+ pme = make_pme(PM_NOT_PRESENT(pm->v2));
}
/* check that 'vma' actually covers this address,
@@ -940,7 +1028,7 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
if (vma && (vma->vm_start <= addr) &&
!is_vm_hugetlb_page(vma)) {
pte = pte_offset_map(pmd, addr);
- pte_to_pagemap_entry(&pme, vma, addr, *pte);
+ pte_to_pagemap_entry(&pme, pm, vma, addr, *pte);
/* unmap before userspace copy */
pte_unmap(pte);
}
@@ -955,14 +1043,14 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
}
#ifdef CONFIG_HUGETLB_PAGE
-static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme,
+static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, struct pagemapread *pm,
pte_t pte, int offset)
{
if (pte_present(pte))
*pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset)
- | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT);
+ | PM_STATUS2(pm->v2, 0) | PM_PRESENT);
else
- *pme = make_pme(PM_NOT_PRESENT);
+ *pme = make_pme(PM_NOT_PRESENT(pm->v2));
}
/* This function walks within one hugetlb entry in the single call */
@@ -976,7 +1064,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask,
for (; addr != end; addr += PAGE_SIZE) {
int offset = (addr & ~hmask) >> PAGE_SHIFT;
- huge_pte_to_pagemap_entry(&pme, *pte, offset);
+ huge_pte_to_pagemap_entry(&pme, pm, *pte, offset);
err = add_to_pagemap(addr, &pme, pm);
if (err)
return err;
@@ -1038,8 +1126,9 @@ static ssize_t pagemap_read(struct file *file, char __user *buf,
if (!count)
goto out_task;
- pm.len = PM_ENTRY_BYTES * (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
- pm.buffer = kmalloc(pm.len, GFP_TEMPORARY);
+ pm.v2 = soft_dirty_cleared;
+ pm.len = (PAGEMAP_WALK_SIZE >> PAGE_SHIFT);
+ pm.buffer = kmalloc(pm.len * PM_ENTRY_BYTES, GFP_TEMPORARY);
ret = -ENOMEM;
if (!pm.buffer)
goto out_task;
@@ -1110,9 +1199,18 @@ out:
return ret;
}
+static int pagemap_open(struct inode *inode, struct file *file)
+{
+ pr_warn_once("Bits 55-60 of /proc/PID/pagemap entries are about "
+ "to stop being page-shift some time soon. See the "
+ "linux/Documentation/vm/pagemap.txt for details.\n");
+ return 0;
+}
+
const struct file_operations proc_pagemap_operations = {
.llseek = mem_lseek, /* borrow this */
.read = pagemap_read,
+ .open = pagemap_open,
};
#endif /* CONFIG_PROC_PAGE_MONITOR */
diff --git a/fs/proc/uptime.c b/fs/proc/uptime.c
index 9610ac772d7e..061894625903 100644
--- a/fs/proc/uptime.c
+++ b/fs/proc/uptime.c
@@ -20,8 +20,7 @@ static int uptime_proc_show(struct seq_file *m, void *v)
for_each_possible_cpu(i)
idletime += (__force u64) kcpustat_cpu(i).cpustat[CPUTIME_IDLE];
- do_posix_clock_monotonic_gettime(&uptime);
- monotonic_to_bootbased(&uptime);
+ get_monotonic_boottime(&uptime);
nsec = cputime64_to_jiffies64(idletime) * TICK_NSEC;
idle.tv_sec = div_u64_rem(nsec, NSEC_PER_SEC, &rem);
idle.tv_nsec = rem;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 17f7e080d7ff..a1a16eb97c7b 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -20,6 +20,7 @@
#include <linux/init.h>
#include <linux/crash_dump.h>
#include <linux/list.h>
+#include <linux/vmalloc.h>
#include <asm/uaccess.h>
#include <asm/io.h>
#include "internal.h"
@@ -32,6 +33,10 @@ static LIST_HEAD(vmcore_list);
/* Stores the pointer to the buffer containing kernel elf core headers. */
static char *elfcorebuf;
static size_t elfcorebuf_sz;
+static size_t elfcorebuf_sz_orig;
+
+static char *elfnotes_buf;
+static size_t elfnotes_sz;
/* Total size of vmcore file. */
static u64 vmcore_size;
@@ -118,27 +123,6 @@ static ssize_t read_from_oldmem(char *buf, size_t count,
return read;
}
-/* Maps vmcore file offset to respective physical address in memroy. */
-static u64 map_offset_to_paddr(loff_t offset, struct list_head *vc_list,
- struct vmcore **m_ptr)
-{
- struct vmcore *m;
- u64 paddr;
-
- list_for_each_entry(m, vc_list, list) {
- u64 start, end;
- start = m->offset;
- end = m->offset + m->size - 1;
- if (offset >= start && offset <= end) {
- paddr = m->paddr + offset - start;
- *m_ptr = m;
- return paddr;
- }
- }
- *m_ptr = NULL;
- return 0;
-}
-
/* Read from the ELF header and then the crash dump. On error, negative value is
* returned otherwise number of bytes read are returned.
*/
@@ -147,8 +131,8 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
{
ssize_t acc = 0, tmp;
size_t tsz;
- u64 start, nr_bytes;
- struct vmcore *curr_m = NULL;
+ u64 start;
+ struct vmcore *m = NULL;
if (buflen == 0 || *fpos >= vmcore_size)
return 0;
@@ -159,9 +143,7 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
/* Read ELF core header */
if (*fpos < elfcorebuf_sz) {
- tsz = elfcorebuf_sz - *fpos;
- if (buflen < tsz)
- tsz = buflen;
+ tsz = min(elfcorebuf_sz - (size_t)*fpos, buflen);
if (copy_to_user(buffer, elfcorebuf + *fpos, tsz))
return -EFAULT;
buflen -= tsz;
@@ -174,39 +156,161 @@ static ssize_t read_vmcore(struct file *file, char __user *buffer,
return acc;
}
- start = map_offset_to_paddr(*fpos, &vmcore_list, &curr_m);
- if (!curr_m)
- return -EINVAL;
-
- while (buflen) {
- tsz = min_t(size_t, buflen, PAGE_SIZE - (start & ~PAGE_MASK));
-
- /* Calculate left bytes in current memory segment. */
- nr_bytes = (curr_m->size - (start - curr_m->paddr));
- if (tsz > nr_bytes)
- tsz = nr_bytes;
+ /* Read Elf note segment */
+ if (*fpos < elfcorebuf_sz + elfnotes_sz) {
+ void *kaddr;
- tmp = read_from_oldmem(buffer, tsz, &start, 1);
- if (tmp < 0)
- return tmp;
+ tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)*fpos, buflen);
+ kaddr = elfnotes_buf + *fpos - elfcorebuf_sz;
+ if (copy_to_user(buffer, kaddr, tsz))
+ return -EFAULT;
buflen -= tsz;
*fpos += tsz;
buffer += tsz;
acc += tsz;
- if (start >= (curr_m->paddr + curr_m->size)) {
- if (curr_m->list.next == &vmcore_list)
- return acc; /*EOF*/
- curr_m = list_entry(curr_m->list.next,
- struct vmcore, list);
- start = curr_m->paddr;
+
+ /* leave now if filled buffer already */
+ if (buflen == 0)
+ return acc;
+ }
+
+ list_for_each_entry(m, &vmcore_list, list) {
+ if (*fpos < m->offset + m->size) {
+ tsz = min_t(size_t, m->offset + m->size - *fpos, buflen);
+ start = m->paddr + *fpos - m->offset;
+ tmp = read_from_oldmem(buffer, tsz, &start, 1);
+ if (tmp < 0)
+ return tmp;
+ buflen -= tsz;
+ *fpos += tsz;
+ buffer += tsz;
+ acc += tsz;
+
+ /* leave now if filled buffer already */
+ if (buflen == 0)
+ return acc;
}
}
+
return acc;
}
+/**
+ * alloc_elfnotes_buf - allocate buffer for ELF note segment in
+ * vmalloc memory
+ *
+ * @notes_sz: size of buffer
+ *
+ * If CONFIG_MMU is defined, use vmalloc_user() to allow users to mmap
+ * the buffer to user-space by means of remap_vmalloc_range().
+ *
+ * If CONFIG_MMU is not defined, use vzalloc() since mmap_vmcore() is
+ * disabled and there's no need to allow users to mmap the buffer.
+ */
+static inline char *alloc_elfnotes_buf(size_t notes_sz)
+{
+#ifdef CONFIG_MMU
+ return vmalloc_user(notes_sz);
+#else
+ return vzalloc(notes_sz);
+#endif
+}
+
+/*
+ * Disable mmap_vmcore() if CONFIG_MMU is not defined. MMU is
+ * essential for mmap_vmcore() in order to map physically
+ * non-contiguous objects (ELF header, ELF note segment and memory
+ * regions in the 1st kernel pointed to by PT_LOAD entries) into
+ * virtually contiguous user-space in ELF layout.
+ */
+#if defined(CONFIG_MMU) && !defined(CONFIG_S390)
+static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
+{
+ size_t size = vma->vm_end - vma->vm_start;
+ u64 start, end, len, tsz;
+ struct vmcore *m;
+
+ start = (u64)vma->vm_pgoff << PAGE_SHIFT;
+ end = start + size;
+
+ if (size > vmcore_size || end > vmcore_size)
+ return -EINVAL;
+
+ if (vma->vm_flags & (VM_WRITE | VM_EXEC))
+ return -EPERM;
+
+ vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+ vma->vm_flags |= VM_MIXEDMAP;
+
+ len = 0;
+
+ if (start < elfcorebuf_sz) {
+ u64 pfn;
+
+ tsz = min(elfcorebuf_sz - (size_t)start, size);
+ pfn = __pa(elfcorebuf + start) >> PAGE_SHIFT;
+ if (remap_pfn_range(vma, vma->vm_start, pfn, tsz,
+ vma->vm_page_prot))
+ return -EAGAIN;
+ size -= tsz;
+ start += tsz;
+ len += tsz;
+
+ if (size == 0)
+ return 0;
+ }
+
+ if (start < elfcorebuf_sz + elfnotes_sz) {
+ void *kaddr;
+
+ tsz = min(elfcorebuf_sz + elfnotes_sz - (size_t)start, size);
+ kaddr = elfnotes_buf + start - elfcorebuf_sz;
+ if (remap_vmalloc_range_partial(vma, vma->vm_start + len,
+ kaddr, tsz))
+ goto fail;
+ size -= tsz;
+ start += tsz;
+ len += tsz;
+
+ if (size == 0)
+ return 0;
+ }
+
+ list_for_each_entry(m, &vmcore_list, list) {
+ if (start < m->offset + m->size) {
+ u64 paddr = 0;
+
+ tsz = min_t(size_t, m->offset + m->size - start, size);
+ paddr = m->paddr + start - m->offset;
+ if (remap_pfn_range(vma, vma->vm_start + len,
+ paddr >> PAGE_SHIFT, tsz,
+ vma->vm_page_prot))
+ goto fail;
+ size -= tsz;
+ start += tsz;
+ len += tsz;
+
+ if (size == 0)
+ return 0;
+ }
+ }
+
+ return 0;
+fail:
+ do_munmap(vma->vm_mm, vma->vm_start, len);
+ return -EAGAIN;
+}
+#else
+static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
+{
+ return -ENOSYS;
+}
+#endif
+
static const struct file_operations proc_vmcore_operations = {
.read = read_vmcore,
.llseek = default_llseek,
+ .mmap = mmap_vmcore,
};
static struct vmcore* __init get_new_element(void)
@@ -214,61 +318,40 @@ static struct vmcore* __init get_new_element(void)
return kzalloc(sizeof(struct vmcore), GFP_KERNEL);
}
-static u64 __init get_vmcore_size_elf64(char *elfptr)
-{
- int i;
- u64 size;
- Elf64_Ehdr *ehdr_ptr;
- Elf64_Phdr *phdr_ptr;
-
- ehdr_ptr = (Elf64_Ehdr *)elfptr;
- phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
- size = sizeof(Elf64_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr));
- for (i = 0; i < ehdr_ptr->e_phnum; i++) {
- size += phdr_ptr->p_memsz;
- phdr_ptr++;
- }
- return size;
-}
-
-static u64 __init get_vmcore_size_elf32(char *elfptr)
+static u64 __init get_vmcore_size(size_t elfsz, size_t elfnotesegsz,
+ struct list_head *vc_list)
{
- int i;
u64 size;
- Elf32_Ehdr *ehdr_ptr;
- Elf32_Phdr *phdr_ptr;
+ struct vmcore *m;
- ehdr_ptr = (Elf32_Ehdr *)elfptr;
- phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
- size = sizeof(Elf32_Ehdr) + ((ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr));
- for (i = 0; i < ehdr_ptr->e_phnum; i++) {
- size += phdr_ptr->p_memsz;
- phdr_ptr++;
+ size = elfsz + elfnotesegsz;
+ list_for_each_entry(m, vc_list, list) {
+ size += m->size;
}
return size;
}
-/* Merges all the PT_NOTE headers into one. */
-static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
- struct list_head *vc_list)
+/**
+ * update_note_header_size_elf64 - update p_memsz member of each PT_NOTE entry
+ *
+ * @ehdr_ptr: ELF header
+ *
+ * This function updates p_memsz member of each PT_NOTE entry in the
+ * program header table pointed to by @ehdr_ptr to real size of ELF
+ * note segment.
+ */
+static int __init update_note_header_size_elf64(const Elf64_Ehdr *ehdr_ptr)
{
- int i, nr_ptnote=0, rc=0;
- char *tmp;
- Elf64_Ehdr *ehdr_ptr;
- Elf64_Phdr phdr, *phdr_ptr;
+ int i, rc=0;
+ Elf64_Phdr *phdr_ptr;
Elf64_Nhdr *nhdr_ptr;
- u64 phdr_sz = 0, note_off;
- ehdr_ptr = (Elf64_Ehdr *)elfptr;
- phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr));
+ phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
- int j;
void *notes_section;
- struct vmcore *new;
u64 offset, max_sz, sz, real_sz = 0;
if (phdr_ptr->p_type != PT_NOTE)
continue;
- nr_ptnote++;
max_sz = phdr_ptr->p_memsz;
offset = phdr_ptr->p_offset;
notes_section = kmalloc(max_sz, GFP_KERNEL);
@@ -280,7 +363,7 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
return rc;
}
nhdr_ptr = notes_section;
- for (j = 0; j < max_sz; j += sz) {
+ while (real_sz < max_sz) {
if (nhdr_ptr->n_namesz == 0)
break;
sz = sizeof(Elf64_Nhdr) +
@@ -289,26 +372,122 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
real_sz += sz;
nhdr_ptr = (Elf64_Nhdr*)((char*)nhdr_ptr + sz);
}
-
- /* Add this contiguous chunk of notes section to vmcore list.*/
- new = get_new_element();
- if (!new) {
- kfree(notes_section);
- return -ENOMEM;
- }
- new->paddr = phdr_ptr->p_offset;
- new->size = real_sz;
- list_add_tail(&new->list, vc_list);
- phdr_sz += real_sz;
kfree(notes_section);
+ phdr_ptr->p_memsz = real_sz;
}
+ return 0;
+}
+
+/**
+ * get_note_number_and_size_elf64 - get the number of PT_NOTE program
+ * headers and sum of real size of their ELF note segment headers and
+ * data.
+ *
+ * @ehdr_ptr: ELF header
+ * @nr_ptnote: buffer for the number of PT_NOTE program headers
+ * @sz_ptnote: buffer for size of unique PT_NOTE program header
+ *
+ * This function is used to merge multiple PT_NOTE program headers
+ * into a unique single one. The resulting unique entry will have
+ * @sz_ptnote in its phdr->p_mem.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf64
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init get_note_number_and_size_elf64(const Elf64_Ehdr *ehdr_ptr,
+ int *nr_ptnote, u64 *sz_ptnote)
+{
+ int i;
+ Elf64_Phdr *phdr_ptr;
+
+ *nr_ptnote = *sz_ptnote = 0;
+
+ phdr_ptr = (Elf64_Phdr *)(ehdr_ptr + 1);
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ *nr_ptnote += 1;
+ *sz_ptnote += phdr_ptr->p_memsz;
+ }
+
+ return 0;
+}
+
+/**
+ * copy_notes_elf64 - copy ELF note segments in a given buffer
+ *
+ * @ehdr_ptr: ELF header
+ * @notes_buf: buffer into which ELF note segments are copied
+ *
+ * This function is used to copy ELF note segment in the 1st kernel
+ * into the buffer @notes_buf in the 2nd kernel. It is assumed that
+ * size of the buffer @notes_buf is equal to or larger than sum of the
+ * real ELF note segment headers and data.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf64
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init copy_notes_elf64(const Elf64_Ehdr *ehdr_ptr, char *notes_buf)
+{
+ int i, rc=0;
+ Elf64_Phdr *phdr_ptr;
+
+ phdr_ptr = (Elf64_Phdr*)(ehdr_ptr + 1);
+
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 offset;
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ offset = phdr_ptr->p_offset;
+ rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+ if (rc < 0)
+ return rc;
+ notes_buf += phdr_ptr->p_memsz;
+ }
+
+ return 0;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
+ char **notes_buf, size_t *notes_sz)
+{
+ int i, nr_ptnote=0, rc=0;
+ char *tmp;
+ Elf64_Ehdr *ehdr_ptr;
+ Elf64_Phdr phdr;
+ u64 phdr_sz = 0, note_off;
+
+ ehdr_ptr = (Elf64_Ehdr *)elfptr;
+
+ rc = update_note_header_size_elf64(ehdr_ptr);
+ if (rc < 0)
+ return rc;
+
+ rc = get_note_number_and_size_elf64(ehdr_ptr, &nr_ptnote, &phdr_sz);
+ if (rc < 0)
+ return rc;
+
+ *notes_sz = roundup(phdr_sz, PAGE_SIZE);
+ *notes_buf = alloc_elfnotes_buf(*notes_sz);
+ if (!*notes_buf)
+ return -ENOMEM;
+
+ rc = copy_notes_elf64(ehdr_ptr, *notes_buf);
+ if (rc < 0)
+ return rc;
+
/* Prepare merged PT_NOTE program header. */
phdr.p_type = PT_NOTE;
phdr.p_flags = 0;
note_off = sizeof(Elf64_Ehdr) +
(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf64_Phdr);
- phdr.p_offset = note_off;
+ phdr.p_offset = roundup(note_off, PAGE_SIZE);
phdr.p_vaddr = phdr.p_paddr = 0;
phdr.p_filesz = phdr.p_memsz = phdr_sz;
phdr.p_align = 0;
@@ -322,6 +501,8 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
i = (nr_ptnote - 1) * sizeof(Elf64_Phdr);
*elfsz = *elfsz - i;
memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf64_Ehdr)-sizeof(Elf64_Phdr)));
+ memset(elfptr + *elfsz, 0, i);
+ *elfsz = roundup(*elfsz, PAGE_SIZE);
/* Modify e_phnum to reflect merged headers. */
ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
@@ -329,27 +510,27 @@ static int __init merge_note_headers_elf64(char *elfptr, size_t *elfsz,
return 0;
}
-/* Merges all the PT_NOTE headers into one. */
-static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
- struct list_head *vc_list)
+/**
+ * update_note_header_size_elf32 - update p_memsz member of each PT_NOTE entry
+ *
+ * @ehdr_ptr: ELF header
+ *
+ * This function updates p_memsz member of each PT_NOTE entry in the
+ * program header table pointed to by @ehdr_ptr to real size of ELF
+ * note segment.
+ */
+static int __init update_note_header_size_elf32(const Elf32_Ehdr *ehdr_ptr)
{
- int i, nr_ptnote=0, rc=0;
- char *tmp;
- Elf32_Ehdr *ehdr_ptr;
- Elf32_Phdr phdr, *phdr_ptr;
+ int i, rc=0;
+ Elf32_Phdr *phdr_ptr;
Elf32_Nhdr *nhdr_ptr;
- u64 phdr_sz = 0, note_off;
- ehdr_ptr = (Elf32_Ehdr *)elfptr;
- phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr));
+ phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
- int j;
void *notes_section;
- struct vmcore *new;
u64 offset, max_sz, sz, real_sz = 0;
if (phdr_ptr->p_type != PT_NOTE)
continue;
- nr_ptnote++;
max_sz = phdr_ptr->p_memsz;
offset = phdr_ptr->p_offset;
notes_section = kmalloc(max_sz, GFP_KERNEL);
@@ -361,7 +542,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
return rc;
}
nhdr_ptr = notes_section;
- for (j = 0; j < max_sz; j += sz) {
+ while (real_sz < max_sz) {
if (nhdr_ptr->n_namesz == 0)
break;
sz = sizeof(Elf32_Nhdr) +
@@ -370,26 +551,122 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
real_sz += sz;
nhdr_ptr = (Elf32_Nhdr*)((char*)nhdr_ptr + sz);
}
-
- /* Add this contiguous chunk of notes section to vmcore list.*/
- new = get_new_element();
- if (!new) {
- kfree(notes_section);
- return -ENOMEM;
- }
- new->paddr = phdr_ptr->p_offset;
- new->size = real_sz;
- list_add_tail(&new->list, vc_list);
- phdr_sz += real_sz;
kfree(notes_section);
+ phdr_ptr->p_memsz = real_sz;
}
+ return 0;
+}
+
+/**
+ * get_note_number_and_size_elf32 - get the number of PT_NOTE program
+ * headers and sum of real size of their ELF note segment headers and
+ * data.
+ *
+ * @ehdr_ptr: ELF header
+ * @nr_ptnote: buffer for the number of PT_NOTE program headers
+ * @sz_ptnote: buffer for size of unique PT_NOTE program header
+ *
+ * This function is used to merge multiple PT_NOTE program headers
+ * into a unique single one. The resulting unique entry will have
+ * @sz_ptnote in its phdr->p_mem.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf32
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init get_note_number_and_size_elf32(const Elf32_Ehdr *ehdr_ptr,
+ int *nr_ptnote, u64 *sz_ptnote)
+{
+ int i;
+ Elf32_Phdr *phdr_ptr;
+
+ *nr_ptnote = *sz_ptnote = 0;
+
+ phdr_ptr = (Elf32_Phdr *)(ehdr_ptr + 1);
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ *nr_ptnote += 1;
+ *sz_ptnote += phdr_ptr->p_memsz;
+ }
+
+ return 0;
+}
+
+/**
+ * copy_notes_elf32 - copy ELF note segments in a given buffer
+ *
+ * @ehdr_ptr: ELF header
+ * @notes_buf: buffer into which ELF note segments are copied
+ *
+ * This function is used to copy ELF note segment in the 1st kernel
+ * into the buffer @notes_buf in the 2nd kernel. It is assumed that
+ * size of the buffer @notes_buf is equal to or larger than sum of the
+ * real ELF note segment headers and data.
+ *
+ * It is assumed that program headers with PT_NOTE type pointed to by
+ * @ehdr_ptr has already been updated by update_note_header_size_elf32
+ * and each of PT_NOTE program headers has actual ELF note segment
+ * size in its p_memsz member.
+ */
+static int __init copy_notes_elf32(const Elf32_Ehdr *ehdr_ptr, char *notes_buf)
+{
+ int i, rc=0;
+ Elf32_Phdr *phdr_ptr;
+
+ phdr_ptr = (Elf32_Phdr*)(ehdr_ptr + 1);
+
+ for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 offset;
+ if (phdr_ptr->p_type != PT_NOTE)
+ continue;
+ offset = phdr_ptr->p_offset;
+ rc = read_from_oldmem(notes_buf, phdr_ptr->p_memsz, &offset, 0);
+ if (rc < 0)
+ return rc;
+ notes_buf += phdr_ptr->p_memsz;
+ }
+
+ return 0;
+}
+
+/* Merges all the PT_NOTE headers into one. */
+static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
+ char **notes_buf, size_t *notes_sz)
+{
+ int i, nr_ptnote=0, rc=0;
+ char *tmp;
+ Elf32_Ehdr *ehdr_ptr;
+ Elf32_Phdr phdr;
+ u64 phdr_sz = 0, note_off;
+
+ ehdr_ptr = (Elf32_Ehdr *)elfptr;
+
+ rc = update_note_header_size_elf32(ehdr_ptr);
+ if (rc < 0)
+ return rc;
+
+ rc = get_note_number_and_size_elf32(ehdr_ptr, &nr_ptnote, &phdr_sz);
+ if (rc < 0)
+ return rc;
+
+ *notes_sz = roundup(phdr_sz, PAGE_SIZE);
+ *notes_buf = alloc_elfnotes_buf(*notes_sz);
+ if (!*notes_buf)
+ return -ENOMEM;
+
+ rc = copy_notes_elf32(ehdr_ptr, *notes_buf);
+ if (rc < 0)
+ return rc;
+
/* Prepare merged PT_NOTE program header. */
phdr.p_type = PT_NOTE;
phdr.p_flags = 0;
note_off = sizeof(Elf32_Ehdr) +
(ehdr_ptr->e_phnum - nr_ptnote +1) * sizeof(Elf32_Phdr);
- phdr.p_offset = note_off;
+ phdr.p_offset = roundup(note_off, PAGE_SIZE);
phdr.p_vaddr = phdr.p_paddr = 0;
phdr.p_filesz = phdr.p_memsz = phdr_sz;
phdr.p_align = 0;
@@ -403,6 +680,8 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
i = (nr_ptnote - 1) * sizeof(Elf32_Phdr);
*elfsz = *elfsz - i;
memmove(tmp, tmp+i, ((*elfsz)-sizeof(Elf32_Ehdr)-sizeof(Elf32_Phdr)));
+ memset(elfptr + *elfsz, 0, i);
+ *elfsz = roundup(*elfsz, PAGE_SIZE);
/* Modify e_phnum to reflect merged headers. */
ehdr_ptr->e_phnum = ehdr_ptr->e_phnum - nr_ptnote + 1;
@@ -414,6 +693,7 @@ static int __init merge_note_headers_elf32(char *elfptr, size_t *elfsz,
* the new offset fields of exported program headers. */
static int __init process_ptload_program_headers_elf64(char *elfptr,
size_t elfsz,
+ size_t elfnotes_sz,
struct list_head *vc_list)
{
int i;
@@ -425,32 +705,38 @@ static int __init process_ptload_program_headers_elf64(char *elfptr,
ehdr_ptr = (Elf64_Ehdr *)elfptr;
phdr_ptr = (Elf64_Phdr*)(elfptr + sizeof(Elf64_Ehdr)); /* PT_NOTE hdr */
- /* First program header is PT_NOTE header. */
- vmcore_off = sizeof(Elf64_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr) +
- phdr_ptr->p_memsz; /* Note sections */
+ /* Skip Elf header, program headers and Elf note segment. */
+ vmcore_off = elfsz + elfnotes_sz;
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 paddr, start, end, size;
+
if (phdr_ptr->p_type != PT_LOAD)
continue;
+ paddr = phdr_ptr->p_offset;
+ start = rounddown(paddr, PAGE_SIZE);
+ end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
+ size = end - start;
+
/* Add this contiguous chunk of memory to vmcore list.*/
new = get_new_element();
if (!new)
return -ENOMEM;
- new->paddr = phdr_ptr->p_offset;
- new->size = phdr_ptr->p_memsz;
+ new->paddr = start;
+ new->size = size;
list_add_tail(&new->list, vc_list);
/* Update the program header offset. */
- phdr_ptr->p_offset = vmcore_off;
- vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+ phdr_ptr->p_offset = vmcore_off + (paddr - start);
+ vmcore_off = vmcore_off + size;
}
return 0;
}
static int __init process_ptload_program_headers_elf32(char *elfptr,
size_t elfsz,
+ size_t elfnotes_sz,
struct list_head *vc_list)
{
int i;
@@ -462,43 +748,44 @@ static int __init process_ptload_program_headers_elf32(char *elfptr,
ehdr_ptr = (Elf32_Ehdr *)elfptr;
phdr_ptr = (Elf32_Phdr*)(elfptr + sizeof(Elf32_Ehdr)); /* PT_NOTE hdr */
- /* First program header is PT_NOTE header. */
- vmcore_off = sizeof(Elf32_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr) +
- phdr_ptr->p_memsz; /* Note sections */
+ /* Skip Elf header, program headers and Elf note segment. */
+ vmcore_off = elfsz + elfnotes_sz;
for (i = 0; i < ehdr_ptr->e_phnum; i++, phdr_ptr++) {
+ u64 paddr, start, end, size;
+
if (phdr_ptr->p_type != PT_LOAD)
continue;
+ paddr = phdr_ptr->p_offset;
+ start = rounddown(paddr, PAGE_SIZE);
+ end = roundup(paddr + phdr_ptr->p_memsz, PAGE_SIZE);
+ size = end - start;
+
/* Add this contiguous chunk of memory to vmcore list.*/
new = get_new_element();
if (!new)
return -ENOMEM;
- new->paddr = phdr_ptr->p_offset;
- new->size = phdr_ptr->p_memsz;
+ new->paddr = start;
+ new->size = size;
list_add_tail(&new->list, vc_list);
/* Update the program header offset */
- phdr_ptr->p_offset = vmcore_off;
- vmcore_off = vmcore_off + phdr_ptr->p_memsz;
+ phdr_ptr->p_offset = vmcore_off + (paddr - start);
+ vmcore_off = vmcore_off + size;
}
return 0;
}
/* Sets offset fields of vmcore elements. */
-static void __init set_vmcore_list_offsets_elf64(char *elfptr,
- struct list_head *vc_list)
+static void __init set_vmcore_list_offsets(size_t elfsz, size_t elfnotes_sz,
+ struct list_head *vc_list)
{
loff_t vmcore_off;
- Elf64_Ehdr *ehdr_ptr;
struct vmcore *m;
- ehdr_ptr = (Elf64_Ehdr *)elfptr;
-
- /* Skip Elf header and program headers. */
- vmcore_off = sizeof(Elf64_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf64_Phdr);
+ /* Skip Elf header, program headers and Elf note segment. */
+ vmcore_off = elfsz + elfnotes_sz;
list_for_each_entry(m, vc_list, list) {
m->offset = vmcore_off;
@@ -506,24 +793,12 @@ static void __init set_vmcore_list_offsets_elf64(char *elfptr,
}
}
-/* Sets offset fields of vmcore elements. */
-static void __init set_vmcore_list_offsets_elf32(char *elfptr,
- struct list_head *vc_list)
+static void free_elfcorebuf(void)
{
- loff_t vmcore_off;
- Elf32_Ehdr *ehdr_ptr;
- struct vmcore *m;
-
- ehdr_ptr = (Elf32_Ehdr *)elfptr;
-
- /* Skip Elf header and program headers. */
- vmcore_off = sizeof(Elf32_Ehdr) +
- (ehdr_ptr->e_phnum) * sizeof(Elf32_Phdr);
-
- list_for_each_entry(m, vc_list, list) {
- m->offset = vmcore_off;
- vmcore_off += m->size;
- }
+ free_pages((unsigned long)elfcorebuf, get_order(elfcorebuf_sz_orig));
+ elfcorebuf = NULL;
+ vfree(elfnotes_buf);
+ elfnotes_buf = NULL;
}
static int __init parse_crash_elf64_headers(void)
@@ -554,31 +829,32 @@ static int __init parse_crash_elf64_headers(void)
}
/* Read in all elf headers. */
- elfcorebuf_sz = sizeof(Elf64_Ehdr) + ehdr.e_phnum * sizeof(Elf64_Phdr);
- elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+ elfcorebuf_sz_orig = sizeof(Elf64_Ehdr) +
+ ehdr.e_phnum * sizeof(Elf64_Phdr);
+ elfcorebuf_sz = elfcorebuf_sz_orig;
+ elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(elfcorebuf_sz_orig));
if (!elfcorebuf)
return -ENOMEM;
addr = elfcorehdr_addr;
- rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
- if (rc < 0) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+ if (rc < 0)
+ goto fail;
/* Merge all PT_NOTE headers into one. */
- rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = merge_note_headers_elf64(elfcorebuf, &elfcorebuf_sz,
+ &elfnotes_buf, &elfnotes_sz);
+ if (rc)
+ goto fail;
rc = process_ptload_program_headers_elf64(elfcorebuf, elfcorebuf_sz,
- &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
- set_vmcore_list_offsets_elf64(elfcorebuf, &vmcore_list);
+ elfnotes_sz, &vmcore_list);
+ if (rc)
+ goto fail;
+ set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
return 0;
+fail:
+ free_elfcorebuf();
+ return rc;
}
static int __init parse_crash_elf32_headers(void)
@@ -609,31 +885,31 @@ static int __init parse_crash_elf32_headers(void)
}
/* Read in all elf headers. */
- elfcorebuf_sz = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
- elfcorebuf = kmalloc(elfcorebuf_sz, GFP_KERNEL);
+ elfcorebuf_sz_orig = sizeof(Elf32_Ehdr) + ehdr.e_phnum * sizeof(Elf32_Phdr);
+ elfcorebuf_sz = elfcorebuf_sz_orig;
+ elfcorebuf = (void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO,
+ get_order(elfcorebuf_sz_orig));
if (!elfcorebuf)
return -ENOMEM;
addr = elfcorehdr_addr;
- rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz, &addr, 0);
- if (rc < 0) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = read_from_oldmem(elfcorebuf, elfcorebuf_sz_orig, &addr, 0);
+ if (rc < 0)
+ goto fail;
/* Merge all PT_NOTE headers into one. */
- rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz, &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
+ rc = merge_note_headers_elf32(elfcorebuf, &elfcorebuf_sz,
+ &elfnotes_buf, &elfnotes_sz);
+ if (rc)
+ goto fail;
rc = process_ptload_program_headers_elf32(elfcorebuf, elfcorebuf_sz,
- &vmcore_list);
- if (rc) {
- kfree(elfcorebuf);
- return rc;
- }
- set_vmcore_list_offsets_elf32(elfcorebuf, &vmcore_list);
+ elfnotes_sz, &vmcore_list);
+ if (rc)
+ goto fail;
+ set_vmcore_list_offsets(elfcorebuf_sz, elfnotes_sz, &vmcore_list);
return 0;
+fail:
+ free_elfcorebuf();
+ return rc;
}
static int __init parse_crash_elf_headers(void)
@@ -655,20 +931,19 @@ static int __init parse_crash_elf_headers(void)
rc = parse_crash_elf64_headers();
if (rc)
return rc;
-
- /* Determine vmcore size. */
- vmcore_size = get_vmcore_size_elf64(elfcorebuf);
} else if (e_ident[EI_CLASS] == ELFCLASS32) {
rc = parse_crash_elf32_headers();
if (rc)
return rc;
-
- /* Determine vmcore size. */
- vmcore_size = get_vmcore_size_elf32(elfcorebuf);
} else {
pr_warn("Warning: Core image elf header is not sane\n");
return -EINVAL;
}
+
+ /* Determine vmcore size. */
+ vmcore_size = get_vmcore_size(elfcorebuf_sz, elfnotes_sz,
+ &vmcore_list);
+
return 0;
}
@@ -711,7 +986,6 @@ void vmcore_cleanup(void)
list_del(&m->list);
kfree(m);
}
- kfree(elfcorebuf);
- elfcorebuf = NULL;
+ free_elfcorebuf();
}
EXPORT_SYMBOL_GPL(vmcore_cleanup);