summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/mount.h2
-rw-r--r--fs/namespace.c450
-rw-r--r--fs/nsfs.c14
-rw-r--r--fs/proc_namespace.c6
-rw-r--r--include/linux/path.h9
-rw-r--r--include/uapi/linux/mount.h10
-rw-r--r--include/uapi/linux/nsfs.h2
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount.h46
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c144
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test_ns.c364
11 files changed, 926 insertions, 123 deletions
diff --git a/fs/mount.h b/fs/mount.h
index 4adce73211ae..ad4b1ddebb54 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -16,6 +16,8 @@ struct mnt_namespace {
u64 event;
unsigned int nr_mounts; /* # of mounts in the namespace */
unsigned int pending_mounts;
+ struct rb_node mnt_ns_tree_node; /* node in the mnt_ns_tree */
+ refcount_t passive; /* number references not pinning @mounts */
} __randomize_layout;
struct mnt_pcp {
diff --git a/fs/namespace.c b/fs/namespace.c
index 3aadd5361a18..221db9de4729 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -70,7 +70,8 @@ static DEFINE_IDA(mnt_id_ida);
static DEFINE_IDA(mnt_group_ida);
/* Don't allow confusion with old 32bit mount ID */
-static atomic64_t mnt_id_ctr = ATOMIC64_INIT(1ULL << 32);
+#define MNT_UNIQUE_ID_OFFSET (1ULL << 32)
+static atomic64_t mnt_id_ctr = ATOMIC64_INIT(MNT_UNIQUE_ID_OFFSET);
static struct hlist_head *mount_hashtable __ro_after_init;
static struct hlist_head *mountpoint_hashtable __ro_after_init;
@@ -78,6 +79,8 @@ static struct kmem_cache *mnt_cache __ro_after_init;
static DECLARE_RWSEM(namespace_sem);
static HLIST_HEAD(unmounted); /* protected by namespace_sem */
static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
+static DEFINE_RWLOCK(mnt_ns_tree_lock);
+static struct rb_root mnt_ns_tree = RB_ROOT; /* protected by mnt_ns_tree_lock */
struct mount_kattr {
unsigned int attr_set;
@@ -103,6 +106,109 @@ EXPORT_SYMBOL_GPL(fs_kobj);
*/
__cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
+static int mnt_ns_cmp(u64 seq, const struct mnt_namespace *ns)
+{
+ u64 seq_b = ns->seq;
+
+ if (seq < seq_b)
+ return -1;
+ if (seq > seq_b)
+ return 1;
+ return 0;
+}
+
+static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node)
+{
+ if (!node)
+ return NULL;
+ return rb_entry(node, struct mnt_namespace, mnt_ns_tree_node);
+}
+
+static bool mnt_ns_less(struct rb_node *a, const struct rb_node *b)
+{
+ struct mnt_namespace *ns_a = node_to_mnt_ns(a);
+ struct mnt_namespace *ns_b = node_to_mnt_ns(b);
+ u64 seq_a = ns_a->seq;
+
+ return mnt_ns_cmp(seq_a, ns_b) < 0;
+}
+
+static void mnt_ns_tree_add(struct mnt_namespace *ns)
+{
+ guard(write_lock)(&mnt_ns_tree_lock);
+ rb_add(&ns->mnt_ns_tree_node, &mnt_ns_tree, mnt_ns_less);
+}
+
+static void mnt_ns_release(struct mnt_namespace *ns)
+{
+ lockdep_assert_not_held(&mnt_ns_tree_lock);
+
+ /* keep alive for {list,stat}mount() */
+ if (refcount_dec_and_test(&ns->passive)) {
+ put_user_ns(ns->user_ns);
+ kfree(ns);
+ }
+}
+DEFINE_FREE(mnt_ns_release, struct mnt_namespace *, if (_T) mnt_ns_release(_T))
+
+static void mnt_ns_tree_remove(struct mnt_namespace *ns)
+{
+ /* remove from global mount namespace list */
+ if (!is_anon_ns(ns)) {
+ guard(write_lock)(&mnt_ns_tree_lock);
+ rb_erase(&ns->mnt_ns_tree_node, &mnt_ns_tree);
+ }
+
+ mnt_ns_release(ns);
+}
+
+/*
+ * Returns the mount namespace which either has the specified id, or has the
+ * next smallest id afer the specified one.
+ */
+static struct mnt_namespace *mnt_ns_find_id_at(u64 mnt_ns_id)
+{
+ struct rb_node *node = mnt_ns_tree.rb_node;
+ struct mnt_namespace *ret = NULL;
+
+ lockdep_assert_held(&mnt_ns_tree_lock);
+
+ while (node) {
+ struct mnt_namespace *n = node_to_mnt_ns(node);
+
+ if (mnt_ns_id <= n->seq) {
+ ret = node_to_mnt_ns(node);
+ if (mnt_ns_id == n->seq)
+ break;
+ node = node->rb_left;
+ } else {
+ node = node->rb_right;
+ }
+ }
+ return ret;
+}
+
+/*
+ * Lookup a mount namespace by id and take a passive reference count. Taking a
+ * passive reference means the mount namespace can be emptied if e.g., the last
+ * task holding an active reference exits. To access the mounts of the
+ * namespace the @namespace_sem must first be acquired. If the namespace has
+ * already shut down before acquiring @namespace_sem, {list,stat}mount() will
+ * see that the mount rbtree of the namespace is empty.
+ */
+static struct mnt_namespace *lookup_mnt_ns(u64 mnt_ns_id)
+{
+ struct mnt_namespace *ns;
+
+ guard(read_lock)(&mnt_ns_tree_lock);
+ ns = mnt_ns_find_id_at(mnt_ns_id);
+ if (!ns || ns->seq != mnt_ns_id)
+ return NULL;
+
+ refcount_inc(&ns->passive);
+ return ns;
+}
+
static inline void lock_mount_hash(void)
{
write_seqlock(&mount_lock);
@@ -1448,6 +1554,30 @@ static struct mount *mnt_find_id_at(struct mnt_namespace *ns, u64 mnt_id)
return ret;
}
+/*
+ * Returns the mount which either has the specified mnt_id, or has the next
+ * greater id before the specified one.
+ */
+static struct mount *mnt_find_id_at_reverse(struct mnt_namespace *ns, u64 mnt_id)
+{
+ struct rb_node *node = ns->mounts.rb_node;
+ struct mount *ret = NULL;
+
+ while (node) {
+ struct mount *m = node_to_mount(node);
+
+ if (mnt_id >= m->mnt_id_unique) {
+ ret = node_to_mount(node);
+ if (mnt_id == m->mnt_id_unique)
+ break;
+ node = node->rb_right;
+ } else {
+ node = node->rb_left;
+ }
+ }
+ return ret;
+}
+
#ifdef CONFIG_PROC_FS
/* iterator; we want it to have access to namespace_sem, thus here... */
@@ -3699,8 +3829,7 @@ static void free_mnt_ns(struct mnt_namespace *ns)
if (!is_anon_ns(ns))
ns_free_inum(&ns->ns);
dec_mnt_namespaces(ns->ucounts);
- put_user_ns(ns->user_ns);
- kfree(ns);
+ mnt_ns_tree_remove(ns);
}
/*
@@ -3739,7 +3868,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a
if (!anon)
new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
refcount_set(&new_ns->ns.count, 1);
+ refcount_set(&new_ns->passive, 1);
new_ns->mounts = RB_ROOT;
+ RB_CLEAR_NODE(&new_ns->mnt_ns_tree_node);
init_waitqueue_head(&new_ns->poll);
new_ns->user_ns = get_user_ns(user_ns);
new_ns->ucounts = ucounts;
@@ -3816,6 +3947,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns,
while (p->mnt.mnt_root != q->mnt.mnt_root)
p = next_mnt(skip_mnt_tree(p), old);
}
+ mnt_ns_tree_add(new_ns);
namespace_unlock();
if (rootmnt)
@@ -4833,6 +4965,40 @@ static int statmount_fs_type(struct kstatmount *s, struct seq_file *seq)
return 0;
}
+static void statmount_mnt_ns_id(struct kstatmount *s, struct mnt_namespace *ns)
+{
+ s->sm.mask |= STATMOUNT_MNT_NS_ID;
+ s->sm.mnt_ns_id = ns->seq;
+}
+
+static int statmount_mnt_opts(struct kstatmount *s, struct seq_file *seq)
+{
+ struct vfsmount *mnt = s->mnt;
+ struct super_block *sb = mnt->mnt_sb;
+ int err;
+
+ if (sb->s_op->show_options) {
+ size_t start = seq->count;
+
+ err = sb->s_op->show_options(seq, mnt->mnt_root);
+ if (err)
+ return err;
+
+ if (unlikely(seq_has_overflowed(seq)))
+ return -EAGAIN;
+
+ if (seq->count == start)
+ return 0;
+
+ /* skip leading comma */
+ memmove(seq->buf + start, seq->buf + start + 1,
+ seq->count - start - 1);
+ seq->count--;
+ }
+
+ return 0;
+}
+
static int statmount_string(struct kstatmount *s, u64 flag)
{
int ret;
@@ -4853,6 +5019,10 @@ static int statmount_string(struct kstatmount *s, u64 flag)
sm->mnt_point = seq->count;
ret = statmount_mnt_point(s, seq);
break;
+ case STATMOUNT_MNT_OPTS:
+ sm->mnt_opts = seq->count;
+ ret = statmount_mnt_opts(s, seq);
+ break;
default:
WARN_ON_ONCE(true);
return -EINVAL;
@@ -4893,23 +5063,84 @@ static int copy_statmount_to_user(struct kstatmount *s)
return 0;
}
-static int do_statmount(struct kstatmount *s)
+static struct mount *listmnt_next(struct mount *curr, bool reverse)
{
- struct mount *m = real_mount(s->mnt);
+ struct rb_node *node;
+
+ if (reverse)
+ node = rb_prev(&curr->mnt_node);
+ else
+ node = rb_next(&curr->mnt_node);
+
+ return node_to_mount(node);
+}
+
+static int grab_requested_root(struct mnt_namespace *ns, struct path *root)
+{
+ struct mount *first, *child;
+
+ rwsem_assert_held(&namespace_sem);
+
+ /* We're looking at our own ns, just use get_fs_root. */
+ if (ns == current->nsproxy->mnt_ns) {
+ get_fs_root(current->fs, root);
+ return 0;
+ }
+
+ /*
+ * We have to find the first mount in our ns and use that, however it
+ * may not exist, so handle that properly.
+ */
+ if (RB_EMPTY_ROOT(&ns->mounts))
+ return -ENOENT;
+
+ first = child = ns->root;
+ for (;;) {
+ child = listmnt_next(child, false);
+ if (!child)
+ return -ENOENT;
+ if (child->mnt_parent == first)
+ break;
+ }
+
+ root->mnt = mntget(&child->mnt);
+ root->dentry = dget(root->mnt->mnt_root);
+ return 0;
+}
+
+static int do_statmount(struct kstatmount *s, u64 mnt_id, u64 mnt_ns_id,
+ struct mnt_namespace *ns)
+{
+ struct path root __free(path_put) = {};
+ struct mount *m;
int err;
+ /* Has the namespace already been emptied? */
+ if (mnt_ns_id && RB_EMPTY_ROOT(&ns->mounts))
+ return -ENOENT;
+
+ s->mnt = lookup_mnt_in_ns(mnt_id, ns);
+ if (!s->mnt)
+ return -ENOENT;
+
+ err = grab_requested_root(ns, &root);
+ if (err)
+ return err;
+
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(m, m->mnt.mnt_root, &s->root) &&
- !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
+ m = real_mount(s->mnt);
+ if (!is_path_reachable(m, m->mnt.mnt_root, &root) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
err = security_sb_statfs(s->mnt->mnt_root);
if (err)
return err;
+ s->root = root;
if (s->mask & STATMOUNT_SB_BASIC)
statmount_sb_basic(s);
@@ -4928,6 +5159,12 @@ static int do_statmount(struct kstatmount *s)
if (!err && s->mask & STATMOUNT_MNT_POINT)
err = statmount_string(s, STATMOUNT_MNT_POINT);
+ if (!err && s->mask & STATMOUNT_MNT_OPTS)
+ err = statmount_string(s, STATMOUNT_MNT_OPTS);
+
+ if (!err && s->mask & STATMOUNT_MNT_NS_ID)
+ statmount_mnt_ns_id(s, ns);
+
if (err)
return err;
@@ -4945,6 +5182,9 @@ static inline bool retry_statmount(const long ret, size_t *seq_size)
return true;
}
+#define STATMOUNT_STRING_REQ (STATMOUNT_MNT_ROOT | STATMOUNT_MNT_POINT | \
+ STATMOUNT_FS_TYPE | STATMOUNT_MNT_OPTS)
+
static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
struct statmount __user *buf, size_t bufsize,
size_t seq_size)
@@ -4956,10 +5196,18 @@ static int prepare_kstatmount(struct kstatmount *ks, struct mnt_id_req *kreq,
ks->mask = kreq->param;
ks->buf = buf;
ks->bufsize = bufsize;
- ks->seq.size = seq_size;
- ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
- if (!ks->seq.buf)
- return -ENOMEM;
+
+ if (ks->mask & STATMOUNT_STRING_REQ) {
+ if (bufsize == sizeof(ks->sm))
+ return -EOVERFLOW;
+
+ ks->seq.buf = kvmalloc(seq_size, GFP_KERNEL_ACCOUNT);
+ if (!ks->seq.buf)
+ return -ENOMEM;
+
+ ks->seq.size = seq_size;
+ }
+
return 0;
}
@@ -4969,7 +5217,7 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
int ret;
size_t usize;
- BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER0);
+ BUILD_BUG_ON(sizeof(struct mnt_id_req) != MNT_ID_REQ_SIZE_VER1);
ret = get_user(usize, &req->size);
if (ret)
@@ -4984,16 +5232,32 @@ static int copy_mnt_id_req(const struct mnt_id_req __user *req,
return ret;
if (kreq->spare != 0)
return -EINVAL;
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (kreq->mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
return 0;
}
+/*
+ * If the user requested a specific mount namespace id, look that up and return
+ * that, or if not simply grab a passive reference on our mount namespace and
+ * return that.
+ */
+static struct mnt_namespace *grab_requested_mnt_ns(u64 mnt_ns_id)
+{
+ if (mnt_ns_id)
+ return lookup_mnt_ns(mnt_ns_id);
+ refcount_inc(&current->nsproxy->mnt_ns->passive);
+ return current->nsproxy->mnt_ns;
+}
+
SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
struct statmount __user *, buf, size_t, bufsize,
unsigned int, flags)
{
- struct vfsmount *mnt;
+ struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
+ struct kstatmount *ks __free(kfree) = NULL;
struct mnt_id_req kreq;
- struct kstatmount ks;
/* We currently support retrieval of 3 strings. */
size_t seq_size = 3 * PATH_MAX;
int ret;
@@ -5005,64 +5269,88 @@ SYSCALL_DEFINE4(statmount, const struct mnt_id_req __user *, req,
if (ret)
return ret;
+ ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
+ if (!ns)
+ return -ENOENT;
+
+ if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -ENOENT;
+
+ ks = kmalloc(sizeof(*ks), GFP_KERNEL_ACCOUNT);
+ if (!ks)
+ return -ENOMEM;
+
retry:
- ret = prepare_kstatmount(&ks, &kreq, buf, bufsize, seq_size);
+ ret = prepare_kstatmount(ks, &kreq, buf, bufsize, seq_size);
if (ret)
return ret;
- down_read(&namespace_sem);
- mnt = lookup_mnt_in_ns(kreq.mnt_id, current->nsproxy->mnt_ns);
- if (!mnt) {
- up_read(&namespace_sem);
- kvfree(ks.seq.buf);
- return -ENOENT;
- }
-
- ks.mnt = mnt;
- get_fs_root(current->fs, &ks.root);
- ret = do_statmount(&ks);
- path_put(&ks.root);
- up_read(&namespace_sem);
+ scoped_guard(rwsem_read, &namespace_sem)
+ ret = do_statmount(ks, kreq.mnt_id, kreq.mnt_ns_id, ns);
if (!ret)
- ret = copy_statmount_to_user(&ks);
- kvfree(ks.seq.buf);
+ ret = copy_statmount_to_user(ks);
+ kvfree(ks->seq.buf);
if (retry_statmount(ret, &seq_size))
goto retry;
return ret;
}
-static struct mount *listmnt_next(struct mount *curr)
+static ssize_t do_listmount(struct mnt_namespace *ns, u64 mnt_parent_id,
+ u64 last_mnt_id, u64 *mnt_ids, size_t nr_mnt_ids,
+ bool reverse)
{
- return node_to_mount(rb_next(&curr->mnt_node));
-}
-
-static ssize_t do_listmount(struct mount *first, struct path *orig,
- u64 mnt_parent_id, u64 __user *mnt_ids,
- size_t nr_mnt_ids, const struct path *root)
-{
- struct mount *r;
+ struct path root __free(path_put) = {};
+ struct path orig;
+ struct mount *r, *first;
ssize_t ret;
+ rwsem_assert_held(&namespace_sem);
+
+ ret = grab_requested_root(ns, &root);
+ if (ret)
+ return ret;
+
+ if (mnt_parent_id == LSMT_ROOT) {
+ orig = root;
+ } else {
+ orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
+ if (!orig.mnt)
+ return -ENOENT;
+ orig.dentry = orig.mnt->mnt_root;
+ }
+
/*
* Don't trigger audit denials. We just want to determine what
* mounts to show users.
*/
- if (!is_path_reachable(real_mount(orig->mnt), orig->dentry, root) &&
- !ns_capable_noaudit(&init_user_ns, CAP_SYS_ADMIN))
+ if (!is_path_reachable(real_mount(orig.mnt), orig.dentry, &root) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
return -EPERM;
- ret = security_sb_statfs(orig->dentry);
+ ret = security_sb_statfs(orig.dentry);
if (ret)
return ret;
- for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r)) {
+ if (!last_mnt_id) {
+ if (reverse)
+ first = node_to_mount(rb_last(&ns->mounts));
+ else
+ first = node_to_mount(rb_first(&ns->mounts));
+ } else {
+ if (reverse)
+ first = mnt_find_id_at_reverse(ns, last_mnt_id - 1);
+ else
+ first = mnt_find_id_at(ns, last_mnt_id + 1);
+ }
+
+ for (ret = 0, r = first; r && nr_mnt_ids; r = listmnt_next(r, reverse)) {
if (r->mnt_id_unique == mnt_parent_id)
continue;
- if (!is_path_reachable(r, r->mnt.mnt_root, orig))
+ if (!is_path_reachable(r, r->mnt.mnt_root, &orig))
continue;
- if (put_user(r->mnt_id_unique, mnt_ids))
- return -EFAULT;
+ *mnt_ids = r->mnt_id_unique;
mnt_ids++;
nr_mnt_ids--;
ret++;
@@ -5070,22 +5358,26 @@ static ssize_t do_listmount(struct mount *first, struct path *orig,
return ret;
}
-SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
- mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
+SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req,
+ u64 __user *, mnt_ids, size_t, nr_mnt_ids, unsigned int, flags)
{
- struct mnt_namespace *ns = current->nsproxy->mnt_ns;
+ u64 *kmnt_ids __free(kvfree) = NULL;
+ const size_t maxcount = 1000000;
+ struct mnt_namespace *ns __free(mnt_ns_release) = NULL;
struct mnt_id_req kreq;
- struct mount *first;
- struct path root, orig;
- u64 mnt_parent_id, last_mnt_id;
- const size_t maxcount = (size_t)-1 >> 3;
+ u64 last_mnt_id;
ssize_t ret;
- if (flags)
+ if (flags & ~LISTMOUNT_REVERSE)
return -EINVAL;
+ /*
+ * If the mount namespace really has more than 1 million mounts the
+ * caller must iterate over the mount namespace (and reconsider their
+ * system design...).
+ */
if (unlikely(nr_mnt_ids > maxcount))
- return -EFAULT;
+ return -EOVERFLOW;
if (!access_ok(mnt_ids, nr_mnt_ids * sizeof(*mnt_ids)))
return -EFAULT;
@@ -5093,33 +5385,37 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, u64 __user *,
ret = copy_mnt_id_req(req, &kreq);
if (ret)
return ret;
- mnt_parent_id = kreq.mnt_id;
+
last_mnt_id = kreq.param;
+ /* The first valid unique mount id is MNT_UNIQUE_ID_OFFSET + 1. */
+ if (last_mnt_id != 0 && last_mnt_id <= MNT_UNIQUE_ID_OFFSET)
+ return -EINVAL;
- down_read(&namespace_sem);
- get_fs_root(current->fs, &root);
- if (mnt_parent_id == LSMT_ROOT) {
- orig = root;
- } else {
- ret = -ENOENT;
- orig.mnt = lookup_mnt_in_ns(mnt_parent_id, ns);
- if (!orig.mnt)
- goto err;
- orig.dentry = orig.mnt->mnt_root;
- }
- if (!last_mnt_id)
- first = node_to_mount(rb_first(&ns->mounts));
- else
- first = mnt_find_id_at(ns, last_mnt_id + 1);
+ kmnt_ids = kvmalloc_array(nr_mnt_ids, sizeof(*kmnt_ids),
+ GFP_KERNEL_ACCOUNT);
+ if (!kmnt_ids)
+ return -ENOMEM;
+
+ ns = grab_requested_mnt_ns(kreq.mnt_ns_id);
+ if (!ns)
+ return -ENOENT;
+
+ if (kreq.mnt_ns_id && (ns != current->nsproxy->mnt_ns) &&
+ !ns_capable_noaudit(ns->user_ns, CAP_SYS_ADMIN))
+ return -ENOENT;
+
+ scoped_guard(rwsem_read, &namespace_sem)
+ ret = do_listmount(ns, kreq.mnt_id, last_mnt_id, kmnt_ids,
+ nr_mnt_ids, (flags & LISTMOUNT_REVERSE));
+ if (ret <= 0)
+ return ret;
+
+ if (copy_to_user(mnt_ids, kmnt_ids, ret * sizeof(*mnt_ids)))
+ return -EFAULT;
- ret = do_listmount(first, &orig, mnt_parent_id, mnt_ids, nr_mnt_ids, &root);
-err:
- path_put(&root);
- up_read(&namespace_sem);
return ret;
}
-
static void __init init_mount_tree(void)
{
struct vfsmount *mnt;
@@ -5147,6 +5443,8 @@ static void __init init_mount_tree(void)
set_fs_pwd(current->fs, &root);
set_fs_root(current->fs, &root);
+
+ mnt_ns_tree_add(ns);
}
void __init mnt_init(void)
diff --git a/fs/nsfs.c b/fs/nsfs.c
index 07e22a15ef02..af352dadffe1 100644
--- a/fs/nsfs.c
+++ b/fs/nsfs.c
@@ -12,6 +12,7 @@
#include <linux/nsfs.h>
#include <linux/uaccess.h>
+#include "mount.h"
#include "internal.h"
static struct vfsmount *nsfs_mnt;
@@ -143,6 +144,19 @@ static long ns_ioctl(struct file *filp, unsigned int ioctl,
argp = (uid_t __user *) arg;
uid = from_kuid_munged(current_user_ns(), user_ns->owner);
return put_user(uid, argp);
+ case NS_GET_MNTNS_ID: {
+ struct mnt_namespace *mnt_ns;
+ __u64 __user *idp;
+ __u64 id;
+
+ if (ns->ops->type != CLONE_NEWNS)
+ return -EINVAL;
+
+ mnt_ns = container_of(ns, struct mnt_namespace, ns);
+ idp = (__u64 __user *)arg;
+ id = mnt_ns->seq;
+ return put_user(id, idp);
+ }
default:
return -ENOTTY;
}
diff --git a/fs/proc_namespace.c b/fs/proc_namespace.c
index 0a808951b7d3..e133b507ddf3 100644
--- a/fs/proc_namespace.c
+++ b/fs/proc_namespace.c
@@ -61,7 +61,7 @@ static int show_sb_opts(struct seq_file *m, struct super_block *sb)
return security_sb_show_options(m, sb);
}
-static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
+static void show_vfsmnt_opts(struct seq_file *m, struct vfsmount *mnt)
{
static const struct proc_fs_opts mnt_opts[] = {
{ MNT_NOSUID, ",nosuid" },
@@ -124,7 +124,7 @@ static int show_vfsmnt(struct seq_file *m, struct vfsmount *mnt)
err = show_sb_opts(m, sb);
if (err)
goto out;
- show_mnt_opts(m, mnt);
+ show_vfsmnt_opts(m, mnt);
if (sb->s_op->show_options)
err = sb->s_op->show_options(m, mnt_path.dentry);
seq_puts(m, " 0 0\n");
@@ -153,7 +153,7 @@ static int show_mountinfo(struct seq_file *m, struct vfsmount *mnt)
goto out;
seq_puts(m, mnt->mnt_flags & MNT_READONLY ? " ro" : " rw");
- show_mnt_opts(m, mnt);
+ show_vfsmnt_opts(m, mnt);
/* Tagged fields ("foo:X" or "bar") */
if (IS_MNT_SHARED(r))
diff --git a/include/linux/path.h b/include/linux/path.h
index 475225a03d0d..ca073e70decd 100644
--- a/include/linux/path.h
+++ b/include/linux/path.h
@@ -24,4 +24,13 @@ static inline void path_put_init(struct path *path)
*path = (struct path) { };
}
+/*
+ * Cleanup macro for use with __free(path_put). Avoids dereference and
+ * copying @path unlike DEFINE_FREE(). path_put() will handle the empty
+ * path correctly just ensure @path is initialized:
+ *
+ * struct path path __free(path_put) = {};
+ */
+#define __free_path_put path_put
+
#endif /* _LINUX_PATH_H */
diff --git a/include/uapi/linux/mount.h b/include/uapi/linux/mount.h
index ad5478dbad00..225bc366ffcb 100644
--- a/include/uapi/linux/mount.h
+++ b/include/uapi/linux/mount.h
@@ -154,7 +154,7 @@ struct mount_attr {
*/
struct statmount {
__u32 size; /* Total size, including strings */
- __u32 __spare1;
+ __u32 mnt_opts; /* [str] Mount options of the mount */
__u64 mask; /* What results were written */
__u32 sb_dev_major; /* Device ID */
__u32 sb_dev_minor;
@@ -172,7 +172,8 @@ struct statmount {
__u64 propagate_from; /* Propagation from in current namespace */
__u32 mnt_root; /* [str] Root of mount relative to root of fs */
__u32 mnt_point; /* [str] Mountpoint relative to current root */
- __u64 __spare2[50];
+ __u64 mnt_ns_id; /* ID of the mount namespace */
+ __u64 __spare2[49];
char str[]; /* Variable size part containing strings */
};
@@ -188,10 +189,12 @@ struct mnt_id_req {
__u32 spare;
__u64 mnt_id;
__u64 param;
+ __u64 mnt_ns_id;
};
/* List of all mnt_id_req versions. */
#define MNT_ID_REQ_SIZE_VER0 24 /* sizeof first published struct */
+#define MNT_ID_REQ_SIZE_VER1 32 /* sizeof second published struct */
/*
* @mask bits for statmount(2)
@@ -202,10 +205,13 @@ struct mnt_id_req {
#define STATMOUNT_MNT_ROOT 0x00000008U /* Want/got mnt_root */
#define STATMOUNT_MNT_POINT 0x00000010U /* Want/got mnt_point */
#define STATMOUNT_FS_TYPE 0x00000020U /* Want/got fs_type */
+#define STATMOUNT_MNT_NS_ID 0x00000040U /* Want/got mnt_ns_id */
+#define STATMOUNT_MNT_OPTS 0x00000080U /* Want/got mnt_opts */
/*
* Special @mnt_id values that can be passed to listmount
*/
#define LSMT_ROOT 0xffffffffffffffff /* root mount */
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
#endif /* _UAPI_LINUX_MOUNT_H */
diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h
index a0c8552b64ee..56e8b1639b98 100644
--- a/include/uapi/linux/nsfs.h
+++ b/include/uapi/linux/nsfs.h
@@ -15,5 +15,7 @@
#define NS_GET_NSTYPE _IO(NSIO, 0x3)
/* Get owner UID (in the caller's user namespace) for a user namespace */
#define NS_GET_OWNER_UID _IO(NSIO, 0x4)
+/* Get the id for a mount namespace */
+#define NS_GET_MNTNS_ID _IO(NSIO, 0x5)
#endif /* __LINUX_NSFS_H */
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
index 07a0d5b545ca..3af3136e35a4 100644
--- a/tools/testing/selftests/filesystems/statmount/Makefile
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-or-later
CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := statmount_test
+TEST_GEN_PROGS := statmount_test statmount_test_ns
include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/statmount/statmount.h b/tools/testing/selftests/filesystems/statmount/statmount.h
new file mode 100644
index 000000000000..f4294bab9d73
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __STATMOUNT_H
+#define __STATMOUNT_H
+
+#include <stdint.h>
+#include <linux/mount.h>
+#include <asm/unistd.h>
+
+static inline int statmount(uint64_t mnt_id, uint64_t mnt_ns_id, uint64_t mask,
+ struct statmount *buf, size_t bufsize,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = mask,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_statmount, &req, buf, bufsize, flags);
+}
+
+static ssize_t listmount(uint64_t mnt_id, uint64_t mnt_ns_id,
+ uint64_t last_mnt_id, uint64_t list[], size_t num,
+ unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ };
+
+ if (mnt_ns_id) {
+ req.size = MNT_ID_REQ_SIZE_VER1;
+ req.mnt_ns_id = mnt_ns_id;
+ }
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+#endif /* __STATMOUNT_H */
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
index e8c019d72cbf..c773334bbcc9 100644
--- a/tools/testing/selftests/filesystems/statmount/statmount_test.c
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -4,17 +4,15 @@
#include <assert.h>
#include <stddef.h>
-#include <stdint.h>
#include <sched.h>
#include <fcntl.h>
#include <sys/param.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/statfs.h>
-#include <linux/mount.h>
#include <linux/stat.h>
-#include <asm/unistd.h>
+#include "statmount.h"
#include "../../kselftest.h"
static const char *const known_fs[] = {
@@ -36,18 +34,6 @@ static const char *const known_fs[] = {
"ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
"zonefs", NULL };
-static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf,
- size_t bufsize, unsigned int flags)
-{
- struct mnt_id_req req = {
- .size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
- .param = mask,
- };
-
- return syscall(__NR_statmount, &req, buf, bufsize, flags);
-}
-
static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
{
size_t bufsize = 1 << 15;
@@ -56,7 +42,7 @@ static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigne
int ret;
for (;;) {
- ret = statmount(mnt_id, mask, tmp, bufsize, flags);
+ ret = statmount(mnt_id, 0, mask, tmp, bufsize, flags);
if (ret != -1)
break;
if (tofree)
@@ -121,7 +107,7 @@ static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX";
static int orig_root;
static uint64_t root_id, parent_id;
static uint32_t old_root_id, old_parent_id;
-
+static FILE *f_mountinfo;
static void cleanup_namespace(void)
{
@@ -146,7 +132,7 @@ static void setup_namespace(void)
uid_t uid = getuid();
gid_t gid = getgid();
- ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
if (ret == -1)
ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
strerror(errno));
@@ -157,6 +143,11 @@ static void setup_namespace(void)
sprintf(buf, "0 %d 1", gid);
write_file("/proc/self/gid_map", buf);
+ f_mountinfo = fopen("/proc/self/mountinfo", "re");
+ if (!f_mountinfo)
+ ksft_exit_fail_msg("failed to open mountinfo: %s\n",
+ strerror(errno));
+
ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
if (ret == -1)
ksft_exit_fail_msg("making mount tree private: %s\n",
@@ -216,25 +207,13 @@ static int setup_mount_tree(int log2_num)
return 0;
}
-static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id,
- uint64_t list[], size_t num, unsigned int flags)
-{
- struct mnt_id_req req = {
- .size = MNT_ID_REQ_SIZE_VER0,
- .mnt_id = mnt_id,
- .param = last_mnt_id,
- };
-
- return syscall(__NR_listmount, &req, list, num, flags);
-}
-
static void test_listmount_empty_root(void)
{
ssize_t res;
const unsigned int size = 32;
uint64_t list[size];
- res = listmount(LSMT_ROOT, 0, list, size, 0);
+ res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno));
return;
@@ -259,7 +238,7 @@ static void test_statmount_zero_mask(void)
struct statmount sm;
int ret;
- ret = statmount(root_id, 0, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, 0, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount zero mask: %s\n",
strerror(errno));
@@ -285,7 +264,7 @@ static void test_statmount_mnt_basic(void)
int ret;
uint64_t mask = STATMOUNT_MNT_BASIC;
- ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount mnt basic: %s\n",
strerror(errno));
@@ -345,7 +324,7 @@ static void test_statmount_sb_basic(void)
struct statx sx;
struct statfs sf;
- ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ ret = statmount(root_id, 0, mask, &sm, sizeof(sm), 0);
if (ret == -1) {
ksft_test_result_fail("statmount sb basic: %s\n",
strerror(errno));
@@ -470,6 +449,88 @@ static void test_statmount_fs_type(void)
free(sm);
}
+static void test_statmount_mnt_opts(void)
+{
+ struct statmount *sm;
+ const char *statmount_opts;
+ char *line = NULL;
+ size_t len = 0;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_BASIC | STATMOUNT_MNT_OPTS,
+ 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mnt opts: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ while (getline(&line, &len, f_mountinfo) != -1) {
+ int i;
+ char *p, *p2;
+ unsigned int old_mnt_id;
+
+ old_mnt_id = atoi(line);
+ if (old_mnt_id != sm->mnt_id_old)
+ continue;
+
+ for (p = line, i = 0; p && i < 5; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+
+ p2 = strchr(p + 1, ' ');
+ if (!p2)
+ continue;
+ *p2 = '\0';
+ p = strchr(p2 + 1, '-');
+ if (!p)
+ continue;
+ for (p++, i = 0; p && i < 2; i++)
+ p = strchr(p + 1, ' ');
+ if (!p)
+ continue;
+ p++;
+
+ /* skip generic superblock options */
+ if (strncmp(p, "ro", 2) == 0)
+ p += 2;
+ else if (strncmp(p, "rw", 2) == 0)
+ p += 2;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "sync", 4) == 0)
+ p += 4;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "dirsync", 7) == 0)
+ p += 7;
+ if (*p == ',')
+ p++;
+ if (strncmp(p, "lazytime", 8) == 0)
+ p += 8;
+ if (*p == ',')
+ p++;
+ p2 = strrchr(p, '\n');
+ if (p2)
+ *p2 = '\0';
+
+ statmount_opts = sm->str + sm->mnt_opts;
+ if (strcmp(statmount_opts, p) != 0)
+ ksft_test_result_fail(
+ "unexpected mount options: '%s' != '%s'\n",
+ statmount_opts, p);
+ else
+ ksft_test_result_pass("statmount mount options\n");
+ free(sm);
+ free(line);
+ return;
+ }
+
+ ksft_test_result_fail("didnt't find mount entry\n");
+ free(sm);
+ free(line);
+}
+
static void test_statmount_string(uint64_t mask, size_t off, const char *name)
{
struct statmount *sm;
@@ -506,14 +567,14 @@ static void test_statmount_string(uint64_t mask, size_t off, const char *name)
exactsize = sm->size;
shortsize = sizeof(*sm) + i;
- ret = statmount(root_id, mask, sm, exactsize, 0);
+ ret = statmount(root_id, 0, mask, sm, exactsize, 0);
if (ret == -1) {
ksft_test_result_fail("statmount exact size: %s\n",
strerror(errno));
goto out;
}
errno = 0;
- ret = statmount(root_id, mask, sm, shortsize, 0);
+ ret = statmount(root_id, 0, mask, sm, shortsize, 0);
if (ret != -1 || errno != EOVERFLOW) {
ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
strerror(errno));
@@ -541,7 +602,7 @@ static void test_listmount_tree(void)
if (res == -1)
return;
- num = res = listmount(LSMT_ROOT, 0, list, size, 0);
+ num = res = listmount(LSMT_ROOT, 0, 0, list, size, 0);
if (res == -1) {
ksft_test_result_fail("listmount: %s\n", strerror(errno));
return;
@@ -553,7 +614,7 @@ static void test_listmount_tree(void)
}
for (i = 0; i < size - step;) {
- res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0);
+ res = listmount(LSMT_ROOT, 0, i ? list2[i - 1] : 0, list2 + i, step, 0);
if (res == -1)
ksft_test_result_fail("short listmount: %s\n",
strerror(errno));
@@ -585,18 +646,18 @@ int main(void)
int ret;
uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT |
- STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE;
+ STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE | STATMOUNT_MNT_NS_ID;
ksft_print_header();
- ret = statmount(0, 0, NULL, 0, 0);
+ ret = statmount(0, 0, 0, NULL, 0, 0);
assert(ret == -1);
if (errno == ENOSYS)
ksft_exit_skip("statmount() syscall not supported\n");
setup_namespace();
- ksft_set_plan(14);
+ ksft_set_plan(15);
test_listmount_empty_root();
test_statmount_zero_mask();
test_statmount_mnt_basic();
@@ -604,6 +665,7 @@ int main(void)
test_statmount_mnt_root();
test_statmount_mnt_point();
test_statmount_fs_type();
+ test_statmount_mnt_opts();
test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root");
test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point");
test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type");
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
new file mode 100644
index 000000000000..e044f5fc57fd
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test_ns.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <linux/nsfs.h>
+#include <linux/stat.h>
+
+#include "statmount.h"
+#include "../../kselftest.h"
+
+#define NSID_PASS 0
+#define NSID_FAIL 1
+#define NSID_SKIP 2
+#define NSID_ERROR 3
+
+static void handle_result(int ret, const char *testname)
+{
+ if (ret == NSID_PASS)
+ ksft_test_result_pass("%s\n", testname);
+ else if (ret == NSID_FAIL)
+ ksft_test_result_fail("%s\n", testname);
+ else if (ret == NSID_ERROR)
+ ksft_exit_fail_msg("%s\n", testname);
+ else
+ ksft_test_result_skip("%s\n", testname);
+}
+
+static inline int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
+ return -1;
+ }
+
+ if (!WIFEXITED(status)) {
+ ksft_print_msg(
+ "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
+ WIFSIGNALED(status), WTERMSIG(status));
+ return -1;
+ }
+
+ ret = WEXITSTATUS(status);
+ return ret;
+}
+
+static int get_mnt_ns_id(const char *mnt_ns, uint64_t *mnt_ns_id)
+{
+ int fd = open(mnt_ns, O_RDONLY);
+
+ if (fd < 0) {
+ ksft_print_msg("failed to open for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ sleep(60);
+ return NSID_ERROR;
+ }
+
+ if (ioctl(fd, NS_GET_MNTNS_ID, mnt_ns_id) < 0) {
+ ksft_print_msg("failed to get the nsid for ns %s: %s\n",
+ mnt_ns, strerror(errno));
+ return NSID_ERROR;
+ }
+ close(fd);
+ return NSID_PASS;
+}
+
+static int get_mnt_id(const char *path, uint64_t *mnt_id)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, STATX_MNT_ID_UNIQUE, &sx);
+ if (ret == -1) {
+ ksft_print_msg("retrieving unique mount ID for %s: %s\n", path,
+ strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (!(sx.stx_mask & STATX_MNT_ID_UNIQUE)) {
+ ksft_print_msg("no unique mount ID available for %s\n", path);
+ return NSID_ERROR;
+ }
+
+ *mnt_id = sx.stx_mnt_id;
+ return NSID_PASS;
+}
+
+static int write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1) {
+ ksft_print_msg("opening %s for write: %s\n", path, strerror(errno));
+ return NSID_ERROR;
+ }
+
+ ret = write(fd, val, len);
+ if (ret == -1) {
+ ksft_print_msg("writing to %s: %s\n", path, strerror(errno));
+ return NSID_ERROR;
+ }
+ if (ret != len) {
+ ksft_print_msg("short write to %s\n", path);
+ return NSID_ERROR;
+ }
+
+ ret = close(fd);
+ if (ret == -1) {
+ ksft_print_msg("closing %s\n", path);
+ return NSID_ERROR;
+ }
+
+ return NSID_PASS;
+}
+
+static int setup_namespace(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER|CLONE_NEWPID);
+ if (ret == -1)
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+
+ sprintf(buf, "0 %d 1", uid);
+ ret = write_file("/proc/self/uid_map", buf);
+ if (ret != NSID_PASS)
+ return ret;
+ ret = write_file("/proc/self/setgroups", "deny");
+ if (ret != NSID_PASS)
+ return ret;
+ sprintf(buf, "0 %d 1", gid);
+ ret = write_file("/proc/self/gid_map", buf);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret == -1) {
+ ksft_print_msg("making mount tree private: %s\n",
+ strerror(errno));
+ return NSID_ERROR;
+ }
+
+ return NSID_PASS;
+}
+
+static int _test_statmount_mnt_ns_id(void)
+{
+ struct statmount sm;
+ uint64_t mnt_ns_id;
+ uint64_t root_id;
+ int ret;
+
+ ret = get_mnt_ns_id("/proc/self/ns/mnt", &mnt_ns_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = get_mnt_id("/", &root_id);
+ if (ret != NSID_PASS)
+ return ret;
+
+ ret = statmount(root_id, 0, STATMOUNT_MNT_NS_ID, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.size != sizeof(sm)) {
+ ksft_print_msg("unexpected size: %u != %u\n", sm.size,
+ (uint32_t)sizeof(sm));
+ return NSID_FAIL;
+ }
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("unexpected mnt ns ID: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+
+ return NSID_PASS;
+}
+
+static void test_statmount_mnt_ns_id(void)
+{
+ pid_t pid;
+ int ret;
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ /* We're the original pid, wait for the result. */
+ if (pid != 0) {
+ ret = wait_for_pid(pid);
+ handle_result(ret, "test statmount ns id");
+ return;
+ }
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+ ret = _test_statmount_mnt_ns_id();
+ exit(ret);
+}
+
+static int validate_external_listmount(pid_t pid, uint64_t child_nr_mounts)
+{
+ uint64_t list[256];
+ uint64_t mnt_ns_id;
+ uint64_t nr_mounts;
+ char buf[256];
+ int ret;
+
+ /* Get the mount ns id for our child. */
+ snprintf(buf, sizeof(buf), "/proc/%lu/ns/mnt", (unsigned long)pid);
+ ret = get_mnt_ns_id(buf, &mnt_ns_id);
+
+ nr_mounts = listmount(LSMT_ROOT, mnt_ns_id, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (nr_mounts != child_nr_mounts) {
+ ksft_print_msg("listmount results is %zi != %zi\n", nr_mounts,
+ child_nr_mounts);
+ return NSID_FAIL;
+ }
+
+ /* Validate that all of our entries match our mnt_ns_id. */
+ for (int i = 0; i < nr_mounts; i++) {
+ struct statmount sm;
+
+ ret = statmount(list[i], mnt_ns_id, STATMOUNT_MNT_NS_ID, &sm,
+ sizeof(sm), 0);
+ if (ret < 0) {
+ ksft_print_msg("statmount mnt ns id: %s\n", strerror(errno));
+ return NSID_ERROR;
+ }
+
+ if (sm.mask != STATMOUNT_MNT_NS_ID) {
+ ksft_print_msg("statmount mnt ns id unavailable\n");
+ return NSID_SKIP;
+ }
+
+ if (sm.mnt_ns_id != mnt_ns_id) {
+ ksft_print_msg("listmount gave us the wrong ns id: 0x%llx != 0x%llx\n",
+ (unsigned long long)sm.mnt_ns_id,
+ (unsigned long long)mnt_ns_id);
+ return NSID_FAIL;
+ }
+ }
+
+ return NSID_PASS;
+}
+
+static void test_listmount_ns(void)
+{
+ uint64_t nr_mounts;
+ char pval;
+ int child_ready_pipe[2];
+ int parent_ready_pipe[2];
+ pid_t pid;
+ int ret, child_ret;
+
+ if (pipe(child_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the child pipe: %s\n",
+ strerror(errno));
+ if (pipe(parent_ready_pipe) < 0)
+ ksft_exit_fail_msg("failed to create the parent pipe: %s\n",
+ strerror(errno));
+
+ pid = fork();
+ if (pid < 0)
+ ksft_exit_fail_msg("failed to fork: %s\n", strerror(errno));
+
+ if (pid == 0) {
+ char cval;
+ uint64_t list[256];
+
+ close(child_ready_pipe[0]);
+ close(parent_ready_pipe[1]);
+
+ ret = setup_namespace();
+ if (ret != NSID_PASS)
+ exit(ret);
+
+ nr_mounts = listmount(LSMT_ROOT, 0, 0, list, 256, 0);
+ if (nr_mounts == (uint64_t)-1) {
+ ksft_print_msg("listmount: %s\n", strerror(errno));
+ exit(NSID_FAIL);
+ }
+
+ /*
+ * Tell our parent how many mounts we have, and then wait for it
+ * to tell us we're done.
+ */
+ write(child_ready_pipe[1], &nr_mounts, sizeof(nr_mounts));
+ read(parent_ready_pipe[0], &cval, sizeof(cval));
+ exit(NSID_PASS);
+ }
+
+ close(child_ready_pipe[1]);
+ close(parent_ready_pipe[0]);
+
+ /* Wait until the child has created everything. */
+ if (read(child_ready_pipe[0], &nr_mounts, sizeof(nr_mounts)) !=
+ sizeof(nr_mounts))
+ ret = NSID_ERROR;
+
+ ret = validate_external_listmount(pid, nr_mounts);
+
+ if (write(parent_ready_pipe[1], &pval, sizeof(pval)) != sizeof(pval))
+ ret = NSID_ERROR;
+
+ child_ret = wait_for_pid(pid);
+ if (child_ret != NSID_PASS)
+ ret = child_ret;
+ handle_result(ret, "test listmount ns id");
+}
+
+int main(void)
+{
+ int ret;
+
+ ksft_print_header();
+ ret = statmount(0, 0, 0, NULL, 0, 0);
+ assert(ret == -1);
+ if (errno == ENOSYS)
+ ksft_exit_skip("statmount() syscall not supported\n");
+
+ ksft_set_plan(2);
+ test_statmount_mnt_ns_id();
+ test_listmount_ns();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}