diff options
Diffstat (limited to 'security/landlock/fs.c')
-rw-r--r-- | security/landlock/fs.c | 815 |
1 files changed, 663 insertions, 152 deletions
diff --git a/security/landlock/fs.c b/security/landlock/fs.c index 97b8e421f617..ec5a6247cd3e 100644 --- a/security/landlock/fs.c +++ b/security/landlock/fs.c @@ -4,6 +4,7 @@ * * Copyright © 2016-2020 Mickaël Salaün <mic@digikod.net> * Copyright © 2018-2020 ANSSI + * Copyright © 2021-2022 Microsoft Corporation */ #include <linux/atomic.h> @@ -141,23 +142,26 @@ retry: } /* All access rights that can be tied to files. */ +/* clang-format off */ #define ACCESS_FILE ( \ LANDLOCK_ACCESS_FS_EXECUTE | \ LANDLOCK_ACCESS_FS_WRITE_FILE | \ LANDLOCK_ACCESS_FS_READ_FILE) +/* clang-format on */ /* * @path: Should have been checked by get_path_from_fd(). */ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, - const struct path *const path, u32 access_rights) + const struct path *const path, + access_mask_t access_rights) { int err; struct landlock_object *object; /* Files only get access rights that make sense. */ - if (!d_is_dir(path->dentry) && (access_rights | ACCESS_FILE) != - ACCESS_FILE) + if (!d_is_dir(path->dentry) && + (access_rights | ACCESS_FILE) != ACCESS_FILE) return -EINVAL; if (WARN_ON_ONCE(ruleset->num_layers != 1)) return -EINVAL; @@ -180,84 +184,352 @@ int landlock_append_fs_rule(struct landlock_ruleset *const ruleset, /* Access-control management */ -static inline u64 unmask_layers( - const struct landlock_ruleset *const domain, - const struct path *const path, const u32 access_request, - u64 layer_mask) +/* + * The lifetime of the returned rule is tied to @domain. + * + * Returns NULL if no rule is found or if @dentry is negative. + */ +static inline const struct landlock_rule * +find_rule(const struct landlock_ruleset *const domain, + const struct dentry *const dentry) { const struct landlock_rule *rule; const struct inode *inode; - size_t i; - if (d_is_negative(path->dentry)) - /* Ignore nonexistent leafs. */ - return layer_mask; - inode = d_backing_inode(path->dentry); + /* Ignores nonexistent leafs. */ + if (d_is_negative(dentry)) + return NULL; + + inode = d_backing_inode(dentry); rcu_read_lock(); - rule = landlock_find_rule(domain, - rcu_dereference(landlock_inode(inode)->object)); + rule = landlock_find_rule( + domain, rcu_dereference(landlock_inode(inode)->object)); rcu_read_unlock(); + return rule; +} + +/* + * @layer_masks is read and may be updated according to the access request and + * the matching rule. + * + * Returns true if the request is allowed (i.e. relevant layer masks for the + * request are empty). + */ +static inline bool +unmask_layers(const struct landlock_rule *const rule, + const access_mask_t access_request, + layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) +{ + size_t layer_level; + + if (!access_request || !layer_masks) + return true; if (!rule) - return layer_mask; + return false; /* * An access is granted if, for each policy layer, at least one rule - * encountered on the pathwalk grants the requested accesses, - * regardless of their position in the layer stack. We must then check + * encountered on the pathwalk grants the requested access, + * regardless of its position in the layer stack. We must then check * the remaining layers for each inode, from the first added layer to - * the last one. + * the last one. When there is multiple requested accesses, for each + * policy layer, the full set of requested accesses may not be granted + * by only one rule, but by the union (binary OR) of multiple rules. + * E.g. /a/b <execute> + /a <read> => /a/b <execute + read> */ - for (i = 0; i < rule->num_layers; i++) { - const struct landlock_layer *const layer = &rule->layers[i]; - const u64 layer_level = BIT_ULL(layer->level - 1); + for (layer_level = 0; layer_level < rule->num_layers; layer_level++) { + const struct landlock_layer *const layer = + &rule->layers[layer_level]; + const layer_mask_t layer_bit = BIT_ULL(layer->level - 1); + const unsigned long access_req = access_request; + unsigned long access_bit; + bool is_empty; - /* Checks that the layer grants access to the full request. */ - if ((layer->access & access_request) == access_request) { - layer_mask &= ~layer_level; - - if (layer_mask == 0) - return layer_mask; + /* + * Records in @layer_masks which layer grants access to each + * requested access. + */ + is_empty = true; + for_each_set_bit(access_bit, &access_req, + ARRAY_SIZE(*layer_masks)) { + if (layer->access & BIT_ULL(access_bit)) + (*layer_masks)[access_bit] &= ~layer_bit; + is_empty = is_empty && !(*layer_masks)[access_bit]; } + if (is_empty) + return true; } - return layer_mask; + return false; } -static int check_access_path(const struct landlock_ruleset *const domain, - const struct path *const path, u32 access_request) +/* + * Allows access to pseudo filesystems that will never be mountable (e.g. + * sockfs, pipefs), but can still be reachable through + * /proc/<pid>/fd/<file-descriptor> + */ +static inline bool is_nouser_or_private(const struct dentry *dentry) { - bool allowed = false; - struct path walker_path; - u64 layer_mask; - size_t i; + return (dentry->d_sb->s_flags & SB_NOUSER) || + (d_is_positive(dentry) && + unlikely(IS_PRIVATE(d_backing_inode(dentry)))); +} - /* Make sure all layers can be checked. */ - BUILD_BUG_ON(BITS_PER_TYPE(layer_mask) < LANDLOCK_MAX_NUM_LAYERS); +static inline access_mask_t +get_handled_accesses(const struct landlock_ruleset *const domain) +{ + access_mask_t access_dom = 0; + unsigned long access_bit; + + for (access_bit = 0; access_bit < LANDLOCK_NUM_ACCESS_FS; + access_bit++) { + size_t layer_level; + + for (layer_level = 0; layer_level < domain->num_layers; + layer_level++) { + if (domain->fs_access_masks[layer_level] & + BIT_ULL(access_bit)) { + access_dom |= BIT_ULL(access_bit); + break; + } + } + } + return access_dom; +} + +static inline access_mask_t +init_layer_masks(const struct landlock_ruleset *const domain, + const access_mask_t access_request, + layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) +{ + access_mask_t handled_accesses = 0; + size_t layer_level; + memset(layer_masks, 0, sizeof(*layer_masks)); + /* An empty access request can happen because of O_WRONLY | O_RDWR. */ if (!access_request) return 0; + + /* Saves all handled accesses per layer. */ + for (layer_level = 0; layer_level < domain->num_layers; layer_level++) { + const unsigned long access_req = access_request; + unsigned long access_bit; + + for_each_set_bit(access_bit, &access_req, + ARRAY_SIZE(*layer_masks)) { + if (domain->fs_access_masks[layer_level] & + BIT_ULL(access_bit)) { + (*layer_masks)[access_bit] |= + BIT_ULL(layer_level); + handled_accesses |= BIT_ULL(access_bit); + } + } + } + return handled_accesses; +} + +/* + * Check that a destination file hierarchy has more restrictions than a source + * file hierarchy. This is only used for link and rename actions. + * + * @layer_masks_child2: Optional child masks. + */ +static inline bool no_more_access( + const layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], + const layer_mask_t (*const layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS], + const bool child1_is_directory, + const layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], + const layer_mask_t (*const layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS], + const bool child2_is_directory) +{ + unsigned long access_bit; + + for (access_bit = 0; access_bit < ARRAY_SIZE(*layer_masks_parent2); + access_bit++) { + /* Ignores accesses that only make sense for directories. */ + const bool is_file_access = + !!(BIT_ULL(access_bit) & ACCESS_FILE); + + if (child1_is_directory || is_file_access) { + /* + * Checks if the destination restrictions are a + * superset of the source ones (i.e. inherited access + * rights without child exceptions): + * restrictions(parent2) >= restrictions(child1) + */ + if ((((*layer_masks_parent1)[access_bit] & + (*layer_masks_child1)[access_bit]) | + (*layer_masks_parent2)[access_bit]) != + (*layer_masks_parent2)[access_bit]) + return false; + } + + if (!layer_masks_child2) + continue; + if (child2_is_directory || is_file_access) { + /* + * Checks inverted restrictions for RENAME_EXCHANGE: + * restrictions(parent1) >= restrictions(child2) + */ + if ((((*layer_masks_parent2)[access_bit] & + (*layer_masks_child2)[access_bit]) | + (*layer_masks_parent1)[access_bit]) != + (*layer_masks_parent1)[access_bit]) + return false; + } + } + return true; +} + +/* + * Removes @layer_masks accesses that are not requested. + * + * Returns true if the request is allowed, false otherwise. + */ +static inline bool +scope_to_request(const access_mask_t access_request, + layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS]) +{ + const unsigned long access_req = access_request; + unsigned long access_bit; + + if (WARN_ON_ONCE(!layer_masks)) + return true; + + for_each_clear_bit(access_bit, &access_req, ARRAY_SIZE(*layer_masks)) + (*layer_masks)[access_bit] = 0; + return !memchr_inv(layer_masks, 0, sizeof(*layer_masks)); +} + +/* + * Returns true if there is at least one access right different than + * LANDLOCK_ACCESS_FS_REFER. + */ +static inline bool +is_eacces(const layer_mask_t (*const layer_masks)[LANDLOCK_NUM_ACCESS_FS], + const access_mask_t access_request) +{ + unsigned long access_bit; + /* LANDLOCK_ACCESS_FS_REFER alone must return -EXDEV. */ + const unsigned long access_check = access_request & + ~LANDLOCK_ACCESS_FS_REFER; + + if (!layer_masks) + return false; + + for_each_set_bit(access_bit, &access_check, ARRAY_SIZE(*layer_masks)) { + if ((*layer_masks)[access_bit]) + return true; + } + return false; +} + +/** + * check_access_path_dual - Check accesses for requests with a common path + * + * @domain: Domain to check against. + * @path: File hierarchy to walk through. + * @access_request_parent1: Accesses to check, once @layer_masks_parent1 is + * equal to @layer_masks_parent2 (if any). This is tied to the unique + * requested path for most actions, or the source in case of a refer action + * (i.e. rename or link), or the source and destination in case of + * RENAME_EXCHANGE. + * @layer_masks_parent1: Pointer to a matrix of layer masks per access + * masks, identifying the layers that forbid a specific access. Bits from + * this matrix can be unset according to the @path walk. An empty matrix + * means that @domain allows all possible Landlock accesses (i.e. not only + * those identified by @access_request_parent1). This matrix can + * initially refer to domain layer masks and, when the accesses for the + * destination and source are the same, to requested layer masks. + * @dentry_child1: Dentry to the initial child of the parent1 path. This + * pointer must be NULL for non-refer actions (i.e. not link nor rename). + * @access_request_parent2: Similar to @access_request_parent1 but for a + * request involving a source and a destination. This refers to the + * destination, except in case of RENAME_EXCHANGE where it also refers to + * the source. Must be set to 0 when using a simple path request. + * @layer_masks_parent2: Similar to @layer_masks_parent1 but for a refer + * action. This must be NULL otherwise. + * @dentry_child2: Dentry to the initial child of the parent2 path. This + * pointer is only set for RENAME_EXCHANGE actions and must be NULL + * otherwise. + * + * This helper first checks that the destination has a superset of restrictions + * compared to the source (if any) for a common path. Because of + * RENAME_EXCHANGE actions, source and destinations may be swapped. It then + * checks that the collected accesses and the remaining ones are enough to + * allow the request. + * + * Returns: + * - 0 if the access request is granted; + * - -EACCES if it is denied because of access right other than + * LANDLOCK_ACCESS_FS_REFER; + * - -EXDEV if the renaming or linking would be a privileged escalation + * (according to each layered policies), or if LANDLOCK_ACCESS_FS_REFER is + * not allowed by the source or the destination. + */ +static int check_access_path_dual( + const struct landlock_ruleset *const domain, + const struct path *const path, + const access_mask_t access_request_parent1, + layer_mask_t (*const layer_masks_parent1)[LANDLOCK_NUM_ACCESS_FS], + const struct dentry *const dentry_child1, + const access_mask_t access_request_parent2, + layer_mask_t (*const layer_masks_parent2)[LANDLOCK_NUM_ACCESS_FS], + const struct dentry *const dentry_child2) +{ + bool allowed_parent1 = false, allowed_parent2 = false, is_dom_check, + child1_is_directory = true, child2_is_directory = true; + struct path walker_path; + access_mask_t access_masked_parent1, access_masked_parent2; + layer_mask_t _layer_masks_child1[LANDLOCK_NUM_ACCESS_FS], + _layer_masks_child2[LANDLOCK_NUM_ACCESS_FS]; + layer_mask_t(*layer_masks_child1)[LANDLOCK_NUM_ACCESS_FS] = NULL, + (*layer_masks_child2)[LANDLOCK_NUM_ACCESS_FS] = NULL; + + if (!access_request_parent1 && !access_request_parent2) + return 0; if (WARN_ON_ONCE(!domain || !path)) return 0; - /* - * Allows access to pseudo filesystems that will never be mountable - * (e.g. sockfs, pipefs), but can still be reachable through - * /proc/<pid>/fd/<file-descriptor> . - */ - if ((path->dentry->d_sb->s_flags & SB_NOUSER) || - (d_is_positive(path->dentry) && - unlikely(IS_PRIVATE(d_backing_inode(path->dentry))))) + if (is_nouser_or_private(path->dentry)) return 0; - if (WARN_ON_ONCE(domain->num_layers < 1)) + if (WARN_ON_ONCE(domain->num_layers < 1 || !layer_masks_parent1)) return -EACCES; - /* Saves all layers handling a subset of requested accesses. */ - layer_mask = 0; - for (i = 0; i < domain->num_layers; i++) { - if (domain->fs_access_masks[i] & access_request) - layer_mask |= BIT_ULL(i); + if (unlikely(layer_masks_parent2)) { + if (WARN_ON_ONCE(!dentry_child1)) + return -EACCES; + /* + * For a double request, first check for potential privilege + * escalation by looking at domain handled accesses (which are + * a superset of the meaningful requested accesses). + */ + access_masked_parent1 = access_masked_parent2 = + get_handled_accesses(domain); + is_dom_check = true; + } else { + if (WARN_ON_ONCE(dentry_child1 || dentry_child2)) + return -EACCES; + /* For a simple request, only check for requested accesses. */ + access_masked_parent1 = access_request_parent1; + access_masked_parent2 = access_request_parent2; + is_dom_check = false; + } + + if (unlikely(dentry_child1)) { + unmask_layers(find_rule(domain, dentry_child1), + init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + &_layer_masks_child1), + &_layer_masks_child1); + layer_masks_child1 = &_layer_masks_child1; + child1_is_directory = d_is_dir(dentry_child1); + } + if (unlikely(dentry_child2)) { + unmask_layers(find_rule(domain, dentry_child2), + init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + &_layer_masks_child2), + &_layer_masks_child2); + layer_masks_child2 = &_layer_masks_child2; + child2_is_directory = d_is_dir(dentry_child2); } - /* An access request not handled by the domain is allowed. */ - if (layer_mask == 0) - return 0; walker_path = *path; path_get(&walker_path); @@ -267,15 +539,54 @@ static int check_access_path(const struct landlock_ruleset *const domain, */ while (true) { struct dentry *parent_dentry; + const struct landlock_rule *rule; - layer_mask = unmask_layers(domain, &walker_path, - access_request, layer_mask); - if (layer_mask == 0) { - /* Stops when a rule from each layer grants access. */ - allowed = true; - break; + /* + * If at least all accesses allowed on the destination are + * already allowed on the source, respectively if there is at + * least as much as restrictions on the destination than on the + * source, then we can safely refer files from the source to + * the destination without risking a privilege escalation. + * This also applies in the case of RENAME_EXCHANGE, which + * implies checks on both direction. This is crucial for + * standalone multilayered security policies. Furthermore, + * this helps avoid policy writers to shoot themselves in the + * foot. + */ + if (unlikely(is_dom_check && + no_more_access( + layer_masks_parent1, layer_masks_child1, + child1_is_directory, layer_masks_parent2, + layer_masks_child2, + child2_is_directory))) { + allowed_parent1 = scope_to_request( + access_request_parent1, layer_masks_parent1); + allowed_parent2 = scope_to_request( + access_request_parent2, layer_masks_parent2); + + /* Stops when all accesses are granted. */ + if (allowed_parent1 && allowed_parent2) + break; + + /* + * Now, downgrades the remaining checks from domain + * handled accesses to requested accesses. + */ + is_dom_check = false; + access_masked_parent1 = access_request_parent1; + access_masked_parent2 = access_request_parent2; } + rule = find_rule(domain, walker_path.dentry); + allowed_parent1 = unmask_layers(rule, access_masked_parent1, + layer_masks_parent1); + allowed_parent2 = unmask_layers(rule, access_masked_parent2, + layer_masks_parent2); + + /* Stops when a rule from each layer grants access. */ + if (allowed_parent1 && allowed_parent2) + break; + jump_up: if (walker_path.dentry == walker_path.mnt->mnt_root) { if (follow_up(&walker_path)) { @@ -286,7 +597,6 @@ jump_up: * Stops at the real root. Denies access * because not all layers have granted access. */ - allowed = false; break; } } @@ -296,7 +606,8 @@ jump_up: * access to internal filesystems (e.g. nsfs, which is * reachable through /proc/<pid>/ns/<namespace>). */ - allowed = !!(walker_path.mnt->mnt_flags & MNT_INTERNAL); + allowed_parent1 = allowed_parent2 = + !!(walker_path.mnt->mnt_flags & MNT_INTERNAL); break; } parent_dentry = dget_parent(walker_path.dentry); @@ -304,11 +615,40 @@ jump_up: walker_path.dentry = parent_dentry; } path_put(&walker_path); - return allowed ? 0 : -EACCES; + + if (allowed_parent1 && allowed_parent2) + return 0; + + /* + * This prioritizes EACCES over EXDEV for all actions, including + * renames with RENAME_EXCHANGE. + */ + if (likely(is_eacces(layer_masks_parent1, access_request_parent1) || + is_eacces(layer_masks_parent2, access_request_parent2))) + return -EACCES; + + /* + * Gracefully forbids reparenting if the destination directory + * hierarchy is not a superset of restrictions of the source directory + * hierarchy, or if LANDLOCK_ACCESS_FS_REFER is not allowed by the + * source or the destination. + */ + return -EXDEV; +} + +static inline int check_access_path(const struct landlock_ruleset *const domain, + const struct path *const path, + access_mask_t access_request) +{ + layer_mask_t layer_masks[LANDLOCK_NUM_ACCESS_FS] = {}; + + access_request = init_layer_masks(domain, access_request, &layer_masks); + return check_access_path_dual(domain, path, access_request, + &layer_masks, NULL, 0, NULL, NULL); } static inline int current_check_access_path(const struct path *const path, - const u32 access_request) + const access_mask_t access_request) { const struct landlock_ruleset *const dom = landlock_get_current_domain(); @@ -318,6 +658,239 @@ static inline int current_check_access_path(const struct path *const path, return check_access_path(dom, path, access_request); } +static inline access_mask_t get_mode_access(const umode_t mode) +{ + switch (mode & S_IFMT) { + case S_IFLNK: + return LANDLOCK_ACCESS_FS_MAKE_SYM; + case 0: + /* A zero mode translates to S_IFREG. */ + case S_IFREG: + return LANDLOCK_ACCESS_FS_MAKE_REG; + case S_IFDIR: + return LANDLOCK_ACCESS_FS_MAKE_DIR; + case S_IFCHR: + return LANDLOCK_ACCESS_FS_MAKE_CHAR; + case S_IFBLK: + return LANDLOCK_ACCESS_FS_MAKE_BLOCK; + case S_IFIFO: + return LANDLOCK_ACCESS_FS_MAKE_FIFO; + case S_IFSOCK: + return LANDLOCK_ACCESS_FS_MAKE_SOCK; + default: + WARN_ON_ONCE(1); + return 0; + } +} + +static inline access_mask_t maybe_remove(const struct dentry *const dentry) +{ + if (d_is_negative(dentry)) + return 0; + return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : + LANDLOCK_ACCESS_FS_REMOVE_FILE; +} + +/** + * collect_domain_accesses - Walk through a file path and collect accesses + * + * @domain: Domain to check against. + * @mnt_root: Last directory to check. + * @dir: Directory to start the walk from. + * @layer_masks_dom: Where to store the collected accesses. + * + * This helper is useful to begin a path walk from the @dir directory to a + * @mnt_root directory used as a mount point. This mount point is the common + * ancestor between the source and the destination of a renamed and linked + * file. While walking from @dir to @mnt_root, we record all the domain's + * allowed accesses in @layer_masks_dom. + * + * This is similar to check_access_path_dual() but much simpler because it only + * handles walking on the same mount point and only check one set of accesses. + * + * Returns: + * - true if all the domain access rights are allowed for @dir; + * - false if the walk reached @mnt_root. + */ +static bool collect_domain_accesses( + const struct landlock_ruleset *const domain, + const struct dentry *const mnt_root, struct dentry *dir, + layer_mask_t (*const layer_masks_dom)[LANDLOCK_NUM_ACCESS_FS]) +{ + unsigned long access_dom; + bool ret = false; + + if (WARN_ON_ONCE(!domain || !mnt_root || !dir || !layer_masks_dom)) + return true; + if (is_nouser_or_private(dir)) + return true; + + access_dom = init_layer_masks(domain, LANDLOCK_MASK_ACCESS_FS, + layer_masks_dom); + + dget(dir); + while (true) { + struct dentry *parent_dentry; + + /* Gets all layers allowing all domain accesses. */ + if (unmask_layers(find_rule(domain, dir), access_dom, + layer_masks_dom)) { + /* + * Stops when all handled accesses are allowed by at + * least one rule in each layer. + */ + ret = true; + break; + } + + /* We should not reach a root other than @mnt_root. */ + if (dir == mnt_root || WARN_ON_ONCE(IS_ROOT(dir))) + break; + + parent_dentry = dget_parent(dir); + dput(dir); + dir = parent_dentry; + } + dput(dir); + return ret; +} + +/** + * current_check_refer_path - Check if a rename or link action is allowed + * + * @old_dentry: File or directory requested to be moved or linked. + * @new_dir: Destination parent directory. + * @new_dentry: Destination file or directory. + * @removable: Sets to true if it is a rename operation. + * @exchange: Sets to true if it is a rename operation with RENAME_EXCHANGE. + * + * Because of its unprivileged constraints, Landlock relies on file hierarchies + * (and not only inodes) to tie access rights to files. Being able to link or + * rename a file hierarchy brings some challenges. Indeed, moving or linking a + * file (i.e. creating a new reference to an inode) can have an impact on the + * actions allowed for a set of files if it would change its parent directory + * (i.e. reparenting). + * + * To avoid trivial access right bypasses, Landlock first checks if the file or + * directory requested to be moved would gain new access rights inherited from + * its new hierarchy. Before returning any error, Landlock then checks that + * the parent source hierarchy and the destination hierarchy would allow the + * link or rename action. If it is not the case, an error with EACCES is + * returned to inform user space that there is no way to remove or create the + * requested source file type. If it should be allowed but the new inherited + * access rights would be greater than the source access rights, then the + * kernel returns an error with EXDEV. Prioritizing EACCES over EXDEV enables + * user space to abort the whole operation if there is no way to do it, or to + * manually copy the source to the destination if this remains allowed, e.g. + * because file creation is allowed on the destination directory but not direct + * linking. + * + * To achieve this goal, the kernel needs to compare two file hierarchies: the + * one identifying the source file or directory (including itself), and the + * destination one. This can be seen as a multilayer partial ordering problem. + * The kernel walks through these paths and collects in a matrix the access + * rights that are denied per layer. These matrices are then compared to see + * if the destination one has more (or the same) restrictions as the source + * one. If this is the case, the requested action will not return EXDEV, which + * doesn't mean the action is allowed. The parent hierarchy of the source + * (i.e. parent directory), and the destination hierarchy must also be checked + * to verify that they explicitly allow such action (i.e. referencing, + * creation and potentially removal rights). The kernel implementation is then + * required to rely on potentially four matrices of access rights: one for the + * source file or directory (i.e. the child), a potentially other one for the + * other source/destination (in case of RENAME_EXCHANGE), one for the source + * parent hierarchy and a last one for the destination hierarchy. These + * ephemeral matrices take some space on the stack, which limits the number of + * layers to a deemed reasonable number: 16. + * + * Returns: + * - 0 if access is allowed; + * - -EXDEV if @old_dentry would inherit new access rights from @new_dir; + * - -EACCES if file removal or creation is denied. + */ +static int current_check_refer_path(struct dentry *const old_dentry, + const struct path *const new_dir, + struct dentry *const new_dentry, + const bool removable, const bool exchange) +{ + const struct landlock_ruleset *const dom = + landlock_get_current_domain(); + bool allow_parent1, allow_parent2; + access_mask_t access_request_parent1, access_request_parent2; + struct path mnt_dir; + layer_mask_t layer_masks_parent1[LANDLOCK_NUM_ACCESS_FS], + layer_masks_parent2[LANDLOCK_NUM_ACCESS_FS]; + + if (!dom) + return 0; + if (WARN_ON_ONCE(dom->num_layers < 1)) + return -EACCES; + if (unlikely(d_is_negative(old_dentry))) + return -ENOENT; + if (exchange) { + if (unlikely(d_is_negative(new_dentry))) + return -ENOENT; + access_request_parent1 = + get_mode_access(d_backing_inode(new_dentry)->i_mode); + } else { + access_request_parent1 = 0; + } + access_request_parent2 = + get_mode_access(d_backing_inode(old_dentry)->i_mode); + if (removable) { + access_request_parent1 |= maybe_remove(old_dentry); + access_request_parent2 |= maybe_remove(new_dentry); + } + + /* The mount points are the same for old and new paths, cf. EXDEV. */ + if (old_dentry->d_parent == new_dir->dentry) { + /* + * The LANDLOCK_ACCESS_FS_REFER access right is not required + * for same-directory referer (i.e. no reparenting). + */ + access_request_parent1 = init_layer_masks( + dom, access_request_parent1 | access_request_parent2, + &layer_masks_parent1); + return check_access_path_dual(dom, new_dir, + access_request_parent1, + &layer_masks_parent1, NULL, 0, + NULL, NULL); + } + + /* Backward compatibility: no reparenting support. */ + if (!(get_handled_accesses(dom) & LANDLOCK_ACCESS_FS_REFER)) + return -EXDEV; + + access_request_parent1 |= LANDLOCK_ACCESS_FS_REFER; + access_request_parent2 |= LANDLOCK_ACCESS_FS_REFER; + + /* Saves the common mount point. */ + mnt_dir.mnt = new_dir->mnt; + mnt_dir.dentry = new_dir->mnt->mnt_root; + + /* new_dir->dentry is equal to new_dentry->d_parent */ + allow_parent1 = collect_domain_accesses(dom, mnt_dir.dentry, + old_dentry->d_parent, + &layer_masks_parent1); + allow_parent2 = collect_domain_accesses( + dom, mnt_dir.dentry, new_dir->dentry, &layer_masks_parent2); + + if (allow_parent1 && allow_parent2) + return 0; + + /* + * To be able to compare source and destination domain access rights, + * take into account the @old_dentry access rights aggregated with its + * parent access rights. This will be useful to compare with the + * destination parent access rights. + */ + return check_access_path_dual(dom, &mnt_dir, access_request_parent1, + &layer_masks_parent1, old_dentry, + access_request_parent2, + &layer_masks_parent2, + exchange ? new_dentry : NULL); +} + /* Inode hooks */ static void hook_inode_free_security(struct inode *const inode) @@ -436,8 +1009,8 @@ static void hook_sb_delete(struct super_block *const sb) if (prev_inode) iput(prev_inode); /* Waits for pending iput() in release_inode(). */ - wait_var_event(&landlock_superblock(sb)->inode_refs, !atomic_long_read( - &landlock_superblock(sb)->inode_refs)); + wait_var_event(&landlock_superblock(sb)->inode_refs, + !atomic_long_read(&landlock_superblock(sb)->inode_refs)); } /* @@ -459,8 +1032,8 @@ static void hook_sb_delete(struct super_block *const sb) * a dedicated user space option would be required (e.g. as a ruleset flag). */ static int hook_sb_mount(const char *const dev_name, - const struct path *const path, const char *const type, - const unsigned long flags, void *const data) + const struct path *const path, const char *const type, + const unsigned long flags, void *const data) { if (!landlock_get_current_domain()) return 0; @@ -468,7 +1041,7 @@ static int hook_sb_mount(const char *const dev_name, } static int hook_move_mount(const struct path *const from_path, - const struct path *const to_path) + const struct path *const to_path) { if (!landlock_get_current_domain()) return 0; @@ -502,7 +1075,7 @@ static int hook_sb_remount(struct super_block *const sb, void *const mnt_opts) * view of the filesystem. */ static int hook_sb_pivotroot(const struct path *const old_path, - const struct path *const new_path) + const struct path *const new_path) { if (!landlock_get_current_domain()) return 0; @@ -511,97 +1084,34 @@ static int hook_sb_pivotroot(const struct path *const old_path, /* Path hooks */ -static inline u32 get_mode_access(const umode_t mode) -{ - switch (mode & S_IFMT) { - case S_IFLNK: - return LANDLOCK_ACCESS_FS_MAKE_SYM; - case 0: - /* A zero mode translates to S_IFREG. */ - case S_IFREG: - return LANDLOCK_ACCESS_FS_MAKE_REG; - case S_IFDIR: - return LANDLOCK_ACCESS_FS_MAKE_DIR; - case S_IFCHR: - return LANDLOCK_ACCESS_FS_MAKE_CHAR; - case S_IFBLK: - return LANDLOCK_ACCESS_FS_MAKE_BLOCK; - case S_IFIFO: - return LANDLOCK_ACCESS_FS_MAKE_FIFO; - case S_IFSOCK: - return LANDLOCK_ACCESS_FS_MAKE_SOCK; - default: - WARN_ON_ONCE(1); - return 0; - } -} - -/* - * Creating multiple links or renaming may lead to privilege escalations if not - * handled properly. Indeed, we must be sure that the source doesn't gain more - * privileges by being accessible from the destination. This is getting more - * complex when dealing with multiple layers. The whole picture can be seen as - * a multilayer partial ordering problem. A future version of Landlock will - * deal with that. - */ static int hook_path_link(struct dentry *const old_dentry, - const struct path *const new_dir, - struct dentry *const new_dentry) -{ - const struct landlock_ruleset *const dom = - landlock_get_current_domain(); - - if (!dom) - return 0; - /* The mount points are the same for old and new paths, cf. EXDEV. */ - if (old_dentry->d_parent != new_dir->dentry) - /* Gracefully forbids reparenting. */ - return -EXDEV; - if (unlikely(d_is_negative(old_dentry))) - return -ENOENT; - return check_access_path(dom, new_dir, - get_mode_access(d_backing_inode(old_dentry)->i_mode)); -} - -static inline u32 maybe_remove(const struct dentry *const dentry) + const struct path *const new_dir, + struct dentry *const new_dentry) { - if (d_is_negative(dentry)) - return 0; - return d_is_dir(dentry) ? LANDLOCK_ACCESS_FS_REMOVE_DIR : - LANDLOCK_ACCESS_FS_REMOVE_FILE; + return current_check_refer_path(old_dentry, new_dir, new_dentry, false, + false); } static int hook_path_rename(const struct path *const old_dir, - struct dentry *const old_dentry, - const struct path *const new_dir, - struct dentry *const new_dentry) + struct dentry *const old_dentry, + const struct path *const new_dir, + struct dentry *const new_dentry, + const unsigned int flags) { - const struct landlock_ruleset *const dom = - landlock_get_current_domain(); - - if (!dom) - return 0; - /* The mount points are the same for old and new paths, cf. EXDEV. */ - if (old_dir->dentry != new_dir->dentry) - /* Gracefully forbids reparenting. */ - return -EXDEV; - if (unlikely(d_is_negative(old_dentry))) - return -ENOENT; - /* RENAME_EXCHANGE is handled because directories are the same. */ - return check_access_path(dom, old_dir, maybe_remove(old_dentry) | - maybe_remove(new_dentry) | - get_mode_access(d_backing_inode(old_dentry)->i_mode)); + /* old_dir refers to old_dentry->d_parent and new_dir->mnt */ + return current_check_refer_path(old_dentry, new_dir, new_dentry, true, + !!(flags & RENAME_EXCHANGE)); } static int hook_path_mkdir(const struct path *const dir, - struct dentry *const dentry, const umode_t mode) + struct dentry *const dentry, const umode_t mode) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_DIR); } static int hook_path_mknod(const struct path *const dir, - struct dentry *const dentry, const umode_t mode, - const unsigned int dev) + struct dentry *const dentry, const umode_t mode, + const unsigned int dev) { const struct landlock_ruleset *const dom = landlock_get_current_domain(); @@ -612,28 +1122,29 @@ static int hook_path_mknod(const struct path *const dir, } static int hook_path_symlink(const struct path *const dir, - struct dentry *const dentry, const char *const old_name) + struct dentry *const dentry, + const char *const old_name) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_MAKE_SYM); } static int hook_path_unlink(const struct path *const dir, - struct dentry *const dentry) + struct dentry *const dentry) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_FILE); } static int hook_path_rmdir(const struct path *const dir, - struct dentry *const dentry) + struct dentry *const dentry) { return current_check_access_path(dir, LANDLOCK_ACCESS_FS_REMOVE_DIR); } /* File hooks */ -static inline u32 get_file_access(const struct file *const file) +static inline access_mask_t get_file_access(const struct file *const file) { - u32 access = 0; + access_mask_t access = 0; if (file->f_mode & FMODE_READ) { /* A directory can only be opened in read mode. */ @@ -688,5 +1199,5 @@ static struct security_hook_list landlock_hooks[] __lsm_ro_after_init = { __init void landlock_add_fs_hooks(void) { security_add_hooks(landlock_hooks, ARRAY_SIZE(landlock_hooks), - LANDLOCK_NAME); + LANDLOCK_NAME); } |