// SPDX-License-Identifier: GPL-2.0-or-later /* * Copyright (c) 2021-2024 Oracle. All Rights Reserved. * Author: Darrick J. Wong */ #include "xfs.h" #include "xfs_fs.h" #include "xfs_shared.h" #include "xfs_format.h" #include "xfs_trans_resv.h" #include "xfs_mount.h" #include "xfs_log_format.h" #include "xfs_trans.h" #include "xfs_inode.h" #include "xfs_ialloc.h" #include "xfs_quota.h" #include "xfs_trans_space.h" #include "xfs_dir2.h" #include "xfs_icache.h" #include "xfs_bmap.h" #include "xfs_bmap_btree.h" #include "xfs_parent.h" #include "xfs_attr_sf.h" #include "scrub/scrub.h" #include "scrub/common.h" #include "scrub/repair.h" #include "scrub/trace.h" #include "scrub/orphanage.h" #include "scrub/readdir.h" #include /* * The Orphanage * ============= * * If the directory tree is damaged, children of that directory become * inaccessible via that file path. If a child has no other parents, the file * is said to be orphaned. xfs_repair fixes this situation by creating a * orphanage directory (specifically, /lost+found) and creating a directory * entry pointing to the orphaned file. * * Online repair follows this tactic by creating a root-owned /lost+found * directory if one does not exist. If an orphan is found, it will move that * files into orphanage. */ /* Make the orphanage owned by root. */ STATIC int xrep_chown_orphanage( struct xfs_scrub *sc, struct xfs_inode *dp) { struct xfs_trans *tp; struct xfs_mount *mp = sc->mp; struct xfs_dquot *udqp = NULL, *gdqp = NULL, *pdqp = NULL; struct xfs_dquot *oldu = NULL, *oldg = NULL, *oldp = NULL; struct inode *inode = VFS_I(dp); int error; error = xfs_qm_vop_dqalloc(dp, GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, 0, XFS_QMOPT_QUOTALL, &udqp, &gdqp, &pdqp); if (error) return error; error = xfs_trans_alloc_ichange(dp, udqp, gdqp, pdqp, true, &tp); if (error) goto out_dqrele; /* * Always clear setuid/setgid/sticky on the orphanage since we don't * normally want that functionality on this directory and xfs_repair * doesn't create it this way either. Leave the other access bits * unchanged. */ inode->i_mode &= ~(S_ISUID | S_ISGID | S_ISVTX); /* * Change the ownerships and register quota modifications * in the transaction. */ if (!uid_eq(inode->i_uid, GLOBAL_ROOT_UID)) { if (XFS_IS_UQUOTA_ON(mp)) oldu = xfs_qm_vop_chown(tp, dp, &dp->i_udquot, udqp); inode->i_uid = GLOBAL_ROOT_UID; } if (!gid_eq(inode->i_gid, GLOBAL_ROOT_GID)) { if (XFS_IS_GQUOTA_ON(mp)) oldg = xfs_qm_vop_chown(tp, dp, &dp->i_gdquot, gdqp); inode->i_gid = GLOBAL_ROOT_GID; } if (dp->i_projid != 0) { if (XFS_IS_PQUOTA_ON(mp)) oldp = xfs_qm_vop_chown(tp, dp, &dp->i_pdquot, pdqp); dp->i_projid = 0; } dp->i_diflags &= ~(XFS_DIFLAG_REALTIME | XFS_DIFLAG_RTINHERIT); xfs_trans_log_inode(tp, dp, XFS_ILOG_CORE); XFS_STATS_INC(mp, xs_ig_attrchg); if (xfs_has_wsync(mp)) xfs_trans_set_sync(tp); error = xfs_trans_commit(tp); xfs_qm_dqrele(oldu); xfs_qm_dqrele(oldg); xfs_qm_dqrele(oldp); out_dqrele: xfs_qm_dqrele(udqp); xfs_qm_dqrele(gdqp); xfs_qm_dqrele(pdqp); return error; } #define ORPHANAGE "lost+found" /* Create the orphanage directory, and set sc->orphanage to it. */ int xrep_orphanage_create( struct xfs_scrub *sc) { struct xfs_mount *mp = sc->mp; struct dentry *root_dentry, *orphanage_dentry; struct inode *root_inode = VFS_I(sc->mp->m_rootip); struct inode *orphanage_inode; int error; if (xfs_is_shutdown(mp)) return -EIO; if (xfs_is_readonly(mp)) { sc->orphanage = NULL; return 0; } ASSERT(sc->tp == NULL); ASSERT(sc->orphanage == NULL); /* Find the dentry for the root directory... */ root_dentry = d_find_alias(root_inode); if (!root_dentry) { error = -EFSCORRUPTED; goto out; } /* ...which is a directory, right? */ if (!d_is_dir(root_dentry)) { error = -EFSCORRUPTED; goto out_dput_root; } /* Try to find the orphanage directory. */ inode_lock_nested(root_inode, I_MUTEX_PARENT); orphanage_dentry = lookup_one_len(ORPHANAGE, root_dentry, strlen(ORPHANAGE)); if (IS_ERR(orphanage_dentry)) { error = PTR_ERR(orphanage_dentry); goto out_unlock_root; } /* * Nothing found? Call mkdir to create the orphanage. Create the * directory without other-user access because we're live and someone * could have been relying partly on minimal access to a parent * directory to control access to a file we put in here. */ if (d_really_is_negative(orphanage_dentry)) { error = vfs_mkdir(&nop_mnt_idmap, root_inode, orphanage_dentry, 0750); if (error) goto out_dput_orphanage; } /* Not a directory? Bail out. */ if (!d_is_dir(orphanage_dentry)) { error = -ENOTDIR; goto out_dput_orphanage; } /* * Grab a reference to the orphanage. This /should/ succeed since * we hold the root directory locked and therefore nobody can delete * the orphanage. */ orphanage_inode = igrab(d_inode(orphanage_dentry)); if (!orphanage_inode) { error = -ENOENT; goto out_dput_orphanage; } /* Make sure the orphanage is owned by root. */ error = xrep_chown_orphanage(sc, XFS_I(orphanage_inode)); if (error) goto out_dput_orphanage; /* Stash the reference for later and bail out. */ sc->orphanage = XFS_I(orphanage_inode); sc->orphanage_ilock_flags = 0; out_dput_orphanage: dput(orphanage_dentry); out_unlock_root: inode_unlock(VFS_I(sc->mp->m_rootip)); out_dput_root: dput(root_dentry); out: return error; } void xrep_orphanage_ilock( struct xfs_scrub *sc, unsigned int ilock_flags) { sc->orphanage_ilock_flags |= ilock_flags; xfs_ilock(sc->orphanage, ilock_flags); } bool xrep_orphanage_ilock_nowait( struct xfs_scrub *sc, unsigned int ilock_flags) { if (xfs_ilock_nowait(sc->orphanage, ilock_flags)) { sc->orphanage_ilock_flags |= ilock_flags; return true; } return false; } void xrep_orphanage_iunlock( struct xfs_scrub *sc, unsigned int ilock_flags) { xfs_iunlock(sc->orphanage, ilock_flags); sc->orphanage_ilock_flags &= ~ilock_flags; } /* Grab the IOLOCK of the orphanage and sc->ip. */ int xrep_orphanage_iolock_two( struct xfs_scrub *sc) { int error = 0; while (true) { if (xchk_should_terminate(sc, &error)) return error; /* * Normal XFS takes the IOLOCK before grabbing a transaction. * Scrub holds a transaction, which means that we can't block * on either IOLOCK. */ if (xrep_orphanage_ilock_nowait(sc, XFS_IOLOCK_EXCL)) { if (xchk_ilock_nowait(sc, XFS_IOLOCK_EXCL)) break; xrep_orphanage_iunlock(sc, XFS_IOLOCK_EXCL); } delay(1); } return 0; } /* Release the orphanage. */ void xrep_orphanage_rele( struct xfs_scrub *sc) { if (!sc->orphanage) return; if (sc->orphanage_ilock_flags) xfs_iunlock(sc->orphanage, sc->orphanage_ilock_flags); xchk_irele(sc, sc->orphanage); sc->orphanage = NULL; } /* Adoption moves a file into /lost+found */ /* Can the orphanage adopt @sc->ip? */ bool xrep_orphanage_can_adopt( struct xfs_scrub *sc) { ASSERT(sc->ip != NULL); if (!sc->orphanage) return false; if (sc->ip == sc->orphanage) return false; if (xchk_inode_is_sb_rooted(sc->ip)) return false; if (xfs_is_internal_inode(sc->ip)) return false; return true; } /* * Create a new transaction to send a child to the orphanage. * * Allocate a new transaction with sufficient disk space to handle the * adoption, take ILOCK_EXCL of the orphanage and sc->ip, joins them to the * transaction, and reserve quota to reparent the latter. Caller must hold the * IOLOCK of the orphanage and sc->ip. */ int xrep_adoption_trans_alloc( struct xfs_scrub *sc, struct xrep_adoption *adopt) { struct xfs_mount *mp = sc->mp; unsigned int child_blkres = 0; int error; ASSERT(sc->tp == NULL); ASSERT(sc->ip != NULL); ASSERT(sc->orphanage != NULL); ASSERT(sc->ilock_flags & XFS_IOLOCK_EXCL); ASSERT(sc->orphanage_ilock_flags & XFS_IOLOCK_EXCL); ASSERT(!(sc->ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); ASSERT(!(sc->orphanage_ilock_flags & (XFS_ILOCK_SHARED | XFS_ILOCK_EXCL))); /* Compute the worst case space reservation that we need. */ adopt->sc = sc; adopt->orphanage_blkres = xfs_link_space_res(mp, MAXNAMELEN); if (S_ISDIR(VFS_I(sc->ip)->i_mode)) child_blkres = xfs_rename_space_res(mp, 0, false, xfs_name_dotdot.len, false); if (xfs_has_parent(mp)) child_blkres += XFS_ADDAFORK_SPACE_RES(mp); adopt->child_blkres = child_blkres; /* * Allocate a transaction to link the child into the parent, along with * enough disk space to handle expansion of both the orphanage and the * dotdot entry of a child directory. */ error = xfs_trans_alloc(mp, &M_RES(mp)->tr_link, adopt->orphanage_blkres + adopt->child_blkres, 0, 0, &sc->tp); if (error) return error; xfs_lock_two_inodes(sc->orphanage, XFS_ILOCK_EXCL, sc->ip, XFS_ILOCK_EXCL); sc->ilock_flags |= XFS_ILOCK_EXCL; sc->orphanage_ilock_flags |= XFS_ILOCK_EXCL; xfs_trans_ijoin(sc->tp, sc->orphanage, 0); xfs_trans_ijoin(sc->tp, sc->ip, 0); /* * Reserve enough quota in the orphan directory to add the new name. * Normally the orphanage should have user/group/project ids of zero * and hence is not subject to quota enforcement, but we're allowed to * exceed quota to reattach disconnected parts of the directory tree. */ error = xfs_trans_reserve_quota_nblks(sc->tp, sc->orphanage, adopt->orphanage_blkres, 0, true); if (error) goto out_cancel; /* * Reserve enough quota in the child directory to change dotdot. * Here we're also allowed to exceed file quota to repair inconsistent * metadata. */ if (adopt->child_blkres) { error = xfs_trans_reserve_quota_nblks(sc->tp, sc->ip, adopt->child_blkres, 0, true); if (error) goto out_cancel; } return 0; out_cancel: xchk_trans_cancel(sc); xrep_orphanage_iunlock(sc, XFS_ILOCK_EXCL); xchk_iunlock(sc, XFS_ILOCK_EXCL); return error; } /* * Compute the xfs_name for the directory entry that we're adding to the * orphanage. Caller must hold ILOCKs of sc->ip and the orphanage and must not * reuse namebuf until the adoption completes or is dissolved. */ int xrep_adoption_compute_name( struct xrep_adoption *adopt, struct xfs_name *xname) { struct xfs_scrub *sc = adopt->sc; char *namebuf = (void *)xname->name; xfs_ino_t ino; unsigned int incr = 0; int error = 0; adopt->xname = xname; xname->len = snprintf(namebuf, MAXNAMELEN, "%llu", sc->ip->i_ino); xname->type = xfs_mode_to_ftype(VFS_I(sc->ip)->i_mode); /* Make sure the filename is unique in the lost+found. */ error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); while (error == 0 && incr < 10000) { xname->len = snprintf(namebuf, MAXNAMELEN, "%llu.%u", sc->ip->i_ino, ++incr); error = xchk_dir_lookup(sc, sc->orphanage, xname, &ino); } if (error == 0) { /* We already have 10,000 entries in the orphanage? */ return -EFSCORRUPTED; } if (error != -ENOENT) return error; return 0; } /* * Make sure the dcache does not have a positive dentry for the name we've * chosen. The caller should have checked with the ondisk directory, so any * discrepancy is a sign that something is seriously wrong. */ static int xrep_adoption_check_dcache( struct xrep_adoption *adopt) { struct qstr qname = QSTR_INIT(adopt->xname->name, adopt->xname->len); struct xfs_scrub *sc = adopt->sc; struct dentry *d_orphanage, *d_child; int error = 0; d_orphanage = d_find_alias(VFS_I(sc->orphanage)); if (!d_orphanage) return 0; d_child = d_hash_and_lookup(d_orphanage, &qname); if (d_child) { trace_xrep_adoption_check_child(sc->mp, d_child); if (d_is_positive(d_child)) { ASSERT(d_is_negative(d_child)); error = -EFSCORRUPTED; } dput(d_child); } dput(d_orphanage); return error; } /* * Invalidate all dentries for the name that was added to the orphanage * directory, and all dentries pointing to the child inode that was moved. * * There should not be any positive entries for the name, since we've * maintained our lock on the orphanage directory. */ static void xrep_adoption_zap_dcache( struct xrep_adoption *adopt) { struct qstr qname = QSTR_INIT(adopt->xname->name, adopt->xname->len); struct xfs_scrub *sc = adopt->sc; struct dentry *d_orphanage, *d_child; /* Invalidate all dentries for the adoption name */ d_orphanage = d_find_alias(VFS_I(sc->orphanage)); if (!d_orphanage) return; d_child = d_hash_and_lookup(d_orphanage, &qname); while (d_child != NULL) { trace_xrep_adoption_invalidate_child(sc->mp, d_child); ASSERT(d_is_negative(d_child)); d_invalidate(d_child); dput(d_child); d_child = d_lookup(d_orphanage, &qname); } dput(d_orphanage); /* Invalidate all the dentries pointing down to this file. */ while ((d_child = d_find_alias(VFS_I(sc->ip))) != NULL) { trace_xrep_adoption_invalidate_child(sc->mp, d_child); d_invalidate(d_child); dput(d_child); } } /* * If we have to add an attr fork ahead of a parent pointer update, how much * space should we ask for? */ static inline int xrep_adoption_attr_sizeof( const struct xrep_adoption *adopt) { return sizeof(struct xfs_attr_sf_hdr) + xfs_attr_sf_entsize_byname(sizeof(struct xfs_parent_rec), adopt->xname->len); } /* * Move the current file to the orphanage under the computed name. * * Returns with a dirty transaction so that the caller can handle any other * work, such as fixing up unlinked lists or resetting link counts. */ int xrep_adoption_move( struct xrep_adoption *adopt) { struct xfs_scrub *sc = adopt->sc; bool isdir = S_ISDIR(VFS_I(sc->ip)->i_mode); int error; trace_xrep_adoption_reparent(sc->orphanage, adopt->xname, sc->ip->i_ino); error = xrep_adoption_check_dcache(adopt); if (error) return error; /* * If this filesystem has parent pointers, ensure that the file being * moved to the orphanage has an attribute fork. This is required * because the parent pointer code does not itself add attr forks. */ if (!xfs_inode_has_attr_fork(sc->ip) && xfs_has_parent(sc->mp)) { int sf_size = xrep_adoption_attr_sizeof(adopt); error = xfs_bmap_add_attrfork(sc->tp, sc->ip, sf_size, true); if (error) return error; } /* Create the new name in the orphanage. */ error = xfs_dir_createname(sc->tp, sc->orphanage, adopt->xname, sc->ip->i_ino, adopt->orphanage_blkres); if (error) return error; /* * Bump the link count of the orphanage if we just added a * subdirectory, and update its timestamps. */ xfs_trans_ichgtime(sc->tp, sc->orphanage, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG); if (isdir) xfs_bumplink(sc->tp, sc->orphanage); xfs_trans_log_inode(sc->tp, sc->orphanage, XFS_ILOG_CORE); /* Bump the link count of the child. */ if (adopt->bump_child_nlink) { xfs_bumplink(sc->tp, sc->ip); xfs_trans_log_inode(sc->tp, sc->ip, XFS_ILOG_CORE); } /* Replace the dotdot entry if the child is a subdirectory. */ if (isdir) { error = xfs_dir_replace(sc->tp, sc->ip, &xfs_name_dotdot, sc->orphanage->i_ino, adopt->child_blkres); if (error) return error; } /* Add a parent pointer from the file back to the lost+found. */ if (xfs_has_parent(sc->mp)) { error = xfs_parent_addname(sc->tp, &adopt->ppargs, sc->orphanage, adopt->xname, sc->ip); if (error) return error; } /* * Notify dirent hooks that we moved the file to /lost+found, and * finish all the deferred work so that we know the adoption is fully * recorded in the log. */ xfs_dir_update_hook(sc->orphanage, sc->ip, 1, adopt->xname); /* Remove negative dentries from the lost+found's dcache */ xrep_adoption_zap_dcache(adopt); return 0; } /* * Roll to a clean scrub transaction so that we can release the orphanage, * even if xrep_adoption_move was not called. * * Commits all the work and deferred ops attached to an adoption request and * rolls to a clean scrub transaction. On success, returns 0 with the scrub * context holding a clean transaction with no inodes joined. On failure, * returns negative errno with no scrub transaction. All inode locks are * still held after this function returns. */ int xrep_adoption_trans_roll( struct xrep_adoption *adopt) { struct xfs_scrub *sc = adopt->sc; int error; trace_xrep_adoption_trans_roll(sc->orphanage, sc->ip, !!(sc->tp->t_flags & XFS_TRANS_DIRTY)); /* Finish all the deferred ops to commit all repairs. */ error = xrep_defer_finish(sc); if (error) return error; /* Roll the transaction once more to detach the inodes. */ return xfs_trans_roll(&sc->tp); }