diff options
Diffstat (limited to 'fs/xfs')
-rw-r--r-- | fs/xfs/libxfs/xfs_log_format.h | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_quota_defs.h | 3 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_rtbitmap.c | 30 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_shared.h | 1 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 251 | ||||
-rw-r--r-- | fs/xfs/xfs_bmap_util.c | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_dquot.c | 129 | ||||
-rw-r--r-- | fs/xfs/xfs_file.c | 82 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.h | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_log_recover.c | 282 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.c | 55 | ||||
-rw-r--r-- | fs/xfs/xfs_qm.h | 48 | ||||
-rw-r--r-- | fs/xfs/xfs_qm_syscalls.c | 27 | ||||
-rw-r--r-- | fs/xfs/xfs_quotaops.c | 36 | ||||
-rw-r--r-- | fs/xfs/xfs_trace.h | 9 | ||||
-rw-r--r-- | fs/xfs/xfs_trans_dquot.c | 15 |
16 files changed, 614 insertions, 362 deletions
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h index 265314690415..1be26ecdbebf 100644 --- a/fs/xfs/libxfs/xfs_log_format.h +++ b/fs/xfs/libxfs/xfs_log_format.h @@ -495,6 +495,8 @@ enum xfs_blft { XFS_BLFT_ATTR_LEAF_BUF, XFS_BLFT_ATTR_RMT_BUF, XFS_BLFT_SB_BUF, + XFS_BLFT_RTBITMAP_BUF, + XFS_BLFT_RTSUMMARY_BUF, XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS), }; diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h index f51078f1e92a..8eed51275bb3 100644 --- a/fs/xfs/libxfs/xfs_quota_defs.h +++ b/fs/xfs/libxfs/xfs_quota_defs.h @@ -37,7 +37,7 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_DQ_PROJ 0x0002 /* project quota */ #define XFS_DQ_GROUP 0x0004 /* a group quota */ #define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */ -#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */ +#define XFS_DQ_FREEING 0x0010 /* dquot is being torn down */ #define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP) @@ -116,6 +116,7 @@ typedef __uint16_t xfs_qwarncnt_t; #define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */ #define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */ #define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */ +#define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */ /* * flags to xfs_trans_mod_dquot to indicate which field needs to be diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 9b59ffa1fc19..33806e0c2bc6 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -42,6 +42,31 @@ */ /* + * Real time buffers need verifiers to avoid runtime warnings during IO. + * We don't have anything to verify, however, so these are just dummy + * operations. + */ +static void +xfs_rtbuf_verify_read( + struct xfs_buf *bp) +{ + return; +} + +static void +xfs_rtbuf_verify_write( + struct xfs_buf *bp) +{ + return; +} + +const struct xfs_buf_ops xfs_rtbuf_ops = { + .name = "rtbuf", + .verify_read = xfs_rtbuf_verify_read, + .verify_write = xfs_rtbuf_verify_write, +}; + +/* * Get a buffer for the bitmap or summary file block specified. * The buffer is returned read and locked. */ @@ -68,9 +93,12 @@ xfs_rtbuf_get( ASSERT(map.br_startblock != NULLFSBLOCK); error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp, XFS_FSB_TO_DADDR(mp, map.br_startblock), - mp->m_bsize, 0, &bp, NULL); + mp->m_bsize, 0, &bp, &xfs_rtbuf_ops); if (error) return error; + + xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF + : XFS_BLFT_RTBITMAP_BUF); *bpp = bp; return 0; } diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index 15c3ceb845b9..81ac870834da 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops; extern const struct xfs_buf_ops xfs_sb_buf_ops; extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops; extern const struct xfs_buf_ops xfs_symlink_buf_ops; +extern const struct xfs_buf_ops xfs_rtbuf_ops; /* * Transaction types. Used to distinguish types of buffers. These never reach diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index 0c8dacea411e..14ac9822b303 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -36,6 +36,10 @@ #include <linux/pagevec.h> #include <linux/writeback.h> +/* flags for direct write completions */ +#define XFS_DIO_FLAG_UNWRITTEN (1 << 0) +#define XFS_DIO_FLAG_APPEND (1 << 1) + void xfs_count_page_state( struct page *page, @@ -1240,27 +1244,8 @@ xfs_vm_releasepage( } /* - * When we map a DIO buffer, we may need to attach an ioend that describes the - * type of write IO we are doing. This passes to the completion function the - * operations it needs to perform. If the mapping is for an overwrite wholly - * within the EOF then we don't need an ioend and so we don't allocate one. - * This avoids the unnecessary overhead of allocating and freeing ioends for - * workloads that don't require transactions on IO completion. - * - * If we get multiple mappings in a single IO, we might be mapping different - * types. But because the direct IO can only have a single private pointer, we - * need to ensure that: - * - * a) i) the ioend spans the entire region of unwritten mappings; or - * ii) the ioend spans all the mappings that cross or are beyond EOF; and - * b) if it contains unwritten extents, it is *permanently* marked as such - * - * We could do this by chaining ioends like buffered IO does, but we only - * actually get one IO completion callback from the direct IO, and that spans - * the entire IO regardless of how many mappings and IOs are needed to complete - * the DIO. There is only going to be one reference to the ioend and its life - * cycle is constrained by the DIO completion code. hence we don't need - * reference counting here. + * When we map a DIO buffer, we may need to pass flags to + * xfs_end_io_direct_write to tell it what kind of write IO we are doing. * * Note that for DIO, an IO to the highest supported file block offset (i.e. * 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64 @@ -1268,68 +1253,26 @@ xfs_vm_releasepage( * extending the file size. We won't know for sure until IO completion is run * and the actual max write offset is communicated to the IO completion * routine. - * - * For DAX page faults, we are preparing to never see unwritten extents here, - * nor should we ever extend the inode size. Hence we will soon have nothing to - * do here for this case, ensuring we don't have to provide an IO completion - * callback to free an ioend that we don't actually need for a fault into the - * page at offset (2^63 - 1FSB) bytes. */ - static void xfs_map_direct( struct inode *inode, struct buffer_head *bh_result, struct xfs_bmbt_irec *imap, - xfs_off_t offset, - bool dax_fault) + xfs_off_t offset) { - struct xfs_ioend *ioend; + uintptr_t *flags = (uintptr_t *)&bh_result->b_private; xfs_off_t size = bh_result->b_size; - int type; - - if (ISUNWRITTEN(imap)) - type = XFS_IO_UNWRITTEN; - else - type = XFS_IO_OVERWRITE; - - trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap); - if (dax_fault) { - ASSERT(type == XFS_IO_OVERWRITE); - trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, - imap); - return; - } - - if (bh_result->b_private) { - ioend = bh_result->b_private; - ASSERT(ioend->io_size > 0); - ASSERT(offset >= ioend->io_offset); - if (offset + size > ioend->io_offset + ioend->io_size) - ioend->io_size = offset - ioend->io_offset + size; - - if (type == XFS_IO_UNWRITTEN && type != ioend->io_type) - ioend->io_type = XFS_IO_UNWRITTEN; - - trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset, - ioend->io_size, ioend->io_type, - imap); - } else if (type == XFS_IO_UNWRITTEN || - offset + size > i_size_read(inode) || - offset + size < 0) { - ioend = xfs_alloc_ioend(inode, type); - ioend->io_offset = offset; - ioend->io_size = size; + trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size, + ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap); - bh_result->b_private = ioend; + if (ISUNWRITTEN(imap)) { + *flags |= XFS_DIO_FLAG_UNWRITTEN; + set_buffer_defer_completion(bh_result); + } else if (offset + size > i_size_read(inode) || offset + size < 0) { + *flags |= XFS_DIO_FLAG_APPEND; set_buffer_defer_completion(bh_result); - - trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type, - imap); - } else { - trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type, - imap); } } @@ -1500,9 +1443,12 @@ __xfs_get_blocks( if (ISUNWRITTEN(&imap)) set_buffer_unwritten(bh_result); /* direct IO needs special help */ - if (create && direct) - xfs_map_direct(inode, bh_result, &imap, offset, - dax_fault); + if (create && direct) { + if (dax_fault) + ASSERT(!ISUNWRITTEN(&imap)); + else + xfs_map_direct(inode, bh_result, &imap, offset); + } } /* @@ -1572,42 +1518,50 @@ xfs_get_blocks_dax_fault( return __xfs_get_blocks(inode, iblock, bh_result, create, true, true); } -static void -__xfs_end_io_direct_write( - struct inode *inode, - struct xfs_ioend *ioend, +/* + * Complete a direct I/O write request. + * + * xfs_map_direct passes us some flags in the private data to tell us what to + * do. If no flags are set, then the write IO is an overwrite wholly within + * the existing allocated file size and so there is nothing for us to do. + * + * Note that in this case the completion can be called in interrupt context, + * whereas if we have flags set we will always be called in task context + * (i.e. from a workqueue). + */ +STATIC int +xfs_end_io_direct_write( + struct kiocb *iocb, loff_t offset, - ssize_t size) + ssize_t size, + void *private) { - struct xfs_mount *mp = XFS_I(inode)->i_mount; + struct inode *inode = file_inode(iocb->ki_filp); + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + uintptr_t flags = (uintptr_t)private; + int error = 0; - if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error) - goto out_end_io; + trace_xfs_end_io_direct_write(ip, offset, size); - /* - * dio completion end_io functions are only called on writes if more - * than 0 bytes was written. - */ - ASSERT(size > 0); + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; - /* - * The ioend only maps whole blocks, while the IO may be sector aligned. - * Hence the ioend offset/size may not match the IO offset/size exactly. - * Because we don't map overwrites within EOF into the ioend, the offset - * may not match, but only if the endio spans EOF. Either way, write - * the IO sizes into the ioend so that completion processing does the - * right thing. - */ - ASSERT(offset + size <= ioend->io_offset + ioend->io_size); - ioend->io_size = size; - ioend->io_offset = offset; + if (size <= 0) + return size; /* - * The ioend tells us whether we are doing unwritten extent conversion + * The flags tell us whether we are doing unwritten extent conversions * or an append transaction that updates the on-disk file size. These * cases are the only cases where we should *potentially* be needing * to update the VFS inode size. - * + */ + if (flags == 0) { + ASSERT(offset + size <= i_size_read(inode)); + return 0; + } + + /* * We need to update the in-core inode size here so that we don't end up * with the on-disk inode size being outside the in-core inode size. We * have no other method of updating EOF for AIO, so always do it here @@ -1618,91 +1572,56 @@ __xfs_end_io_direct_write( * here can result in EOF moving backwards and Bad Things Happen when * that occurs. */ - spin_lock(&XFS_I(inode)->i_flags_lock); + spin_lock(&ip->i_flags_lock); if (offset + size > i_size_read(inode)) i_size_write(inode, offset + size); - spin_unlock(&XFS_I(inode)->i_flags_lock); + spin_unlock(&ip->i_flags_lock); - /* - * If we are doing an append IO that needs to update the EOF on disk, - * do the transaction reserve now so we can use common end io - * processing. Stashing the error (if there is one) in the ioend will - * result in the ioend processing passing on the error if it is - * possible as we can't return it from here. - */ - if (ioend->io_type == XFS_IO_OVERWRITE) - ioend->io_error = xfs_setfilesize_trans_alloc(ioend); + if (flags & XFS_DIO_FLAG_UNWRITTEN) { + trace_xfs_end_io_direct_write_unwritten(ip, offset, size); -out_end_io: - xfs_end_io(&ioend->io_work); - return; -} + error = xfs_iomap_write_unwritten(ip, offset, size); + } else if (flags & XFS_DIO_FLAG_APPEND) { + struct xfs_trans *tp; -/* - * Complete a direct I/O write request. - * - * The ioend structure is passed from __xfs_get_blocks() to tell us what to do. - * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite - * wholly within the EOF and so there is nothing for us to do. Note that in this - * case the completion can be called in interrupt context, whereas if we have an - * ioend we will always be called in task context (i.e. from a workqueue). - */ -STATIC void -xfs_end_io_direct_write( - struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private) -{ - struct inode *inode = file_inode(iocb->ki_filp); - struct xfs_ioend *ioend = private; + trace_xfs_end_io_direct_write_append(ip, offset, size); - trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size, - ioend ? ioend->io_type : 0, NULL); - - if (!ioend) { - ASSERT(offset + size <= i_size_read(inode)); - return; + tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS); + error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0); + if (error) { + xfs_trans_cancel(tp); + return error; + } + error = xfs_setfilesize(ip, tp, offset, size); } - __xfs_end_io_direct_write(inode, ioend, offset, size); + return error; } -static inline ssize_t -xfs_vm_do_dio( - struct inode *inode, +STATIC ssize_t +xfs_vm_direct_IO( struct kiocb *iocb, struct iov_iter *iter, - loff_t offset, - void (*endio)(struct kiocb *iocb, - loff_t offset, - ssize_t size, - void *private), - int flags) + loff_t offset) { + struct inode *inode = iocb->ki_filp->f_mapping->host; + dio_iodone_t *endio = NULL; + int flags = 0; struct block_device *bdev; - if (IS_DAX(inode)) + if (iov_iter_rw(iter) == WRITE) { + endio = xfs_end_io_direct_write; + flags = DIO_ASYNC_EXTEND; + } + + if (IS_DAX(inode)) { return dax_do_io(iocb, inode, iter, offset, xfs_get_blocks_direct, endio, 0); + } bdev = xfs_find_bdev_for_inode(inode); return __blockdev_direct_IO(iocb, inode, bdev, iter, offset, - xfs_get_blocks_direct, endio, NULL, flags); -} - -STATIC ssize_t -xfs_vm_direct_IO( - struct kiocb *iocb, - struct iov_iter *iter, - loff_t offset) -{ - struct inode *inode = iocb->ki_filp->f_mapping->host; - - if (iov_iter_rw(iter) == WRITE) - return xfs_vm_do_dio(inode, iocb, iter, offset, - xfs_end_io_direct_write, DIO_ASYNC_EXTEND); - return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0); + xfs_get_blocks_direct, endio, NULL, flags); } /* diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c index 45ec9e40150c..07ef29b9b464 100644 --- a/fs/xfs/xfs_bmap_util.c +++ b/fs/xfs/xfs_bmap_util.c @@ -202,10 +202,12 @@ xfs_bmap_rtalloc( ralen = MAXEXTLEN / mp->m_sb.sb_rextsize; /* - * Lock out other modifications to the RT bitmap inode. + * Lock out modifications to both the RT bitmap and summary inodes */ xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL); xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL); + xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL); /* * If it's an allocation to an empty file at offset 0, diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c index 9c44d38dcd1f..316b2a1bdba5 100644 --- a/fs/xfs/xfs_dquot.c +++ b/fs/xfs/xfs_dquot.c @@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits( { struct xfs_quotainfo *q = mp->m_quotainfo; struct xfs_disk_dquot *d = &dq->q_core; + struct xfs_def_quota *defq; int prealloc = 0; ASSERT(d->d_id); + defq = xfs_get_defquota(dq, q); - if (q->qi_bsoftlimit && !d->d_blk_softlimit) { - d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit); + if (defq->bsoftlimit && !d->d_blk_softlimit) { + d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit); prealloc = 1; } - if (q->qi_bhardlimit && !d->d_blk_hardlimit) { - d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit); + if (defq->bhardlimit && !d->d_blk_hardlimit) { + d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit); prealloc = 1; } - if (q->qi_isoftlimit && !d->d_ino_softlimit) - d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit); - if (q->qi_ihardlimit && !d->d_ino_hardlimit) - d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit); - if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit) - d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit); - if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit) - d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit); + if (defq->isoftlimit && !d->d_ino_softlimit) + d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit); + if (defq->ihardlimit && !d->d_ino_hardlimit) + d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit); + if (defq->rtbsoftlimit && !d->d_rtb_softlimit) + d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit); + if (defq->rtbhardlimit && !d->d_rtb_hardlimit) + d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit); if (prealloc) xfs_dquot_set_prealloc_limits(dq); @@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk( { struct xfs_quotainfo *q = mp->m_quotainfo; xfs_dqblk_t *d; - int curid, i; + xfs_dqid_t curid; + int i; ASSERT(tp); ASSERT(xfs_buf_islocked(bp)); @@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk( * ID of the first dquot in the block - id's are zero based. */ curid = id - (id % q->qi_dqperchunk); - ASSERT(curid >= 0); memset(d, 0, BBTOB(q->qi_dqchunklen)); for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) { d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC); @@ -464,12 +466,13 @@ xfs_qm_dqtobp( struct xfs_bmbt_irec map; int nmaps = 1, error; struct xfs_buf *bp; - struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp); + struct xfs_inode *quotip; struct xfs_mount *mp = dqp->q_mount; xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id); struct xfs_trans *tp = (tpp ? *tpp : NULL); uint lock_mode; + quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags); dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk; lock_mode = xfs_ilock_data_map_shared(quotip); @@ -685,6 +688,56 @@ error0: } /* + * Advance to the next id in the current chunk, or if at the + * end of the chunk, skip ahead to first id in next allocated chunk + * using the SEEK_DATA interface. + */ +int +xfs_dq_get_next_id( + xfs_mount_t *mp, + uint type, + xfs_dqid_t *id, + loff_t eof) +{ + struct xfs_inode *quotip; + xfs_fsblock_t start; + loff_t offset; + uint lock; + xfs_dqid_t next_id; + int error = 0; + + /* Simple advance */ + next_id = *id + 1; + + /* If new ID is within the current chunk, advancing it sufficed */ + if (next_id % mp->m_quotainfo->qi_dqperchunk) { + *id = next_id; + return 0; + } + + /* Nope, next_id is now past the current chunk, so find the next one */ + start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk; + + quotip = xfs_quota_inode(mp, type); + lock = xfs_ilock_data_map_shared(quotip); + + offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start), + eof, SEEK_DATA); + if (offset < 0) + error = offset; + + xfs_iunlock(quotip, lock); + + /* -ENXIO is essentially "no more data" */ + if (error) + return (error == -ENXIO ? -ENOENT: error); + + /* Convert next data offset back to a quota id */ + *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk; + return 0; +} + +/* * Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a * a locked dquot, doing an allocation (if requested) as needed. * When both an inode and an id are given, the inode's id takes precedence. @@ -704,6 +757,7 @@ xfs_qm_dqget( struct xfs_quotainfo *qi = mp->m_quotainfo; struct radix_tree_root *tree = xfs_dquot_tree(qi, type); struct xfs_dquot *dqp; + loff_t eof = 0; int error; ASSERT(XFS_IS_QUOTA_RUNNING(mp)); @@ -731,6 +785,21 @@ xfs_qm_dqget( } #endif + /* Get the end of the quota file if we need it */ + if (flags & XFS_QMOPT_DQNEXT) { + struct xfs_inode *quotip; + xfs_fileoff_t last; + uint lock_mode; + + quotip = xfs_quota_inode(mp, type); + lock_mode = xfs_ilock_data_map_shared(quotip); + error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK); + xfs_iunlock(quotip, lock_mode); + if (error) + return error; + eof = XFS_FSB_TO_B(mp, last); + } + restart: mutex_lock(&qi->qi_tree_lock); dqp = radix_tree_lookup(tree, id); @@ -744,6 +813,18 @@ restart: goto restart; } + /* uninit / unused quota found in radix tree, keep looking */ + if (flags & XFS_QMOPT_DQNEXT) { + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_dqunlock(dqp); + mutex_unlock(&qi->qi_tree_lock); + error = xfs_dq_get_next_id(mp, type, &id, eof); + if (error) + return error; + goto restart; + } + } + dqp->q_nrefs++; mutex_unlock(&qi->qi_tree_lock); @@ -770,6 +851,13 @@ restart: if (ip) xfs_ilock(ip, XFS_ILOCK_EXCL); + /* If we are asked to find next active id, keep looking */ + if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) { + error = xfs_dq_get_next_id(mp, type, &id, eof); + if (!error) + goto restart; + } + if (error) return error; @@ -820,6 +908,17 @@ restart: qi->qi_dquots++; mutex_unlock(&qi->qi_tree_lock); + /* If we are asked to find next active id, keep looking */ + if (flags & XFS_QMOPT_DQNEXT) { + if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) { + xfs_qm_dqput(dqp); + error = xfs_dq_get_next_id(mp, type, &id, eof); + if (error) + return error; + goto restart; + } + } + dqret: ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL)); trace_xfs_dqget_miss(dqp); diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c index 52883ac3cf84..70a4b5a2802e 100644 --- a/fs/xfs/xfs_file.c +++ b/fs/xfs/xfs_file.c @@ -1337,31 +1337,31 @@ out: return found; } -STATIC loff_t -xfs_seek_hole_data( - struct file *file, +/* + * caller must lock inode with xfs_ilock_data_map_shared, + * can we craft an appropriate ASSERT? + * + * end is because the VFS-level lseek interface is defined such that any + * offset past i_size shall return -ENXIO, but we use this for quota code + * which does not maintain i_size, and we want to SEEK_DATA past i_size. + */ +loff_t +__xfs_seek_hole_data( + struct inode *inode, loff_t start, + loff_t end, int whence) { - struct inode *inode = file->f_mapping->host; struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; loff_t uninitialized_var(offset); - xfs_fsize_t isize; xfs_fileoff_t fsbno; - xfs_filblks_t end; - uint lock; + xfs_filblks_t lastbno; int error; - if (XFS_FORCED_SHUTDOWN(mp)) - return -EIO; - - lock = xfs_ilock_data_map_shared(ip); - - isize = i_size_read(inode); - if (start >= isize) { + if (start >= end) { error = -ENXIO; - goto out_unlock; + goto out_error; } /* @@ -1369,22 +1369,22 @@ xfs_seek_hole_data( * by fsbno to the end block of the file. */ fsbno = XFS_B_TO_FSBT(mp, start); - end = XFS_B_TO_FSB(mp, isize); + lastbno = XFS_B_TO_FSB(mp, end); for (;;) { struct xfs_bmbt_irec map[2]; int nmap = 2; unsigned int i; - error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap, + error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap, XFS_BMAPI_ENTIRE); if (error) - goto out_unlock; + goto out_error; /* No extents at given offset, must be beyond EOF */ if (nmap == 0) { error = -ENXIO; - goto out_unlock; + goto out_error; } for (i = 0; i < nmap; i++) { @@ -1426,7 +1426,7 @@ xfs_seek_hole_data( * hole at the end of any file). */ if (whence == SEEK_HOLE) { - offset = isize; + offset = end; break; } /* @@ -1434,7 +1434,7 @@ xfs_seek_hole_data( */ ASSERT(whence == SEEK_DATA); error = -ENXIO; - goto out_unlock; + goto out_error; } ASSERT(i > 1); @@ -1445,14 +1445,14 @@ xfs_seek_hole_data( */ fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount; start = XFS_FSB_TO_B(mp, fsbno); - if (start >= isize) { + if (start >= end) { if (whence == SEEK_HOLE) { - offset = isize; + offset = end; break; } ASSERT(whence == SEEK_DATA); error = -ENXIO; - goto out_unlock; + goto out_error; } } @@ -1464,7 +1464,39 @@ out: * situation in particular. */ if (whence == SEEK_HOLE) - offset = min_t(loff_t, offset, isize); + offset = min_t(loff_t, offset, end); + + return offset; + +out_error: + return error; +} + +STATIC loff_t +xfs_seek_hole_data( + struct file *file, + loff_t start, + int whence) +{ + struct inode *inode = file->f_mapping->host; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + uint lock; + loff_t offset, end; + int error = 0; + + if (XFS_FORCED_SHUTDOWN(mp)) + return -EIO; + + lock = xfs_ilock_data_map_shared(ip); + + end = i_size_read(inode); + offset = __xfs_seek_hole_data(inode, start, end, whence); + if (offset < 0) { + error = offset; + goto out_unlock; + } + offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes); out_unlock: diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index ca9e11989cbd..ed7e9339c7e9 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -437,6 +437,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip, int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset, xfs_fsize_t isize, bool *did_zeroing); int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count); +loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start, + loff_t eof, int whence); /* from xfs_iops.c */ diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index da37beb76f6e..2c449a25c4f6 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -1109,27 +1109,10 @@ xlog_verify_head( bool tmp_wrapped; /* - * Search backwards through the log looking for the log record header - * block. This wraps all the way back around to the head so something is - * seriously wrong if we can't find it. - */ - found = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, rhead_blk, - rhead, wrapped); - if (found < 0) - return found; - if (!found) { - xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); - return -EIO; - } - - *tail_blk = BLOCK_LSN(be64_to_cpu((*rhead)->h_tail_lsn)); - - /* - * Now that we have a tail block, check the head of the log for torn - * writes. Search again until we hit the tail or the maximum number of - * log record I/Os that could have been in flight at one time. Use a - * temporary buffer so we don't trash the rhead/bp pointer from the - * call above. + * Check the head of the log for torn writes. Search backwards from the + * head until we hit the tail or the maximum number of log record I/Os + * that could have been in flight at one time. Use a temporary buffer so + * we don't trash the rhead/bp pointers from the caller. */ tmp_bp = xlog_get_bp(log, 1); if (!tmp_bp) @@ -1216,6 +1199,115 @@ xlog_verify_head( } /* + * Check whether the head of the log points to an unmount record. In other + * words, determine whether the log is clean. If so, update the in-core state + * appropriately. + */ +static int +xlog_check_unmount_rec( + struct xlog *log, + xfs_daddr_t *head_blk, + xfs_daddr_t *tail_blk, + struct xlog_rec_header *rhead, + xfs_daddr_t rhead_blk, + struct xfs_buf *bp, + bool *clean) +{ + struct xlog_op_header *op_head; + xfs_daddr_t umount_data_blk; + xfs_daddr_t after_umount_blk; + int hblks; + int error; + char *offset; + + *clean = false; + + /* + * Look for unmount record. If we find it, then we know there was a + * clean unmount. Since 'i' could be the last block in the physical + * log, we convert to a log block before comparing to the head_blk. + * + * Save the current tail lsn to use to pass to xlog_clear_stale_blocks() + * below. We won't want to clear the unmount record if there is one, so + * we pass the lsn of the unmount record rather than the block after it. + */ + if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { + int h_size = be32_to_cpu(rhead->h_size); + int h_version = be32_to_cpu(rhead->h_version); + + if ((h_version & XLOG_VERSION_2) && + (h_size > XLOG_HEADER_CYCLE_SIZE)) { + hblks = h_size / XLOG_HEADER_CYCLE_SIZE; + if (h_size % XLOG_HEADER_CYCLE_SIZE) + hblks++; + } else { + hblks = 1; + } + } else { + hblks = 1; + } + after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); + after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); + if (*head_blk == after_umount_blk && + be32_to_cpu(rhead->h_num_logops) == 1) { + umount_data_blk = rhead_blk + hblks; + umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); + error = xlog_bread(log, umount_data_blk, 1, bp, &offset); + if (error) + return error; + + op_head = (struct xlog_op_header *)offset; + if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { + /* + * Set tail and last sync so that newly written log + * records will point recovery to after the current + * unmount record. + */ + xlog_assign_atomic_lsn(&log->l_tail_lsn, + log->l_curr_cycle, after_umount_blk); + xlog_assign_atomic_lsn(&log->l_last_sync_lsn, + log->l_curr_cycle, after_umount_blk); + *tail_blk = after_umount_blk; + + *clean = true; + } + } + + return 0; +} + +static void +xlog_set_state( + struct xlog *log, + xfs_daddr_t head_blk, + struct xlog_rec_header *rhead, + xfs_daddr_t rhead_blk, + bool bump_cycle) +{ + /* + * Reset log values according to the state of the log when we + * crashed. In the case where head_blk == 0, we bump curr_cycle + * one because the next write starts a new cycle rather than + * continuing the cycle of the last good log record. At this + * point we have guaranteed that all partial log records have been + * accounted for. Therefore, we know that the last good log record + * written was complete and ended exactly on the end boundary + * of the physical log. + */ + log->l_prev_block = rhead_blk; + log->l_curr_block = (int)head_blk; + log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); + if (bump_cycle) + log->l_curr_cycle++; + atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); + atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); + xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, + BBTOB(log->l_curr_block)); + xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, + BBTOB(log->l_curr_block)); +} + +/* * Find the sync block number or the tail of the log. * * This will be the block number of the last record to have its @@ -1238,22 +1330,20 @@ xlog_find_tail( xfs_daddr_t *tail_blk) { xlog_rec_header_t *rhead; - xlog_op_header_t *op_head; char *offset = NULL; xfs_buf_t *bp; int error; - xfs_daddr_t umount_data_blk; - xfs_daddr_t after_umount_blk; xfs_daddr_t rhead_blk; xfs_lsn_t tail_lsn; - int hblks; bool wrapped = false; + bool clean = false; /* * Find previous log record */ if ((error = xlog_find_head(log, head_blk))) return error; + ASSERT(*head_blk < INT_MAX); bp = xlog_get_bp(log, 1); if (!bp) @@ -1271,100 +1361,75 @@ xlog_find_tail( } /* - * Trim the head block back to skip over torn records. We can have - * multiple log I/Os in flight at any time, so we assume CRC failures - * back through the previous several records are torn writes and skip - * them. + * Search backwards through the log looking for the log record header + * block. This wraps all the way back around to the head so something is + * seriously wrong if we can't find it. */ - ASSERT(*head_blk < INT_MAX); - error = xlog_verify_head(log, head_blk, tail_blk, bp, &rhead_blk, - &rhead, &wrapped); - if (error) - goto done; + error = xlog_rseek_logrec_hdr(log, *head_blk, *head_blk, 1, bp, + &rhead_blk, &rhead, &wrapped); + if (error < 0) + return error; + if (!error) { + xfs_warn(log->l_mp, "%s: couldn't find sync record", __func__); + return -EIO; + } + *tail_blk = BLOCK_LSN(be64_to_cpu(rhead->h_tail_lsn)); /* - * Reset log values according to the state of the log when we - * crashed. In the case where head_blk == 0, we bump curr_cycle - * one because the next write starts a new cycle rather than - * continuing the cycle of the last good log record. At this - * point we have guaranteed that all partial log records have been - * accounted for. Therefore, we know that the last good log record - * written was complete and ended exactly on the end boundary - * of the physical log. + * Set the log state based on the current head record. */ - log->l_prev_block = rhead_blk; - log->l_curr_block = (int)*head_blk; - log->l_curr_cycle = be32_to_cpu(rhead->h_cycle); - if (wrapped) - log->l_curr_cycle++; - atomic64_set(&log->l_tail_lsn, be64_to_cpu(rhead->h_tail_lsn)); - atomic64_set(&log->l_last_sync_lsn, be64_to_cpu(rhead->h_lsn)); - xlog_assign_grant_head(&log->l_reserve_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); - xlog_assign_grant_head(&log->l_write_head.grant, log->l_curr_cycle, - BBTOB(log->l_curr_block)); + xlog_set_state(log, *head_blk, rhead, rhead_blk, wrapped); + tail_lsn = atomic64_read(&log->l_tail_lsn); /* - * Look for unmount record. If we find it, then we know there - * was a clean unmount. Since 'i' could be the last block in - * the physical log, we convert to a log block before comparing - * to the head_blk. + * Look for an unmount record at the head of the log. This sets the log + * state to determine whether recovery is necessary. + */ + error = xlog_check_unmount_rec(log, head_blk, tail_blk, rhead, + rhead_blk, bp, &clean); + if (error) + goto done; + + /* + * Verify the log head if the log is not clean (e.g., we have anything + * but an unmount record at the head). This uses CRC verification to + * detect and trim torn writes. If discovered, CRC failures are + * considered torn writes and the log head is trimmed accordingly. * - * Save the current tail lsn to use to pass to - * xlog_clear_stale_blocks() below. We won't want to clear the - * unmount record if there is one, so we pass the lsn of the - * unmount record rather than the block after it. + * Note that we can only run CRC verification when the log is dirty + * because there's no guarantee that the log data behind an unmount + * record is compatible with the current architecture. */ - if (xfs_sb_version_haslogv2(&log->l_mp->m_sb)) { - int h_size = be32_to_cpu(rhead->h_size); - int h_version = be32_to_cpu(rhead->h_version); + if (!clean) { + xfs_daddr_t orig_head = *head_blk; - if ((h_version & XLOG_VERSION_2) && - (h_size > XLOG_HEADER_CYCLE_SIZE)) { - hblks = h_size / XLOG_HEADER_CYCLE_SIZE; - if (h_size % XLOG_HEADER_CYCLE_SIZE) - hblks++; - } else { - hblks = 1; - } - } else { - hblks = 1; - } - after_umount_blk = rhead_blk + hblks + BTOBB(be32_to_cpu(rhead->h_len)); - after_umount_blk = do_mod(after_umount_blk, log->l_logBBsize); - tail_lsn = atomic64_read(&log->l_tail_lsn); - if (*head_blk == after_umount_blk && - be32_to_cpu(rhead->h_num_logops) == 1) { - umount_data_blk = rhead_blk + hblks; - umount_data_blk = do_mod(umount_data_blk, log->l_logBBsize); - error = xlog_bread(log, umount_data_blk, 1, bp, &offset); + error = xlog_verify_head(log, head_blk, tail_blk, bp, + &rhead_blk, &rhead, &wrapped); if (error) goto done; - op_head = (xlog_op_header_t *)offset; - if (op_head->oh_flags & XLOG_UNMOUNT_TRANS) { - /* - * Set tail and last sync so that newly written - * log records will point recovery to after the - * current unmount record. - */ - xlog_assign_atomic_lsn(&log->l_tail_lsn, - log->l_curr_cycle, after_umount_blk); - xlog_assign_atomic_lsn(&log->l_last_sync_lsn, - log->l_curr_cycle, after_umount_blk); - *tail_blk = after_umount_blk; - - /* - * Note that the unmount was clean. If the unmount - * was not clean, we need to know this to rebuild the - * superblock counters from the perag headers if we - * have a filesystem using non-persistent counters. - */ - log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; + /* update in-core state again if the head changed */ + if (*head_blk != orig_head) { + xlog_set_state(log, *head_blk, rhead, rhead_blk, + wrapped); + tail_lsn = atomic64_read(&log->l_tail_lsn); + error = xlog_check_unmount_rec(log, head_blk, tail_blk, + rhead, rhead_blk, bp, + &clean); + if (error) + goto done; } } /* + * Note that the unmount was clean. If the unmount was not clean, we + * need to know this to rebuild the superblock counters from the perag + * headers if we have a filesystem using non-persistent counters. + */ + if (clean) + log->l_mp->m_flags |= XFS_MOUNT_WAS_CLEAN; + + /* * Make sure that there are no blocks in front of the head * with the same cycle number as the head. This can happen * because we allow multiple outstanding log writes concurrently, @@ -2473,6 +2538,13 @@ xlog_recover_validate_buf_type( } bp->b_ops = &xfs_sb_buf_ops; break; +#ifdef CONFIG_XFS_RT + case XFS_BLFT_RTBITMAP_BUF: + case XFS_BLFT_RTSUMMARY_BUF: + /* no magic numbers for verification of RT buffers */ + bp->b_ops = &xfs_rtbuf_ops; + break; +#endif /* CONFIG_XFS_RT */ default: xfs_warn(mp, "Unknown buffer type %d!", xfs_blft_from_flags(buf_f)); @@ -4491,7 +4563,7 @@ xlog_recover_process( * know precisely what failed. */ if (pass == XLOG_RECOVER_CRCPASS) { - if (rhead->h_crc && crc != le32_to_cpu(rhead->h_crc)) + if (rhead->h_crc && crc != rhead->h_crc) return -EFSBADCRC; return 0; } @@ -4502,7 +4574,7 @@ xlog_recover_process( * zero CRC check prevents warnings from being emitted when upgrading * the kernel from one that does not add CRCs by default. */ - if (crc != le32_to_cpu(rhead->h_crc)) { + if (crc != rhead->h_crc) { if (rhead->h_crc || xfs_sb_version_hascrc(&log->l_mp->m_sb)) { xfs_alert(log->l_mp, "log record CRC mismatch: found 0x%x, expected 0x%x.", diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index 532ab79d38fe..be125e1758c1 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -560,6 +560,37 @@ xfs_qm_shrink_count( return list_lru_shrink_count(&qi->qi_lru, sc); } +STATIC void +xfs_qm_set_defquota( + xfs_mount_t *mp, + uint type, + xfs_quotainfo_t *qinf) +{ + xfs_dquot_t *dqp; + struct xfs_def_quota *defq; + int error; + + error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp); + + if (!error) { + xfs_disk_dquot_t *ddqp = &dqp->q_core; + + defq = xfs_get_defquota(dqp, qinf); + + /* + * Timers and warnings have been already set, let's just set the + * default limits for this quota type + */ + defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); + defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); + defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); + defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); + defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); + defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); + xfs_qm_dqdestroy(dqp); + } +} + /* * This initializes all the quota information that's kept in the * mount structure @@ -606,19 +637,19 @@ xfs_qm_init_quotainfo( * We try to get the limits from the superuser's limits fields. * This is quite hacky, but it is standard quota practice. * - * We look at the USR dquot with id == 0 first, but if user quotas - * are not enabled we goto the GRP dquot with id == 0. - * We don't really care to keep separate default limits for user - * and group quotas, at least not at this point. - * * Since we may not have done a quotacheck by this point, just read * the dquot without attaching it to any hashtables or lists. + * + * Timers and warnings are globally set by the first timer found in + * user/group/proj quota types, otherwise a default value is used. + * This should be split into different fields per quota type. */ error = xfs_qm_dqread(mp, 0, XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER : (XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP : XFS_DQ_PROJ), XFS_QMOPT_DOWARN, &dqp); + if (!error) { xfs_disk_dquot_t *ddqp = &dqp->q_core; @@ -639,13 +670,6 @@ xfs_qm_init_quotainfo( be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT; qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ? be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT; - qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit); - qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit); - qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit); - qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit); - qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit); - qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit); - xfs_qm_dqdestroy(dqp); } else { qinf->qi_btimelimit = XFS_QM_BTIMELIMIT; @@ -656,6 +680,13 @@ xfs_qm_init_quotainfo( qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT; } + if (XFS_IS_UQUOTA_RUNNING(mp)) + xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf); + if (XFS_IS_GQUOTA_RUNNING(mp)) + xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf); + if (XFS_IS_PQUOTA_RUNNING(mp)) + xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf); + qinf->qi_shrinker.count_objects = xfs_qm_shrink_count; qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan; qinf->qi_shrinker.seeks = DEFAULT_SEEKS; diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h index 996a04064894..2975a822e9f0 100644 --- a/fs/xfs/xfs_qm.h +++ b/fs/xfs/xfs_qm.h @@ -53,6 +53,15 @@ extern struct kmem_zone *xfs_qm_dqtrxzone; */ #define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1 +struct xfs_def_quota { + xfs_qcnt_t bhardlimit; /* default data blk hard limit */ + xfs_qcnt_t bsoftlimit; /* default data blk soft limit */ + xfs_qcnt_t ihardlimit; /* default inode count hard limit */ + xfs_qcnt_t isoftlimit; /* default inode count soft limit */ + xfs_qcnt_t rtbhardlimit; /* default realtime blk hard limit */ + xfs_qcnt_t rtbsoftlimit; /* default realtime blk soft limit */ +}; + /* * Various quota information for individual filesystems. * The mount structure keeps a pointer to this. @@ -76,12 +85,9 @@ typedef struct xfs_quotainfo { struct mutex qi_quotaofflock;/* to serialize quotaoff */ xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */ uint qi_dqperchunk; /* # ondisk dqs in above chunk */ - xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */ - xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */ - xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */ - xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */ - xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */ - xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */ + struct xfs_def_quota qi_usr_default; + struct xfs_def_quota qi_grp_default; + struct xfs_def_quota qi_prj_default; struct shrinker qi_shrinker; } xfs_quotainfo_t; @@ -104,15 +110,15 @@ xfs_dquot_tree( } static inline struct xfs_inode * -xfs_dq_to_quota_inode(struct xfs_dquot *dqp) +xfs_quota_inode(xfs_mount_t *mp, uint dq_flags) { - switch (dqp->dq_flags & XFS_DQ_ALLTYPES) { + switch (dq_flags & XFS_DQ_ALLTYPES) { case XFS_DQ_USER: - return dqp->q_mount->m_quotainfo->qi_uquotaip; + return mp->m_quotainfo->qi_uquotaip; case XFS_DQ_GROUP: - return dqp->q_mount->m_quotainfo->qi_gquotaip; + return mp->m_quotainfo->qi_gquotaip; case XFS_DQ_PROJ: - return dqp->q_mount->m_quotainfo->qi_pquotaip; + return mp->m_quotainfo->qi_pquotaip; default: ASSERT(0); } @@ -164,11 +170,27 @@ extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint); /* quota ops */ extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint); -extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t, - uint, struct qc_dqblk *); +extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *, + uint, struct qc_dqblk *, uint); extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint, struct qc_dqblk *); extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint); extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint); +static inline struct xfs_def_quota * +xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi) +{ + struct xfs_def_quota *defq; + + if (XFS_QM_ISUDQ(dqp)) + defq = &qi->qi_usr_default; + else if (XFS_QM_ISGDQ(dqp)) + defq = &qi->qi_grp_default; + else { + ASSERT(XFS_QM_ISPDQ(dqp)); + defq = &qi->qi_prj_default; + } + return defq; +} + #endif /* __XFS_QM_H__ */ diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c index 3640c6e896af..f4d0e0a8f517 100644 --- a/fs/xfs/xfs_qm_syscalls.c +++ b/fs/xfs/xfs_qm_syscalls.c @@ -404,6 +404,7 @@ xfs_qm_scall_setqlim( struct xfs_disk_dquot *ddq; struct xfs_dquot *dqp; struct xfs_trans *tp; + struct xfs_def_quota *defq; int error; xfs_qcnt_t hard, soft; @@ -431,6 +432,8 @@ xfs_qm_scall_setqlim( ASSERT(error != -ENOENT); goto out_unlock; } + + defq = xfs_get_defquota(dqp, q); xfs_dqunlock(dqp); tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM); @@ -458,8 +461,8 @@ xfs_qm_scall_setqlim( ddq->d_blk_softlimit = cpu_to_be64(soft); xfs_dquot_set_prealloc_limits(dqp); if (id == 0) { - q->qi_bhardlimit = hard; - q->qi_bsoftlimit = soft; + defq->bhardlimit = hard; + defq->bsoftlimit = soft; } } else { xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft); @@ -474,8 +477,8 @@ xfs_qm_scall_setqlim( ddq->d_rtb_hardlimit = cpu_to_be64(hard); ddq->d_rtb_softlimit = cpu_to_be64(soft); if (id == 0) { - q->qi_rtbhardlimit = hard; - q->qi_rtbsoftlimit = soft; + defq->rtbhardlimit = hard; + defq->rtbsoftlimit = soft; } } else { xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft); @@ -491,8 +494,8 @@ xfs_qm_scall_setqlim( ddq->d_ino_hardlimit = cpu_to_be64(hard); ddq->d_ino_softlimit = cpu_to_be64(soft); if (id == 0) { - q->qi_ihardlimit = hard; - q->qi_isoftlimit = soft; + defq->ihardlimit = hard; + defq->isoftlimit = soft; } } else { xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft); @@ -635,9 +638,10 @@ out: int xfs_qm_scall_getquota( struct xfs_mount *mp, - xfs_dqid_t id, + xfs_dqid_t *id, uint type, - struct qc_dqblk *dst) + struct qc_dqblk *dst, + uint dqget_flags) { struct xfs_dquot *dqp; int error; @@ -647,7 +651,7 @@ xfs_qm_scall_getquota( * we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't * exist, we'll get ENOENT back. */ - error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp); + error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp); if (error) return error; @@ -660,6 +664,9 @@ xfs_qm_scall_getquota( goto out_put; } + /* Fill in the ID we actually read from disk */ + *id = be32_to_cpu(dqp->q_core.d_id); + memset(dst, 0, sizeof(*dst)); dst->d_spc_hardlimit = XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit)); @@ -701,7 +708,7 @@ xfs_qm_scall_getquota( if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) || (XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) || (XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) && - id != 0) { + *id != 0) { if ((dst->d_space > dst->d_spc_softlimit) && (dst->d_spc_softlimit > 0)) { ASSERT(dst->d_spc_timer != 0); diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c index 7795e0d01382..f82d79a8c694 100644 --- a/fs/xfs/xfs_quotaops.c +++ b/fs/xfs/xfs_quotaops.c @@ -231,14 +231,45 @@ xfs_fs_get_dqblk( struct qc_dqblk *qdq) { struct xfs_mount *mp = XFS_M(sb); + xfs_dqid_t id; if (!XFS_IS_QUOTA_RUNNING(mp)) return -ENOSYS; if (!XFS_IS_QUOTA_ON(mp)) return -ESRCH; - return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid), - xfs_quota_type(qid.type), qdq); + id = from_kqid(&init_user_ns, qid); + return xfs_qm_scall_getquota(mp, &id, + xfs_quota_type(qid.type), qdq, 0); +} + +/* Return quota info for active quota >= this qid */ +STATIC int +xfs_fs_get_nextdqblk( + struct super_block *sb, + struct kqid *qid, + struct qc_dqblk *qdq) +{ + int ret; + struct xfs_mount *mp = XFS_M(sb); + xfs_dqid_t id; + + if (!XFS_IS_QUOTA_RUNNING(mp)) + return -ENOSYS; + if (!XFS_IS_QUOTA_ON(mp)) + return -ESRCH; + + id = from_kqid(&init_user_ns, *qid); + ret = xfs_qm_scall_getquota(mp, &id, + xfs_quota_type(qid->type), qdq, + XFS_QMOPT_DQNEXT); + if (ret) + return ret; + + /* ID may be different, so convert back what we got */ + *qid = make_kqid(current_user_ns(), qid->type, id); + return 0; + } STATIC int @@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = { .quota_disable = xfs_quota_disable, .rm_xquota = xfs_fs_rm_xquota, .get_dqblk = xfs_fs_get_dqblk, + .get_nextdqblk = xfs_fs_get_nextdqblk, .set_dqblk = xfs_fs_set_dqblk, }; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 391d797cb53f..c8d58426008e 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found); DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc); DEFINE_IOMAP_EVENT(xfs_get_blocks_found); DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none); -DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio); +DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct); DECLARE_EVENT_CLASS(xfs_simple_io_class, TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count), @@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert); DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound); DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize); DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof); +DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write); +DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten); +DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append); DECLARE_EVENT_CLASS(xfs_itrunc_class, TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size), diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index 995170194df0..c3d547211d16 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -609,17 +609,20 @@ xfs_trans_dqresv( xfs_qcnt_t total_count; xfs_qcnt_t *resbcountp; xfs_quotainfo_t *q = mp->m_quotainfo; + struct xfs_def_quota *defq; xfs_dqlock(dqp); + defq = xfs_get_defquota(dqp, q); + if (flags & XFS_TRANS_DQ_RES_BLKS) { hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit); if (!hardlimit) - hardlimit = q->qi_bhardlimit; + hardlimit = defq->bhardlimit; softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit); if (!softlimit) - softlimit = q->qi_bsoftlimit; + softlimit = defq->bsoftlimit; timer = be32_to_cpu(dqp->q_core.d_btimer); warns = be16_to_cpu(dqp->q_core.d_bwarns); warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit; @@ -628,10 +631,10 @@ xfs_trans_dqresv( ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS); hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit); if (!hardlimit) - hardlimit = q->qi_rtbhardlimit; + hardlimit = defq->rtbhardlimit; softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit); if (!softlimit) - softlimit = q->qi_rtbsoftlimit; + softlimit = defq->rtbsoftlimit; timer = be32_to_cpu(dqp->q_core.d_rtbtimer); warns = be16_to_cpu(dqp->q_core.d_rtbwarns); warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit; @@ -672,10 +675,10 @@ xfs_trans_dqresv( warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit; hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit); if (!hardlimit) - hardlimit = q->qi_ihardlimit; + hardlimit = defq->ihardlimit; softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit); if (!softlimit) - softlimit = q->qi_isoftlimit; + softlimit = defq->isoftlimit; if (hardlimit && total_count > hardlimit) { xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN); |