diff options
Diffstat (limited to 'fs')
82 files changed, 1089 insertions, 982 deletions
diff --git a/fs/eventfd.c b/fs/eventfd.c index df466ef81ddd..e265b6dd4f34 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -182,11 +182,14 @@ static __poll_t eventfd_poll(struct file *file, poll_table *wait) return events; } -static void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) +void eventfd_ctx_do_read(struct eventfd_ctx *ctx, __u64 *cnt) { + lockdep_assert_held(&ctx->wqh.lock); + *cnt = (ctx->flags & EFD_SEMAPHORE) ? 1 : ctx->count; ctx->count -= *cnt; } +EXPORT_SYMBOL_GPL(eventfd_ctx_do_read); /** * eventfd_ctx_remove_wait_queue - Read the current counter and removes wait queue. diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 10b81e69db74..a829af074eb5 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -389,19 +389,24 @@ static bool ep_busy_loop_end(void *p, unsigned long start_time) * * we must do our busy polling with irqs enabled */ -static void ep_busy_loop(struct eventpoll *ep, int nonblock) +static bool ep_busy_loop(struct eventpoll *ep, int nonblock) { unsigned int napi_id = READ_ONCE(ep->napi_id); - if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) + if ((napi_id >= MIN_NAPI_ID) && net_busy_loop_on()) { napi_busy_loop(napi_id, nonblock ? NULL : ep_busy_loop_end, ep, false, BUSY_POLL_BUDGET); -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) -{ - if (ep->napi_id) + if (ep_events_available(ep)) + return true; + /* + * Busy poll timed out. Drop NAPI ID for now, we can add + * it back in when we have moved a socket with a valid NAPI + * ID onto the ready list. + */ ep->napi_id = 0; + return false; + } + return false; } /* @@ -441,12 +446,9 @@ static inline void ep_set_busy_poll_napi_id(struct epitem *epi) #else -static inline void ep_busy_loop(struct eventpoll *ep, int nonblock) -{ -} - -static inline void ep_reset_busy_poll_napi_id(struct eventpoll *ep) +static inline bool ep_busy_loop(struct eventpoll *ep, int nonblock) { + return false; } static inline void ep_set_busy_poll_napi_id(struct epitem *epi) @@ -1625,6 +1627,14 @@ static int ep_send_events(struct eventpoll *ep, poll_table pt; int res = 0; + /* + * Always short-circuit for fatal signals to allow threads to make a + * timely exit without the chance of finding more events available and + * fetching repeatedly. + */ + if (fatal_signal_pending(current)) + return -EINTR; + init_poll_funcptr(&pt, NULL); mutex_lock(&ep->mtx); @@ -1702,15 +1712,25 @@ static int ep_send_events(struct eventpoll *ep, return res; } -static inline struct timespec64 ep_set_mstimeout(long ms) +static struct timespec64 *ep_timeout_to_timespec(struct timespec64 *to, long ms) { - struct timespec64 now, ts = { - .tv_sec = ms / MSEC_PER_SEC, - .tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC), - }; + struct timespec64 now; + + if (ms < 0) + return NULL; + + if (!ms) { + to->tv_sec = 0; + to->tv_nsec = 0; + return to; + } + + to->tv_sec = ms / MSEC_PER_SEC; + to->tv_nsec = NSEC_PER_MSEC * (ms % MSEC_PER_SEC); ktime_get_ts64(&now); - return timespec64_add_safe(now, ts); + *to = timespec64_add_safe(now, *to); + return to; } /** @@ -1722,8 +1742,8 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * stored. * @maxevents: Size (in terms of number of events) of the caller event buffer. * @timeout: Maximum timeout for the ready events fetch operation, in - * milliseconds. If the @timeout is zero, the function will not block, - * while if the @timeout is less than zero, the function will block + * timespec. If the timeout is zero, the function will not block, + * while if the @timeout ptr is NULL, the function will block * until at least one event has been retrieved (or an error * occurred). * @@ -1731,55 +1751,59 @@ static inline struct timespec64 ep_set_mstimeout(long ms) * error code, in case of error. */ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events, - int maxevents, long timeout) + int maxevents, struct timespec64 *timeout) { - int res = 0, eavail, timed_out = 0; + int res, eavail, timed_out = 0; u64 slack = 0; wait_queue_entry_t wait; ktime_t expires, *to = NULL; lockdep_assert_irqs_enabled(); - if (timeout > 0) { - struct timespec64 end_time = ep_set_mstimeout(timeout); - - slack = select_estimate_accuracy(&end_time); + if (timeout && (timeout->tv_sec | timeout->tv_nsec)) { + slack = select_estimate_accuracy(timeout); to = &expires; - *to = timespec64_to_ktime(end_time); - } else if (timeout == 0) { + *to = timespec64_to_ktime(*timeout); + } else if (timeout) { /* * Avoid the unnecessary trip to the wait queue loop, if the - * caller specified a non blocking operation. We still need - * lock because we could race and not see an epi being added - * to the ready list while in irq callback. Thus incorrectly - * returning 0 back to userspace. + * caller specified a non blocking operation. */ timed_out = 1; - - write_lock_irq(&ep->lock); - eavail = ep_events_available(ep); - write_unlock_irq(&ep->lock); - - goto send_events; } -fetch_events: + /* + * This call is racy: We may or may not see events that are being added + * to the ready list under the lock (e.g., in IRQ callbacks). For, cases + * with a non-zero timeout, this thread will check the ready list under + * lock and will added to the wait queue. For, cases with a zero + * timeout, the user by definition should not care and will have to + * recheck again. + */ + eavail = ep_events_available(ep); + + while (1) { + if (eavail) { + /* + * Try to transfer events to user space. In case we get + * 0 events and there's still timeout left over, we go + * trying again in search of more luck. + */ + res = ep_send_events(ep, events, maxevents); + if (res) + return res; + } - if (!ep_events_available(ep)) - ep_busy_loop(ep, timed_out); + if (timed_out) + return 0; - eavail = ep_events_available(ep); - if (eavail) - goto send_events; + eavail = ep_busy_loop(ep, timed_out); + if (eavail) + continue; - /* - * Busy poll timed out. Drop NAPI ID for now, we can add - * it back in when we have moved a socket with a valid NAPI - * ID onto the ready list. - */ - ep_reset_busy_poll_napi_id(ep); + if (signal_pending(current)) + return -EINTR; - do { /* * Internally init_wait() uses autoremove_wake_function(), * thus wait entry is removed from the wait queue on each @@ -1809,55 +1833,38 @@ fetch_events: * important. */ eavail = ep_events_available(ep); - if (!eavail) { - if (signal_pending(current)) - res = -EINTR; - else - __add_wait_queue_exclusive(&ep->wq, &wait); - } - write_unlock_irq(&ep->lock); - - if (eavail || res) - break; - - if (!schedule_hrtimeout_range(to, slack, HRTIMER_MODE_ABS)) { - timed_out = 1; - break; - } - - /* We were woken up, thus go and try to harvest some events */ - eavail = 1; - - } while (0); + if (!eavail) + __add_wait_queue_exclusive(&ep->wq, &wait); - __set_current_state(TASK_RUNNING); - - if (!list_empty_careful(&wait.entry)) { - write_lock_irq(&ep->lock); - __remove_wait_queue(&ep->wq, &wait); write_unlock_irq(&ep->lock); - } -send_events: - if (fatal_signal_pending(current)) { + if (!eavail) + timed_out = !schedule_hrtimeout_range(to, slack, + HRTIMER_MODE_ABS); + __set_current_state(TASK_RUNNING); + /* - * Always short-circuit for fatal signals to allow - * threads to make a timely exit without the chance of - * finding more events available and fetching - * repeatedly. + * We were woken up, thus go and try to harvest some events. + * If timed out and still on the wait queue, recheck eavail + * carefully under lock, below. */ - res = -EINTR; - } - /* - * Try to transfer events to user space. In case we get 0 events and - * there's still timeout left over, we go trying again in search of - * more luck. - */ - if (!res && eavail && - !(res = ep_send_events(ep, events, maxevents)) && !timed_out) - goto fetch_events; + eavail = 1; - return res; + if (!list_empty_careful(&wait.entry)) { + write_lock_irq(&ep->lock); + /* + * If the thread timed out and is not on the wait queue, + * it means that the thread was woken up after its + * timeout expired before it could reacquire the lock. + * Thus, when wait.entry is empty, it needs to harvest + * events. + */ + if (timed_out) + eavail = list_empty(&wait.entry); + __remove_wait_queue(&ep->wq, &wait); + write_unlock_irq(&ep->lock); + } + } } /** @@ -2176,7 +2183,7 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, * part of the user space epoll_wait(2). */ static int do_epoll_wait(int epfd, struct epoll_event __user *events, - int maxevents, int timeout) + int maxevents, struct timespec64 *to) { int error; struct fd f; @@ -2210,7 +2217,7 @@ static int do_epoll_wait(int epfd, struct epoll_event __user *events, ep = f.file->private_data; /* Time to fish for events ... */ - error = ep_poll(ep, events, maxevents, timeout); + error = ep_poll(ep, events, maxevents, to); error_fput: fdput(f); @@ -2220,16 +2227,19 @@ error_fput: SYSCALL_DEFINE4(epoll_wait, int, epfd, struct epoll_event __user *, events, int, maxevents, int, timeout) { - return do_epoll_wait(epfd, events, maxevents, timeout); + struct timespec64 to; + + return do_epoll_wait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout)); } /* * Implement the event wait interface for the eventpoll file. It is the kernel * part of the user space epoll_pwait(2). */ -SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, - int, maxevents, int, timeout, const sigset_t __user *, sigmask, - size_t, sigsetsize) +static int do_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *to, + const sigset_t __user *sigmask, size_t sigsetsize) { int error; @@ -2241,18 +2251,47 @@ SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, if (error) return error; - error = do_epoll_wait(epfd, events, maxevents, timeout); + error = do_epoll_wait(epfd, events, maxevents, to); + restore_saved_sigmask_unless(error == -EINTR); return error; } +SYSCALL_DEFINE6(epoll_pwait, int, epfd, struct epoll_event __user *, events, + int, maxevents, int, timeout, const sigset_t __user *, sigmask, + size_t, sigsetsize) +{ + struct timespec64 to; + + return do_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +SYSCALL_DEFINE6(epoll_pwait2, int, epfd, struct epoll_event __user *, events, + int, maxevents, const struct __kernel_timespec __user *, timeout, + const sigset_t __user *, sigmask, size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + #ifdef CONFIG_COMPAT -COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, - struct epoll_event __user *, events, - int, maxevents, int, timeout, - const compat_sigset_t __user *, sigmask, - compat_size_t, sigsetsize) +static int do_compat_epoll_pwait(int epfd, struct epoll_event __user *events, + int maxevents, struct timespec64 *timeout, + const compat_sigset_t __user *sigmask, + compat_size_t sigsetsize) { long err; @@ -2265,10 +2304,46 @@ COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, return err; err = do_epoll_wait(epfd, events, maxevents, timeout); + restore_saved_sigmask_unless(err == -EINTR); return err; } + +COMPAT_SYSCALL_DEFINE6(epoll_pwait, int, epfd, + struct epoll_event __user *, events, + int, maxevents, int, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 to; + + return do_compat_epoll_pwait(epfd, events, maxevents, + ep_timeout_to_timespec(&to, timeout), + sigmask, sigsetsize); +} + +COMPAT_SYSCALL_DEFINE6(epoll_pwait2, int, epfd, + struct epoll_event __user *, events, + int, maxevents, + const struct __kernel_timespec __user *, timeout, + const compat_sigset_t __user *, sigmask, + compat_size_t, sigsetsize) +{ + struct timespec64 ts, *to = NULL; + + if (timeout) { + if (get_timespec64(&ts, timeout)) + return -EFAULT; + to = &ts; + if (poll_select_set_timeout(to, ts.tv_sec, ts.tv_nsec)) + return -EINVAL; + } + + return do_compat_epoll_pwait(epfd, events, maxevents, to, + sigmask, sigsetsize); +} + #endif static int __init eventpoll_init(void) diff --git a/fs/file.c b/fs/file.c index 8434e0afecc7..c0b60961c672 100644 --- a/fs/file.c +++ b/fs/file.c @@ -694,8 +694,10 @@ int __close_range(unsigned fd, unsigned max_fd, unsigned int flags) * If the requested range is greater than the current maximum, * we're closing everything so only copy all file descriptors * beneath the lowest file descriptor. + * If the caller requested all fds to be made cloexec copy all + * of the file descriptors since they still want to use them. */ - if (max_fd >= cur_max) + if (!(flags & CLOSE_RANGE_CLOEXEC) && (max_fd >= cur_max)) max_unshare_fds = fd; ret = unshare_fd(CLONE_FILES, max_unshare_fds, &fds); diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c index 35a6fd103761..d87a5bc3607b 100644 --- a/fs/gfs2/glock.c +++ b/fs/gfs2/glock.c @@ -857,12 +857,6 @@ static void delete_work_func(struct work_struct *work) clear_bit(GLF_PENDING_DELETE, &gl->gl_flags); spin_unlock(&gl->gl_lockref.lock); - /* If someone's using this glock to create a new dinode, the block must - have been freed by another node, then re-used, in which case our - iopen callback is too late after the fact. Ignore it. */ - if (test_bit(GLF_INODE_CREATING, &gl->gl_flags)) - goto out; - if (test_bit(GLF_DEMOTE, &gl->gl_flags)) { /* * If we can evict the inode, give the remote node trying to @@ -2112,8 +2106,6 @@ static const char *gflags2str(char *buf, const struct gfs2_glock *gl) *p++ = 'o'; if (test_bit(GLF_BLOCKING, gflags)) *p++ = 'b'; - if (test_bit(GLF_INODE_CREATING, gflags)) - *p++ = 'c'; if (test_bit(GLF_PENDING_DELETE, gflags)) *p++ = 'P'; if (test_bit(GLF_FREEING, gflags)) diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index f8858d995b24..8e1ab8ed4abc 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -348,7 +348,6 @@ enum { GLF_LRU = 13, GLF_OBJECT = 14, /* Used only for tracing */ GLF_BLOCKING = 15, - GLF_INODE_CREATING = 16, /* Inode creation occurring */ GLF_PENDING_DELETE = 17, GLF_FREEING = 18, /* Wait for glock to be freed */ }; diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index 65ae4fc28ede..c1b77e8d6b1c 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -36,6 +36,10 @@ #include "super.h" #include "glops.h" +static const struct inode_operations gfs2_file_iops; +static const struct inode_operations gfs2_dir_iops; +static const struct inode_operations gfs2_symlink_iops; + static int iget_test(struct inode *inode, void *opaque) { u64 no_addr = *(u64 *)opaque; @@ -605,7 +609,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, struct inode *inode = NULL; struct gfs2_inode *dip = GFS2_I(dir), *ip; struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode); - struct gfs2_glock *io_gl = NULL; + struct gfs2_glock *io_gl; int error, free_vfs_inode = 1; u32 aflags = 0; unsigned blocks = 1; @@ -746,8 +750,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, init_dinode(dip, ip, symname); gfs2_trans_end(sdp); - BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags)); - error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh); if (error) goto fail_gunlock2; @@ -793,7 +795,6 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry, gfs2_glock_dq_uninit(ghs); gfs2_qa_put(ip); gfs2_glock_dq_uninit(ghs + 1); - clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); gfs2_glock_put(io_gl); gfs2_qa_put(dip); return error; @@ -802,7 +803,6 @@ fail_gunlock3: glock_clear_object(io_gl, ip); gfs2_glock_dq_uninit(&ip->i_iopen_gh); fail_gunlock2: - clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags); glock_clear_object(io_gl, ip); gfs2_glock_put(io_gl); fail_free_inode: @@ -2136,7 +2136,7 @@ static int gfs2_update_time(struct inode *inode, struct timespec64 *time, return generic_update_time(inode, time, flags); } -const struct inode_operations gfs2_file_iops = { +static const struct inode_operations gfs2_file_iops = { .permission = gfs2_permission, .setattr = gfs2_setattr, .getattr = gfs2_getattr, @@ -2147,7 +2147,7 @@ const struct inode_operations gfs2_file_iops = { .update_time = gfs2_update_time, }; -const struct inode_operations gfs2_dir_iops = { +static const struct inode_operations gfs2_dir_iops = { .create = gfs2_create, .lookup = gfs2_lookup, .link = gfs2_link, @@ -2168,7 +2168,7 @@ const struct inode_operations gfs2_dir_iops = { .atomic_open = gfs2_atomic_open, }; -const struct inode_operations gfs2_symlink_iops = { +static const struct inode_operations gfs2_symlink_iops = { .get_link = gfs2_get_link, .permission = gfs2_permission, .setattr = gfs2_setattr, diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h index b52ecf4ffe63..8073b8d2c7fa 100644 --- a/fs/gfs2/inode.h +++ b/fs/gfs2/inode.h @@ -107,9 +107,6 @@ extern int gfs2_open_common(struct inode *inode, struct file *file); extern loff_t gfs2_seek_data(struct file *file, loff_t offset); extern loff_t gfs2_seek_hole(struct file *file, loff_t offset); -extern const struct inode_operations gfs2_file_iops; -extern const struct inode_operations gfs2_dir_iops; -extern const struct inode_operations gfs2_symlink_iops; extern const struct file_operations gfs2_file_fops_nolock; extern const struct file_operations gfs2_dir_fops_nolock; diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index b3d951ab8068..2f56acc41c04 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -353,7 +353,6 @@ int gfs2_statfs_sync(struct super_block *sb, int type) struct buffer_head *m_bh, *l_bh; int error; - sb_start_write(sb); error = gfs2_glock_nq_init(m_ip->i_gl, LM_ST_EXCLUSIVE, GL_NOCACHE, &gh); if (error) @@ -392,7 +391,6 @@ out_bh: out_unlock: gfs2_glock_dq_uninit(&gh); out: - sb_end_write(sb); return error; } diff --git a/fs/gfs2/util.c b/fs/gfs2/util.c index 0fba3bf64189..a374397f4273 100644 --- a/fs/gfs2/util.c +++ b/fs/gfs2/util.c @@ -137,7 +137,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp) gfs2_glock_dq(&sdp->sd_jinode_gh); if (test_bit(SDF_FS_FROZEN, &sdp->sd_flags)) { /* Make sure gfs2_unfreeze works if partially-frozen */ - flush_workqueue(gfs2_freeze_wq); + flush_work(&sdp->sd_freeze_work); atomic_set(&sdp->sd_freeze_state, SFS_FROZEN); thaw_super(sdp->sd_vfs); } else { diff --git a/fs/gfs2/util.h b/fs/gfs2/util.h index d7562981b3a0..a4443dd8a94b 100644 --- a/fs/gfs2/util.h +++ b/fs/gfs2/util.h @@ -151,7 +151,7 @@ extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd, bool verbose); #define gfs2_io_error(sdp) \ -gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__); +gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__) void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, @@ -159,10 +159,10 @@ void gfs2_io_error_bh_i(struct gfs2_sbd *sdp, struct buffer_head *bh, bool withdraw); #define gfs2_io_error_bh_wd(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, true) #define gfs2_io_error_bh(sdp, bh) \ -gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false); +gfs2_io_error_bh_i((sdp), (bh), __func__, __FILE__, __LINE__, false) extern struct kmem_cache *gfs2_glock_cachep; diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 4fd9be4cbc98..40e203b6e5c1 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -13,6 +13,7 @@ #ifndef _JFFS2_DEBUG_H_ #define _JFFS2_DEBUG_H_ +#include <linux/printk.h> #include <linux/sched.h> #ifndef CONFIG_JFFS2_FS_DEBUG @@ -99,73 +100,73 @@ do { \ #ifdef JFFS2_DBG_READINODE_MESSAGES #define dbg_readinode(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_readinode(fmt, ...) +#define dbg_readinode(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #ifdef JFFS2_DBG_READINODE2_MESSAGES #define dbg_readinode2(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_readinode2(fmt, ...) +#define dbg_readinode2(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Fragtree build debugging messages */ #ifdef JFFS2_DBG_FRAGTREE_MESSAGES #define dbg_fragtree(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_fragtree(fmt, ...) +#define dbg_fragtree(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #ifdef JFFS2_DBG_FRAGTREE2_MESSAGES #define dbg_fragtree2(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_fragtree2(fmt, ...) +#define dbg_fragtree2(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Directory entry list manilulation debugging messages */ #ifdef JFFS2_DBG_DENTLIST_MESSAGES #define dbg_dentlist(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_dentlist(fmt, ...) +#define dbg_dentlist(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Print the messages about manipulating node_refs */ #ifdef JFFS2_DBG_NODEREF_MESSAGES #define dbg_noderef(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_noderef(fmt, ...) +#define dbg_noderef(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Manipulations with the list of inodes (JFFS2 inocache) */ #ifdef JFFS2_DBG_INOCACHE_MESSAGES #define dbg_inocache(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_inocache(fmt, ...) +#define dbg_inocache(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Summary debugging messages */ #ifdef JFFS2_DBG_SUMMARY_MESSAGES #define dbg_summary(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_summary(fmt, ...) +#define dbg_summary(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* File system build messages */ #ifdef JFFS2_DBG_FSBUILD_MESSAGES #define dbg_fsbuild(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_fsbuild(fmt, ...) +#define dbg_fsbuild(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Watch the object allocations */ #ifdef JFFS2_DBG_MEMALLOC_MESSAGES #define dbg_memalloc(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_memalloc(fmt, ...) +#define dbg_memalloc(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* Watch the XATTR subsystem */ #ifdef JFFS2_DBG_XATTR_MESSAGES #define dbg_xattr(fmt, ...) JFFS2_DEBUG(fmt, ##__VA_ARGS__) #else -#define dbg_xattr(fmt, ...) +#define dbg_xattr(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif /* "Sanity" checks */ diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h index 778275f48a87..5a7091746f68 100644 --- a/fs/jffs2/jffs2_fs_sb.h +++ b/fs/jffs2/jffs2_fs_sb.h @@ -38,6 +38,7 @@ struct jffs2_mount_opts { * users. This is implemented simply by means of not allowing the * latter users to write to the file system if the amount if the * available space is less then 'rp_size'. */ + bool set_rp_size; unsigned int rp_size; }; diff --git a/fs/jffs2/nodelist.h b/fs/jffs2/nodelist.h index 8ff4d1a1e774..2e98fa277dab 100644 --- a/fs/jffs2/nodelist.h +++ b/fs/jffs2/nodelist.h @@ -349,14 +349,14 @@ static inline struct jffs2_node_frag *frag_last(struct rb_root *root) #define frag_parent(frag) rb_entry(rb_parent(&(frag)->rb), struct jffs2_node_frag, rb) #define frag_left(frag) rb_entry((frag)->rb.rb_left, struct jffs2_node_frag, rb) #define frag_right(frag) rb_entry((frag)->rb.rb_right, struct jffs2_node_frag, rb) -#define frag_erase(frag, list) rb_erase(&frag->rb, list); +#define frag_erase(frag, list) rb_erase(&frag->rb, list) #define tn_next(tn) rb_entry(rb_next(&(tn)->rb), struct jffs2_tmp_dnode_info, rb) #define tn_prev(tn) rb_entry(rb_prev(&(tn)->rb), struct jffs2_tmp_dnode_info, rb) #define tn_parent(tn) rb_entry(rb_parent(&(tn)->rb), struct jffs2_tmp_dnode_info, rb) #define tn_left(tn) rb_entry((tn)->rb.rb_left, struct jffs2_tmp_dnode_info, rb) #define tn_right(tn) rb_entry((tn)->rb.rb_right, struct jffs2_tmp_dnode_info, rb) -#define tn_erase(tn, list) rb_erase(&tn->rb, list); +#define tn_erase(tn, list) rb_erase(&tn->rb, list) #define tn_last(list) rb_entry(rb_last(list), struct jffs2_tmp_dnode_info, rb) #define tn_first(list) rb_entry(rb_first(list), struct jffs2_tmp_dnode_info, rb) diff --git a/fs/jffs2/readinode.c b/fs/jffs2/readinode.c index 2f6f0b140c05..03b4f99614be 100644 --- a/fs/jffs2/readinode.c +++ b/fs/jffs2/readinode.c @@ -672,6 +672,22 @@ static inline int read_direntry(struct jffs2_sb_info *c, struct jffs2_raw_node_r jffs2_free_full_dirent(fd); return -EIO; } + +#ifdef CONFIG_JFFS2_SUMMARY + /* + * we use CONFIG_JFFS2_SUMMARY because without it, we + * have checked it while mounting + */ + crc = crc32(0, fd->name, rd->nsize); + if (unlikely(crc != je32_to_cpu(rd->name_crc))) { + JFFS2_NOTICE("name CRC failed on dirent node at" + "%#08x: read %#08x,calculated %#08x\n", + ref_offset(ref), je32_to_cpu(rd->node_crc), crc); + jffs2_mark_node_obsolete(c, ref); + jffs2_free_full_dirent(fd); + return 0; + } +#endif } fd->nhash = full_name_hash(NULL, fd->name, rd->nsize); diff --git a/fs/jffs2/super.c b/fs/jffs2/super.c index 05d7878dfad1..81ca58c10b72 100644 --- a/fs/jffs2/super.c +++ b/fs/jffs2/super.c @@ -88,7 +88,7 @@ static int jffs2_show_options(struct seq_file *s, struct dentry *root) if (opts->override_compr) seq_printf(s, ",compr=%s", jffs2_compr_name(opts->compr)); - if (opts->rp_size) + if (opts->set_rp_size) seq_printf(s, ",rp_size=%u", opts->rp_size / 1024); return 0; @@ -202,11 +202,8 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) case Opt_rp_size: if (result.uint_32 > UINT_MAX / 1024) return invalf(fc, "jffs2: rp_size unrepresentable"); - opt = result.uint_32 * 1024; - if (opt > c->mtd->size) - return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB", - c->mtd->size / 1024); - c->mount_opts.rp_size = opt; + c->mount_opts.rp_size = result.uint_32 * 1024; + c->mount_opts.set_rp_size = true; break; default: return -EINVAL; @@ -215,11 +212,30 @@ static int jffs2_parse_param(struct fs_context *fc, struct fs_parameter *param) return 0; } +static inline void jffs2_update_mount_opts(struct fs_context *fc) +{ + struct jffs2_sb_info *new_c = fc->s_fs_info; + struct jffs2_sb_info *c = JFFS2_SB_INFO(fc->root->d_sb); + + mutex_lock(&c->alloc_sem); + if (new_c->mount_opts.override_compr) { + c->mount_opts.override_compr = new_c->mount_opts.override_compr; + c->mount_opts.compr = new_c->mount_opts.compr; + } + if (new_c->mount_opts.set_rp_size) { + c->mount_opts.set_rp_size = new_c->mount_opts.set_rp_size; + c->mount_opts.rp_size = new_c->mount_opts.rp_size; + } + mutex_unlock(&c->alloc_sem); +} + static int jffs2_reconfigure(struct fs_context *fc) { struct super_block *sb = fc->root->d_sb; sync_filesystem(sb); + jffs2_update_mount_opts(fc); + return jffs2_do_remount_fs(sb, fc); } @@ -249,6 +265,10 @@ static int jffs2_fill_super(struct super_block *sb, struct fs_context *fc) c->mtd = sb->s_mtd; c->os_priv = sb; + if (c->mount_opts.rp_size > c->mtd->size) + return invalf(fc, "jffs2: Too large reserve pool specified, max is %llu KB", + c->mtd->size / 1024); + /* Initialize JFFS2 superblock locks, the further initialization will * be done later */ mutex_init(&c->alloc_sem); diff --git a/fs/ubifs/auth.c b/fs/ubifs/auth.c index 0886d835f597..51a7c8c2c3f0 100644 --- a/fs/ubifs/auth.c +++ b/fs/ubifs/auth.c @@ -337,8 +337,10 @@ int ubifs_init_authentication(struct ubifs_info *c) c->authenticated = true; c->log_hash = ubifs_hash_get_desc(c); - if (IS_ERR(c->log_hash)) + if (IS_ERR(c->log_hash)) { + err = PTR_ERR(c->log_hash); goto out_free_hmac; + } err = 0; diff --git a/fs/ubifs/commit.c b/fs/ubifs/commit.c index b5cdac9b0368..c4fc1047fc07 100644 --- a/fs/ubifs/commit.c +++ b/fs/ubifs/commit.c @@ -701,13 +701,13 @@ out: out_dump: ubifs_err(c, "dumping index node (iip=%d)", i->iip); - ubifs_dump_node(c, idx); + ubifs_dump_node(c, idx, ubifs_idx_node_sz(c, c->fanout)); list_del(&i->list); kfree(i); if (!list_empty(&list)) { i = list_entry(list.prev, struct idx_node, list); ubifs_err(c, "dumping parent index node"); - ubifs_dump_node(c, &i->idx); + ubifs_dump_node(c, &i->idx, ubifs_idx_node_sz(c, c->fanout)); } out_free: while (!list_empty(&list)) { diff --git a/fs/ubifs/debug.c b/fs/ubifs/debug.c index ebff43f8009c..1bbb9fe661b1 100644 --- a/fs/ubifs/debug.c +++ b/fs/ubifs/debug.c @@ -291,9 +291,9 @@ void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode) kfree(pdent); } -void ubifs_dump_node(const struct ubifs_info *c, const void *node) +void ubifs_dump_node(const struct ubifs_info *c, const void *node, int node_len) { - int i, n; + int i, n, type, safe_len, max_node_len, min_node_len; union ubifs_key key; const struct ubifs_ch *ch = node; char key_buf[DBG_KEY_BUF_LEN]; @@ -306,10 +306,40 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) return; } + /* Skip dumping unknown type node */ + type = ch->node_type; + if (type < 0 || type >= UBIFS_NODE_TYPES_CNT) { + pr_err("node type %d was not recognized\n", type); + return; + } + spin_lock(&dbg_lock); dump_ch(node); - switch (ch->node_type) { + if (c->ranges[type].max_len == 0) { + max_node_len = min_node_len = c->ranges[type].len; + } else { + max_node_len = c->ranges[type].max_len; + min_node_len = c->ranges[type].min_len; + } + safe_len = le32_to_cpu(ch->len); + safe_len = safe_len > 0 ? safe_len : 0; + safe_len = min3(safe_len, max_node_len, node_len); + if (safe_len < min_node_len) { + pr_err("node len(%d) is too short for %s, left %d bytes:\n", + safe_len, dbg_ntype(type), + safe_len > UBIFS_CH_SZ ? + safe_len - (int)UBIFS_CH_SZ : 0); + if (safe_len > UBIFS_CH_SZ) + print_hex_dump(KERN_ERR, "", DUMP_PREFIX_OFFSET, 32, 1, + (void *)node + UBIFS_CH_SZ, + safe_len - UBIFS_CH_SZ, 0); + goto out_unlock; + } + if (safe_len != le32_to_cpu(ch->len)) + pr_err("\ttruncated node length %d\n", safe_len); + + switch (type) { case UBIFS_PAD_NODE: { const struct ubifs_pad_node *pad = node; @@ -453,7 +483,8 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("\tnlen %d\n", nlen); pr_err("\tname "); - if (nlen > UBIFS_MAX_NLEN) + if (nlen > UBIFS_MAX_NLEN || + nlen > safe_len - UBIFS_DENT_NODE_SZ) pr_err("(bad name length, not printing, bad or corrupted node)"); else { for (i = 0; i < nlen && dent->name[i]; i++) @@ -467,7 +498,6 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) case UBIFS_DATA_NODE: { const struct ubifs_data_node *dn = node; - int dlen = le32_to_cpu(ch->len) - UBIFS_DATA_NODE_SZ; key_read(c, &dn->key, &key); pr_err("\tkey %s\n", @@ -475,10 +505,13 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) pr_err("\tsize %u\n", le32_to_cpu(dn->size)); pr_err("\tcompr_typ %d\n", (int)le16_to_cpu(dn->compr_type)); - pr_err("\tdata size %d\n", dlen); - pr_err("\tdata:\n"); + pr_err("\tdata size %u\n", + le32_to_cpu(ch->len) - (unsigned int)UBIFS_DATA_NODE_SZ); + pr_err("\tdata (length = %d):\n", + safe_len - (int)UBIFS_DATA_NODE_SZ); print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, - (void *)&dn->data, dlen, 0); + (void *)&dn->data, + safe_len - (int)UBIFS_DATA_NODE_SZ, 0); break; } case UBIFS_TRUN_NODE: @@ -495,13 +528,16 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) case UBIFS_IDX_NODE: { const struct ubifs_idx_node *idx = node; + int max_child_cnt = (safe_len - UBIFS_IDX_NODE_SZ) / + (ubifs_idx_node_sz(c, 1) - + UBIFS_IDX_NODE_SZ); - n = le16_to_cpu(idx->child_cnt); - pr_err("\tchild_cnt %d\n", n); + n = min_t(int, le16_to_cpu(idx->child_cnt), max_child_cnt); + pr_err("\tchild_cnt %d\n", (int)le16_to_cpu(idx->child_cnt)); pr_err("\tlevel %d\n", (int)le16_to_cpu(idx->level)); pr_err("\tBranches:\n"); - for (i = 0; i < n && i < c->fanout - 1; i++) { + for (i = 0; i < n && i < c->fanout; i++) { const struct ubifs_branch *br; br = ubifs_idx_branch(c, idx, i); @@ -525,7 +561,7 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) le64_to_cpu(orph->cmt_no) & LLONG_MAX); pr_err("\tlast node flag %llu\n", (unsigned long long)(le64_to_cpu(orph->cmt_no)) >> 63); - n = (le32_to_cpu(ch->len) - UBIFS_ORPH_NODE_SZ) >> 3; + n = (safe_len - UBIFS_ORPH_NODE_SZ) >> 3; pr_err("\t%d orphan inode numbers:\n", n); for (i = 0; i < n; i++) pr_err("\t ino %llu\n", @@ -537,9 +573,10 @@ void ubifs_dump_node(const struct ubifs_info *c, const void *node) break; } default: - pr_err("node type %d was not recognized\n", - (int)ch->node_type); + pr_err("node type %d was not recognized\n", type); } + +out_unlock: spin_unlock(&dbg_lock); } @@ -764,7 +801,7 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) pr_err("\tnnode_sz: %d\n", c->nnode_sz); pr_err("\tltab_sz: %d\n", c->ltab_sz); pr_err("\tlsave_sz: %d\n", c->lsave_sz); - pr_err("\tbig_lpt: %d\n", c->big_lpt); + pr_err("\tbig_lpt: %u\n", c->big_lpt); pr_err("\tlpt_hght: %d\n", c->lpt_hght); pr_err("\tpnode_cnt: %d\n", c->pnode_cnt); pr_err("\tnnode_cnt: %d\n", c->nnode_cnt); @@ -791,22 +828,6 @@ void ubifs_dump_lpt_info(struct ubifs_info *c) spin_unlock(&dbg_lock); } -void ubifs_dump_sleb(const struct ubifs_info *c, - const struct ubifs_scan_leb *sleb, int offs) -{ - struct ubifs_scan_node *snod; - - pr_err("(pid %d) start dumping scanned data from LEB %d:%d\n", - current->pid, sleb->lnum, offs); - - list_for_each_entry(snod, &sleb->nodes, list) { - cond_resched(); - pr_err("Dumping node at LEB %d:%d len %d\n", - sleb->lnum, snod->offs, snod->len); - ubifs_dump_node(c, snod->node); - } -} - void ubifs_dump_leb(const struct ubifs_info *c, int lnum) { struct ubifs_scan_leb *sleb; @@ -834,7 +855,7 @@ void ubifs_dump_leb(const struct ubifs_info *c, int lnum) cond_resched(); pr_err("Dumping node at LEB %d:%d len %d\n", lnum, snod->offs, snod->len); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); } pr_err("(pid %d) finish dumping LEB %d\n", current->pid, lnum); @@ -1012,7 +1033,7 @@ void dbg_save_space_info(struct ubifs_info *c) * * This function compares current flash space information with the information * which was saved when the 'dbg_save_space_info()' function was called. - * Returns zero if the information has not changed, and %-EINVAL it it has + * Returns zero if the information has not changed, and %-EINVAL if it has * changed. */ int dbg_check_space_info(struct ubifs_info *c) @@ -1212,7 +1233,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr1->key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_dump_node(c, dent1); + ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ); goto out_free; } @@ -1224,7 +1245,7 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, ubifs_err(c, "but it should have key %s according to tnc", dbg_snprintf_key(c, &zbr2->key, key_buf, DBG_KEY_BUF_LEN)); - ubifs_dump_node(c, dent2); + ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ); goto out_free; } @@ -1243,9 +1264,9 @@ static int dbg_check_key_order(struct ubifs_info *c, struct ubifs_zbranch *zbr1, dbg_snprintf_key(c, &key, key_buf, DBG_KEY_BUF_LEN)); ubifs_msg(c, "first node at %d:%d\n", zbr1->lnum, zbr1->offs); - ubifs_dump_node(c, dent1); + ubifs_dump_node(c, dent1, UBIFS_MAX_DENT_NODE_SZ); ubifs_msg(c, "second node at %d:%d\n", zbr2->lnum, zbr2->offs); - ubifs_dump_node(c, dent2); + ubifs_dump_node(c, dent2, UBIFS_MAX_DENT_NODE_SZ); out_free: kfree(dent2); @@ -2110,7 +2131,7 @@ out: out_dump: ubifs_msg(c, "dump of node at LEB %d:%d", zbr->lnum, zbr->offs); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); out_free: kfree(node); return err; @@ -2243,7 +2264,7 @@ out_dump: ubifs_msg(c, "dump of the inode %lu sitting in LEB %d:%d", (unsigned long)fscki->inum, zbr->lnum, zbr->offs); - ubifs_dump_node(c, ino); + ubifs_dump_node(c, ino, zbr->len); kfree(ino); return -EINVAL; } @@ -2314,12 +2335,12 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_DATA_NODE) { ubifs_err(c, "bad node type %d", sa->type); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); return -EINVAL; } if (sb->type != UBIFS_DATA_NODE) { ubifs_err(c, "bad node type %d", sb->type); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2350,8 +2371,8 @@ int dbg_check_data_nodes_order(struct ubifs_info *c, struct list_head *head) return 0; error_dump: - ubifs_dump_node(c, sa->node); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2382,13 +2403,13 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) if (sa->type != UBIFS_INO_NODE && sa->type != UBIFS_DENT_NODE && sa->type != UBIFS_XENT_NODE) { ubifs_err(c, "bad node type %d", sa->type); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); return -EINVAL; } if (sb->type != UBIFS_INO_NODE && sb->type != UBIFS_DENT_NODE && sb->type != UBIFS_XENT_NODE) { ubifs_err(c, "bad node type %d", sb->type); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; } @@ -2438,11 +2459,10 @@ int dbg_check_nondata_nodes_order(struct ubifs_info *c, struct list_head *head) error_dump: ubifs_msg(c, "dumping first node"); - ubifs_dump_node(c, sa->node); + ubifs_dump_node(c, sa->node, c->leb_size - sa->offs); ubifs_msg(c, "dumping second node"); - ubifs_dump_node(c, sb->node); + ubifs_dump_node(c, sb->node, c->leb_size - sb->offs); return -EINVAL; - return 0; } static inline int chance(unsigned int n, unsigned int out_of) diff --git a/fs/ubifs/debug.h b/fs/ubifs/debug.h index 7763639a426b..ed966108da80 100644 --- a/fs/ubifs/debug.h +++ b/fs/ubifs/debug.h @@ -242,7 +242,8 @@ const char *dbg_get_key_dump(const struct ubifs_info *c, const char *dbg_snprintf_key(const struct ubifs_info *c, const union ubifs_key *key, char *buffer, int len); void ubifs_dump_inode(struct ubifs_info *c, const struct inode *inode); -void ubifs_dump_node(const struct ubifs_info *c, const void *node); +void ubifs_dump_node(const struct ubifs_info *c, const void *node, + int node_len); void ubifs_dump_budget_req(const struct ubifs_budget_req *req); void ubifs_dump_lstats(const struct ubifs_lp_stats *lst); void ubifs_dump_budg(struct ubifs_info *c, const struct ubifs_budg_info *bi); @@ -251,8 +252,6 @@ void ubifs_dump_lprop(const struct ubifs_info *c, void ubifs_dump_lprops(struct ubifs_info *c); void ubifs_dump_lpt_info(struct ubifs_info *c); void ubifs_dump_leb(const struct ubifs_info *c, int lnum); -void ubifs_dump_sleb(const struct ubifs_info *c, - const struct ubifs_scan_leb *sleb, int offs); void ubifs_dump_znode(const struct ubifs_info *c, const struct ubifs_znode *znode); void ubifs_dump_heap(struct ubifs_info *c, struct ubifs_lpt_heap *heap, diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 7949d7c9aa8c..9a6b8660425a 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -844,7 +844,7 @@ out_fname: * * This function checks if directory @dir is empty. Returns zero if the * directory is empty, %-ENOTEMPTY if it is not, and other negative error codes - * in case of of errors. + * in case of errors. */ int ubifs_check_dir_empty(struct inode *dir) { @@ -1632,9 +1632,7 @@ const struct inode_operations ubifs_dir_inode_operations = { .rename = ubifs_rename, .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, .tmpfile = ubifs_tmpfile, }; diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c index b77d1637bbbc..2bc7780d2963 100644 --- a/fs/ubifs/file.c +++ b/fs/ubifs/file.c @@ -92,7 +92,7 @@ static int read_block(struct inode *inode, void *addr, unsigned int block, dump: ubifs_err(c, "bad data node (block %u, inode %lu)", block, inode->i_ino); - ubifs_dump_node(c, dn); + ubifs_dump_node(c, dn, UBIFS_MAX_DATA_NODE_SZ); return -EINVAL; } @@ -205,7 +205,7 @@ static void release_new_page_budget(struct ubifs_info *c) * @c: UBIFS file-system description object * * This is a helper function which releases budget corresponding to the budget - * of changing one one page of data which already exists on the flash media. + * of changing one page of data which already exists on the flash media. */ static void release_existing_page_budget(struct ubifs_info *c) { @@ -1645,9 +1645,7 @@ const struct address_space_operations ubifs_file_address_operations = { const struct inode_operations ubifs_file_inode_operations = { .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, }; @@ -1655,9 +1653,7 @@ const struct inode_operations ubifs_symlink_inode_operations = { .get_link = ubifs_get_link, .setattr = ubifs_setattr, .getattr = ubifs_getattr, -#ifdef CONFIG_UBIFS_FS_XATTR .listxattr = ubifs_listxattr, -#endif .update_time = ubifs_update_time, }; diff --git a/fs/ubifs/io.c b/fs/ubifs/io.c index 7e4bfaf2871f..00b61dba62b7 100644 --- a/fs/ubifs/io.c +++ b/fs/ubifs/io.c @@ -198,6 +198,7 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum) * ubifs_check_node - check node. * @c: UBIFS file-system description object * @buf: node to check + * @len: node length * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * @quiet: print no messages @@ -222,10 +223,10 @@ int ubifs_is_mapped(const struct ubifs_info *c, int lnum) * This function returns zero in case of success and %-EUCLEAN in case of bad * CRC or magic. */ -int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int must_chk_crc) +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, + int lnum, int offs, int quiet, int must_chk_crc) { - int err = -EINVAL, type, node_len, dump_node = 1; + int err = -EINVAL, type, node_len; uint32_t crc, node_crc, magic; const struct ubifs_ch *ch = buf; @@ -278,22 +279,10 @@ int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, out_len: if (!quiet) ubifs_err(c, "bad node length %d", node_len); - if (type == UBIFS_DATA_NODE && node_len > UBIFS_DATA_NODE_SZ) - dump_node = 0; out: if (!quiet) { ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); - if (dump_node) { - ubifs_dump_node(c, buf); - } else { - int safe_len = min3(node_len, c->leb_size - offs, - (int)UBIFS_MAX_DATA_NODE_SZ); - pr_err("\tprevent out-of-bounds memory access\n"); - pr_err("\ttruncated data node length %d\n", safe_len); - pr_err("\tcorrupted data node:\n"); - print_hex_dump(KERN_ERR, "\t", DUMP_PREFIX_OFFSET, 32, 1, - buf, safe_len, 0); - } + ubifs_dump_node(c, buf, len); dump_stack(); } return err; @@ -319,7 +308,7 @@ void ubifs_pad(const struct ubifs_info *c, void *buf, int pad) { uint32_t crc; - ubifs_assert(c, pad >= 0 && !(pad & 7)); + ubifs_assert(c, pad >= 0); if (pad >= UBIFS_PAD_NODE_SZ) { struct ubifs_ch *ch = buf; @@ -730,7 +719,7 @@ out_timers: int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) { struct ubifs_info *c = wbuf->c; - int err, written, n, aligned_len = ALIGN(len, 8); + int err, n, written = 0, aligned_len = ALIGN(len, 8); dbg_io("%d bytes (%s) to jhead %s wbuf at LEB %d:%d", len, dbg_ntype(((struct ubifs_ch *)buf)->node_type), @@ -764,6 +753,10 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) * write-buffer. */ memcpy(wbuf->buf + wbuf->used, buf, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + wbuf->used + len, aligned_len - len); + } if (aligned_len == wbuf->avail) { dbg_io("flush jhead %s wbuf to LEB %d:%d", @@ -793,8 +786,6 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) goto exit; } - written = 0; - if (wbuf->used) { /* * The node is large enough and does not fit entirely within @@ -856,13 +847,18 @@ int ubifs_wbuf_write_nolock(struct ubifs_wbuf *wbuf, void *buf, int len) } spin_lock(&wbuf->lock); - if (aligned_len) + if (aligned_len) { /* * And now we have what's left and what does not take whole * max. write unit, so write it to the write-buffer and we are * done. */ memcpy(wbuf->buf, buf + written, len); + if (aligned_len > len) { + ubifs_assert(c, aligned_len - len < 8); + ubifs_pad(c, wbuf->buf + len, aligned_len - len); + } + } if (c->leb_size - wbuf->offs >= c->max_write_size) wbuf->size = c->max_write_size; @@ -890,7 +886,7 @@ exit: out: ubifs_err(c, "cannot write %d bytes to LEB %d:%d, error %d", len, wbuf->lnum, wbuf->offs, err); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, written + len); dump_stack(); ubifs_dump_leb(c, wbuf->lnum); return err; @@ -933,7 +929,7 @@ int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, err = ubifs_leb_write(c, lnum, buf, offs, buf_len); if (err) - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); return err; } @@ -1016,7 +1012,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, goto out; } - err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); if (err) { ubifs_err(c, "expected node type %d", type); return err; @@ -1032,7 +1028,7 @@ int ubifs_read_node_wbuf(struct ubifs_wbuf *wbuf, void *buf, int type, int len, out: ubifs_err(c, "bad node at LEB %d:%d", lnum, offs); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); dump_stack(); return -EINVAL; } @@ -1046,7 +1042,7 @@ out: * @lnum: logical eraseblock number * @offs: offset within the logical eraseblock * - * This function reads a node of known type and and length, checks it and + * This function reads a node of known type and length, checks it and * stores in @buf. Returns zero in case of success, %-EUCLEAN if CRC mismatched * and a negative error code in case of failure. */ @@ -1072,7 +1068,7 @@ int ubifs_read_node(const struct ubifs_info *c, void *buf, int type, int len, goto out; } - err = ubifs_check_node(c, buf, lnum, offs, 0, 0); + err = ubifs_check_node(c, buf, len, lnum, offs, 0, 0); if (err) { ubifs_errc(c, "expected node type %d", type); return err; @@ -1090,7 +1086,7 @@ out: ubifs_errc(c, "bad node at LEB %d:%d, LEB mapping status %d", lnum, offs, ubi_is_mapped(c->ubi, lnum)); if (!c->probing) { - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, len); dump_stack(); } return -EINVAL; diff --git a/fs/ubifs/journal.c b/fs/ubifs/journal.c index 091c2ad8f211..03410ae0813a 100644 --- a/fs/ubifs/journal.c +++ b/fs/ubifs/journal.c @@ -1559,7 +1559,8 @@ int ubifs_jnl_truncate(struct ubifs_info *c, const struct inode *inode, if (dn_len <= 0 || dn_len > UBIFS_BLOCK_SIZE) { ubifs_err(c, "bad data node (block %u, inode %lu)", blk, inode->i_ino); - ubifs_dump_node(c, dn); + ubifs_dump_node(c, dn, sz - UBIFS_INO_NODE_SZ - + UBIFS_TRUN_NODE_SZ); goto out_free; } diff --git a/fs/ubifs/lpt.c b/fs/ubifs/lpt.c index 6e0a153b7194..778a22bf9a92 100644 --- a/fs/ubifs/lpt.c +++ b/fs/ubifs/lpt.c @@ -851,7 +851,7 @@ int ubifs_create_dflt_lpt(struct ubifs_info *c, int *main_lebs, int lpt_first, dbg_lp("lsave_sz %d", c->lsave_sz); dbg_lp("lsave_cnt %d", c->lsave_cnt); dbg_lp("lpt_hght %d", c->lpt_hght); - dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("big_lpt %u", c->big_lpt); dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); @@ -1824,7 +1824,7 @@ static int lpt_init_rd(struct ubifs_info *c) dbg_lp("lsave_sz %d", c->lsave_sz); dbg_lp("lsave_cnt %d", c->lsave_cnt); dbg_lp("lpt_hght %d", c->lpt_hght); - dbg_lp("big_lpt %d", c->big_lpt); + dbg_lp("big_lpt %u", c->big_lpt); dbg_lp("LPT root is at %d:%d", c->lpt_lnum, c->lpt_offs); dbg_lp("LPT head is at %d:%d", c->nhead_lnum, c->nhead_offs); dbg_lp("LPT ltab is at %d:%d", c->ltab_lnum, c->ltab_offs); diff --git a/fs/ubifs/master.c b/fs/ubifs/master.c index 911d0555b9f2..0df9a3dd0aaa 100644 --- a/fs/ubifs/master.c +++ b/fs/ubifs/master.c @@ -314,7 +314,7 @@ static int validate_master(const struct ubifs_info *c) out: ubifs_err(c, "bad master node at offset %d error %d", c->mst_offs, err); - ubifs_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node, c->mst_node_alsz); return -EINVAL; } @@ -392,7 +392,7 @@ int ubifs_read_master(struct ubifs_info *c) if (c->leb_cnt < old_leb_cnt || c->leb_cnt < UBIFS_MIN_LEB_CNT) { ubifs_err(c, "bad leb_cnt on master node"); - ubifs_dump_node(c, c->mst_node); + ubifs_dump_node(c, c->mst_node, c->mst_node_alsz); return -EINVAL; } diff --git a/fs/ubifs/orphan.c b/fs/ubifs/orphan.c index 0fb61956146d..4909321d84cf 100644 --- a/fs/ubifs/orphan.c +++ b/fs/ubifs/orphan.c @@ -646,7 +646,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, if (snod->type != UBIFS_ORPH_NODE) { ubifs_err(c, "invalid node type %d in orphan area at %d:%d", snod->type, sleb->lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, + c->leb_size - snod->offs); err = -EINVAL; goto out_free; } @@ -674,7 +675,8 @@ static int do_kill_orphans(struct ubifs_info *c, struct ubifs_scan_leb *sleb, if (!first) { ubifs_err(c, "out of order commit number %llu in orphan node at %d:%d", cmt_no, sleb->lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, + c->leb_size - snod->offs); err = -EINVAL; goto out_free; } diff --git a/fs/ubifs/recovery.c b/fs/ubifs/recovery.c index f116f7b3f9e5..f0d51dd21c9e 100644 --- a/fs/ubifs/recovery.c +++ b/fs/ubifs/recovery.c @@ -352,11 +352,11 @@ out_free: ubifs_err(c, "failed to recover master node"); if (mst1) { ubifs_err(c, "dumping first master node"); - ubifs_dump_node(c, mst1); + ubifs_dump_node(c, mst1, c->leb_size - ((void *)mst1 - buf1)); } if (mst2) { ubifs_err(c, "dumping second master node"); - ubifs_dump_node(c, mst2); + ubifs_dump_node(c, mst2, c->leb_size - ((void *)mst2 - buf2)); } vfree(buf2); vfree(buf1); @@ -469,7 +469,7 @@ static int no_more_nodes(const struct ubifs_info *c, void *buf, int len, * The area after the common header size is not empty, so the common * header must be intact. Check it. */ - if (ubifs_check_node(c, buf, lnum, offs, 1, 0) != -EUCLEAN) { + if (ubifs_check_node(c, buf, len, lnum, offs, 1, 0) != -EUCLEAN) { dbg_rcvry("unexpected bad common header at %d:%d", lnum, offs); return 0; } diff --git a/fs/ubifs/replay.c b/fs/ubifs/replay.c index 2f8d8f4f411a..79801c9a5b87 100644 --- a/fs/ubifs/replay.c +++ b/fs/ubifs/replay.c @@ -574,7 +574,7 @@ static int authenticate_sleb_hash(struct ubifs_info *c, struct shash_desc *log_h * @c: UBIFS file-system description object * @sleb: the scan LEB to authenticate * @log_hash: - * @is_last: if true, this is is the last LEB + * @is_last: if true, this is the last LEB * * This function iterates over the buds of a single LEB authenticating all buds * with the authentication nodes on this LEB. Authentication nodes are written @@ -827,7 +827,7 @@ out: out_dump: ubifs_err(c, "bad node is at LEB %d:%d", lnum, snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); ubifs_scan_destroy(sleb); return -EINVAL; } @@ -1123,7 +1123,7 @@ out: out_dump: ubifs_err(c, "log error detected while replaying the log at LEB %d:%d", lnum, offs + snod->offs); - ubifs_dump_node(c, snod->node); + ubifs_dump_node(c, snod->node, c->leb_size - snod->offs); ubifs_scan_destroy(sleb); return -EINVAL; } diff --git a/fs/ubifs/sb.c b/fs/ubifs/sb.c index c0d3e4008d23..c160f718c288 100644 --- a/fs/ubifs/sb.c +++ b/fs/ubifs/sb.c @@ -503,7 +503,7 @@ static int validate_sb(struct ubifs_info *c, struct ubifs_sb_node *sup) failed: ubifs_err(c, "bad superblock, error %d", err); - ubifs_dump_node(c, sup); + ubifs_dump_node(c, sup, ALIGN(UBIFS_SB_NODE_SZ, c->min_io_size)); return -EINVAL; } diff --git a/fs/ubifs/scan.c b/fs/ubifs/scan.c index c69cdb5e65bc..84a9157dcc32 100644 --- a/fs/ubifs/scan.c +++ b/fs/ubifs/scan.c @@ -76,7 +76,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, dbg_scan("scanning %s at LEB %d:%d", dbg_ntype(ch->node_type), lnum, offs); - if (ubifs_check_node(c, buf, lnum, offs, quiet, 1)) + if (ubifs_check_node(c, buf, len, lnum, offs, quiet, 1)) return SCANNED_A_CORRUPT_NODE; if (ch->node_type == UBIFS_PAD_NODE) { @@ -90,7 +90,7 @@ int ubifs_scan_a_node(const struct ubifs_info *c, void *buf, int len, int lnum, if (!quiet) { ubifs_err(c, "bad pad node at LEB %d:%d", lnum, offs); - ubifs_dump_node(c, pad); + ubifs_dump_node(c, pad, len); } return SCANNED_A_BAD_PAD_NODE; } diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c index cb3acfb7dd1f..138b9426c6c1 100644 --- a/fs/ubifs/super.c +++ b/fs/ubifs/super.c @@ -253,7 +253,7 @@ struct inode *ubifs_iget(struct super_block *sb, unsigned long inum) out_invalid: ubifs_err(c, "inode %lu validation failed, error %d", inode->i_ino, err); - ubifs_dump_node(c, ino); + ubifs_dump_node(c, ino, UBIFS_MAX_INO_NODE_SZ); ubifs_dump_inode(c, inode); err = -EINVAL; out_ino: @@ -1572,7 +1572,7 @@ static int mount_ubifs(struct ubifs_info *c) dbg_gen("main area LEBs: %d (%d - %d)", c->main_lebs, c->main_first, c->leb_cnt - 1); dbg_gen("index LEBs: %d", c->lst.idx_lebs); - dbg_gen("total index bytes: %lld (%lld KiB, %lld MiB)", + dbg_gen("total index bytes: %llu (%llu KiB, %llu MiB)", c->bi.old_idx_sz, c->bi.old_idx_sz >> 10, c->bi.old_idx_sz >> 20); dbg_gen("key hash type: %d", c->key_hash_type); @@ -2207,9 +2207,7 @@ static int ubifs_fill_super(struct super_block *sb, void *data, int silent) if (c->max_inode_sz > MAX_LFS_FILESIZE) sb->s_maxbytes = c->max_inode_sz = MAX_LFS_FILESIZE; sb->s_op = &ubifs_super_operations; -#ifdef CONFIG_UBIFS_FS_XATTR sb->s_xattr = ubifs_xattr_handlers; -#endif fscrypt_set_ops(sb, &ubifs_crypt_operations); mutex_lock(&c->umount_mutex); diff --git a/fs/ubifs/tnc.c b/fs/ubifs/tnc.c index 894f1ab14616..488f3da7a6c6 100644 --- a/fs/ubifs/tnc.c +++ b/fs/ubifs/tnc.c @@ -316,7 +316,7 @@ static int lnc_add(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, dent); if (err) { dump_stack(); - ubifs_dump_node(c, dent); + ubifs_dump_node(c, dent, zbr->len); return err; } @@ -349,7 +349,7 @@ static int lnc_add_directly(struct ubifs_info *c, struct ubifs_zbranch *zbr, err = ubifs_validate_entry(c, node); if (err) { dump_stack(); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); return err; } @@ -377,7 +377,7 @@ static void lnc_free(struct ubifs_zbranch *zbr) * * This function reads a "hashed" node defined by @zbr from the leaf node cache * (in it is there) or from the hash media, in which case the node is also - * added to LNC. Returns zero in case of success or a negative negative error + * added to LNC. Returns zero in case of success or a negative error * code in case of failure. */ static int tnc_read_hashed_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, @@ -1699,7 +1699,7 @@ static int validate_data_node(struct ubifs_info *c, void *buf, goto out_err; } - err = ubifs_check_node(c, buf, zbr->lnum, zbr->offs, 0, 0); + err = ubifs_check_node(c, buf, zbr->len, zbr->lnum, zbr->offs, 0, 0); if (err) { ubifs_err(c, "expected node type %d", UBIFS_DATA_NODE); goto out; @@ -1733,7 +1733,7 @@ out_err: err = -EINVAL; out: ubifs_err(c, "bad node at LEB %d:%d", zbr->lnum, zbr->offs); - ubifs_dump_node(c, buf); + ubifs_dump_node(c, buf, zbr->len); dump_stack(); return err; } diff --git a/fs/ubifs/tnc_misc.c b/fs/ubifs/tnc_misc.c index ccaf94ea5be3..4d686e34e64d 100644 --- a/fs/ubifs/tnc_misc.c +++ b/fs/ubifs/tnc_misc.c @@ -390,7 +390,7 @@ static int read_znode(struct ubifs_info *c, struct ubifs_zbranch *zzbr, out_dump: ubifs_err(c, "bad indexing node at LEB %d:%d, error %d", lnum, offs, err); - ubifs_dump_node(c, idx); + ubifs_dump_node(c, idx, c->max_idx_node_sz); kfree(idx); return -EINVAL; } @@ -455,8 +455,7 @@ out: * @node: node is returned here * * This function reads a node defined by @zbr from the flash media. Returns - * zero in case of success or a negative negative error code in case of - * failure. + * zero in case of success or a negative error code in case of failure. */ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, void *node) @@ -489,7 +488,7 @@ int ubifs_tnc_read_node(struct ubifs_info *c, struct ubifs_zbranch *zbr, zbr->lnum, zbr->offs); dbg_tnck(key, "looked for key "); dbg_tnck(&key1, "but found node's key "); - ubifs_dump_node(c, node); + ubifs_dump_node(c, node, zbr->len); return -EINVAL; } diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index 4ffd832e3b93..fc2cdde3b549 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1719,8 +1719,8 @@ int ubifs_write_node(struct ubifs_info *c, void *node, int len, int lnum, int offs); int ubifs_write_node_hmac(struct ubifs_info *c, void *buf, int len, int lnum, int offs, int hmac_offs); -int ubifs_check_node(const struct ubifs_info *c, const void *buf, int lnum, - int offs, int quiet, int must_chk_crc); +int ubifs_check_node(const struct ubifs_info *c, const void *buf, int len, + int lnum, int offs, int quiet, int must_chk_crc); void ubifs_init_node(struct ubifs_info *c, void *buf, int len, int pad); void ubifs_crc_node(struct ubifs_info *c, void *buf, int len); void ubifs_prepare_node(struct ubifs_info *c, void *buf, int len, int pad); @@ -2000,17 +2000,19 @@ int ubifs_getattr(const struct path *path, struct kstat *stat, int ubifs_check_dir_empty(struct inode *dir); /* xattr.c */ -extern const struct xattr_handler *ubifs_xattr_handlers[]; -ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); int ubifs_xattr_set(struct inode *host, const char *name, const void *value, size_t size, int flags, bool check_lock); ssize_t ubifs_xattr_get(struct inode *host, const char *name, void *buf, size_t size); #ifdef CONFIG_UBIFS_FS_XATTR +extern const struct xattr_handler *ubifs_xattr_handlers[]; +ssize_t ubifs_listxattr(struct dentry *dentry, char *buffer, size_t size); void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum); int ubifs_purge_xattrs(struct inode *host); #else +#define ubifs_listxattr NULL +#define ubifs_xattr_handlers NULL static inline void ubifs_evict_xattr_inode(struct ubifs_info *c, ino_t xattr_inum) { } static inline int ubifs_purge_xattrs(struct inode *host) diff --git a/fs/xfs/libxfs/xfs_alloc.c b/fs/xfs/libxfs/xfs_alloc.c index 15640015be9d..7cb9f064ac64 100644 --- a/fs/xfs/libxfs/xfs_alloc.c +++ b/fs/xfs/libxfs/xfs_alloc.c @@ -690,9 +690,9 @@ xfs_alloc_read_agfl( xfs_mount_t *mp, /* mount point structure */ xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno, /* allocation group number */ - xfs_buf_t **bpp) /* buffer for the ag free block array */ + struct xfs_buf **bpp) /* buffer for the ag free block array */ { - xfs_buf_t *bp; /* return value */ + struct xfs_buf *bp; /* return value */ int error; ASSERT(agno != NULLAGNUMBER); @@ -2647,12 +2647,12 @@ out_no_agbp: int /* error */ xfs_alloc_get_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer containing the agf structure */ + struct xfs_buf *agbp, /* buffer containing the agf structure */ xfs_agblock_t *bnop, /* block address retrieved from freelist */ int btreeblk) /* destination is a AGF btree */ { struct xfs_agf *agf = agbp->b_addr; - xfs_buf_t *agflbp;/* buffer for a.g. freelist structure */ + struct xfs_buf *agflbp;/* buffer for a.g. freelist structure */ xfs_agblock_t bno; /* block number returned */ __be32 *agfl_bno; int error; @@ -2711,7 +2711,7 @@ xfs_alloc_get_freelist( void xfs_alloc_log_agf( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* buffer for a.g. freelist header */ + struct xfs_buf *bp, /* buffer for a.g. freelist header */ int fields) /* mask of fields to be logged (XFS_AGF_...) */ { int first; /* first byte offset */ @@ -2757,7 +2757,7 @@ xfs_alloc_pagf_init( xfs_agnumber_t agno, /* allocation group number */ int flags) /* XFS_ALLOC_FLAGS_... */ { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; error = xfs_alloc_read_agf(mp, tp, agno, flags, &bp); @@ -2772,8 +2772,8 @@ xfs_alloc_pagf_init( int /* error */ xfs_alloc_put_freelist( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *agbp, /* buffer for a.g. freelist header */ - xfs_buf_t *agflbp,/* buffer for a.g. free block array */ + struct xfs_buf *agbp, /* buffer for a.g. freelist header */ + struct xfs_buf *agflbp,/* buffer for a.g. free block array */ xfs_agblock_t bno, /* block being freed */ int btreeblk) /* block came from a AGF btree */ { diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index d9a692484eae..bc446418e227 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -321,7 +321,7 @@ xfs_bmap_check_leaf_extents( struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, whichfork); struct xfs_btree_block *block; /* current btree block */ xfs_fsblock_t bno; /* block # of "block" */ - xfs_buf_t *bp; /* buffer for "block" */ + struct xfs_buf *bp; /* buffer for "block" */ int error; /* error return value */ xfs_extnum_t i=0, j; /* index into the extents list */ int level; /* btree level, for checking */ @@ -592,7 +592,7 @@ xfs_bmap_btree_to_extents( struct xfs_btree_block *rblock = ifp->if_broot; struct xfs_btree_block *cblock;/* child btree block */ xfs_fsblock_t cbno; /* child block number */ - xfs_buf_t *cbp; /* child block's buffer */ + struct xfs_buf *cbp; /* child block's buffer */ int error; /* error return value */ __be64 *pp; /* ptr to block address */ struct xfs_owner_info oinfo; @@ -830,7 +830,7 @@ xfs_bmap_local_to_extents( int flags; /* logging flags returned */ struct xfs_ifork *ifp; /* inode fork pointer */ xfs_alloc_arg_t args; /* allocation arguments */ - xfs_buf_t *bp; /* buffer for extent block */ + struct xfs_buf *bp; /* buffer for extent block */ struct xfs_bmbt_irec rec; struct xfs_iext_cursor icur; @@ -6226,23 +6226,17 @@ xfs_bmap_validate_extent( struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = ip->i_mount; - xfs_fsblock_t endfsb; - bool isrt; - isrt = XFS_IS_REALTIME_INODE(ip); - endfsb = irec->br_startblock + irec->br_blockcount - 1; - if (isrt && whichfork == XFS_DATA_FORK) { - if (!xfs_verify_rtbno(mp, irec->br_startblock)) - return __this_address; - if (!xfs_verify_rtbno(mp, endfsb)) + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + return __this_address; + + if (XFS_IS_REALTIME_INODE(ip) && whichfork == XFS_DATA_FORK) { + if (!xfs_verify_rtext(mp, irec->br_startblock, + irec->br_blockcount)) return __this_address; } else { - if (!xfs_verify_fsbno(mp, irec->br_startblock)) - return __this_address; - if (!xfs_verify_fsbno(mp, endfsb)) - return __this_address; - if (XFS_FSB_TO_AGNO(mp, irec->br_startblock) != - XFS_FSB_TO_AGNO(mp, endfsb)) + if (!xfs_verify_fsbext(mp, irec->br_startblock, + irec->br_blockcount)) return __this_address; } if (irec->br_state != XFS_EXT_NORM && whichfork != XFS_DATA_FORK) diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index ecec604e6e4d..976659190d27 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -639,8 +639,6 @@ xfs_bmbt_change_owner( ASSERT(XFS_IFORK_PTR(ip, whichfork)->if_format == XFS_DINODE_FMT_BTREE); cur = xfs_bmbt_init_cursor(ip->i_mount, tp, ip, whichfork); - if (!cur) - return -ENOMEM; cur->bc_ino.flags |= XFS_BTCUR_BMBT_INVALID_OWNER; error = xfs_btree_change_owner(cur, new_owner, buffer_list); diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c index 2d25bab68764..c4d7a9241dc3 100644 --- a/fs/xfs/libxfs/xfs_btree.c +++ b/fs/xfs/libxfs/xfs_btree.c @@ -397,7 +397,7 @@ xfs_btree_dup_cursor( xfs_btree_cur_t *cur, /* input cursor */ xfs_btree_cur_t **ncur) /* output cursor */ { - xfs_buf_t *bp; /* btree block's buffer pointer */ + struct xfs_buf *bp; /* btree block's buffer pointer */ int error; /* error return value */ int i; /* level number of btree block */ xfs_mount_t *mp; /* mount structure for filesystem */ @@ -701,7 +701,7 @@ xfs_btree_firstrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -731,7 +731,7 @@ xfs_btree_lastrec( int level) /* level to change */ { struct xfs_btree_block *block; /* generic btree block pointer */ - xfs_buf_t *bp; /* buffer containing block */ + struct xfs_buf *bp; /* buffer containing block */ /* * Get the block pointer for this level. @@ -993,7 +993,7 @@ STATIC void xfs_btree_setbuf( xfs_btree_cur_t *cur, /* btree cursor */ int lev, /* level in btree */ - xfs_buf_t *bp) /* new buffer to set */ + struct xfs_buf *bp) /* new buffer to set */ { struct xfs_btree_block *b; /* btree block */ @@ -1636,7 +1636,7 @@ xfs_btree_decrement( int *stat) /* success/failure */ { struct xfs_btree_block *block; - xfs_buf_t *bp; + struct xfs_buf *bp; int error; /* error return value */ int lev; union xfs_btree_ptr ptr; @@ -4070,7 +4070,7 @@ xfs_btree_delrec( * surviving block, and log it. */ xfs_btree_set_numrecs(left, lrecs + rrecs); - xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB), + xfs_btree_get_sibling(cur, right, &cptr, XFS_BB_RIGHTSIB); xfs_btree_set_sibling(cur, left, &cptr, XFS_BB_RIGHTSIB); xfs_btree_log_block(cur, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB); diff --git a/fs/xfs/libxfs/xfs_format.h b/fs/xfs/libxfs/xfs_format.h index dd764da08f6f..630388b72dbe 100644 --- a/fs/xfs/libxfs/xfs_format.h +++ b/fs/xfs/libxfs/xfs_format.h @@ -468,11 +468,13 @@ xfs_sb_has_ro_compat_feature( #define XFS_SB_FEAT_INCOMPAT_SPINODES (1 << 1) /* sparse inode chunks */ #define XFS_SB_FEAT_INCOMPAT_META_UUID (1 << 2) /* metadata UUID */ #define XFS_SB_FEAT_INCOMPAT_BIGTIME (1 << 3) /* large timestamps */ +#define XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR (1 << 4) /* needs xfs_repair */ #define XFS_SB_FEAT_INCOMPAT_ALL \ (XFS_SB_FEAT_INCOMPAT_FTYPE| \ XFS_SB_FEAT_INCOMPAT_SPINODES| \ XFS_SB_FEAT_INCOMPAT_META_UUID| \ - XFS_SB_FEAT_INCOMPAT_BIGTIME) + XFS_SB_FEAT_INCOMPAT_BIGTIME| \ + XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR) #define XFS_SB_FEAT_INCOMPAT_UNKNOWN ~XFS_SB_FEAT_INCOMPAT_ALL static inline bool @@ -584,6 +586,12 @@ static inline bool xfs_sb_version_hasinobtcounts(struct xfs_sb *sbp) (sbp->sb_features_ro_compat & XFS_SB_FEAT_RO_COMPAT_INOBTCNT); } +static inline bool xfs_sb_version_needsrepair(struct xfs_sb *sbp) +{ + return XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 && + (sbp->sb_features_incompat & XFS_SB_FEAT_INCOMPAT_NEEDSREPAIR); +} + /* * end of superblock version macros */ @@ -625,7 +633,6 @@ xfs_is_quota_inode(struct xfs_sb *sbp, xfs_ino_t ino) #define XFS_B_TO_FSB(mp,b) \ ((((uint64_t)(b)) + (mp)->m_blockmask) >> (mp)->m_sb.sb_blocklog) #define XFS_B_TO_FSBT(mp,b) (((uint64_t)(b)) >> (mp)->m_sb.sb_blocklog) -#define XFS_B_FSB_OFFSET(mp,b) ((b) & (mp)->m_blockmask) /* * Allocation group header diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c index 974e71bc4a3a..69b228fce81a 100644 --- a/fs/xfs/libxfs/xfs_ialloc.c +++ b/fs/xfs/libxfs/xfs_ialloc.c @@ -607,13 +607,13 @@ error: /* * Allocate new inodes in the allocation group specified by agbp. - * Return 0 for success, else error code. + * Returns 0 if inodes were allocated in this AG; 1 if there was no space + * in this AG; or the usual negative error code. */ STATIC int xfs_ialloc_ag_alloc( struct xfs_trans *tp, - struct xfs_buf *agbp, - int *alloc) + struct xfs_buf *agbp) { struct xfs_agi *agi; struct xfs_alloc_arg args; @@ -795,10 +795,9 @@ sparse_alloc: allocmask = (1 << (newlen / XFS_INODES_PER_HOLEMASK_BIT)) - 1; } - if (args.fsbno == NULLFSBLOCK) { - *alloc = 0; - return 0; - } + if (args.fsbno == NULLFSBLOCK) + return 1; + ASSERT(args.len == args.minlen); /* @@ -903,7 +902,6 @@ sparse_alloc: */ xfs_trans_mod_sb(tp, XFS_TRANS_SB_ICOUNT, (long)newlen); xfs_trans_mod_sb(tp, XFS_TRANS_SB_IFREE, (long)newlen); - *alloc = 1; return 0; } @@ -1570,7 +1568,7 @@ xfs_dialloc_ag_update_inobt( * The caller selected an AG for us, and made sure that free inodes are * available. */ -STATIC int +int xfs_dialloc_ag( struct xfs_trans *tp, struct xfs_buf *agbp, @@ -1682,65 +1680,78 @@ error_cur: return error; } +static int +xfs_dialloc_roll( + struct xfs_trans **tpp, + struct xfs_buf *agibp) +{ + struct xfs_trans *tp = *tpp; + struct xfs_dquot_acct *dqinfo; + int error; + + /* + * Hold to on to the agibp across the commit so no other allocation can + * come in and take the free inodes we just allocated for our caller. + */ + xfs_trans_bhold(tp, agibp); + + /* + * We want the quota changes to be associated with the next transaction, + * NOT this one. So, detach the dqinfo from this and attach it to the + * next transaction. + */ + dqinfo = tp->t_dqinfo; + tp->t_dqinfo = NULL; + + error = xfs_trans_roll(&tp); + + /* Re-attach the quota info that we detached from prev trx. */ + tp->t_dqinfo = dqinfo; + + *tpp = tp; + if (error) + return error; + xfs_trans_bjoin(tp, agibp); + return 0; +} + /* - * Allocate an inode on disk. - * - * Mode is used to tell whether the new inode will need space, and whether it - * is a directory. + * Select and prepare an AG for inode allocation. * - * This function is designed to be called twice if it has to do an allocation - * to make more free inodes. On the first call, *IO_agbp should be set to NULL. - * If an inode is available without having to performn an allocation, an inode - * number is returned. In this case, *IO_agbp is set to NULL. If an allocation - * needs to be done, xfs_dialloc returns the current AGI buffer in *IO_agbp. - * The caller should then commit the current transaction, allocate a - * new transaction, and call xfs_dialloc() again, passing in the previous value - * of *IO_agbp. IO_agbp should be held across the transactions. Since the AGI - * buffer is locked across the two calls, the second call is guaranteed to have - * a free inode available. + * Mode is used to tell whether the new inode is a directory and hence where to + * locate it. * - * Once we successfully pick an inode its number is returned and the on-disk - * data structures are updated. The inode itself is not read in, since doing so - * would break ordering constraints with xfs_reclaim. + * This function will ensure that the selected AG has free inodes available to + * allocate from. The selected AGI will be returned locked to the caller, and it + * will allocate more free inodes if required. If no free inodes are found or + * can be allocated, no AGI will be returned. */ int -xfs_dialloc( - struct xfs_trans *tp, +xfs_dialloc_select_ag( + struct xfs_trans **tpp, xfs_ino_t parent, umode_t mode, - struct xfs_buf **IO_agbp, - xfs_ino_t *inop) + struct xfs_buf **IO_agbp) { - struct xfs_mount *mp = tp->t_mountp; + struct xfs_mount *mp = (*tpp)->t_mountp; struct xfs_buf *agbp; xfs_agnumber_t agno; int error; - int ialloced; - int noroom = 0; + bool noroom = false; xfs_agnumber_t start_agno; struct xfs_perag *pag; struct xfs_ino_geometry *igeo = M_IGEO(mp); - int okalloc = 1; + bool okalloc = true; - if (*IO_agbp) { - /* - * If the caller passes in a pointer to the AGI buffer, - * continue where we left off before. In this case, we - * know that the allocation group has free inodes. - */ - agbp = *IO_agbp; - goto out_alloc; - } + *IO_agbp = NULL; /* * We do not have an agbp, so select an initial allocation * group for inode allocation. */ - start_agno = xfs_ialloc_ag_select(tp, parent, mode); - if (start_agno == NULLAGNUMBER) { - *inop = NULLFSINO; + start_agno = xfs_ialloc_ag_select(*tpp, parent, mode); + if (start_agno == NULLAGNUMBER) return 0; - } /* * If we have already hit the ceiling of inode blocks then clear @@ -1753,8 +1764,8 @@ xfs_dialloc( if (igeo->maxicount && percpu_counter_read_positive(&mp->m_icount) + igeo->ialloc_inos > igeo->maxicount) { - noroom = 1; - okalloc = 0; + noroom = true; + okalloc = false; } /* @@ -1771,9 +1782,9 @@ xfs_dialloc( } if (!pag->pagi_init) { - error = xfs_ialloc_pagi_init(mp, tp, agno); + error = xfs_ialloc_pagi_init(mp, *tpp, agno); if (error) - goto out_error; + break; } /* @@ -1786,64 +1797,59 @@ xfs_dialloc( * Then read in the AGI buffer and recheck with the AGI buffer * lock held. */ - error = xfs_ialloc_read_agi(mp, tp, agno, &agbp); + error = xfs_ialloc_read_agi(mp, *tpp, agno, &agbp); if (error) - goto out_error; + break; if (pag->pagi_freecount) { xfs_perag_put(pag); - goto out_alloc; + goto found_ag; } if (!okalloc) goto nextag_relse_buffer; + error = xfs_ialloc_ag_alloc(*tpp, agbp); + if (error < 0) { + xfs_trans_brelse(*tpp, agbp); - error = xfs_ialloc_ag_alloc(tp, agbp, &ialloced); - if (error) { - xfs_trans_brelse(tp, agbp); - - if (error != -ENOSPC) - goto out_error; - - xfs_perag_put(pag); - *inop = NULLFSINO; - return 0; + if (error == -ENOSPC) + error = 0; + break; } - if (ialloced) { + if (error == 0) { /* - * We successfully allocated some inodes, return - * the current context to the caller so that it - * can commit the current transaction and call - * us again where we left off. + * We successfully allocated space for an inode cluster + * in this AG. Roll the transaction so that we can + * allocate one of the new inodes. */ ASSERT(pag->pagi_freecount > 0); xfs_perag_put(pag); - *IO_agbp = agbp; - *inop = NULLFSINO; - return 0; + error = xfs_dialloc_roll(tpp, agbp); + if (error) { + xfs_buf_relse(agbp); + return error; + } + goto found_ag; } nextag_relse_buffer: - xfs_trans_brelse(tp, agbp); + xfs_trans_brelse(*tpp, agbp); nextag: xfs_perag_put(pag); if (++agno == mp->m_sb.sb_agcount) agno = 0; - if (agno == start_agno) { - *inop = NULLFSINO; + if (agno == start_agno) return noroom ? -ENOSPC : 0; - } } -out_alloc: - *IO_agbp = NULL; - return xfs_dialloc_ag(tp, agbp, parent, inop); -out_error: xfs_perag_put(pag); return error; +found_ag: + *IO_agbp = agbp; + return 0; } /* @@ -2453,7 +2459,7 @@ out_map: void xfs_ialloc_log_agi( xfs_trans_t *tp, /* transaction pointer */ - xfs_buf_t *bp, /* allocation group header buffer */ + struct xfs_buf *bp, /* allocation group header buffer */ int fields) /* bitmask of fields to log */ { int first; /* first byte number */ @@ -2674,7 +2680,7 @@ xfs_ialloc_pagi_init( xfs_trans_t *tp, /* transaction pointer */ xfs_agnumber_t agno) /* allocation group number */ { - xfs_buf_t *bp = NULL; + struct xfs_buf *bp = NULL; int error; error = xfs_ialloc_read_agi(mp, tp, agno, &bp); diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h index 72b3468b97b1..3511086a7ae1 100644 --- a/fs/xfs/libxfs/xfs_ialloc.h +++ b/fs/xfs/libxfs/xfs_ialloc.h @@ -37,30 +37,26 @@ xfs_make_iptr(struct xfs_mount *mp, struct xfs_buf *b, int o) * Mode is used to tell whether the new inode will need space, and whether * it is a directory. * - * To work within the constraint of one allocation per transaction, - * xfs_dialloc() is designed to be called twice if it has to do an - * allocation to make more free inodes. If an inode is - * available without an allocation, agbp would be set to the current - * agbp and alloc_done set to false. - * If an allocation needed to be done, agbp would be set to the - * inode header of the allocation group and alloc_done set to true. - * The caller should then commit the current transaction and allocate a new - * transaction. xfs_dialloc() should then be called again with - * the agbp value returned from the previous call. - * - * Once we successfully pick an inode its number is returned and the - * on-disk data structures are updated. The inode itself is not read - * in, since doing so would break ordering constraints with xfs_reclaim. - * - * *agbp should be set to NULL on the first call, *alloc_done set to FALSE. + * There are two phases to inode allocation: selecting an AG and ensuring + * that it contains free inodes, followed by allocating one of the free + * inodes. xfs_dialloc_select_ag() does the former and returns a locked AGI + * to the caller, ensuring that followup call to xfs_dialloc_ag() will + * have free inodes to allocate from. xfs_dialloc_ag() will return the inode + * number of the free inode we allocated. */ int /* error */ -xfs_dialloc( - struct xfs_trans *tp, /* transaction pointer */ +xfs_dialloc_select_ag( + struct xfs_trans **tpp, /* double pointer of transaction */ xfs_ino_t parent, /* parent inode (directory) */ umode_t mode, /* mode bits for new inode */ - struct xfs_buf **agbp, /* buf for a.g. inode header */ - xfs_ino_t *inop); /* inode number allocated */ + struct xfs_buf **IO_agbp); + +int +xfs_dialloc_ag( + struct xfs_trans *tp, + struct xfs_buf *agbp, + xfs_ino_t parent, + xfs_ino_t *inop); /* * Free disk inode. Carefully avoids touching the incore inode, all diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c index cc919a2ee870..4c5831646bd9 100644 --- a/fs/xfs/libxfs/xfs_ialloc_btree.c +++ b/fs/xfs/libxfs/xfs_ialloc_btree.c @@ -672,11 +672,6 @@ xfs_inobt_cur( return error; cur = xfs_inobt_init_cursor(mp, tp, *agi_bpp, agno, which); - if (!cur) { - xfs_trans_brelse(tp, *agi_bpp); - *agi_bpp = NULL; - return -ENOMEM; - } *curpp = cur; return 0; } diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c index c667c63f2cb0..4d7410e49db4 100644 --- a/fs/xfs/libxfs/xfs_inode_buf.c +++ b/fs/xfs/libxfs/xfs_inode_buf.c @@ -547,10 +547,6 @@ xfs_dinode_verify( if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags & XFS_DIFLAG_REALTIME)) return __this_address; - /* don't let reflink and dax mix */ - if ((flags2 & XFS_DIFLAG2_REFLINK) && (flags2 & XFS_DIFLAG2_DAX)) - return __this_address; - /* COW extent size hint validation */ fa = xfs_inode_validate_cowextsize(mp, be32_to_cpu(dip->di_cowextsize), mode, flags, flags2); diff --git a/fs/xfs/libxfs/xfs_refcount.c b/fs/xfs/libxfs/xfs_refcount.c index 2076627243b0..2037b9f23069 100644 --- a/fs/xfs/libxfs/xfs_refcount.c +++ b/fs/xfs/libxfs/xfs_refcount.c @@ -1179,10 +1179,6 @@ xfs_refcount_finish_one( return error; rcur = xfs_refcountbt_init_cursor(mp, tp, agbp, agno); - if (!rcur) { - error = -ENOMEM; - goto out_cur; - } rcur->bc_ag.refc.nr_ops = nr_ops; rcur->bc_ag.refc.shape_changes = shape_changes; } @@ -1217,11 +1213,6 @@ xfs_refcount_finish_one( trace_xfs_refcount_finish_one_leftover(mp, agno, type, bno, blockcount, new_agbno, *new_len); return error; - -out_cur: - xfs_trans_brelse(tp, agbp); - - return error; } /* diff --git a/fs/xfs/libxfs/xfs_rmap.c b/fs/xfs/libxfs/xfs_rmap.c index 2668ebe02865..10e0cf9949a2 100644 --- a/fs/xfs/libxfs/xfs_rmap.c +++ b/fs/xfs/libxfs/xfs_rmap.c @@ -2404,10 +2404,6 @@ xfs_rmap_finish_one( return -EFSCORRUPTED; rcur = xfs_rmapbt_init_cursor(mp, tp, agbp, agno); - if (!rcur) { - error = -ENOMEM; - goto out_cur; - } } *pcur = rcur; @@ -2446,11 +2442,6 @@ xfs_rmap_finish_one( error = -EFSCORRUPTED; } return error; - -out_cur: - xfs_trans_brelse(tp, agbp); - - return error; } /* diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c index 6c1aba16113c..fe3a49575ff3 100644 --- a/fs/xfs/libxfs/xfs_rtbitmap.c +++ b/fs/xfs/libxfs/xfs_rtbitmap.c @@ -56,9 +56,9 @@ xfs_rtbuf_get( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t block, /* block number in bitmap or summary */ int issum, /* is summary not bitmap */ - xfs_buf_t **bpp) /* output: buffer for the block */ + struct xfs_buf **bpp) /* output: buffer for the block */ { - xfs_buf_t *bp; /* block buffer, result */ + struct xfs_buf *bp; /* block buffer, result */ xfs_inode_t *ip; /* bitmap or summary inode */ xfs_bmbt_irec_t map; int nmap = 1; @@ -101,7 +101,7 @@ xfs_rtfind_back( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t firstbit; /* first useful bit in the word */ @@ -276,7 +276,7 @@ xfs_rtfind_forw( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -447,11 +447,11 @@ xfs_rtmodify_summary_int( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { - xfs_buf_t *bp; /* buffer for the summary block */ + struct xfs_buf *bp; /* buffer for the summary block */ int error; /* error value */ xfs_fsblock_t sb; /* summary fsblock */ int so; /* index into the summary file */ @@ -517,7 +517,7 @@ xfs_rtmodify_summary( int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ int delta, /* change to make to summary info */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { return xfs_rtmodify_summary_int(mp, tp, log, bbno, @@ -539,7 +539,7 @@ xfs_rtmodify_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtword_t *first; /* first used word in the buffer */ @@ -690,7 +690,7 @@ xfs_rtfree_range( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* starting block to free */ xfs_extlen_t len, /* length to free */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { xfs_rtblock_t end; /* end of the freed extent */ @@ -773,7 +773,7 @@ xfs_rtcheck_range( xfs_rtword_t *b; /* current word in buffer */ int bit; /* bit number in the word */ xfs_rtblock_t block; /* bitmap block number */ - xfs_buf_t *bp; /* buf for the block */ + struct xfs_buf *bp; /* buf for the block */ xfs_rtword_t *bufp; /* starting word in buffer */ int error; /* error value */ xfs_rtblock_t i; /* current bit number rel. to start */ @@ -969,7 +969,7 @@ xfs_rtfree_extent( int error; /* error value */ xfs_mount_t *mp; /* file system mount structure */ xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp = NULL; /* summary file block buffer */ + struct xfs_buf *sumbp = NULL; /* summary file block buffer */ mp = tp->t_mountp; diff --git a/fs/xfs/libxfs/xfs_sb.c b/fs/xfs/libxfs/xfs_sb.c index 5aeafa59ed27..bbda117e5d85 100644 --- a/fs/xfs/libxfs/xfs_sb.c +++ b/fs/xfs/libxfs/xfs_sb.c @@ -360,21 +360,18 @@ xfs_validate_sb_common( } } - if (sbp->sb_unit) { - if (!xfs_sb_version_hasdalign(sbp) || - sbp->sb_unit > sbp->sb_width || - (sbp->sb_width % sbp->sb_unit) != 0) { - xfs_notice(mp, "SB stripe unit sanity check failed"); - return -EFSCORRUPTED; - } - } else if (xfs_sb_version_hasdalign(sbp)) { + /* + * Either (sb_unit and !hasdalign) or (!sb_unit and hasdalign) + * would imply the image is corrupted. + */ + if (!!sbp->sb_unit ^ xfs_sb_version_hasdalign(sbp)) { xfs_notice(mp, "SB stripe alignment sanity check failed"); return -EFSCORRUPTED; - } else if (sbp->sb_width) { - xfs_notice(mp, "SB stripe width sanity check failed"); - return -EFSCORRUPTED; } + if (!xfs_validate_stripe_geometry(mp, XFS_FSB_TO_B(mp, sbp->sb_unit), + XFS_FSB_TO_B(mp, sbp->sb_width), 0, false)) + return -EFSCORRUPTED; if (xfs_sb_version_hascrc(&mp->m_sb) && sbp->sb_blocksize < XFS_MIN_CRC_BLOCKSIZE) { @@ -383,17 +380,6 @@ xfs_validate_sb_common( } /* - * Until this is fixed only page-sized or smaller data blocks work. - */ - if (unlikely(sbp->sb_blocksize > PAGE_SIZE)) { - xfs_warn(mp, - "File system with blocksize %d bytes. " - "Only pagesize (%ld) or less will currently work.", - sbp->sb_blocksize, PAGE_SIZE); - return -ENOSYS; - } - - /* * Currently only very few inode sizes are supported. */ switch (sbp->sb_inodesize) { @@ -408,22 +394,6 @@ xfs_validate_sb_common( return -ENOSYS; } - if (xfs_sb_validate_fsb_count(sbp, sbp->sb_dblocks) || - xfs_sb_validate_fsb_count(sbp, sbp->sb_rblocks)) { - xfs_warn(mp, - "file system too large to be mounted on this system."); - return -EFBIG; - } - - /* - * Don't touch the filesystem if a user tool thinks it owns the primary - * superblock. mkfs doesn't clear the flag from secondary supers, so - * we don't check them at all. - */ - if (XFS_BUF_ADDR(bp) == XFS_SB_DADDR && sbp->sb_inprogress) { - xfs_warn(mp, "Offline file system operation in progress!"); - return -EFSCORRUPTED; - } return 0; } @@ -1233,3 +1203,61 @@ xfs_sb_get_secondary( *bpp = bp; return 0; } + +/* + * sunit, swidth, sectorsize(optional with 0) should be all in bytes, + * so users won't be confused by values in error messages. + */ +bool +xfs_validate_stripe_geometry( + struct xfs_mount *mp, + __s64 sunit, + __s64 swidth, + int sectorsize, + bool silent) +{ + if (swidth > INT_MAX) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) is too large", swidth); + return false; + } + + if (sunit > swidth) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) is larger than the stripe width (%lld)", sunit, swidth); + return false; + } + + if (sectorsize && (int)sunit % sectorsize) { + if (!silent) + xfs_notice(mp, +"stripe unit (%lld) must be a multiple of the sector size (%d)", + sunit, sectorsize); + return false; + } + + if (sunit && !swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe unit (%lld) and stripe width of 0", sunit); + return false; + } + + if (!sunit && swidth) { + if (!silent) + xfs_notice(mp, +"invalid stripe width (%lld) and stripe unit of 0", swidth); + return false; + } + + if (sunit && (int)swidth % (int)sunit) { + if (!silent) + xfs_notice(mp, +"stripe width (%lld) must be a multiple of the stripe unit (%lld)", + swidth, sunit); + return false; + } + return true; +} diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h index 92465a9a5162..f79f9dc632b6 100644 --- a/fs/xfs/libxfs/xfs_sb.h +++ b/fs/xfs/libxfs/xfs_sb.h @@ -42,4 +42,7 @@ extern int xfs_sb_get_secondary(struct xfs_mount *mp, struct xfs_trans *tp, xfs_agnumber_t agno, struct xfs_buf **bpp); +extern bool xfs_validate_stripe_geometry(struct xfs_mount *mp, + __s64 sunit, __s64 swidth, int sectorsize, bool silent); + #endif /* __XFS_SB_H__ */ diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h index c795ae47b3c9..8c61a461bf7b 100644 --- a/fs/xfs/libxfs/xfs_shared.h +++ b/fs/xfs/libxfs/xfs_shared.h @@ -62,7 +62,6 @@ void xfs_log_get_max_trans_res(struct xfs_mount *mp, #define XFS_TRANS_SB_DIRTY 0x02 /* superblock is modified */ #define XFS_TRANS_PERM_LOG_RES 0x04 /* xact took a permanent log res */ #define XFS_TRANS_SYNC 0x08 /* make commit synchronous */ -#define XFS_TRANS_DQ_DIRTY 0x10 /* at least one dquot in trx dirty */ #define XFS_TRANS_RESERVE 0x20 /* OK to use reserved data blocks */ #define XFS_TRANS_NO_WRITECOUNT 0x40 /* do not elevate SB writecount */ #define XFS_TRANS_RES_FDBLKS 0x80 /* reserve newly freed blocks */ diff --git a/fs/xfs/libxfs/xfs_types.c b/fs/xfs/libxfs/xfs_types.c index 4f595546a639..b254fbeaaa50 100644 --- a/fs/xfs/libxfs/xfs_types.c +++ b/fs/xfs/libxfs/xfs_types.c @@ -61,6 +61,29 @@ xfs_verify_fsbno( return xfs_verify_agbno(mp, agno, XFS_FSB_TO_AGBNO(mp, fsbno)); } +/* + * Verify that a data device extent is fully contained inside the filesystem, + * does not cross an AG boundary, and does not point at static metadata. + */ +bool +xfs_verify_fsbext( + struct xfs_mount *mp, + xfs_fsblock_t fsbno, + xfs_fsblock_t len) +{ + if (fsbno + len <= fsbno) + return false; + + if (!xfs_verify_fsbno(mp, fsbno)) + return false; + + if (!xfs_verify_fsbno(mp, fsbno + len - 1)) + return false; + + return XFS_FSB_TO_AGNO(mp, fsbno) == + XFS_FSB_TO_AGNO(mp, fsbno + len - 1); +} + /* Calculate the first and last possible inode number in an AG. */ void xfs_agino_range( @@ -175,6 +198,22 @@ xfs_verify_rtbno( return rtbno < mp->m_sb.sb_rblocks; } +/* Verify that a realtime device extent is fully contained inside the volume. */ +bool +xfs_verify_rtext( + struct xfs_mount *mp, + xfs_rtblock_t rtbno, + xfs_rtblock_t len) +{ + if (rtbno + len <= rtbno) + return false; + + if (!xfs_verify_rtbno(mp, rtbno)) + return false; + + return xfs_verify_rtbno(mp, rtbno + len - 1); +} + /* Calculate the range of valid icount values. */ void xfs_icount_range( @@ -219,3 +258,28 @@ xfs_verify_dablk( return dabno <= max_dablk; } + +/* Check that a file block offset does not exceed the maximum. */ +bool +xfs_verify_fileoff( + struct xfs_mount *mp, + xfs_fileoff_t off) +{ + return off <= XFS_MAX_FILEOFF; +} + +/* Check that a range of file block offsets do not exceed the maximum. */ +bool +xfs_verify_fileext( + struct xfs_mount *mp, + xfs_fileoff_t off, + xfs_fileoff_t len) +{ + if (off + len <= off) + return false; + + if (!xfs_verify_fileoff(mp, off)) + return false; + + return xfs_verify_fileoff(mp, off + len - 1); +} diff --git a/fs/xfs/libxfs/xfs_types.h b/fs/xfs/libxfs/xfs_types.h index 397d94775440..064bd6e8c922 100644 --- a/fs/xfs/libxfs/xfs_types.h +++ b/fs/xfs/libxfs/xfs_types.h @@ -184,6 +184,8 @@ xfs_agblock_t xfs_ag_block_count(struct xfs_mount *mp, xfs_agnumber_t agno); bool xfs_verify_agbno(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agblock_t agbno); bool xfs_verify_fsbno(struct xfs_mount *mp, xfs_fsblock_t fsbno); +bool xfs_verify_fsbext(struct xfs_mount *mp, xfs_fsblock_t fsbno, + xfs_fsblock_t len); void xfs_agino_range(struct xfs_mount *mp, xfs_agnumber_t agno, xfs_agino_t *first, xfs_agino_t *last); @@ -195,9 +197,14 @@ bool xfs_verify_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_internal_inum(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_dir_ino(struct xfs_mount *mp, xfs_ino_t ino); bool xfs_verify_rtbno(struct xfs_mount *mp, xfs_rtblock_t rtbno); +bool xfs_verify_rtext(struct xfs_mount *mp, xfs_rtblock_t rtbno, + xfs_rtblock_t len); bool xfs_verify_icount(struct xfs_mount *mp, unsigned long long icount); bool xfs_verify_dablk(struct xfs_mount *mp, xfs_fileoff_t off); void xfs_icount_range(struct xfs_mount *mp, unsigned long long *min, unsigned long long *max); +bool xfs_verify_fileoff(struct xfs_mount *mp, xfs_fileoff_t off); +bool xfs_verify_fileext(struct xfs_mount *mp, xfs_fileoff_t off, + xfs_fileoff_t len); #endif /* __XFS_TYPES_H__ */ diff --git a/fs/xfs/scrub/agheader_repair.c b/fs/xfs/scrub/agheader_repair.c index 401f71579ce6..23690f824ffa 100644 --- a/fs/xfs/scrub/agheader_repair.c +++ b/fs/xfs/scrub/agheader_repair.c @@ -829,8 +829,6 @@ xrep_agi_calc_from_btrees( cur = xfs_inobt_init_cursor(mp, sc->tp, agi_bp, sc->sa.agno, XFS_BTNUM_FINO); - if (error) - goto err; error = xfs_btree_count_blocks(cur, &blocks); if (error) goto err; diff --git a/fs/xfs/scrub/bmap.c b/fs/xfs/scrub/bmap.c index fed56d213a3f..33559c3a4bc3 100644 --- a/fs/xfs/scrub/bmap.c +++ b/fs/xfs/scrub/bmap.c @@ -319,7 +319,6 @@ xchk_bmap_iextent( struct xfs_bmbt_irec *irec) { struct xfs_mount *mp = info->sc->mp; - xfs_filblks_t end; int error = 0; /* @@ -330,6 +329,10 @@ xchk_bmap_iextent( xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); + if (!xfs_verify_fileext(mp, irec->br_startoff, irec->br_blockcount)) + xchk_fblock_set_corrupt(info->sc, info->whichfork, + irec->br_startoff); + xchk_bmap_dirattr_extent(ip, info, irec); /* There should never be a "hole" extent in either extent list. */ @@ -349,20 +352,12 @@ xchk_bmap_iextent( if (irec->br_blockcount > MAXEXTLEN) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); - if (irec->br_startblock + irec->br_blockcount <= irec->br_startblock) - xchk_fblock_set_corrupt(info->sc, info->whichfork, - irec->br_startoff); - end = irec->br_startblock + irec->br_blockcount - 1; if (info->is_rt && - (!xfs_verify_rtbno(mp, irec->br_startblock) || - !xfs_verify_rtbno(mp, end))) + !xfs_verify_rtext(mp, irec->br_startblock, irec->br_blockcount)) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); if (!info->is_rt && - (!xfs_verify_fsbno(mp, irec->br_startblock) || - !xfs_verify_fsbno(mp, end) || - XFS_FSB_TO_AGNO(mp, irec->br_startblock) != - XFS_FSB_TO_AGNO(mp, end))) + !xfs_verify_fsbext(mp, irec->br_startblock, irec->br_blockcount)) xchk_fblock_set_corrupt(info->sc, info->whichfork, irec->br_startoff); @@ -563,10 +558,6 @@ xchk_bmap_check_ag_rmaps( return error; cur = xfs_rmapbt_init_cursor(sc->mp, sc->tp, agf, agno); - if (!cur) { - error = -ENOMEM; - goto out_agf; - } sbcri.sc = sc; sbcri.whichfork = whichfork; @@ -575,7 +566,6 @@ xchk_bmap_check_ag_rmaps( error = 0; xfs_btree_del_cursor(cur, error); -out_agf: xfs_trans_brelse(sc->tp, agf); return error; } diff --git a/fs/xfs/scrub/common.c b/fs/xfs/scrub/common.c index 18876056e5e0..8ea6d4aa3f55 100644 --- a/fs/xfs/scrub/common.c +++ b/fs/xfs/scrub/common.c @@ -466,8 +466,6 @@ xchk_ag_btcur_init( /* Set up a bnobt cursor for cross-referencing. */ sa->bno_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, agno, XFS_BTNUM_BNO); - if (!sa->bno_cur) - goto err; } if (sa->agf_bp && @@ -475,8 +473,6 @@ xchk_ag_btcur_init( /* Set up a cntbt cursor for cross-referencing. */ sa->cnt_cur = xfs_allocbt_init_cursor(mp, sc->tp, sa->agf_bp, agno, XFS_BTNUM_CNT); - if (!sa->cnt_cur) - goto err; } /* Set up a inobt cursor for cross-referencing. */ @@ -484,8 +480,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_INO)) { sa->ino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, agno, XFS_BTNUM_INO); - if (!sa->ino_cur) - goto err; } /* Set up a finobt cursor for cross-referencing. */ @@ -493,8 +487,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_FINO)) { sa->fino_cur = xfs_inobt_init_cursor(mp, sc->tp, sa->agi_bp, agno, XFS_BTNUM_FINO); - if (!sa->fino_cur) - goto err; } /* Set up a rmapbt cursor for cross-referencing. */ @@ -502,8 +494,6 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_RMAP)) { sa->rmap_cur = xfs_rmapbt_init_cursor(mp, sc->tp, sa->agf_bp, agno); - if (!sa->rmap_cur) - goto err; } /* Set up a refcountbt cursor for cross-referencing. */ @@ -511,13 +501,9 @@ xchk_ag_btcur_init( xchk_ag_btree_healthy_enough(sc, sa->pag, XFS_BTNUM_REFC)) { sa->refc_cur = xfs_refcountbt_init_cursor(mp, sc->tp, sa->agf_bp, agno); - if (!sa->refc_cur) - goto err; } return 0; -err: - return -ENOMEM; } /* Release the AG header context and btree cursors. */ diff --git a/fs/xfs/scrub/dir.c b/fs/xfs/scrub/dir.c index b045e95c2ea7..178b3455a170 100644 --- a/fs/xfs/scrub/dir.c +++ b/fs/xfs/scrub/dir.c @@ -66,8 +66,18 @@ xchk_dir_check_ftype( * eofblocks cleanup (which allocates what would be a nested * transaction), we can't use DONTCACHE here because DONTCACHE * inodes can trigger immediate inactive cleanup of the inode. + * + * If _iget returns -EINVAL or -ENOENT then the child inode number is + * garbage and the directory is corrupt. If the _iget returns + * -EFSCORRUPTED or -EFSBADCRC then the child is corrupt which is a + * cross referencing error. Any other error is an operational error. */ error = xfs_iget(mp, sdc->sc->tp, inum, 0, 0, &ip); + if (error == -EINVAL || error == -ENOENT) { + error = -EFSCORRUPTED; + xchk_fblock_process_error(sdc->sc, XFS_DATA_FORK, 0, &error); + goto out; + } if (!xchk_fblock_xref_process_error(sdc->sc, XFS_DATA_FORK, offset, &error)) goto out; @@ -105,6 +115,7 @@ xchk_dir_actor( struct xfs_name xname; xfs_ino_t lookup_ino; xfs_dablk_t offset; + bool checked_ftype = false; int error = 0; sdc = container_of(dir_iter, struct xchk_dir_ctx, dir_iter); @@ -133,6 +144,7 @@ xchk_dir_actor( if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + checked_ftype = true; if (ino != ip->i_ino) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -144,6 +156,7 @@ xchk_dir_actor( if (xfs_sb_version_hasftype(&mp->m_sb) && type != DT_DIR) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); + checked_ftype = true; if (ip->i_ino == mp->m_sb.sb_rootino && ino != ip->i_ino) xchk_fblock_set_corrupt(sdc->sc, XFS_DATA_FORK, offset); @@ -167,9 +180,11 @@ xchk_dir_actor( } /* Verify the file type. This function absorbs error codes. */ - error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); - if (error) - goto out; + if (!checked_ftype) { + error = xchk_dir_check_ftype(sdc, offset, lookup_ino, type); + if (error) + goto out; + } out: /* * A negative error code returned here is supposed to cause the diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c index bb25ff1b770d..faf65eb5bd31 100644 --- a/fs/xfs/scrub/inode.c +++ b/fs/xfs/scrub/inode.c @@ -185,10 +185,6 @@ xchk_inode_flags2( if ((flags & XFS_DIFLAG_REALTIME) && (flags2 & XFS_DIFLAG2_REFLINK)) goto bad; - /* dax and reflink make no sense, currently */ - if ((flags2 & XFS_DIFLAG2_DAX) && (flags2 & XFS_DIFLAG2_REFLINK)) - goto bad; - /* no bigtime iflag without the bigtime feature */ if (xfs_dinode_has_bigtime(dip) && !xfs_sb_version_hasbigtime(&mp->m_sb)) diff --git a/fs/xfs/scrub/parent.c b/fs/xfs/scrub/parent.c index 855aa8bcab64..66c35f6dfc24 100644 --- a/fs/xfs/scrub/parent.c +++ b/fs/xfs/scrub/parent.c @@ -164,13 +164,13 @@ xchk_parent_validate( * can't use DONTCACHE here because DONTCACHE inodes can trigger * immediate inactive cleanup of the inode. * - * If _iget returns -EINVAL then the parent inode number is garbage - * and the directory is corrupt. If the _iget returns -EFSCORRUPTED - * or -EFSBADCRC then the parent is corrupt which is a cross - * referencing error. Any other error is an operational error. + * If _iget returns -EINVAL or -ENOENT then the parent inode number is + * garbage and the directory is corrupt. If the _iget returns + * -EFSCORRUPTED or -EFSBADCRC then the parent is corrupt which is a + * cross referencing error. Any other error is an operational error. */ error = xfs_iget(mp, sc->tp, dnum, XFS_IGET_UNTRUSTED, 0, &dp); - if (error == -EINVAL) { + if (error == -EINVAL || error == -ENOENT) { error = -EFSCORRUPTED; xchk_fblock_process_error(sc, XFS_DATA_FORK, 0, &error); goto out; diff --git a/fs/xfs/scrub/rtbitmap.c b/fs/xfs/scrub/rtbitmap.c index 76e4ffe0315b..d409ca592178 100644 --- a/fs/xfs/scrub/rtbitmap.c +++ b/fs/xfs/scrub/rtbitmap.c @@ -52,9 +52,7 @@ xchk_rtbitmap_rec( startblock = rec->ar_startext * tp->t_mountp->m_sb.sb_rextsize; blockcount = rec->ar_extcount * tp->t_mountp->m_sb.sb_rextsize; - if (startblock + blockcount <= startblock || - !xfs_verify_rtbno(sc->mp, startblock) || - !xfs_verify_rtbno(sc->mp, startblock + blockcount - 1)) + if (!xfs_verify_rtext(sc->mp, startblock, blockcount)) xchk_fblock_set_corrupt(sc, XFS_DATA_FORK, 0); return 0; } diff --git a/fs/xfs/xfs_acl.c b/fs/xfs/xfs_acl.c index c544951a0c07..779cb73b3d00 100644 --- a/fs/xfs/xfs_acl.c +++ b/fs/xfs/xfs_acl.c @@ -16,6 +16,7 @@ #include "xfs_acl.h" #include "xfs_da_format.h" #include "xfs_da_btree.h" +#include "xfs_trans.h" #include <linux/posix_acl_xattr.h> @@ -212,21 +213,28 @@ __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } static int -xfs_set_mode(struct inode *inode, umode_t mode) +xfs_acl_set_mode( + struct inode *inode, + umode_t mode) { - int error = 0; - - if (mode != inode->i_mode) { - struct iattr iattr; + struct xfs_inode *ip = XFS_I(inode); + struct xfs_mount *mp = ip->i_mount; + struct xfs_trans *tp; + int error; - iattr.ia_valid = ATTR_MODE | ATTR_CTIME; - iattr.ia_mode = mode; - iattr.ia_ctime = current_time(inode); + error = xfs_trans_alloc(mp, &M_RES(mp)->tr_ichange, 0, 0, 0, &tp); + if (error) + return error; - error = xfs_setattr_nonsize(XFS_I(inode), &iattr, XFS_ATTR_NOACL); - } + xfs_ilock(ip, XFS_ILOCK_EXCL); + xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL); + inode->i_mode = mode; + inode->i_ctime = current_time(inode); + xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE); - return error; + if (mp->m_flags & XFS_MOUNT_WSYNC) + xfs_trans_set_sync(tp); + return xfs_trans_commit(tp); } int @@ -251,18 +259,14 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type) } set_acl: - error = __xfs_set_acl(inode, acl, type); - if (error) - return error; - /* * We set the mode after successfully updating the ACL xattr because the * xattr update can fail at ENOSPC and we don't want to change the mode * if the ACL update hasn't been applied. */ - if (set_mode) - error = xfs_set_mode(inode, mode); - + error = __xfs_set_acl(inode, acl, type); + if (!error && set_mode && mode != inode->i_mode) + error = xfs_acl_set_mode(inode, mode); return error; } diff --git a/fs/xfs/xfs_bmap_item.c b/fs/xfs/xfs_bmap_item.c index 9e16a4d0f97c..93e4d8ae6e92 100644 --- a/fs/xfs/xfs_bmap_item.c +++ b/fs/xfs/xfs_bmap_item.c @@ -417,6 +417,40 @@ const struct xfs_defer_op_type xfs_bmap_update_defer_type = { .cancel_item = xfs_bmap_update_cancel_item, }; +/* Is this recovered BUI ok? */ +static inline bool +xfs_bui_validate( + struct xfs_mount *mp, + struct xfs_bui_log_item *buip) +{ + struct xfs_map_extent *bmap; + + /* Only one mapping operation per BUI... */ + if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + return false; + + bmap = &buip->bui_format.bui_extents[0]; + + if (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS) + return false; + + switch (bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK) { + case XFS_BMAP_MAP: + case XFS_BMAP_UNMAP: + break; + default: + return false; + } + + if (!xfs_verify_ino(mp, bmap->me_owner)) + return false; + + if (!xfs_verify_fileext(mp, bmap->me_startoff, bmap->me_len)) + return false; + + return xfs_verify_fsbext(mp, bmap->me_startblock, bmap->me_len); +} + /* * Process a bmap update intent item that was recovered from the log. * We need to update some inode's bmbt. @@ -433,47 +467,24 @@ xfs_bui_item_recover( struct xfs_mount *mp = lip->li_mountp; struct xfs_map_extent *bmap; struct xfs_bud_log_item *budp; - xfs_fsblock_t startblock_fsb; - xfs_fsblock_t inode_fsb; xfs_filblks_t count; xfs_exntst_t state; unsigned int bui_type; int whichfork; int error = 0; - /* Only one mapping operation per BUI... */ - if (buip->bui_format.bui_nextents != XFS_BUI_MAX_FAST_EXTENTS) + if (!xfs_bui_validate(mp, buip)) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &buip->bui_format, sizeof(buip->bui_format)); return -EFSCORRUPTED; + } - /* - * First check the validity of the extent described by the - * BUI. If anything is bad, then toss the BUI. - */ bmap = &buip->bui_format.bui_extents[0]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, bmap->me_startblock)); - inode_fsb = XFS_BB_TO_FSB(mp, XFS_FSB_TO_DADDR(mp, - XFS_INO_TO_FSB(mp, bmap->me_owner))); state = (bmap->me_flags & XFS_BMAP_EXTENT_UNWRITTEN) ? XFS_EXT_UNWRITTEN : XFS_EXT_NORM; whichfork = (bmap->me_flags & XFS_BMAP_EXTENT_ATTR_FORK) ? XFS_ATTR_FORK : XFS_DATA_FORK; bui_type = bmap->me_flags & XFS_BMAP_EXTENT_TYPE_MASK; - switch (bui_type) { - case XFS_BMAP_MAP: - case XFS_BMAP_UNMAP: - break; - default: - return -EFSCORRUPTED; - } - if (startblock_fsb == 0 || - bmap->me_len == 0 || - inode_fsb == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - bmap->me_len >= mp->m_sb.sb_agblocks || - inode_fsb >= mp->m_sb.sb_dblocks || - (bmap->me_flags & ~XFS_BMAP_EXTENT_FLAGS)) - return -EFSCORRUPTED; /* Grab the inode. */ error = xfs_iget(mp, NULL, bmap->me_owner, 0, 0, &ip); diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c index 4e4cf91f4f9f..f8400bbd6473 100644 --- a/fs/xfs/xfs_buf.c +++ b/fs/xfs/xfs_buf.c @@ -278,7 +278,7 @@ _xfs_buf_alloc( */ STATIC int _xfs_buf_get_pages( - xfs_buf_t *bp, + struct xfs_buf *bp, int page_count) { /* Make sure that we have a page list */ @@ -302,7 +302,7 @@ _xfs_buf_get_pages( */ STATIC void _xfs_buf_free_pages( - xfs_buf_t *bp) + struct xfs_buf *bp) { if (bp->b_pages != bp->b_page_array) { kmem_free(bp->b_pages); @@ -319,7 +319,7 @@ _xfs_buf_free_pages( */ static void xfs_buf_free( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_free(bp, _RET_IP_); @@ -352,7 +352,7 @@ xfs_buf_free( */ STATIC int xfs_buf_allocate_memory( - xfs_buf_t *bp, + struct xfs_buf *bp, uint flags) { size_t size; @@ -463,7 +463,7 @@ out_free_pages: */ STATIC int _xfs_buf_map_pages( - xfs_buf_t *bp, + struct xfs_buf *bp, uint flags) { ASSERT(bp->b_flags & _XBF_PAGES); @@ -590,7 +590,7 @@ xfs_buf_find( struct xfs_buf **found_bp) { struct xfs_perag *pag; - xfs_buf_t *bp; + struct xfs_buf *bp; struct xfs_buf_map cmap = { .bm_bn = map[0].bm_bn }; xfs_daddr_t eofs; int i; @@ -762,7 +762,7 @@ found: int _xfs_buf_read( - xfs_buf_t *bp, + struct xfs_buf *bp, xfs_buf_flags_t flags) { ASSERT(!(flags & XBF_WRITE)); @@ -1005,7 +1005,7 @@ xfs_buf_get_uncached( */ void xfs_buf_hold( - xfs_buf_t *bp) + struct xfs_buf *bp) { trace_xfs_buf_hold(bp, _RET_IP_); atomic_inc(&bp->b_hold); @@ -1017,7 +1017,7 @@ xfs_buf_hold( */ void xfs_buf_rele( - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_perag *pag = bp->b_pag; bool release; @@ -1161,7 +1161,7 @@ xfs_buf_unlock( STATIC void xfs_buf_wait_unpin( - xfs_buf_t *bp) + struct xfs_buf *bp) { DECLARE_WAITQUEUE (wait, current); @@ -1373,7 +1373,7 @@ xfs_buf_ioend_work( struct work_struct *work) { struct xfs_buf *bp = - container_of(work, xfs_buf_t, b_ioend_work); + container_of(work, struct xfs_buf, b_ioend_work); xfs_buf_ioend(bp); } @@ -1388,7 +1388,7 @@ xfs_buf_ioend_async( void __xfs_buf_ioerror( - xfs_buf_t *bp, + struct xfs_buf *bp, int error, xfs_failaddr_t failaddr) { diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h index bfd2907e7bc4..5d91a31298a4 100644 --- a/fs/xfs/xfs_buf.h +++ b/fs/xfs/xfs_buf.h @@ -124,7 +124,7 @@ struct xfs_buf_ops { xfs_failaddr_t (*verify_struct)(struct xfs_buf *bp); }; -typedef struct xfs_buf { +struct xfs_buf { /* * first cacheline holds all the fields needed for an uncontended cache * hit to be fully processed. The semaphore straddles the cacheline @@ -190,7 +190,7 @@ typedef struct xfs_buf { int b_last_error; const struct xfs_buf_ops *b_ops; -} xfs_buf_t; +}; /* Finding and Reading Buffers */ struct xfs_buf *xfs_buf_incore(struct xfs_buftarg *target, @@ -253,16 +253,16 @@ int _xfs_buf_read(struct xfs_buf *bp, xfs_buf_flags_t flags); void xfs_buf_hold(struct xfs_buf *bp); /* Releasing Buffers */ -extern void xfs_buf_rele(xfs_buf_t *); +extern void xfs_buf_rele(struct xfs_buf *); /* Locking and Unlocking Buffers */ -extern int xfs_buf_trylock(xfs_buf_t *); -extern void xfs_buf_lock(xfs_buf_t *); -extern void xfs_buf_unlock(xfs_buf_t *); +extern int xfs_buf_trylock(struct xfs_buf *); +extern void xfs_buf_lock(struct xfs_buf *); +extern void xfs_buf_unlock(struct xfs_buf *); #define xfs_buf_islocked(bp) \ ((bp)->b_sema.count <= 0) -static inline void xfs_buf_relse(xfs_buf_t *bp) +static inline void xfs_buf_relse(struct xfs_buf *bp) { xfs_buf_unlock(bp); xfs_buf_rele(bp); diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c index 0356f2e340a1..dc0be2a639cc 100644 --- a/fs/xfs/xfs_buf_item.c +++ b/fs/xfs/xfs_buf_item.c @@ -412,7 +412,7 @@ xfs_buf_item_unpin( int remove) { struct xfs_buf_log_item *bip = BUF_ITEM(lip); - xfs_buf_t *bp = bip->bli_buf; + struct xfs_buf *bp = bip->bli_buf; int stale = bip->bli_flags & XFS_BLI_STALE; int freed; @@ -942,7 +942,7 @@ xfs_buf_item_free( */ void xfs_buf_item_relse( - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; diff --git a/fs/xfs/xfs_extfree_item.c b/fs/xfs/xfs_extfree_item.c index 6c11bfc3d452..93223ebb3372 100644 --- a/fs/xfs/xfs_extfree_item.c +++ b/fs/xfs/xfs_extfree_item.c @@ -578,6 +578,15 @@ const struct xfs_defer_op_type xfs_agfl_free_defer_type = { .cancel_item = xfs_extent_free_cancel_item, }; +/* Is this recovered EFI ok? */ +static inline bool +xfs_efi_validate_ext( + struct xfs_mount *mp, + struct xfs_extent *extp) +{ + return xfs_verify_fsbext(mp, extp->ext_start, extp->ext_len); +} + /* * Process an extent free intent item that was recovered from * the log. We need to free the extents that it describes. @@ -592,7 +601,6 @@ xfs_efi_item_recover( struct xfs_efd_log_item *efdp; struct xfs_trans *tp; struct xfs_extent *extp; - xfs_fsblock_t startblock_fsb; int i; int error = 0; @@ -602,14 +610,13 @@ xfs_efi_item_recover( * just toss the EFI. */ for (i = 0; i < efip->efi_format.efi_nextents; i++) { - extp = &efip->efi_format.efi_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, extp->ext_start)); - if (startblock_fsb == 0 || - extp->ext_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - extp->ext_len >= mp->m_sb.sb_agblocks) + if (!xfs_efi_validate_ext(mp, + &efip->efi_format.efi_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &efip->efi_format, + sizeof(efip->efi_format)); return -EFSCORRUPTED; + } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, 0, 0, 0, &tp); diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c index b7c5783a031c..959ce91a3755 100644 --- a/fs/xfs/xfs_fsops.c +++ b/fs/xfs/xfs_fsops.c @@ -28,7 +28,7 @@ xfs_growfs_data_private( xfs_mount_t *mp, /* mount point for filesystem */ xfs_growfs_data_t *in) /* growfs data input struct */ { - xfs_buf_t *bp; + struct xfs_buf *bp; int error; xfs_agnumber_t nagcount; xfs_agnumber_t nagimax = 0; diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index 2bfbcf28b1bd..b7352bc4c815 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -761,68 +761,26 @@ xfs_inode_inherit_flags2( } /* - * Allocate an inode on disk and return a copy of its in-core version. - * The in-core inode is locked exclusively. Set mode, nlink, and rdev - * appropriately within the inode. The uid and gid for the inode are - * set according to the contents of the given cred structure. - * - * Use xfs_dialloc() to allocate the on-disk inode. If xfs_dialloc() - * has a free inode available, call xfs_iget() to obtain the in-core - * version of the allocated inode. Finally, fill in the inode and - * log its initial contents. In this case, ialloc_context would be - * set to NULL. - * - * If xfs_dialloc() does not have an available inode, it will replenish - * its supply by doing an allocation. Since we can only do one - * allocation within a transaction without deadlocks, we must commit - * the current transaction before returning the inode itself. - * In this case, therefore, we will set ialloc_context and return. - * The caller should then commit the current transaction, start a new - * transaction, and call xfs_ialloc() again to actually get the inode. - * - * To ensure that some other process does not grab the inode that - * was allocated during the first call to xfs_ialloc(), this routine - * also returns the [locked] bp pointing to the head of the freelist - * as ialloc_context. The caller should hold this buffer across - * the commit and pass it back into this routine on the second call. - * - * If we are allocating quota inodes, we do not have a parent inode - * to attach to or associate with (i.e. pip == NULL) because they - * are not linked into the directory structure - they are attached - * directly to the superblock - and so have no parent. + * Initialise a newly allocated inode and return the in-core inode to the + * caller locked exclusively. */ static int -xfs_ialloc( - xfs_trans_t *tp, - xfs_inode_t *pip, - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, - xfs_buf_t **ialloc_context, - xfs_inode_t **ipp) -{ - struct xfs_mount *mp = tp->t_mountp; - xfs_ino_t ino; - xfs_inode_t *ip; - uint flags; - int error; - struct timespec64 tv; - struct inode *inode; - - /* - * Call the space management code to pick - * the on-disk inode to be allocated. - */ - error = xfs_dialloc(tp, pip ? pip->i_ino : 0, mode, - ialloc_context, &ino); - if (error) - return error; - if (*ialloc_context || ino == NULLFSINO) { - *ipp = NULL; - return 0; - } - ASSERT(*ialloc_context == NULL); +xfs_init_new_inode( + struct xfs_trans *tp, + struct xfs_inode *pip, + xfs_ino_t ino, + umode_t mode, + xfs_nlink_t nlink, + dev_t rdev, + prid_t prid, + struct xfs_inode **ipp) +{ + struct xfs_mount *mp = tp->t_mountp; + struct xfs_inode *ip; + unsigned int flags; + int error; + struct timespec64 tv; + struct inode *inode; /* * Protect against obviously corrupt allocation btree records. Later @@ -837,14 +795,13 @@ xfs_ialloc( } /* - * Get the in-core inode with the lock held exclusively. - * This is because we're setting fields here we need - * to prevent others from looking at until we're done. + * Get the in-core inode with the lock held exclusively to prevent + * others from looking at until we're done. */ - error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, - XFS_ILOCK_EXCL, &ip); + error = xfs_iget(mp, tp, ino, XFS_IGET_CREATE, XFS_ILOCK_EXCL, &ip); if (error) return error; + ASSERT(ip != NULL); inode = VFS_I(ip); inode->i_mode = mode; @@ -932,143 +889,51 @@ xfs_ialloc( } /* - * Allocates a new inode from disk and return a pointer to the - * incore copy. This routine will internally commit the current - * transaction and allocate a new one if the Space Manager needed - * to do an allocation to replenish the inode free-list. - * - * This routine is designed to be called from xfs_create and - * xfs_create_dir. + * Allocates a new inode from disk and return a pointer to the incore copy. This + * routine will internally commit the current transaction and allocate a new one + * if we needed to allocate more on-disk free inodes to perform the requested + * operation. * + * If we are allocating quota inodes, we do not have a parent inode to attach to + * or associate with (i.e. dp == NULL) because they are not linked into the + * directory structure - they are attached directly to the superblock - and so + * have no parent. */ int xfs_dir_ialloc( - xfs_trans_t **tpp, /* input: current transaction; - output: may be a new transaction. */ - xfs_inode_t *dp, /* directory within whose allocate - the inode. */ - umode_t mode, - xfs_nlink_t nlink, - dev_t rdev, - prid_t prid, /* project id */ - xfs_inode_t **ipp) /* pointer to inode; it will be - locked. */ -{ - xfs_trans_t *tp; - xfs_inode_t *ip; - xfs_buf_t *ialloc_context = NULL; - int code; - void *dqinfo; - uint tflags; - - tp = *tpp; - ASSERT(tp->t_flags & XFS_TRANS_PERM_LOG_RES); + struct xfs_trans **tpp, + struct xfs_inode *dp, + umode_t mode, + xfs_nlink_t nlink, + dev_t rdev, + prid_t prid, + struct xfs_inode **ipp) +{ + struct xfs_buf *agibp; + xfs_ino_t parent_ino = dp ? dp->i_ino : 0; + xfs_ino_t ino; + int error; - /* - * xfs_ialloc will return a pointer to an incore inode if - * the Space Manager has an available inode on the free - * list. Otherwise, it will do an allocation and replenish - * the freelist. Since we can only do one allocation per - * transaction without deadlocks, we will need to commit the - * current transaction and start a new one. We will then - * need to call xfs_ialloc again to get the inode. - * - * If xfs_ialloc did an allocation to replenish the freelist, - * it returns the bp containing the head of the freelist as - * ialloc_context. We will hold a lock on it across the - * transaction commit so that no other process can steal - * the inode(s) that we've just allocated. - */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, &ialloc_context, - &ip); + ASSERT((*tpp)->t_flags & XFS_TRANS_PERM_LOG_RES); /* - * Return an error if we were unable to allocate a new inode. - * This should only happen if we run out of space on disk or - * encounter a disk error. + * Call the space management code to pick the on-disk inode to be + * allocated. */ - if (code) { - *ipp = NULL; - return code; - } - if (!ialloc_context && !ip) { - *ipp = NULL; - return -ENOSPC; - } - - /* - * If the AGI buffer is non-NULL, then we were unable to get an - * inode in one operation. We need to commit the current - * transaction and call xfs_ialloc() again. It is guaranteed - * to succeed the second time. - */ - if (ialloc_context) { - /* - * Normally, xfs_trans_commit releases all the locks. - * We call bhold to hang on to the ialloc_context across - * the commit. Holding this buffer prevents any other - * processes from doing any allocations in this - * allocation group. - */ - xfs_trans_bhold(tp, ialloc_context); - - /* - * We want the quota changes to be associated with the next - * transaction, NOT this one. So, detach the dqinfo from this - * and attach it to the next transaction. - */ - dqinfo = NULL; - tflags = 0; - if (tp->t_dqinfo) { - dqinfo = (void *)tp->t_dqinfo; - tp->t_dqinfo = NULL; - tflags = tp->t_flags & XFS_TRANS_DQ_DIRTY; - tp->t_flags &= ~(XFS_TRANS_DQ_DIRTY); - } - - code = xfs_trans_roll(&tp); - - /* - * Re-attach the quota info that we detached from prev trx. - */ - if (dqinfo) { - tp->t_dqinfo = dqinfo; - tp->t_flags |= tflags; - } - - if (code) { - xfs_buf_relse(ialloc_context); - *tpp = tp; - *ipp = NULL; - return code; - } - xfs_trans_bjoin(tp, ialloc_context); - - /* - * Call ialloc again. Since we've locked out all - * other allocations in this allocation group, - * this call should always succeed. - */ - code = xfs_ialloc(tp, dp, mode, nlink, rdev, prid, - &ialloc_context, &ip); - - /* - * If we get an error at this point, return to the caller - * so that the current transaction can be aborted. - */ - if (code) { - *tpp = tp; - *ipp = NULL; - return code; - } - ASSERT(!ialloc_context && ip); + error = xfs_dialloc_select_ag(tpp, parent_ino, mode, &agibp); + if (error) + return error; - } + if (!agibp) + return -ENOSPC; - *ipp = ip; - *tpp = tp; + /* Allocate an inode from the selected AG */ + error = xfs_dialloc_ag(*tpp, agibp, parent_ino, &ino); + if (error) + return error; + ASSERT(ino != NULLFSINO); - return 0; + return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp); } /* @@ -1521,7 +1386,7 @@ xfs_itruncate_extents_flags( * the page cache can't scale that far. */ first_unmap_block = XFS_B_TO_FSB(mp, (xfs_ufsize_t)new_size); - if (first_unmap_block >= XFS_MAX_FILEOFF) { + if (!xfs_verify_fileoff(mp, first_unmap_block)) { WARN_ON_ONCE(first_unmap_block > XFS_MAX_FILEOFF); return 0; } diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h index 751a3d1d7d84..eca333f5f715 100644 --- a/fs/xfs/xfs_inode.h +++ b/fs/xfs/xfs_inode.h @@ -407,9 +407,9 @@ void xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode, xfs_extlen_t xfs_get_extsz_hint(struct xfs_inode *ip); xfs_extlen_t xfs_get_cowextsz_hint(struct xfs_inode *ip); -int xfs_dir_ialloc(struct xfs_trans **, struct xfs_inode *, umode_t, - xfs_nlink_t, dev_t, prid_t, - struct xfs_inode **); +int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode, + xfs_nlink_t nlink, dev_t dev, prid_t prid, + struct xfs_inode **ipp); static inline int xfs_itruncate_extents( diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 1414ab79eacf..67c8dc9de8aa 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -206,10 +206,8 @@ xfs_generic_create( xfs_finish_inode_setup(ip); out_free_acl: - if (default_acl) - posix_acl_release(default_acl); - if (acl) - posix_acl_release(acl); + posix_acl_release(default_acl); + posix_acl_release(acl); return error; out_cleanup_inode: @@ -648,11 +646,10 @@ xfs_vn_change_ok( * Caution: The caller of this function is responsible for calling * setattr_prepare() or otherwise verifying the change is fine. */ -int +static int xfs_setattr_nonsize( struct xfs_inode *ip, - struct iattr *iattr, - int flags) + struct iattr *iattr) { xfs_mount_t *mp = ip->i_mount; struct inode *inode = VFS_I(ip); @@ -809,7 +806,7 @@ xfs_setattr_nonsize( * to attr_set. No previous user of the generic * Posix ACL code seems to care about this issue either. */ - if ((mask & ATTR_MODE) && !(flags & XFS_ATTR_NOACL)) { + if (mask & ATTR_MODE) { error = posix_acl_chmod(inode, inode->i_mode); if (error) return error; @@ -826,22 +823,6 @@ out_dqrele: return error; } -int -xfs_vn_setattr_nonsize( - struct dentry *dentry, - struct iattr *iattr) -{ - struct xfs_inode *ip = XFS_I(d_inode(dentry)); - int error; - - trace_xfs_setattr(ip); - - error = xfs_vn_change_ok(dentry, iattr); - if (error) - return error; - return xfs_setattr_nonsize(ip, iattr, 0); -} - /* * Truncate file. Must have write permission and not be a directory. * @@ -881,7 +862,7 @@ xfs_setattr_size( * Use the regular setattr path to update the timestamps. */ iattr->ia_valid &= ~ATTR_SIZE; - return xfs_setattr_nonsize(ip, iattr, 0); + return xfs_setattr_nonsize(ip, iattr); } /* @@ -1069,11 +1050,11 @@ xfs_vn_setattr( struct dentry *dentry, struct iattr *iattr) { + struct inode *inode = d_inode(dentry); + struct xfs_inode *ip = XFS_I(inode); int error; if (iattr->ia_valid & ATTR_SIZE) { - struct inode *inode = d_inode(dentry); - struct xfs_inode *ip = XFS_I(inode); uint iolock; xfs_ilock(ip, XFS_MMAPLOCK_EXCL); @@ -1088,7 +1069,11 @@ xfs_vn_setattr( error = xfs_vn_setattr_size(dentry, iattr); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); } else { - error = xfs_vn_setattr_nonsize(dentry, iattr); + trace_xfs_setattr(ip); + + error = xfs_vn_change_ok(dentry, iattr); + if (!error) + error = xfs_setattr_nonsize(ip, iattr); } return error; diff --git a/fs/xfs/xfs_iops.h b/fs/xfs/xfs_iops.h index 4d24ff309f59..99ca745c1071 100644 --- a/fs/xfs/xfs_iops.h +++ b/fs/xfs/xfs_iops.h @@ -13,15 +13,7 @@ extern const struct file_operations xfs_dir_file_operations; extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size); -/* - * Internal setattr interfaces. - */ -#define XFS_ATTR_NOACL 0x01 /* Don't call posix_acl_chmod */ - extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr); -extern int xfs_setattr_nonsize(struct xfs_inode *ip, struct iattr *vap, - int flags); -extern int xfs_vn_setattr_nonsize(struct dentry *dentry, struct iattr *vap); extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap); #endif /* __XFS_IOPS_H__ */ diff --git a/fs/xfs/xfs_iwalk.c b/fs/xfs/xfs_iwalk.c index 2a45138831e3..eae3aff9bc97 100644 --- a/fs/xfs/xfs_iwalk.c +++ b/fs/xfs/xfs_iwalk.c @@ -363,7 +363,7 @@ xfs_iwalk_run_callbacks( /* Delete cursor but remember the last record we cached... */ xfs_iwalk_del_inobt(tp, curpp, agi_bpp, 0); irec = &iwag->recs[iwag->nr_recs - 1]; - ASSERT(next_agino == irec->ir_startino + XFS_INODES_PER_CHUNK); + ASSERT(next_agino >= irec->ir_startino + XFS_INODES_PER_CHUNK); error = xfs_iwalk_ag_recs(iwag); if (error) diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c index 87886b7f77da..97f31308de03 100644 --- a/fs/xfs/xfs_log_recover.c +++ b/fs/xfs/xfs_log_recover.c @@ -2559,8 +2559,11 @@ xlog_recover_process_intents( spin_unlock(&ailp->ail_lock); error = lip->li_ops->iop_recover(lip, &capture_list); spin_lock(&ailp->ail_lock); - if (error) + if (error) { + trace_xlog_intent_recovery_failed(log->l_mp, error, + lip->li_ops->iop_recover); break; + } } xfs_trans_ail_cursor_done(&cur); @@ -2628,7 +2631,7 @@ xlog_recover_clear_agi_bucket( { xfs_trans_t *tp; xfs_agi_t *agi; - xfs_buf_t *agibp; + struct xfs_buf *agibp; int offset; int error; @@ -2746,7 +2749,7 @@ xlog_recover_process_iunlinks( xfs_mount_t *mp; xfs_agnumber_t agno; xfs_agi_t *agi; - xfs_buf_t *agibp; + struct xfs_buf *agibp; xfs_agino_t agino; int bucket; int error; @@ -3498,8 +3501,8 @@ xlog_recover_check_summary( struct xlog *log) { xfs_mount_t *mp; - xfs_buf_t *agfbp; - xfs_buf_t *agibp; + struct xfs_buf *agfbp; + struct xfs_buf *agibp; xfs_agnumber_t agno; uint64_t freeblks; uint64_t itotal; diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c index b2a9abee8b2b..c134eb4aeaa8 100644 --- a/fs/xfs/xfs_qm.c +++ b/fs/xfs/xfs_qm.c @@ -737,15 +737,15 @@ xfs_qm_destroy_quotainfo( */ STATIC int xfs_qm_qino_alloc( - xfs_mount_t *mp, - xfs_inode_t **ip, - uint flags) + struct xfs_mount *mp, + struct xfs_inode **ipp, + unsigned int flags) { - xfs_trans_t *tp; - int error; - bool need_alloc = true; + struct xfs_trans *tp; + int error; + bool need_alloc = true; - *ip = NULL; + *ipp = NULL; /* * With superblock that doesn't have separate pquotino, we * share an inode between gquota and pquota. If the on-disk @@ -771,7 +771,7 @@ xfs_qm_qino_alloc( return -EFSCORRUPTED; } if (ino != NULLFSINO) { - error = xfs_iget(mp, NULL, ino, 0, 0, ip); + error = xfs_iget(mp, NULL, ino, 0, 0, ipp); if (error) return error; mp->m_sb.sb_gquotino = NULLFSINO; @@ -787,7 +787,7 @@ xfs_qm_qino_alloc( return error; if (need_alloc) { - error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ip); + error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp); if (error) { xfs_trans_cancel(tp); return error; @@ -812,11 +812,11 @@ xfs_qm_qino_alloc( mp->m_sb.sb_qflags = mp->m_qflags & XFS_ALL_QUOTA_ACCT; } if (flags & XFS_QMOPT_UQUOTA) - mp->m_sb.sb_uquotino = (*ip)->i_ino; + mp->m_sb.sb_uquotino = (*ipp)->i_ino; else if (flags & XFS_QMOPT_GQUOTA) - mp->m_sb.sb_gquotino = (*ip)->i_ino; + mp->m_sb.sb_gquotino = (*ipp)->i_ino; else - mp->m_sb.sb_pquotino = (*ip)->i_ino; + mp->m_sb.sb_pquotino = (*ipp)->i_ino; spin_unlock(&mp->m_sb_lock); xfs_log_sb(tp); @@ -826,7 +826,7 @@ xfs_qm_qino_alloc( xfs_alert(mp, "%s failed (error %d)!", __func__, error); } if (need_alloc) - xfs_finish_inode_setup(*ip); + xfs_finish_inode_setup(*ipp); return error; } diff --git a/fs/xfs/xfs_refcount_item.c b/fs/xfs/xfs_refcount_item.c index 7529eb63ce94..07ebccbbf4df 100644 --- a/fs/xfs/xfs_refcount_item.c +++ b/fs/xfs/xfs_refcount_item.c @@ -417,6 +417,31 @@ const struct xfs_defer_op_type xfs_refcount_update_defer_type = { .cancel_item = xfs_refcount_update_cancel_item, }; +/* Is this recovered CUI ok? */ +static inline bool +xfs_cui_validate_phys( + struct xfs_mount *mp, + struct xfs_phys_extent *refc) +{ + if (!xfs_sb_version_hasreflink(&mp->m_sb)) + return false; + + if (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS) + return false; + + switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { + case XFS_REFCOUNT_INCREASE: + case XFS_REFCOUNT_DECREASE: + case XFS_REFCOUNT_ALLOC_COW: + case XFS_REFCOUNT_FREE_COW: + break; + default: + return false; + } + + return xfs_verify_fsbext(mp, refc->pe_startblock, refc->pe_len); +} + /* * Process a refcount update intent item that was recovered from the log. * We need to update the refcountbt. @@ -433,11 +458,9 @@ xfs_cui_item_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_mountp; - xfs_fsblock_t startblock_fsb; xfs_fsblock_t new_fsb; xfs_extlen_t new_len; unsigned int refc_type; - bool op_ok; bool requeue_only = false; enum xfs_refcount_intent_type type; int i; @@ -449,26 +472,13 @@ xfs_cui_item_recover( * just toss the CUI. */ for (i = 0; i < cuip->cui_format.cui_nextents; i++) { - refc = &cuip->cui_format.cui_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, refc->pe_startblock)); - switch (refc->pe_flags & XFS_REFCOUNT_EXTENT_TYPE_MASK) { - case XFS_REFCOUNT_INCREASE: - case XFS_REFCOUNT_DECREASE: - case XFS_REFCOUNT_ALLOC_COW: - case XFS_REFCOUNT_FREE_COW: - op_ok = true; - break; - default: - op_ok = false; - break; - } - if (!op_ok || startblock_fsb == 0 || - refc->pe_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - refc->pe_len >= mp->m_sb.sb_agblocks || - (refc->pe_flags & ~XFS_REFCOUNT_EXTENT_FLAGS)) + if (!xfs_cui_validate_phys(mp, + &cuip->cui_format.cui_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &cuip->cui_format, + sizeof(cuip->cui_format)); return -EFSCORRUPTED; + } } /* diff --git a/fs/xfs/xfs_rmap_item.c b/fs/xfs/xfs_rmap_item.c index 7adc996ca6e3..49cebd68b672 100644 --- a/fs/xfs/xfs_rmap_item.c +++ b/fs/xfs/xfs_rmap_item.c @@ -460,6 +460,42 @@ const struct xfs_defer_op_type xfs_rmap_update_defer_type = { .cancel_item = xfs_rmap_update_cancel_item, }; +/* Is this recovered RUI ok? */ +static inline bool +xfs_rui_validate_map( + struct xfs_mount *mp, + struct xfs_map_extent *rmap) +{ + if (!xfs_sb_version_hasrmapbt(&mp->m_sb)) + return false; + + if (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS) + return false; + + switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { + case XFS_RMAP_EXTENT_MAP: + case XFS_RMAP_EXTENT_MAP_SHARED: + case XFS_RMAP_EXTENT_UNMAP: + case XFS_RMAP_EXTENT_UNMAP_SHARED: + case XFS_RMAP_EXTENT_CONVERT: + case XFS_RMAP_EXTENT_CONVERT_SHARED: + case XFS_RMAP_EXTENT_ALLOC: + case XFS_RMAP_EXTENT_FREE: + break; + default: + return false; + } + + if (!XFS_RMAP_NON_INODE_OWNER(rmap->me_owner) && + !xfs_verify_ino(mp, rmap->me_owner)) + return false; + + if (!xfs_verify_fileext(mp, rmap->me_startoff, rmap->me_len)) + return false; + + return xfs_verify_fsbext(mp, rmap->me_startblock, rmap->me_len); +} + /* * Process an rmap update intent item that was recovered from the log. * We need to update the rmapbt. @@ -475,10 +511,8 @@ xfs_rui_item_recover( struct xfs_trans *tp; struct xfs_btree_cur *rcur = NULL; struct xfs_mount *mp = lip->li_mountp; - xfs_fsblock_t startblock_fsb; enum xfs_rmap_intent_type type; xfs_exntst_t state; - bool op_ok; int i; int whichfork; int error = 0; @@ -489,30 +523,13 @@ xfs_rui_item_recover( * just toss the RUI. */ for (i = 0; i < ruip->rui_format.rui_nextents; i++) { - rmap = &ruip->rui_format.rui_extents[i]; - startblock_fsb = XFS_BB_TO_FSB(mp, - XFS_FSB_TO_DADDR(mp, rmap->me_startblock)); - switch (rmap->me_flags & XFS_RMAP_EXTENT_TYPE_MASK) { - case XFS_RMAP_EXTENT_MAP: - case XFS_RMAP_EXTENT_MAP_SHARED: - case XFS_RMAP_EXTENT_UNMAP: - case XFS_RMAP_EXTENT_UNMAP_SHARED: - case XFS_RMAP_EXTENT_CONVERT: - case XFS_RMAP_EXTENT_CONVERT_SHARED: - case XFS_RMAP_EXTENT_ALLOC: - case XFS_RMAP_EXTENT_FREE: - op_ok = true; - break; - default: - op_ok = false; - break; - } - if (!op_ok || startblock_fsb == 0 || - rmap->me_len == 0 || - startblock_fsb >= mp->m_sb.sb_dblocks || - rmap->me_len >= mp->m_sb.sb_agblocks || - (rmap->me_flags & ~XFS_RMAP_EXTENT_FLAGS)) + if (!xfs_rui_validate_map(mp, + &ruip->rui_format.rui_extents[i])) { + XFS_CORRUPTION_ERROR(__func__, XFS_ERRLEVEL_LOW, mp, + &ruip->rui_format, + sizeof(ruip->rui_format)); return -EFSCORRUPTED; + } } error = xfs_trans_alloc(mp, &M_RES(mp)->tr_itruncate, diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c index ede1baf31413..b4999fb01ff7 100644 --- a/fs/xfs/xfs_rtalloc.c +++ b/fs/xfs/xfs_rtalloc.c @@ -32,7 +32,7 @@ xfs_rtget_summary( xfs_trans_t *tp, /* transaction pointer */ int log, /* log2 of extent size */ xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_suminfo_t *sum) /* out: summary info for this block */ { @@ -50,7 +50,7 @@ xfs_rtany_summary( int low, /* low log2 extent size */ int high, /* high log2 extent size */ xfs_rtblock_t bbno, /* bitmap block number */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ int *stat) /* out: any good extents here? */ { @@ -104,7 +104,7 @@ xfs_rtcopy_summary( xfs_trans_t *tp) /* transaction pointer */ { xfs_rtblock_t bbno; /* bitmap block number */ - xfs_buf_t *bp; /* summary buffer */ + struct xfs_buf *bp; /* summary buffer */ int error; /* error return value */ int log; /* summary level number (log length) */ xfs_suminfo_t sum; /* summary data */ @@ -144,7 +144,7 @@ xfs_rtallocate_range( xfs_trans_t *tp, /* transaction pointer */ xfs_rtblock_t start, /* start block to allocate */ xfs_extlen_t len, /* length to allocate */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb) /* in/out: summary block number */ { xfs_rtblock_t end; /* end of the allocated extent */ @@ -226,7 +226,7 @@ xfs_rtallocate_extent_block( xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ xfs_rtblock_t *nextp, /* out: next block to try */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -345,7 +345,7 @@ xfs_rtallocate_extent_exact( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -424,7 +424,7 @@ xfs_rtallocate_extent_near( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -626,7 +626,7 @@ xfs_rtallocate_extent_size( xfs_extlen_t minlen, /* minimum length to allocate */ xfs_extlen_t maxlen, /* maximum length to allocate */ xfs_extlen_t *len, /* out: actual length allocated */ - xfs_buf_t **rbpp, /* in/out: summary block buffer */ + struct xfs_buf **rbpp, /* in/out: summary block buffer */ xfs_fsblock_t *rsb, /* in/out: summary block number */ xfs_extlen_t prod, /* extent product factor */ xfs_rtblock_t *rtblock) /* out: start block allocated */ @@ -900,7 +900,7 @@ xfs_growfs_rt( xfs_growfs_rt_t *in) /* growfs rt input struct */ { xfs_rtblock_t bmbno; /* bitmap block number */ - xfs_buf_t *bp; /* temporary buffer */ + struct xfs_buf *bp; /* temporary buffer */ int error; /* error return value */ xfs_mount_t *nmp; /* new (fake) mount structure */ xfs_rfsblock_t nrblocks; /* new number of realtime blocks */ @@ -1151,7 +1151,7 @@ xfs_rtallocate_extent( int error; /* error value */ xfs_rtblock_t r; /* result allocated block */ xfs_fsblock_t sb; /* summary file block number */ - xfs_buf_t *sumbp; /* summary file block buffer */ + struct xfs_buf *sumbp; /* summary file block buffer */ ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL)); ASSERT(minlen > 0 && minlen <= maxlen); diff --git a/fs/xfs/xfs_rtalloc.h b/fs/xfs/xfs_rtalloc.h index 93e77b221355..ed885620589c 100644 --- a/fs/xfs/xfs_rtalloc.h +++ b/fs/xfs/xfs_rtalloc.h @@ -115,10 +115,10 @@ int xfs_rtmodify_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, int val); int xfs_rtmodify_summary_int(struct xfs_mount *mp, struct xfs_trans *tp, int log, xfs_rtblock_t bbno, int delta, - xfs_buf_t **rbpp, xfs_fsblock_t *rsb, + struct xfs_buf **rbpp, xfs_fsblock_t *rsb, xfs_suminfo_t *sum); int xfs_rtmodify_summary(struct xfs_mount *mp, struct xfs_trans *tp, int log, - xfs_rtblock_t bbno, int delta, xfs_buf_t **rbpp, + xfs_rtblock_t bbno, int delta, struct xfs_buf **rbpp, xfs_fsblock_t *rsb); int xfs_rtfree_range(struct xfs_mount *mp, struct xfs_trans *tp, xfs_rtblock_t start, xfs_extlen_t len, diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index e3e229e52512..813be879a5e5 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -199,10 +199,12 @@ xfs_fs_show_options( seq_printf(m, ",swidth=%d", (int)XFS_FSB_TO_BB(mp, mp->m_swidth)); - if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD)) - seq_puts(m, ",usrquota"); - else if (mp->m_qflags & XFS_UQUOTA_ACCT) - seq_puts(m, ",uqnoenforce"); + if (mp->m_qflags & XFS_UQUOTA_ACCT) { + if (mp->m_qflags & XFS_UQUOTA_ENFD) + seq_puts(m, ",usrquota"); + else + seq_puts(m, ",uqnoenforce"); + } if (mp->m_qflags & XFS_PQUOTA_ACCT) { if (mp->m_qflags & XFS_PQUOTA_ENFD) @@ -1159,7 +1161,7 @@ suffix_kstrtoint( * NOTE: mp->m_super is NULL here! */ static int -xfs_fc_parse_param( +xfs_fs_parse_param( struct fs_context *fc, struct fs_parameter *param) { @@ -1317,7 +1319,7 @@ xfs_fc_parse_param( } static int -xfs_fc_validate_params( +xfs_fs_validate_params( struct xfs_mount *mp) { /* @@ -1386,7 +1388,7 @@ xfs_fc_validate_params( } static int -xfs_fc_fill_super( +xfs_fs_fill_super( struct super_block *sb, struct fs_context *fc) { @@ -1396,7 +1398,7 @@ xfs_fc_fill_super( mp->m_super = sb; - error = xfs_fc_validate_params(mp); + error = xfs_fs_validate_params(mp); if (error) goto out_free_names; @@ -1467,6 +1469,45 @@ xfs_fc_fill_super( #endif } + /* Filesystem claims it needs repair, so refuse the mount. */ + if (xfs_sb_version_needsrepair(&mp->m_sb)) { + xfs_warn(mp, "Filesystem needs repair. Please run xfs_repair."); + error = -EFSCORRUPTED; + goto out_free_sb; + } + + /* + * Don't touch the filesystem if a user tool thinks it owns the primary + * superblock. mkfs doesn't clear the flag from secondary supers, so + * we don't check them at all. + */ + if (mp->m_sb.sb_inprogress) { + xfs_warn(mp, "Offline file system operation in progress!"); + error = -EFSCORRUPTED; + goto out_free_sb; + } + + /* + * Until this is fixed only page-sized or smaller data blocks work. + */ + if (mp->m_sb.sb_blocksize > PAGE_SIZE) { + xfs_warn(mp, + "File system with blocksize %d bytes. " + "Only pagesize (%ld) or less will currently work.", + mp->m_sb.sb_blocksize, PAGE_SIZE); + error = -ENOSYS; + goto out_free_sb; + } + + /* Ensure this filesystem fits in the page cache limits */ + if (xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_dblocks) || + xfs_sb_validate_fsb_count(&mp->m_sb, mp->m_sb.sb_rblocks)) { + xfs_warn(mp, + "file system too large to be mounted on this system."); + error = -EFBIG; + goto out_free_sb; + } + /* * XFS block mappings use 54 bits to store the logical block offset. * This should suffice to handle the maximum file size that the VFS @@ -1478,7 +1519,7 @@ xfs_fc_fill_super( * Avoid integer overflow by comparing the maximum bmbt offset to the * maximum pagecache offset in units of fs blocks. */ - if (XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE) > XFS_MAX_FILEOFF) { + if (!xfs_verify_fileoff(mp, XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE))) { xfs_warn(mp, "MAX_LFS_FILESIZE block offset (%llu) exceeds extent map maximum (%llu)!", XFS_B_TO_FSBT(mp, MAX_LFS_FILESIZE), @@ -1621,10 +1662,10 @@ xfs_fc_fill_super( } static int -xfs_fc_get_tree( +xfs_fs_get_tree( struct fs_context *fc) { - return get_tree_bdev(fc, xfs_fc_fill_super); + return get_tree_bdev(fc, xfs_fs_fill_super); } static int @@ -1743,7 +1784,7 @@ xfs_remount_ro( * silently ignore all options that we can't actually change. */ static int -xfs_fc_reconfigure( +xfs_fs_reconfigure( struct fs_context *fc) { struct xfs_mount *mp = XFS_M(fc->root->d_sb); @@ -1756,7 +1797,7 @@ xfs_fc_reconfigure( if (XFS_SB_VERSION_NUM(&mp->m_sb) == XFS_SB_VERSION_5) fc->sb_flags |= SB_I_VERSION; - error = xfs_fc_validate_params(new_mp); + error = xfs_fs_validate_params(new_mp); if (error) return error; @@ -1793,7 +1834,7 @@ xfs_fc_reconfigure( return 0; } -static void xfs_fc_free( +static void xfs_fs_free( struct fs_context *fc) { struct xfs_mount *mp = fc->s_fs_info; @@ -1809,10 +1850,10 @@ static void xfs_fc_free( } static const struct fs_context_operations xfs_context_ops = { - .parse_param = xfs_fc_parse_param, - .get_tree = xfs_fc_get_tree, - .reconfigure = xfs_fc_reconfigure, - .free = xfs_fc_free, + .parse_param = xfs_fs_parse_param, + .get_tree = xfs_fs_get_tree, + .reconfigure = xfs_fs_reconfigure, + .free = xfs_fs_free, }; static int xfs_init_fs_context( diff --git a/fs/xfs/xfs_symlink.c b/fs/xfs/xfs_symlink.c index 8e88a7ca387e..1f43fd7f3209 100644 --- a/fs/xfs/xfs_symlink.c +++ b/fs/xfs/xfs_symlink.c @@ -154,7 +154,7 @@ xfs_symlink( const char *cur_chunk; int byte_cnt; int n; - xfs_buf_t *bp; + struct xfs_buf *bp; prid_t prid; struct xfs_dquot *udqp = NULL; struct xfs_dquot *gdqp = NULL; @@ -365,7 +365,7 @@ STATIC int xfs_inactive_symlink_rmt( struct xfs_inode *ip) { - xfs_buf_t *bp; + struct xfs_buf *bp; int done; int error; int i; diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h index 86951652d3ed..5a263ae3d4f0 100644 --- a/fs/xfs/xfs_trace.h +++ b/fs/xfs/xfs_trace.h @@ -103,6 +103,24 @@ DEFINE_ATTR_LIST_EVENT(xfs_attr_list_notfound); DEFINE_ATTR_LIST_EVENT(xfs_attr_leaf_list); DEFINE_ATTR_LIST_EVENT(xfs_attr_node_list); +TRACE_EVENT(xlog_intent_recovery_failed, + TP_PROTO(struct xfs_mount *mp, int error, void *function), + TP_ARGS(mp, error, function), + TP_STRUCT__entry( + __field(dev_t, dev) + __field(int, error) + __field(void *, function) + ), + TP_fast_assign( + __entry->dev = mp->m_super->s_dev; + __entry->error = error; + __entry->function = function; + ), + TP_printk("dev %d:%d error %d function %pS", + MAJOR(__entry->dev), MINOR(__entry->dev), + __entry->error, __entry->function) +); + DECLARE_EVENT_CLASS(xfs_perag_class, TP_PROTO(struct xfs_mount *mp, xfs_agnumber_t agno, int refcount, unsigned long caller_ip), diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c index c94e71f741b6..e72730f85af1 100644 --- a/fs/xfs/xfs_trans.c +++ b/fs/xfs/xfs_trans.c @@ -465,7 +465,7 @@ xfs_trans_apply_sb_deltas( xfs_trans_t *tp) { xfs_dsb_t *sbp; - xfs_buf_t *bp; + struct xfs_buf *bp; int whole = 0; bp = xfs_trans_getsb(tp); diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c index 42d63b830cb9..9aced0a00003 100644 --- a/fs/xfs/xfs_trans_buf.c +++ b/fs/xfs/xfs_trans_buf.c @@ -121,7 +121,7 @@ xfs_trans_get_buf_map( xfs_buf_flags_t flags, struct xfs_buf **bpp) { - xfs_buf_t *bp; + struct xfs_buf *bp; struct xfs_buf_log_item *bip; int error; @@ -401,7 +401,7 @@ xfs_trans_brelse( void xfs_trans_bhold( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -422,7 +422,7 @@ xfs_trans_bhold( void xfs_trans_bhold_release( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -538,7 +538,7 @@ xfs_trans_log_buf( void xfs_trans_binval( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; int i; @@ -593,7 +593,7 @@ xfs_trans_binval( void xfs_trans_inode_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -618,7 +618,7 @@ xfs_trans_inode_buf( void xfs_trans_stale_inode_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -643,7 +643,7 @@ xfs_trans_stale_inode_buf( void xfs_trans_inode_alloc_buf( xfs_trans_t *tp, - xfs_buf_t *bp) + struct xfs_buf *bp) { struct xfs_buf_log_item *bip = bp->b_log_item; @@ -737,7 +737,7 @@ xfs_trans_buf_copy_type( void xfs_trans_dquot_buf( xfs_trans_t *tp, - xfs_buf_t *bp, + struct xfs_buf *bp, uint type) { struct xfs_buf_log_item *bip = bp->b_log_item; diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c index fe45b0c3970c..28b8ac701919 100644 --- a/fs/xfs/xfs_trans_dquot.c +++ b/fs/xfs/xfs_trans_dquot.c @@ -84,13 +84,6 @@ xfs_trans_dup_dqinfo( xfs_trans_alloc_dqinfo(ntp); - /* - * Because the quota blk reservation is carried forward, - * it is also necessary to carry forward the DQ_DIRTY flag. - */ - if (otp->t_flags & XFS_TRANS_DQ_DIRTY) - ntp->t_flags |= XFS_TRANS_DQ_DIRTY; - for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { oqa = otp->t_dqinfo->dqs[j]; nqa = ntp->t_dqinfo->dqs[j]; @@ -143,9 +136,6 @@ xfs_trans_mod_dquot_byino( xfs_is_quota_inode(&mp->m_sb, ip->i_ino)) return; - if (tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - if (XFS_IS_UQUOTA_ON(mp) && ip->i_udquot) (void) xfs_trans_mod_dquot(tp, ip->i_udquot, field, delta); if (XFS_IS_GQUOTA_ON(mp) && ip->i_gdquot) @@ -204,6 +194,9 @@ xfs_trans_mod_dquot( ASSERT(XFS_IS_QUOTA_RUNNING(tp->t_mountp)); qtrx = NULL; + if (!delta) + return; + if (tp->t_dqinfo == NULL) xfs_trans_alloc_dqinfo(tp); /* @@ -215,10 +208,8 @@ xfs_trans_mod_dquot( if (qtrx->qt_dquot == NULL) qtrx->qt_dquot = dqp; - if (delta) { - trace_xfs_trans_mod_dquot_before(qtrx); - trace_xfs_trans_mod_dquot(tp, dqp, field, delta); - } + trace_xfs_trans_mod_dquot_before(qtrx); + trace_xfs_trans_mod_dquot(tp, dqp, field, delta); switch (field) { /* regular disk blk reservation */ @@ -271,10 +262,7 @@ xfs_trans_mod_dquot( ASSERT(0); } - if (delta) - trace_xfs_trans_mod_dquot_after(qtrx); - - tp->t_flags |= XFS_TRANS_DQ_DIRTY; + trace_xfs_trans_mod_dquot_after(qtrx); } @@ -351,7 +339,7 @@ xfs_trans_apply_dquot_deltas( int64_t totalbdelta; int64_t totalrtbdelta; - if (!(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!tp->t_dqinfo) return; ASSERT(tp->t_dqinfo); @@ -493,7 +481,7 @@ xfs_trans_unreserve_and_mod_dquots( struct xfs_dqtrx *qtrx, *qa; bool locked; - if (!tp->t_dqinfo || !(tp->t_flags & XFS_TRANS_DQ_DIRTY)) + if (!tp->t_dqinfo) return; for (j = 0; j < XFS_QM_TRANS_DQTYPES; j++) { @@ -698,16 +686,10 @@ xfs_trans_dqresv( * because we don't have the luxury of a transaction envelope then. */ if (tp) { - ASSERT(tp->t_dqinfo); ASSERT(flags & XFS_QMOPT_RESBLK_MASK); - if (nblks != 0) - xfs_trans_mod_dquot(tp, dqp, - flags & XFS_QMOPT_RESBLK_MASK, - nblks); - if (ninos != 0) - xfs_trans_mod_dquot(tp, dqp, - XFS_TRANS_DQ_RES_INOS, - ninos); + xfs_trans_mod_dquot(tp, dqp, flags & XFS_QMOPT_RESBLK_MASK, + nblks); + xfs_trans_mod_dquot(tp, dqp, XFS_TRANS_DQ_RES_INOS, ninos); } ASSERT(dqp->q_blk.reserved >= dqp->q_blk.count); ASSERT(dqp->q_rtb.reserved >= dqp->q_rtb.count); @@ -752,9 +734,6 @@ xfs_trans_reserve_quota_bydquots( if (!XFS_IS_QUOTA_RUNNING(mp) || !XFS_IS_QUOTA_ON(mp)) return 0; - if (tp && tp->t_dqinfo == NULL) - xfs_trans_alloc_dqinfo(tp); - ASSERT(flags & XFS_QMOPT_RESBLK_MASK); if (udqp) { |