summaryrefslogtreecommitdiff
path: root/fs/ext4/orphan.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4/orphan.c')
-rw-r--r--fs/ext4/orphan.c340
1 files changed, 301 insertions, 39 deletions
diff --git a/fs/ext4/orphan.c b/fs/ext4/orphan.c
index 1f2fa2ef53bd..ac928f60234a 100644
--- a/fs/ext4/orphan.c
+++ b/fs/ext4/orphan.c
@@ -8,6 +8,52 @@
#include "ext4.h"
#include "ext4_jbd2.h"
+static int ext4_orphan_file_add(handle_t *handle, struct inode *inode)
+{
+ int i, j;
+ struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
+ int ret = 0;
+ __le32 *bdata;
+ int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
+
+ spin_lock(&oi->of_lock);
+ for (i = 0; i < oi->of_blocks && !oi->of_binfo[i].ob_free_entries; i++);
+ if (i == oi->of_blocks) {
+ spin_unlock(&oi->of_lock);
+ /*
+ * For now we don't grow or shrink orphan file. We just use
+ * whatever was allocated at mke2fs time. The additional
+ * credits we would have to reserve for each orphan inode
+ * operation just don't seem worth it.
+ */
+ return -ENOSPC;
+ }
+ oi->of_binfo[i].ob_free_entries--;
+ spin_unlock(&oi->of_lock);
+
+ /*
+ * Get access to orphan block. We have dropped of_lock but since we
+ * have decremented number of free entries we are guaranteed free entry
+ * in our block.
+ */
+ ret = ext4_journal_get_write_access(handle, inode->i_sb,
+ oi->of_binfo[i].ob_bh, EXT4_JTR_ORPHAN_FILE);
+ if (ret)
+ return ret;
+
+ bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
+ spin_lock(&oi->of_lock);
+ /* Find empty slot in a block */
+ for (j = 0; j < inodes_per_ob && bdata[j]; j++);
+ BUG_ON(j == inodes_per_ob);
+ bdata[j] = cpu_to_le32(inode->i_ino);
+ EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
+ ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
+ spin_unlock(&oi->of_lock);
+
+ return ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[i].ob_bh);
+}
+
/*
* ext4_orphan_add() links an unlinked or truncated inode into a list of
* such inodes, starting at the superblock, in case we crash before the
@@ -34,10 +80,10 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
!inode_is_locked(inode));
/*
- * Exit early if inode already is on orphan list. This is a big speedup
- * since we don't have to contend on the global s_orphan_lock.
+ * Inode orphaned in orphan file or in orphan list?
*/
- if (!list_empty(&EXT4_I(inode)->i_orphan))
+ if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE) ||
+ !list_empty(&EXT4_I(inode)->i_orphan))
return 0;
/*
@@ -49,6 +95,16 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
ASSERT((S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
S_ISLNK(inode->i_mode)) || inode->i_nlink == 0);
+ if (sbi->s_orphan_info.of_blocks) {
+ err = ext4_orphan_file_add(handle, inode);
+ /*
+ * Fallback to normal orphan list of orphan file is
+ * out of space
+ */
+ if (err != -ENOSPC)
+ return err;
+ }
+
BUFFER_TRACE(sbi->s_sbh, "get_write_access");
err = ext4_journal_get_write_access(handle, sb, sbi->s_sbh,
EXT4_JTR_NONE);
@@ -103,6 +159,39 @@ out:
return err;
}
+static int ext4_orphan_file_del(handle_t *handle, struct inode *inode)
+{
+ struct ext4_orphan_info *oi = &EXT4_SB(inode->i_sb)->s_orphan_info;
+ __le32 *bdata;
+ int blk, off;
+ int inodes_per_ob = ext4_inodes_per_orphan_block(inode->i_sb);
+ int ret = 0;
+
+ if (!handle)
+ goto out;
+ blk = EXT4_I(inode)->i_orphan_idx / inodes_per_ob;
+ off = EXT4_I(inode)->i_orphan_idx % inodes_per_ob;
+ if (WARN_ON_ONCE(blk >= oi->of_blocks))
+ goto out;
+
+ ret = ext4_journal_get_write_access(handle, inode->i_sb,
+ oi->of_binfo[blk].ob_bh, EXT4_JTR_ORPHAN_FILE);
+ if (ret)
+ goto out;
+
+ bdata = (__le32 *)(oi->of_binfo[blk].ob_bh->b_data);
+ spin_lock(&oi->of_lock);
+ bdata[off] = 0;
+ oi->of_binfo[blk].ob_free_entries++;
+ spin_unlock(&oi->of_lock);
+ ret = ext4_handle_dirty_metadata(handle, NULL, oi->of_binfo[blk].ob_bh);
+out:
+ ext4_clear_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
+ INIT_LIST_HEAD(&EXT4_I(inode)->i_orphan);
+
+ return ret;
+}
+
/*
* ext4_orphan_del() removes an unlinked or truncated inode from the list
* of such inodes stored on disk, because it is finally being cleaned up.
@@ -121,6 +210,9 @@ int ext4_orphan_del(handle_t *handle, struct inode *inode)
WARN_ON_ONCE(!(inode->i_state & (I_NEW | I_FREEING)) &&
!inode_is_locked(inode));
+ if (ext4_test_inode_state(inode, EXT4_STATE_ORPHAN_FILE))
+ return ext4_orphan_file_del(handle, inode);
+
/* Do this quick check before taking global s_orphan_lock. */
if (list_empty(&ei->i_orphan))
return 0;
@@ -200,6 +292,46 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
}
#endif
+static void ext4_process_orphan(struct inode *inode,
+ int *nr_truncates, int *nr_orphans)
+{
+ struct super_block *sb = inode->i_sb;
+ int ret;
+
+ dquot_initialize(inode);
+ if (inode->i_nlink) {
+ if (test_opt(sb, DEBUG))
+ ext4_msg(sb, KERN_DEBUG,
+ "%s: truncating inode %lu to %lld bytes",
+ __func__, inode->i_ino, inode->i_size);
+ jbd_debug(2, "truncating inode %lu to %lld bytes\n",
+ inode->i_ino, inode->i_size);
+ inode_lock(inode);
+ truncate_inode_pages(inode->i_mapping, inode->i_size);
+ ret = ext4_truncate(inode);
+ if (ret) {
+ /*
+ * We need to clean up the in-core orphan list
+ * manually if ext4_truncate() failed to get a
+ * transaction handle.
+ */
+ ext4_orphan_del(NULL, inode);
+ ext4_std_error(inode->i_sb, ret);
+ }
+ inode_unlock(inode);
+ (*nr_truncates)++;
+ } else {
+ if (test_opt(sb, DEBUG))
+ ext4_msg(sb, KERN_DEBUG,
+ "%s: deleting unreferenced inode %lu",
+ __func__, inode->i_ino);
+ jbd_debug(2, "deleting unreferenced inode %lu\n",
+ inode->i_ino);
+ (*nr_orphans)++;
+ }
+ iput(inode); /* The delete magic happens here! */
+}
+
/* ext4_orphan_cleanup() walks a singly-linked list of inodes (starting at
* the superblock) which were deleted from all directories, but held open by
* a process at the time of a crash. We walk the list and try to delete these
@@ -220,12 +352,17 @@ static int ext4_quota_on_mount(struct super_block *sb, int type)
void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
{
unsigned int s_flags = sb->s_flags;
- int ret, nr_orphans = 0, nr_truncates = 0;
+ int nr_orphans = 0, nr_truncates = 0;
+ struct inode *inode;
+ int i, j;
#ifdef CONFIG_QUOTA
int quota_update = 0;
- int i;
#endif
- if (!es->s_last_orphan) {
+ __le32 *bdata;
+ struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+ int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+
+ if (!es->s_last_orphan && !oi->of_blocks) {
jbd_debug(4, "no orphan inodes to clean up\n");
return;
}
@@ -289,8 +426,6 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
#endif
while (es->s_last_orphan) {
- struct inode *inode;
-
/*
* We may have encountered an error during cleanup; if
* so, skip the rest.
@@ -308,38 +443,21 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
}
list_add(&EXT4_I(inode)->i_orphan, &EXT4_SB(sb)->s_orphan);
- dquot_initialize(inode);
- if (inode->i_nlink) {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: truncating inode %lu to %lld bytes",
- __func__, inode->i_ino, inode->i_size);
- jbd_debug(2, "truncating inode %lu to %lld bytes\n",
- inode->i_ino, inode->i_size);
- inode_lock(inode);
- truncate_inode_pages(inode->i_mapping, inode->i_size);
- ret = ext4_truncate(inode);
- if (ret) {
- /*
- * We need to clean up the in-core orphan list
- * manually if ext4_truncate() failed to get a
- * transaction handle.
- */
- ext4_orphan_del(NULL, inode);
- ext4_std_error(inode->i_sb, ret);
- }
- inode_unlock(inode);
- nr_truncates++;
- } else {
- if (test_opt(sb, DEBUG))
- ext4_msg(sb, KERN_DEBUG,
- "%s: deleting unreferenced inode %lu",
- __func__, inode->i_ino);
- jbd_debug(2, "deleting unreferenced inode %lu\n",
- inode->i_ino);
- nr_orphans++;
+ ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
+ }
+
+ for (i = 0; i < oi->of_blocks; i++) {
+ bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
+ for (j = 0; j < inodes_per_ob; j++) {
+ if (!bdata[j])
+ continue;
+ inode = ext4_orphan_get(sb, le32_to_cpu(bdata[j]));
+ if (IS_ERR(inode))
+ continue;
+ ext4_set_inode_state(inode, EXT4_STATE_ORPHAN_FILE);
+ EXT4_I(inode)->i_orphan_idx = i * inodes_per_ob + j;
+ ext4_process_orphan(inode, &nr_truncates, &nr_orphans);
}
- iput(inode); /* The delete magic happens here! */
}
#define PLURAL(x) (x), ((x) == 1) ? "" : "s"
@@ -361,3 +479,147 @@ void ext4_orphan_cleanup(struct super_block *sb, struct ext4_super_block *es)
#endif
sb->s_flags = s_flags; /* Restore SB_RDONLY status */
}
+
+void ext4_release_orphan_info(struct super_block *sb)
+{
+ int i;
+ struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+
+ if (!oi->of_blocks)
+ return;
+ for (i = 0; i < oi->of_blocks; i++)
+ brelse(oi->of_binfo[i].ob_bh);
+ kfree(oi->of_binfo);
+}
+
+static struct ext4_orphan_block_tail *ext4_orphan_block_tail(
+ struct super_block *sb,
+ struct buffer_head *bh)
+{
+ return (struct ext4_orphan_block_tail *)(bh->b_data + sb->s_blocksize -
+ sizeof(struct ext4_orphan_block_tail));
+}
+
+static int ext4_orphan_file_block_csum_verify(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ __u32 calculated;
+ int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+ struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+ struct ext4_orphan_block_tail *ot;
+ __le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
+
+ if (!ext4_has_metadata_csum(sb))
+ return 1;
+
+ ot = ext4_orphan_block_tail(sb, bh);
+ calculated = ext4_chksum(EXT4_SB(sb), oi->of_csum_seed,
+ (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr));
+ calculated = ext4_chksum(EXT4_SB(sb), calculated, (__u8 *)bh->b_data,
+ inodes_per_ob * sizeof(__u32));
+ return le32_to_cpu(ot->ob_checksum) == calculated;
+}
+
+/* This gets called only when checksumming is enabled */
+void ext4_orphan_file_block_trigger(struct jbd2_buffer_trigger_type *triggers,
+ struct buffer_head *bh,
+ void *data, size_t size)
+{
+ struct super_block *sb = EXT4_TRIGGER(triggers)->sb;
+ __u32 csum;
+ int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+ struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+ struct ext4_orphan_block_tail *ot;
+ __le64 dsk_block_nr = cpu_to_le64(bh->b_blocknr);
+
+ csum = ext4_chksum(EXT4_SB(sb), oi->of_csum_seed,
+ (__u8 *)&dsk_block_nr, sizeof(dsk_block_nr));
+ csum = ext4_chksum(EXT4_SB(sb), csum, (__u8 *)data,
+ inodes_per_ob * sizeof(__u32));
+ ot = ext4_orphan_block_tail(sb, bh);
+ ot->ob_checksum = cpu_to_le32(csum);
+}
+
+int ext4_init_orphan_info(struct super_block *sb)
+{
+ struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+ struct inode *inode;
+ int i, j;
+ int ret;
+ int free;
+ __le32 *bdata;
+ int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+ struct ext4_orphan_block_tail *ot;
+ ino_t orphan_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_orphan_file_inum);
+
+ spin_lock_init(&oi->of_lock);
+
+ if (!ext4_has_feature_orphan_file(sb))
+ return 0;
+
+ inode = ext4_iget(sb, orphan_ino, EXT4_IGET_SPECIAL);
+ if (IS_ERR(inode)) {
+ ext4_msg(sb, KERN_ERR, "get orphan inode failed");
+ return PTR_ERR(inode);
+ }
+ oi->of_blocks = inode->i_size >> sb->s_blocksize_bits;
+ oi->of_csum_seed = EXT4_I(inode)->i_csum_seed;
+ oi->of_binfo = kmalloc(oi->of_blocks*sizeof(struct ext4_orphan_block),
+ GFP_KERNEL);
+ if (!oi->of_binfo) {
+ ret = -ENOMEM;
+ goto out_put;
+ }
+ for (i = 0; i < oi->of_blocks; i++) {
+ oi->of_binfo[i].ob_bh = ext4_bread(NULL, inode, i, 0);
+ if (IS_ERR(oi->of_binfo[i].ob_bh)) {
+ ret = PTR_ERR(oi->of_binfo[i].ob_bh);
+ goto out_free;
+ }
+ if (!oi->of_binfo[i].ob_bh) {
+ ret = -EIO;
+ goto out_free;
+ }
+ ot = ext4_orphan_block_tail(sb, oi->of_binfo[i].ob_bh);
+ if (le32_to_cpu(ot->ob_magic) != EXT4_ORPHAN_BLOCK_MAGIC) {
+ ext4_error(sb, "orphan file block %d: bad magic", i);
+ ret = -EIO;
+ goto out_free;
+ }
+ if (!ext4_orphan_file_block_csum_verify(sb,
+ oi->of_binfo[i].ob_bh)) {
+ ext4_error(sb, "orphan file block %d: bad checksum", i);
+ ret = -EIO;
+ goto out_free;
+ }
+ bdata = (__le32 *)(oi->of_binfo[i].ob_bh->b_data);
+ free = 0;
+ for (j = 0; j < inodes_per_ob; j++)
+ if (bdata[j] == 0)
+ free++;
+ oi->of_binfo[i].ob_free_entries = free;
+ }
+ iput(inode);
+ return 0;
+out_free:
+ for (i--; i >= 0; i--)
+ brelse(oi->of_binfo[i].ob_bh);
+ kfree(oi->of_binfo);
+out_put:
+ iput(inode);
+ return ret;
+}
+
+int ext4_orphan_file_empty(struct super_block *sb)
+{
+ struct ext4_orphan_info *oi = &EXT4_SB(sb)->s_orphan_info;
+ int i;
+ int inodes_per_ob = ext4_inodes_per_orphan_block(sb);
+
+ if (!ext4_has_feature_orphan_file(sb))
+ return 1;
+ for (i = 0; i < oi->of_blocks; i++)
+ if (oi->of_binfo[i].ob_free_entries != inodes_per_ob)
+ return 0;
+ return 1;
+}