summaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
authorKent Overstreet <kent.overstreet@gmail.com>2021-01-16 15:40:33 -0500
committerKent Overstreet <kent.overstreet@linux.dev>2023-10-22 17:08:51 -0400
commit280249b9d9b9a62562ddeb5429a7d29d2f03ba1c (patch)
tree3f67835c9f401afb6062d819a4ace409ae7d7144 /fs/bcachefs
parented9d58a2b1ddbc38816571638ee114b7efb9f279 (diff)
bcachefs: Correctly order flushes and journal writes on multi device filesystems
All writes prior to a journal write need to be flushed before the journal write itself happens. On single device filesystems, it suffices to mark the write with REQ_PREFLUSH|REQ_FUA, but on multi device filesystems we need to issue flushes to every device - and wait for them to complete - before issuing the journal writes. Previously, we were issuing flushes to every device, but we weren't waiting for them to complete before issuing the journal writes. Signed-off-by: Kent Overstreet <kent.overstreet@gmail.com> Signed-off-by: Kent Overstreet <kent.overstreet@linux.dev>
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/io.c3
-rw-r--r--fs/bcachefs/journal.c1
-rw-r--r--fs/bcachefs/journal.h5
-rw-r--r--fs/bcachefs/journal_io.c99
-rw-r--r--fs/bcachefs/journal_types.h1
5 files changed, 65 insertions, 44 deletions
diff --git a/fs/bcachefs/io.c b/fs/bcachefs/io.c
index bc1e2dc04850..8a4d05eee381 100644
--- a/fs/bcachefs/io.c
+++ b/fs/bcachefs/io.c
@@ -509,9 +509,6 @@ void bch2_submit_wbio_replicas(struct bch_write_bio *wbio, struct bch_fs *c,
n->submit_time = local_clock();
n->bio.bi_iter.bi_sector = ptr->offset;
- if (!journal_flushes_device(ca))
- n->bio.bi_opf |= REQ_FUA;
-
if (likely(n->have_ioref)) {
this_cpu_add(ca->io_done->sectors[WRITE][type],
bio_sectors(&n->bio));
diff --git a/fs/bcachefs/journal.c b/fs/bcachefs/journal.c
index e90fe042302f..6f84a5dd06bc 100644
--- a/fs/bcachefs/journal.c
+++ b/fs/bcachefs/journal.c
@@ -81,6 +81,7 @@ static void bch2_journal_buf_init(struct journal *j)
bkey_extent_init(&buf->key);
buf->noflush = false;
buf->must_flush = false;
+ buf->separate_flush = false;
memset(buf->has_inode, 0, sizeof(buf->has_inode));
diff --git a/fs/bcachefs/journal.h b/fs/bcachefs/journal.h
index df353a18011b..547c735ce3cb 100644
--- a/fs/bcachefs/journal.h
+++ b/fs/bcachefs/journal.h
@@ -496,11 +496,6 @@ static inline int bch2_journal_error(struct journal *j)
struct bch_dev;
-static inline bool journal_flushes_device(struct bch_dev *ca)
-{
- return true;
-}
-
static inline void bch2_journal_set_replay_done(struct journal *j)
{
BUG_ON(!test_bit(JOURNAL_STARTED, &j->flags));
diff --git a/fs/bcachefs/journal_io.c b/fs/bcachefs/journal_io.c
index f6c9681badea..40da18d778a3 100644
--- a/fs/bcachefs/journal_io.c
+++ b/fs/bcachefs/journal_io.c
@@ -1188,6 +1188,51 @@ static void journal_write_endio(struct bio *bio)
percpu_ref_put(&ca->io_ref);
}
+static void do_journal_write(struct closure *cl)
+{
+ struct journal *j = container_of(cl, struct journal, io);
+ struct bch_fs *c = container_of(j, struct bch_fs, journal);
+ struct bch_dev *ca;
+ struct journal_buf *w = journal_last_unwritten_buf(j);
+ struct bch_extent_ptr *ptr;
+ struct bio *bio;
+ unsigned sectors = vstruct_sectors(w->data, c->block_bits);
+
+ extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
+ ca = bch_dev_bkey_exists(c, ptr->dev);
+ if (!percpu_ref_tryget(&ca->io_ref)) {
+ /* XXX: fix this */
+ bch_err(c, "missing device for journal write\n");
+ continue;
+ }
+
+ this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
+ sectors);
+
+ bio = ca->journal.bio;
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
+ bio->bi_iter.bi_sector = ptr->offset;
+ bio->bi_end_io = journal_write_endio;
+ bio->bi_private = ca;
+
+ if (!JSET_NO_FLUSH(w->data))
+ bio->bi_opf |= REQ_FUA;
+ if (!JSET_NO_FLUSH(w->data) && !w->separate_flush)
+ bio->bi_opf |= REQ_PREFLUSH;
+
+ bch2_bio_map(bio, w->data, sectors << 9);
+
+ trace_journal_write(bio);
+ closure_bio_submit(bio, cl);
+
+ ca->journal.bucket_seq[ca->journal.cur_idx] =
+ le64_to_cpu(w->data->seq);
+ }
+
+ continue_at(cl, journal_write_done, system_highpri_wq);
+ return;
+}
+
void bch2_journal_write(struct closure *cl)
{
struct journal *j = container_of(cl, struct journal, io);
@@ -1197,9 +1242,8 @@ void bch2_journal_write(struct closure *cl)
struct jset_entry *start, *end;
struct jset *jset;
struct bio *bio;
- struct bch_extent_ptr *ptr;
bool validate_before_checksum = false;
- unsigned i, sectors, bytes, u64s;
+ unsigned i, sectors, bytes, u64s, nr_rw_members = 0;
int ret;
BUG_ON(BCH_SB_CLEAN(c->disk_sb.sb));
@@ -1329,45 +1373,28 @@ retry_alloc:
if (c->opts.nochanges)
goto no_io;
- extent_for_each_ptr(bkey_i_to_s_extent(&w->key), ptr) {
- ca = bch_dev_bkey_exists(c, ptr->dev);
- if (!percpu_ref_tryget(&ca->io_ref)) {
- /* XXX: fix this */
- bch_err(c, "missing device for journal write\n");
- continue;
- }
-
- this_cpu_add(ca->io_done->sectors[WRITE][BCH_DATA_journal],
- sectors);
+ for_each_rw_member(ca, c, i)
+ nr_rw_members++;
- bio = ca->journal.bio;
- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_WRITE|REQ_SYNC|REQ_META);
- bio->bi_iter.bi_sector = ptr->offset;
- bio->bi_end_io = journal_write_endio;
- bio->bi_private = ca;
- if (!JSET_NO_FLUSH(jset))
- bio->bi_opf |= REQ_PREFLUSH|REQ_FUA;
- bch2_bio_map(bio, jset, sectors << 9);
+ if (nr_rw_members > 1)
+ w->separate_flush = true;
- trace_journal_write(bio);
- closure_bio_submit(bio, cl);
+ if (!JSET_NO_FLUSH(jset) && w->separate_flush) {
+ for_each_rw_member(ca, c, i) {
+ percpu_ref_get(&ca->io_ref);
- ca->journal.bucket_seq[ca->journal.cur_idx] = le64_to_cpu(jset->seq);
+ bio = ca->journal.bio;
+ bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH);
+ bio->bi_end_io = journal_write_endio;
+ bio->bi_private = ca;
+ closure_bio_submit(bio, cl);
+ }
}
- if (!JSET_NO_FLUSH(jset)) {
- for_each_rw_member(ca, c, i)
- if (journal_flushes_device(ca) &&
- !bch2_bkey_has_device(bkey_i_to_s_c(&w->key), i)) {
- percpu_ref_get(&ca->io_ref);
-
- bio = ca->journal.bio;
- bio_reset(bio, ca->disk_sb.bdev, REQ_OP_FLUSH);
- bio->bi_end_io = journal_write_endio;
- bio->bi_private = ca;
- closure_bio_submit(bio, cl);
- }
- }
+ bch2_bucket_seq_cleanup(c);
+
+ continue_at(cl, do_journal_write, system_highpri_wq);
+ return;
no_io:
bch2_bucket_seq_cleanup(c);
diff --git a/fs/bcachefs/journal_types.h b/fs/bcachefs/journal_types.h
index 150e691d5317..8ad10e46dd5d 100644
--- a/fs/bcachefs/journal_types.h
+++ b/fs/bcachefs/journal_types.h
@@ -31,6 +31,7 @@ struct journal_buf {
unsigned u64s_reserved;
bool noflush; /* write has already been kicked off, and was noflush */
bool must_flush; /* something wants a flush */
+ bool separate_flush;
/* bloom filter: */
unsigned long has_inode[1024 / sizeof(unsigned long)];
};