diff options
author | Song Liu <songliubraving@fb.com> | 2016-11-17 15:24:38 -0800 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2016-11-18 13:26:07 -0800 |
commit | 2ded370373a400c20cf0c6e941e724e61582a867 (patch) | |
tree | 704038326bbbe6f2e7fddd1b98b1f0ae1d3d9f44 /drivers/md/raid5-cache.c | |
parent | 937621c36e0ea1af2aceeaea412ba3bd80247199 (diff) |
md/r5cache: State machine for raid5-cache write back mode
This patch adds state machine for raid5-cache. With log device, the
raid456 array could operate in two different modes (r5c_journal_mode):
- write-back (R5C_MODE_WRITE_BACK)
- write-through (R5C_MODE_WRITE_THROUGH)
Existing code of raid5-cache only has write-through mode. For write-back
cache, it is necessary to extend the state machine.
With write-back cache, every stripe could operate in two different
phases:
- caching
- writing-out
In caching phase, the stripe handles writes as:
- write to journal
- return IO
In writing-out phase, the stripe behaviors as a stripe in write through
mode R5C_MODE_WRITE_THROUGH.
STRIPE_R5C_CACHING is added to sh->state to differentiate caching and
writing-out phase.
Please note: this is a "no-op" patch for raid5-cache write-through
mode.
The following detailed explanation is copied from the raid5-cache.c:
/*
* raid5 cache state machine
*
* With rhe RAID cache, each stripe works in two phases:
* - caching phase
* - writing-out phase
*
* These two phases are controlled by bit STRIPE_R5C_CACHING:
* if STRIPE_R5C_CACHING == 0, the stripe is in writing-out phase
* if STRIPE_R5C_CACHING == 1, the stripe is in caching phase
*
* When there is no journal, or the journal is in write-through mode,
* the stripe is always in writing-out phase.
*
* For write-back journal, the stripe is sent to caching phase on write
* (r5c_handle_stripe_dirtying). r5c_make_stripe_write_out() kicks off
* the write-out phase by clearing STRIPE_R5C_CACHING.
*
* Stripes in caching phase do not write the raid disks. Instead, all
* writes are committed from the log device. Therefore, a stripe in
* caching phase handles writes as:
* - write to log device
* - return IO
*
* Stripes in writing-out phase handle writes as:
* - calculate parity
* - write pending data and parity to journal
* - write data and parity to raid disks
* - return IO for pending writes
*/
Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/raid5-cache.c')
-rw-r--r-- | drivers/md/raid5-cache.c | 143 |
1 files changed, 140 insertions, 3 deletions
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 33fc85015147..02a554434747 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -40,6 +40,47 @@ */ #define R5L_POOL_SIZE 4 +/* + * r5c journal modes of the array: write-back or write-through. + * write-through mode has identical behavior as existing log only + * implementation. + */ +enum r5c_journal_mode { + R5C_JOURNAL_MODE_WRITE_THROUGH = 0, + R5C_JOURNAL_MODE_WRITE_BACK = 1, +}; + +/* + * raid5 cache state machine + * + * With rhe RAID cache, each stripe works in two phases: + * - caching phase + * - writing-out phase + * + * These two phases are controlled by bit STRIPE_R5C_CACHING: + * if STRIPE_R5C_CACHING == 0, the stripe is in writing-out phase + * if STRIPE_R5C_CACHING == 1, the stripe is in caching phase + * + * When there is no journal, or the journal is in write-through mode, + * the stripe is always in writing-out phase. + * + * For write-back journal, the stripe is sent to caching phase on write + * (r5c_try_caching_write). r5c_make_stripe_write_out() kicks off + * the write-out phase by clearing STRIPE_R5C_CACHING. + * + * Stripes in caching phase do not write the raid disks. Instead, all + * writes are committed from the log device. Therefore, a stripe in + * caching phase handles writes as: + * - write to log device + * - return IO + * + * Stripes in writing-out phase handle writes as: + * - calculate parity + * - write pending data and parity to journal + * - write data and parity to raid disks + * - return IO for pending writes + */ + struct r5l_log { struct md_rdev *rdev; @@ -96,6 +137,9 @@ struct r5l_log { spinlock_t no_space_stripes_lock; bool need_cache_flush; + + /* for r5c_cache */ + enum r5c_journal_mode r5c_journal_mode; }; /* @@ -133,6 +177,12 @@ enum r5l_io_unit_state { IO_UNIT_STRIPE_END = 3, /* stripes data finished writing to raid */ }; +bool r5c_is_writeback(struct r5l_log *log) +{ + return (log != NULL && + log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_BACK); +} + static sector_t r5l_ring_add(struct r5l_log *log, sector_t start, sector_t inc) { start += inc; @@ -168,12 +218,51 @@ static void __r5l_set_io_unit_state(struct r5l_io_unit *io, io->state = state; } +/* + * Put the stripe into writing-out phase by clearing STRIPE_R5C_CACHING. + * This function should only be called in write-back mode. + */ +static void r5c_make_stripe_write_out(struct stripe_head *sh) +{ + struct r5conf *conf = sh->raid_conf; + struct r5l_log *log = conf->log; + + BUG_ON(!r5c_is_writeback(log)); + + WARN_ON(!test_bit(STRIPE_R5C_CACHING, &sh->state)); + clear_bit(STRIPE_R5C_CACHING, &sh->state); +} + +/* + * Setting proper flags after writing (or flushing) data and/or parity to the + * log device. This is called from r5l_log_endio() or r5l_log_flush_endio(). + */ +static void r5c_finish_cache_stripe(struct stripe_head *sh) +{ + struct r5l_log *log = sh->raid_conf->log; + + if (log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) { + BUG_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); + /* + * Set R5_InJournal for parity dev[pd_idx]. This means + * all data AND parity in the journal. For RAID 6, it is + * NOT necessary to set the flag for dev[qd_idx], as the + * two parities are written out together. + */ + set_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags); + } else + BUG(); /* write-back logic in next patch */ +} + static void r5l_io_run_stripes(struct r5l_io_unit *io) { struct stripe_head *sh, *next; list_for_each_entry_safe(sh, next, &io->stripe_list, log_list) { list_del_init(&sh->log_list); + + r5c_finish_cache_stripe(sh); + set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); } @@ -412,18 +501,19 @@ static int r5l_log_stripe(struct r5l_log *log, struct stripe_head *sh, r5l_append_payload_page(log, sh->dev[i].page); } - if (sh->qd_idx >= 0) { + if (parity_pages == 2) { r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, sh->sector, sh->dev[sh->pd_idx].log_checksum, sh->dev[sh->qd_idx].log_checksum, true); r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); r5l_append_payload_page(log, sh->dev[sh->qd_idx].page); - } else { + } else if (parity_pages == 1) { r5l_append_payload_meta(log, R5LOG_PAYLOAD_PARITY, sh->sector, sh->dev[sh->pd_idx].log_checksum, 0, false); r5l_append_payload_page(log, sh->dev[sh->pd_idx].page); - } + } else /* Just writing data, not parity, in caching phase */ + BUG_ON(parity_pages != 0); list_add_tail(&sh->log_list, &io->stripe_list); atomic_inc(&io->pending_stripe); @@ -455,6 +545,8 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh) return -EAGAIN; } + WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); + for (i = 0; i < sh->disks; i++) { void *addr; @@ -1112,6 +1204,49 @@ static void r5l_write_super(struct r5l_log *log, sector_t cp) set_bit(MD_CHANGE_DEVS, &mddev->flags); } +/* + * Try handle write operation in caching phase. This function should only + * be called in write-back mode. + * + * If all outstanding writes can be handled in caching phase, returns 0 + * If writes requires write-out phase, call r5c_make_stripe_write_out() + * and returns -EAGAIN + */ +int r5c_try_caching_write(struct r5conf *conf, + struct stripe_head *sh, + struct stripe_head_state *s, + int disks) +{ + struct r5l_log *log = conf->log; + + BUG_ON(!r5c_is_writeback(log)); + + /* more write-back logic in next patches */ + r5c_make_stripe_write_out(sh); + return -EAGAIN; +} + +/* + * clean up the stripe (clear R5_InJournal for dev[pd_idx] etc.) after the + * stripe is committed to RAID disks. + */ +void r5c_finish_stripe_write_out(struct r5conf *conf, + struct stripe_head *sh, + struct stripe_head_state *s) +{ + if (!conf->log || + !test_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags)) + return; + + WARN_ON(test_bit(STRIPE_R5C_CACHING, &sh->state)); + clear_bit(R5_InJournal, &sh->dev[sh->pd_idx].flags); + + if (conf->log->r5c_journal_mode == R5C_JOURNAL_MODE_WRITE_THROUGH) + return; + BUG(); /* write-back logic in following patches */ +} + + static int r5l_load_log(struct r5l_log *log) { struct md_rdev *rdev = log->rdev; @@ -1249,6 +1384,8 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) INIT_LIST_HEAD(&log->no_space_stripes); spin_lock_init(&log->no_space_stripes_lock); + log->r5c_journal_mode = R5C_JOURNAL_MODE_WRITE_THROUGH; + if (r5l_load_log(log)) goto error; |