diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 13:39:11 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-12-17 13:39:11 -0800 |
commit | 9228ff90387e276ad67b10c0eb525c9d6a57d5e9 (patch) | |
tree | e7c87b68daba7cf7ca4c342c6b52165bd78fbe16 /drivers/block/drbd/drbd_bitmap.c | |
parent | 9360b53661a2c7754517b2925580055bacc8ec38 (diff) | |
parent | d2ec180c23a5a1bfe34d8638b0342a47c00cf70f (diff) |
Merge branch 'for-3.8/drivers' of git://git.kernel.dk/linux-block
Pull block driver update from Jens Axboe:
"Now that the core bits are in, here are the driver bits for 3.8. The
branch contains:
- A huge pile of drbd bits that were dumped from the 3.7 merge
window. Following that, it was both made perfectly clear that
there is going to be no more over-the-wall pulls and how the
situation on individual pulls can be improved.
- A few cleanups from Akinobu Mita for drbd and cciss.
- Queue improvement for loop from Lukas. This grew into adding a
generic interface for waiting/checking an even with a specific
lock, allowing this to be pulled out of md and now loop and drbd is
also using it.
- A few fixes for xen back/front block driver from Roger Pau Monne.
- Partition improvements from Stephen Warren, allowing partiion UUID
to be used as an identifier."
* 'for-3.8/drivers' of git://git.kernel.dk/linux-block: (609 commits)
drbd: update Kconfig to match current dependencies
drbd: Fix drbdsetup wait-connect, wait-sync etc... commands
drbd: close race between drbd_set_role and drbd_connect
drbd: respect no-md-barriers setting also when changed online via disk-options
drbd: Remove obsolete check
drbd: fixup after wait_even_lock_irq() addition to generic code
loop: Limit the number of requests in the bio list
wait: add wait_event_lock_irq() interface
xen-blkfront: free allocated page
xen-blkback: move free persistent grants code
block: partition: msdos: provide UUIDs for partitions
init: reduce PARTUUID min length to 1 from 36
block: store partition_meta_info.uuid as a string
cciss: use check_signature()
cciss: cleanup bitops usage
drbd: use copy_highpage
drbd: if the replication link breaks during handshake, keep retrying
drbd: check return of kmalloc in receive_uuids
drbd: Broadcast sync progress no more often than once per second
drbd: don't try to clear bits once the disk has failed
...
Diffstat (limited to 'drivers/block/drbd/drbd_bitmap.c')
-rw-r--r-- | drivers/block/drbd/drbd_bitmap.c | 249 |
1 files changed, 133 insertions, 116 deletions
diff --git a/drivers/block/drbd/drbd_bitmap.c b/drivers/block/drbd/drbd_bitmap.c index d84566496746..8dc29502dc08 100644 --- a/drivers/block/drbd/drbd_bitmap.c +++ b/drivers/block/drbd/drbd_bitmap.c @@ -119,13 +119,9 @@ static void __bm_print_lock_info(struct drbd_conf *mdev, const char *func) if (!__ratelimit(&drbd_ratelimit_state)) return; dev_err(DEV, "FIXME %s in %s, bitmap locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, - func, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev->tconn, current), + func, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); } void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) @@ -142,13 +138,9 @@ void drbd_bm_lock(struct drbd_conf *mdev, char *why, enum bm_flag flags) if (trylock_failed) { dev_warn(DEV, "%s going to '%s' but bitmap already locked for '%s' by %s\n", - current == mdev->receiver.task ? "receiver" : - current == mdev->asender.task ? "asender" : - current == mdev->worker.task ? "worker" : current->comm, - why, b->bm_why ?: "?", - b->bm_task == mdev->receiver.task ? "receiver" : - b->bm_task == mdev->asender.task ? "asender" : - b->bm_task == mdev->worker.task ? "worker" : "?"); + drbd_task_to_thread_name(mdev->tconn, current), + why, b->bm_why ?: "?", + drbd_task_to_thread_name(mdev->tconn, b->bm_task)); mutex_lock(&b->bm_change); } if (BM_LOCKED_MASK & b->bm_flags) @@ -196,6 +188,9 @@ void drbd_bm_unlock(struct drbd_conf *mdev) /* to mark for lazy writeout once syncer cleared all clearable bits, * we if bits have been cleared since last IO. */ #define BM_PAGE_LAZY_WRITEOUT 28 +/* pages marked with this "HINT" will be considered for writeout + * on activity log transactions */ +#define BM_PAGE_HINT_WRITEOUT 27 /* store_page_idx uses non-atomic assignment. It is only used directly after * allocating the page. All other bm_set_page_* and bm_clear_page_* need to @@ -227,8 +222,7 @@ static void bm_page_unlock_io(struct drbd_conf *mdev, int page_nr) { struct drbd_bitmap *b = mdev->bitmap; void *addr = &page_private(b->bm_pages[page_nr]); - clear_bit(BM_PAGE_IO_LOCK, addr); - smp_mb__after_clear_bit(); + clear_bit_unlock(BM_PAGE_IO_LOCK, addr); wake_up(&mdev->bitmap->bm_io_wait); } @@ -246,6 +240,27 @@ static void bm_set_page_need_writeout(struct page *page) set_bit(BM_PAGE_NEED_WRITEOUT, &page_private(page)); } +/** + * drbd_bm_mark_for_writeout() - mark a page with a "hint" to be considered for writeout + * @mdev: DRBD device. + * @page_nr: the bitmap page to mark with the "hint" flag + * + * From within an activity log transaction, we mark a few pages with these + * hints, then call drbd_bm_write_hinted(), which will only write out changed + * pages which are flagged with this mark. + */ +void drbd_bm_mark_for_writeout(struct drbd_conf *mdev, int page_nr) +{ + struct page *page; + if (page_nr >= mdev->bitmap->bm_number_of_pages) { + dev_warn(DEV, "BAD: page_nr: %u, number_of_pages: %u\n", + page_nr, (int)mdev->bitmap->bm_number_of_pages); + return; + } + page = mdev->bitmap->bm_pages[page_nr]; + set_bit(BM_PAGE_HINT_WRITEOUT, &page_private(page)); +} + static int bm_test_page_unchanged(struct page *page) { volatile const unsigned long *addr = &page_private(page); @@ -373,14 +388,16 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) return old_pages; /* Trying kmalloc first, falling back to vmalloc. - * GFP_KERNEL is ok, as this is done when a lower level disk is - * "attached" to the drbd. Context is receiver thread or cqueue - * thread. As we have no disk yet, we are not in the IO path, - * not even the IO path of the peer. */ + * GFP_NOIO, as this is called while drbd IO is "suspended", + * and during resize or attach on diskless Primary, + * we must not block on IO to ourselves. + * Context is receiver thread or dmsetup. */ bytes = sizeof(struct page *)*want; - new_pages = kzalloc(bytes, GFP_KERNEL); + new_pages = kzalloc(bytes, GFP_NOIO); if (!new_pages) { - new_pages = vzalloc(bytes); + new_pages = __vmalloc(bytes, + GFP_NOIO | __GFP_HIGHMEM | __GFP_ZERO, + PAGE_KERNEL); if (!new_pages) return NULL; vmalloced = 1; @@ -390,7 +407,7 @@ static struct page **bm_realloc_pages(struct drbd_bitmap *b, unsigned long want) for (i = 0; i < have; i++) new_pages[i] = old_pages[i]; for (; i < want; i++) { - page = alloc_page(GFP_HIGHUSER); + page = alloc_page(GFP_NOIO | __GFP_HIGHMEM); if (!page) { bm_free_pages(new_pages + have, i - have); bm_vk_free(new_pages, vmalloced); @@ -439,7 +456,8 @@ int drbd_bm_init(struct drbd_conf *mdev) sector_t drbd_bm_capacity(struct drbd_conf *mdev) { - ERR_IF(!mdev->bitmap) return 0; + if (!expect(mdev->bitmap)) + return 0; return mdev->bitmap->bm_dev_capacity; } @@ -447,7 +465,8 @@ sector_t drbd_bm_capacity(struct drbd_conf *mdev) */ void drbd_bm_cleanup(struct drbd_conf *mdev) { - ERR_IF (!mdev->bitmap) return; + if (!expect(mdev->bitmap)) + return; bm_free_pages(mdev->bitmap->bm_pages, mdev->bitmap->bm_number_of_pages); bm_vk_free(mdev->bitmap->bm_pages, (BM_P_VMALLOCED & mdev->bitmap->bm_flags)); kfree(mdev->bitmap); @@ -610,7 +629,8 @@ int drbd_bm_resize(struct drbd_conf *mdev, sector_t capacity, int set_new_bits) int err = 0, growing; int opages_vmalloced; - ERR_IF(!b) return -ENOMEM; + if (!expect(b)) + return -ENOMEM; drbd_bm_lock(mdev, "resize", BM_LOCKED_MASK); @@ -732,8 +752,10 @@ unsigned long _drbd_bm_total_weight(struct drbd_conf *mdev) unsigned long s; unsigned long flags; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); s = b->bm_set; @@ -756,8 +778,10 @@ unsigned long drbd_bm_total_weight(struct drbd_conf *mdev) size_t drbd_bm_words(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; return b->bm_words; } @@ -765,7 +789,8 @@ size_t drbd_bm_words(struct drbd_conf *mdev) unsigned long drbd_bm_bits(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return 0; + if (!expect(b)) + return 0; return b->bm_bits; } @@ -786,8 +811,10 @@ void drbd_bm_merge_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; if (number == 0) return; WARN_ON(offset >= b->bm_words); @@ -831,8 +858,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, end = offset + number; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); if ((offset >= b->bm_words) || @@ -860,8 +889,10 @@ void drbd_bm_get_lel(struct drbd_conf *mdev, size_t offset, size_t number, void drbd_bm_set_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0xff, b->bm_words); @@ -874,8 +905,10 @@ void drbd_bm_set_all(struct drbd_conf *mdev) void drbd_bm_clear_all(struct drbd_conf *mdev) { struct drbd_bitmap *b = mdev->bitmap; - ERR_IF(!b) return; - ERR_IF(!b->bm_pages) return; + if (!expect(b)) + return; + if (!expect(b->bm_pages)) + return; spin_lock_irq(&b->bm_lock); bm_memset(b, 0, 0, b->bm_words); @@ -889,7 +922,8 @@ struct bm_aio_ctx { unsigned int done; unsigned flags; #define BM_AIO_COPY_PAGES 1 -#define BM_WRITE_ALL_PAGES 2 +#define BM_AIO_WRITE_HINTED 2 +#define BM_WRITE_ALL_PAGES 4 int error; struct kref kref; }; @@ -977,17 +1011,11 @@ static void bm_page_io_async(struct bm_aio_ctx *ctx, int page_nr, int rw) __must bm_set_page_unchanged(b->bm_pages[page_nr]); if (ctx->flags & BM_AIO_COPY_PAGES) { - void *src, *dest; page = mempool_alloc(drbd_md_io_page_pool, __GFP_HIGHMEM|__GFP_WAIT); - dest = kmap_atomic(page); - src = kmap_atomic(b->bm_pages[page_nr]); - memcpy(dest, src, PAGE_SIZE); - kunmap_atomic(src); - kunmap_atomic(dest); + copy_highpage(page, b->bm_pages[page_nr]); bm_store_page_idx(page, page_nr); } else page = b->bm_pages[page_nr]; - bio->bi_bdev = mdev->ldev->md_bdev; bio->bi_sector = on_disk_sector; /* bio_add_page of a single page to an empty bio will always succeed, @@ -1060,6 +1088,11 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w if (lazy_writeout_upper_idx && i == lazy_writeout_upper_idx) break; if (rw & WRITE) { + if ((flags & BM_AIO_WRITE_HINTED) && + !test_and_clear_bit(BM_PAGE_HINT_WRITEOUT, + &page_private(b->bm_pages[i]))) + continue; + if (!(flags & BM_WRITE_ALL_PAGES) && bm_test_page_unchanged(b->bm_pages[i])) { dynamic_dev_dbg(DEV, "skipped bm write for idx %u\n", i); @@ -1088,13 +1121,15 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w * "in_flight reached zero, all done" event. */ if (!atomic_dec_and_test(&ctx->in_flight)) - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); else kref_put(&ctx->kref, &bm_aio_ctx_destroy); - dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", - rw == WRITE ? "WRITE" : "READ", - count, jiffies - now); + /* summary for global bitmap IO */ + if (flags == 0) + dev_info(DEV, "bitmap %s of %u pages took %lu jiffies\n", + rw == WRITE ? "WRITE" : "READ", + count, jiffies - now); if (ctx->error) { dev_alert(DEV, "we had at least one MD IO ERROR during bitmap IO\n"); @@ -1103,7 +1138,7 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } if (atomic_read(&ctx->in_flight)) - err = -EIO; /* Disk failed during IO... */ + err = -EIO; /* Disk timeout/force-detach during IO... */ now = jiffies; if (rw == WRITE) { @@ -1115,8 +1150,9 @@ static int bm_rw(struct drbd_conf *mdev, int rw, unsigned flags, unsigned lazy_w } now = b->bm_set; - dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", - ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); + if (flags == 0) + dev_info(DEV, "%s (%lu bits) marked out-of-sync by on disk bit-map.\n", + ppsize(ppb, now << (BM_BLOCK_SHIFT-10)), now); kref_put(&ctx->kref, &bm_aio_ctx_destroy); return err; @@ -1179,9 +1215,17 @@ int drbd_bm_write_copy_pages(struct drbd_conf *mdev) __must_hold(local) return bm_rw(mdev, WRITE, BM_AIO_COPY_PAGES, 0); } +/** + * drbd_bm_write_hinted() - Write bitmap pages with "hint" marks, if they have changed. + * @mdev: DRBD device. + */ +int drbd_bm_write_hinted(struct drbd_conf *mdev) __must_hold(local) +{ + return bm_rw(mdev, WRITE, BM_AIO_WRITE_HINTED | BM_AIO_COPY_PAGES, 0); +} /** - * drbd_bm_write_page: Writes a PAGE_SIZE aligned piece of bitmap + * drbd_bm_write_page() - Writes a PAGE_SIZE aligned piece of bitmap * @mdev: DRBD device. * @idx: bitmap page index * @@ -1222,11 +1266,11 @@ int drbd_bm_write_page(struct drbd_conf *mdev, unsigned int idx) __must_hold(loc } bm_page_io_async(ctx, idx, WRITE_SYNC); - wait_until_done_or_disk_failure(mdev, mdev->ldev, &ctx->done); + wait_until_done_or_force_detached(mdev, mdev->ldev, &ctx->done); if (ctx->error) drbd_chk_io_error(mdev, 1, DRBD_META_IO_ERROR); - /* that should force detach, so the in memory bitmap will be + /* that causes us to detach, so the in memory bitmap will be * gone in a moment as well. */ mdev->bm_writ_cnt++; @@ -1289,8 +1333,10 @@ static unsigned long bm_find_next(struct drbd_conf *mdev, struct drbd_bitmap *b = mdev->bitmap; unsigned long i = DRBD_END_OF_BITMAP; - ERR_IF(!b) return i; - ERR_IF(!b->bm_pages) return i; + if (!expect(b)) + return i; + if (!expect(b->bm_pages)) + return i; spin_lock_irq(&b->bm_lock); if (BM_DONT_TEST & b->bm_flags) @@ -1391,8 +1437,10 @@ static int bm_change_bits_to(struct drbd_conf *mdev, const unsigned long s, struct drbd_bitmap *b = mdev->bitmap; int c = 0; - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if ((val ? BM_DONT_SET : BM_DONT_CLEAR) & b->bm_flags) @@ -1423,13 +1471,21 @@ static inline void bm_set_full_words_within_one_page(struct drbd_bitmap *b, { int i; int bits; + int changed = 0; unsigned long *paddr = kmap_atomic(b->bm_pages[page_nr]); for (i = first_word; i < last_word; i++) { bits = hweight_long(paddr[i]); paddr[i] = ~0UL; - b->bm_set += BITS_PER_LONG - bits; + changed += BITS_PER_LONG - bits; } kunmap_atomic(paddr); + if (changed) { + /* We only need lazy writeout, the information is still in the + * remote bitmap as well, and is reconstructed during the next + * bitmap exchange, if lost locally due to a crash. */ + bm_set_page_lazy_writeout(b->bm_pages[page_nr]); + b->bm_set += changed; + } } /* Same thing as drbd_bm_set_bits, @@ -1524,8 +1580,10 @@ int drbd_bm_test_bit(struct drbd_conf *mdev, const unsigned long bitnr) unsigned long *p_addr; int i; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1559,8 +1617,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi * robust in case we screwed up elsewhere, in that case pretend there * was one dirty bit in the requested area, so we won't try to do a * local read there (no bitmap probably implies no disk) */ - ERR_IF(!b) return 1; - ERR_IF(!b->bm_pages) return 1; + if (!expect(b)) + return 1; + if (!expect(b->bm_pages)) + return 1; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1573,11 +1633,10 @@ int drbd_bm_count_bits(struct drbd_conf *mdev, const unsigned long s, const unsi bm_unmap(p_addr); p_addr = bm_map_pidx(b, idx); } - ERR_IF (bitnr >= b->bm_bits) { - dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); - } else { + if (expect(bitnr < b->bm_bits)) c += (0 != test_bit_le(bitnr - (page_nr << (PAGE_SHIFT+3)), p_addr)); - } + else + dev_err(DEV, "bitnr=%lu bm_bits=%lu\n", bitnr, b->bm_bits); } if (p_addr) bm_unmap(p_addr); @@ -1607,8 +1666,10 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) unsigned long flags; unsigned long *p_addr, *bm; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; + if (!expect(b)) + return 0; + if (!expect(b->bm_pages)) + return 0; spin_lock_irqsave(&b->bm_lock, flags); if (BM_DONT_TEST & b->bm_flags) @@ -1630,47 +1691,3 @@ int drbd_bm_e_weight(struct drbd_conf *mdev, unsigned long enr) spin_unlock_irqrestore(&b->bm_lock, flags); return count; } - -/* Set all bits covered by the AL-extent al_enr. - * Returns number of bits changed. */ -unsigned long drbd_bm_ALe_set_all(struct drbd_conf *mdev, unsigned long al_enr) -{ - struct drbd_bitmap *b = mdev->bitmap; - unsigned long *p_addr, *bm; - unsigned long weight; - unsigned long s, e; - int count, i, do_now; - ERR_IF(!b) return 0; - ERR_IF(!b->bm_pages) return 0; - - spin_lock_irq(&b->bm_lock); - if (BM_DONT_SET & b->bm_flags) - bm_print_lock_info(mdev); - weight = b->bm_set; - - s = al_enr * BM_WORDS_PER_AL_EXT; - e = min_t(size_t, s + BM_WORDS_PER_AL_EXT, b->bm_words); - /* assert that s and e are on the same page */ - D_ASSERT((e-1) >> (PAGE_SHIFT - LN2_BPL + 3) - == s >> (PAGE_SHIFT - LN2_BPL + 3)); - count = 0; - if (s < b->bm_words) { - i = do_now = e-s; - p_addr = bm_map_pidx(b, bm_word_to_page_idx(b, s)); - bm = p_addr + MLPP(s); - while (i--) { - count += hweight_long(*bm); - *bm = -1UL; - bm++; - } - bm_unmap(p_addr); - b->bm_set += do_now*BITS_PER_LONG - count; - if (e == b->bm_words) - b->bm_set -= bm_clear_surplus(b); - } else { - dev_err(DEV, "start offset (%lu) too large in drbd_bm_ALe_set_all\n", s); - } - weight = b->bm_set - weight; - spin_unlock_irq(&b->bm_lock); - return weight; -} |