From fd7c092e711ebab55b2688d3859d95dfd0301f73 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:44 +0000 Subject: dm: fix truncated status strings Avoid returning a truncated table or status string instead of setting the DM_BUFFER_FULL_FLAG when the last target of a table fills the buffer. When processing a table or status request, the function retrieve_status calls ti->type->status. If ti->type->status returns non-zero, retrieve_status assumes that the buffer overflowed and sets DM_BUFFER_FULL_FLAG. However, targets don't return non-zero values from their status method on overflow. Most targets returns always zero. If a buffer overflow happens in a target that is not the last in the table, it gets noticed during the next iteration of the loop in retrieve_status; but if a buffer overflow happens in the last target, it goes unnoticed and erroneously truncated data is returned. In the current code, the targets behave in the following way: * dm-crypt returns -ENOMEM if there is not enough space to store the key, but it returns 0 on all other overflows. * dm-thin returns errors from the status method if a disk error happened. This is incorrect because retrieve_status doesn't check the error code, it assumes that all non-zero values mean buffer overflow. * all the other targets always return 0. This patch changes the ti->type->status function to return void (because most targets don't use the return code). Overflow is detected in retrieve_status: if the status method fills up the remaining space completely, it is assumed that buffer overflow happened. Cc: stable@vger.kernel.org Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- include/linux/device-mapper.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index bf6afa2fc432..a5cda3ea6b88 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -68,8 +68,8 @@ typedef void (*dm_postsuspend_fn) (struct dm_target *ti); typedef int (*dm_preresume_fn) (struct dm_target *ti); typedef void (*dm_resume_fn) (struct dm_target *ti); -typedef int (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, - unsigned status_flags, char *result, unsigned maxlen); +typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, + unsigned status_flags, char *result, unsigned maxlen); typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); -- cgit v1.2.3-70-g09d2 From 55a62eef8d1b50ceff3b7bf46851103bdcc7e5b0 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 1 Mar 2013 22:45:47 +0000 Subject: dm: rename request variables to bios Use 'bio' in the name of variables and functions that deal with bios rather than 'request' to avoid confusion with the normal block layer use of 'request'. No functional changes. Signed-off-by: Alasdair G Kergon --- drivers/md/dm-crypt.c | 6 +++--- drivers/md/dm-delay.c | 4 ++-- drivers/md/dm-flakey.c | 4 ++-- drivers/md/dm-linear.c | 6 +++--- drivers/md/dm-mpath.c | 4 ++-- drivers/md/dm-raid.c | 2 +- drivers/md/dm-raid1.c | 4 ++-- drivers/md/dm-snap.c | 10 +++++----- drivers/md/dm-stripe.c | 20 +++++++++---------- drivers/md/dm-table.c | 10 +++++----- drivers/md/dm-target.c | 2 +- drivers/md/dm-thin.c | 12 +++++------ drivers/md/dm-zero.c | 2 +- drivers/md/dm.c | 46 +++++++++++++++++++++---------------------- include/linux/device-mapper.h | 30 ++++++++++++++-------------- 15 files changed, 81 insertions(+), 81 deletions(-) (limited to 'include') diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index 2ae151e59c0c..13c15480d940 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1637,7 +1637,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (opt_params == 1 && opt_string && !strcasecmp(opt_string, "allow_discards")) - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; else if (opt_params) { ret = -EINVAL; ti->error = "Invalid feature arguments"; @@ -1665,7 +1665,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->discard_zeroes_data_unsupported = true; return 0; @@ -1726,7 +1726,7 @@ static void crypt_status(struct dm_target *ti, status_type_t type, DMEMIT(" %llu %s %llu", (unsigned long long)cc->iv_offset, cc->dev->name, (unsigned long long)cc->start); - if (ti->num_discard_requests) + if (ti->num_discard_bios) DMEMIT(" 1 allow_discards"); break; diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index c0d03b006e40..496d5f3646a5 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -198,8 +198,8 @@ out: mutex_init(&dc->timer_lock); atomic_set(&dc->may_delay, 1); - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->private = dc; return 0; diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 5d6c04cceee0..7fcf21cb4ff8 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -216,8 +216,8 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct per_bio_data); ti->private = fc; return 0; diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 5be301c1e809..4f99d267340c 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -53,9 +53,9 @@ static int linear_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; - ti->num_write_same_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; + ti->num_write_same_bios = 1; ti->private = lc; return 0; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index d267bb5705e9..51bb81676be3 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -905,8 +905,8 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, goto bad; } - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; return 0; diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5a578d89da2d..9a01d1e4c783 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1151,7 +1151,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv) INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; mutex_lock(&rs->md.reconfig_mutex); ret = md_run(&rs->md); diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 7f2419099b50..e2ea97723e10 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1072,8 +1072,8 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto err_free_context; - ti->num_flush_requests = 1; - ti->num_discard_requests = 1; + ti->num_flush_bios = 1; + ti->num_discard_bios = 1; ti->per_bio_data_size = sizeof(struct dm_raid1_bio_record); ti->discard_zeroes_data_unsupported = true; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 6e45e3774eab..5d78027e07ac 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1037,7 +1037,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) int i; int r = -EINVAL; char *origin_path, *cow_path; - unsigned args_used, num_flush_requests = 1; + unsigned args_used, num_flush_bios = 1; fmode_t origin_mode = FMODE_READ; if (argc != 4) { @@ -1047,7 +1047,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) } if (dm_target_is_snapshot_merge(ti)) { - num_flush_requests = 2; + num_flush_bios = 2; origin_mode = FMODE_WRITE; } @@ -1127,7 +1127,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) spin_lock_init(&s->tracked_chunk_lock); ti->private = s; - ti->num_flush_requests = num_flush_requests; + ti->num_flush_bios = num_flush_bios; ti->per_bio_data_size = sizeof(struct dm_snap_tracked_chunk); /* Add snapshot to the list of snapshots for this origin */ @@ -1691,7 +1691,7 @@ static int snapshot_merge_map(struct dm_target *ti, struct bio *bio) init_tracked_chunk(bio); if (bio->bi_rw & REQ_FLUSH) { - if (!dm_bio_get_target_request_nr(bio)) + if (!dm_bio_get_target_bio_nr(bio)) bio->bi_bdev = s->origin->bdev; else bio->bi_bdev = s->cow->bdev; @@ -2102,7 +2102,7 @@ static int origin_ctr(struct dm_target *ti, unsigned int argc, char **argv) } ti->private = dev; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; return 0; } diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c index aaecefa63935..d8837d313f54 100644 --- a/drivers/md/dm-stripe.c +++ b/drivers/md/dm-stripe.c @@ -160,9 +160,9 @@ static int stripe_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) return r; - ti->num_flush_requests = stripes; - ti->num_discard_requests = stripes; - ti->num_write_same_requests = stripes; + ti->num_flush_bios = stripes; + ti->num_discard_bios = stripes; + ti->num_write_same_bios = stripes; sc->chunk_size = chunk_size; if (chunk_size & (chunk_size - 1)) @@ -276,19 +276,19 @@ static int stripe_map(struct dm_target *ti, struct bio *bio) { struct stripe_c *sc = ti->private; uint32_t stripe; - unsigned target_request_nr; + unsigned target_bio_nr; if (bio->bi_rw & REQ_FLUSH) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - bio->bi_bdev = sc->stripe[target_request_nr].dev->bdev; + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + bio->bi_bdev = sc->stripe[target_bio_nr].dev->bdev; return DM_MAPIO_REMAPPED; } if (unlikely(bio->bi_rw & REQ_DISCARD) || unlikely(bio->bi_rw & REQ_WRITE_SAME)) { - target_request_nr = dm_bio_get_target_request_nr(bio); - BUG_ON(target_request_nr >= sc->stripes); - return stripe_map_range(sc, bio, target_request_nr); + target_bio_nr = dm_bio_get_target_bio_nr(bio); + BUG_ON(target_bio_nr >= sc->stripes); + return stripe_map_range(sc, bio, target_bio_nr); } stripe_map_sector(sc, bio->bi_sector, &stripe, &bio->bi_sector); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index c95405d04726..e50dad0c65f4 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -822,8 +822,8 @@ int dm_table_add_target(struct dm_table *t, const char *type, t->highs[t->num_targets++] = tgt->begin + tgt->len - 1; - if (!tgt->num_discard_requests && tgt->discards_supported) - DMWARN("%s: %s: ignoring discards_supported because num_discard_requests is zero.", + if (!tgt->num_discard_bios && tgt->discards_supported) + DMWARN("%s: %s: ignoring discards_supported because num_discard_bios is zero.", dm_device_name(t->md), type); return 0; @@ -1359,7 +1359,7 @@ static bool dm_table_supports_flush(struct dm_table *t, unsigned flush) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_flush_requests) + if (!ti->num_flush_bios) continue; if (ti->flush_supported) @@ -1438,7 +1438,7 @@ static bool dm_table_supports_write_same(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_write_same_requests) + if (!ti->num_write_same_bios) return false; if (!ti->type->iterate_devices || @@ -1656,7 +1656,7 @@ bool dm_table_supports_discards(struct dm_table *t) while (i < dm_table_get_num_targets(t)) { ti = dm_table_get_target(t, i++); - if (!ti->num_discard_requests) + if (!ti->num_discard_bios) continue; if (ti->discards_supported) diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index 617d21a77256..37ba5db71cd9 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -116,7 +116,7 @@ static int io_err_ctr(struct dm_target *tt, unsigned int argc, char **args) /* * Return error for discards instead of -EOPNOTSUPP */ - tt->num_discard_requests = 1; + tt->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 088f6b34f599..303e11da7c2a 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -1944,7 +1944,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) pt->data_dev = data_dev; pt->low_water_blocks = low_water_blocks; pt->adjusted_pf = pt->requested_pf = pf; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; /* * Only need to enable discards if the pool should pass @@ -1952,7 +1952,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv) * processing will cause mappings to be removed from the btree. */ if (pf.discard_enabled && pf.discard_passdown) { - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; /* * Setting 'discards_supported' circumvents the normal @@ -2593,17 +2593,17 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv) if (r) goto bad_thin_open; - ti->num_flush_requests = 1; + ti->num_flush_bios = 1; ti->flush_supported = true; ti->per_bio_data_size = sizeof(struct dm_thin_endio_hook); /* In case the pool supports discards, pass them on. */ if (tc->pool->pf.discard_enabled) { ti->discards_supported = true; - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; ti->discard_zeroes_data_unsupported = true; - /* Discard requests must be split on a block boundary */ - ti->split_discard_requests = true; + /* Discard bios must be split on a block boundary */ + ti->split_discard_bios = true; } dm_put(pool_md); diff --git a/drivers/md/dm-zero.c b/drivers/md/dm-zero.c index 69a5c3b3b340..c99003e0d47a 100644 --- a/drivers/md/dm-zero.c +++ b/drivers/md/dm-zero.c @@ -25,7 +25,7 @@ static int zero_ctr(struct dm_target *ti, unsigned int argc, char **argv) /* * Silently drop discards, avoiding -EOPNOTSUPP. */ - ti->num_discard_requests = 1; + ti->num_discard_bios = 1; return 0; } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 4521d72637c2..caef71befc43 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1103,7 +1103,7 @@ static void clone_bio(struct dm_target_io *tio, struct bio *bio, static struct dm_target_io *alloc_tio(struct clone_info *ci, struct dm_target *ti, int nr_iovecs, - unsigned target_request_nr) + unsigned target_bio_nr) { struct dm_target_io *tio; struct bio *clone; @@ -1114,15 +1114,15 @@ static struct dm_target_io *alloc_tio(struct clone_info *ci, tio->io = ci->io; tio->ti = ti; memset(&tio->info, 0, sizeof(tio->info)); - tio->target_request_nr = target_request_nr; + tio->target_bio_nr = target_bio_nr; return tio; } static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, - unsigned request_nr, sector_t len) + unsigned target_bio_nr, sector_t len) { - struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, request_nr); + struct dm_target_io *tio = alloc_tio(ci, ti, ci->bio->bi_max_vecs, target_bio_nr); struct bio *clone = &tio->clone; /* @@ -1137,13 +1137,13 @@ static void __issue_target_request(struct clone_info *ci, struct dm_target *ti, __map_bio(tio); } -static void __issue_target_requests(struct clone_info *ci, struct dm_target *ti, - unsigned num_requests, sector_t len) +static void __issue_target_bios(struct clone_info *ci, struct dm_target *ti, + unsigned num_bios, sector_t len) { - unsigned request_nr; + unsigned target_bio_nr; - for (request_nr = 0; request_nr < num_requests; request_nr++) - __issue_target_request(ci, ti, request_nr, len); + for (target_bio_nr = 0; target_bio_nr < num_bios; target_bio_nr++) + __issue_target_request(ci, ti, target_bio_nr, len); } static int __clone_and_map_empty_flush(struct clone_info *ci) @@ -1153,7 +1153,7 @@ static int __clone_and_map_empty_flush(struct clone_info *ci) BUG_ON(bio_has_data(ci->bio)); while ((ti = dm_table_get_target(ci->map, target_nr++))) - __issue_target_requests(ci, ti, ti->num_flush_requests, 0); + __issue_target_bios(ci, ti, ti->num_flush_bios, 0); return 0; } @@ -1173,32 +1173,32 @@ static void __clone_and_map_simple(struct clone_info *ci, struct dm_target *ti) ci->sector_count = 0; } -typedef unsigned (*get_num_requests_fn)(struct dm_target *ti); +typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); -static unsigned get_num_discard_requests(struct dm_target *ti) +static unsigned get_num_discard_bios(struct dm_target *ti) { - return ti->num_discard_requests; + return ti->num_discard_bios; } -static unsigned get_num_write_same_requests(struct dm_target *ti) +static unsigned get_num_write_same_bios(struct dm_target *ti) { - return ti->num_write_same_requests; + return ti->num_write_same_bios; } typedef bool (*is_split_required_fn)(struct dm_target *ti); static bool is_split_required_for_discard(struct dm_target *ti) { - return ti->split_discard_requests; + return ti->split_discard_bios; } static int __clone_and_map_changing_extent_only(struct clone_info *ci, - get_num_requests_fn get_num_requests, + get_num_bios_fn get_num_bios, is_split_required_fn is_split_required) { struct dm_target *ti; sector_t len; - unsigned num_requests; + unsigned num_bios; do { ti = dm_table_find_target(ci->map, ci->sector); @@ -1211,8 +1211,8 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, * reconfiguration might also have changed that since the * check was performed. */ - num_requests = get_num_requests ? get_num_requests(ti) : 0; - if (!num_requests) + num_bios = get_num_bios ? get_num_bios(ti) : 0; + if (!num_bios) return -EOPNOTSUPP; if (is_split_required && !is_split_required(ti)) @@ -1220,7 +1220,7 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, else len = min(ci->sector_count, max_io_len(ci->sector, ti)); - __issue_target_requests(ci, ti, num_requests, len); + __issue_target_bios(ci, ti, num_bios, len); ci->sector += len; } while (ci->sector_count -= len); @@ -1230,13 +1230,13 @@ static int __clone_and_map_changing_extent_only(struct clone_info *ci, static int __clone_and_map_discard(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_discard_requests, + return __clone_and_map_changing_extent_only(ci, get_num_discard_bios, is_split_required_for_discard); } static int __clone_and_map_write_same(struct clone_info *ci) { - return __clone_and_map_changing_extent_only(ci, get_num_write_same_requests, NULL); + return __clone_and_map_changing_extent_only(ci, get_num_write_same_bios, NULL); } static int __clone_and_map(struct clone_info *ci) diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a5cda3ea6b88..d5f984b07466 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -187,26 +187,26 @@ struct dm_target { uint32_t max_io_len; /* - * A number of zero-length barrier requests that will be submitted + * A number of zero-length barrier bios that will be submitted * to the target for the purpose of flushing cache. * - * The request number can be accessed with dm_bio_get_target_request_nr. - * It is a responsibility of the target driver to remap these requests + * The bio number can be accessed with dm_bio_get_target_bio_nr. + * It is a responsibility of the target driver to remap these bios * to the real underlying devices. */ - unsigned num_flush_requests; + unsigned num_flush_bios; /* - * The number of discard requests that will be submitted to the target. - * The request number can be accessed with dm_bio_get_target_request_nr. + * The number of discard bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_discard_requests; + unsigned num_discard_bios; /* - * The number of WRITE SAME requests that will be submitted to the target. - * The request number can be accessed with dm_bio_get_target_request_nr. + * The number of WRITE SAME bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. */ - unsigned num_write_same_requests; + unsigned num_write_same_bios; /* * The minimum number of extra bytes allocated in each bio for the @@ -233,10 +233,10 @@ struct dm_target { bool discards_supported:1; /* - * Set if the target required discard request to be split + * Set if the target required discard bios to be split * on max_io_len boundary. */ - bool split_discard_requests:1; + bool split_discard_bios:1; /* * Set if this target does not return zeroes on discarded blocks. @@ -261,7 +261,7 @@ struct dm_target_io { struct dm_io *io; struct dm_target *ti; union map_info info; - unsigned target_request_nr; + unsigned target_bio_nr; struct bio clone; }; @@ -275,9 +275,9 @@ static inline struct bio *dm_bio_from_per_bio_data(void *data, size_t data_size) return (struct bio *)((char *)data + data_size + offsetof(struct dm_target_io, clone)); } -static inline unsigned dm_bio_get_target_request_nr(const struct bio *bio) +static inline unsigned dm_bio_get_target_bio_nr(const struct bio *bio) { - return container_of(bio, struct dm_target_io, clone)->target_request_nr; + return container_of(bio, struct dm_target_io, clone)->target_bio_nr; } int dm_register_target(struct target_type *t); -- cgit v1.2.3-70-g09d2 From 02cde50b7ea74557d32ff778c73809322445ccd2 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm ioctl: optimize functions without variable params Device-mapper ioctls receive and send data in a buffer supplied by userspace. The buffer has two parts. The first part contains a 'struct dm_ioctl' and has a fixed size. The second part depends on the ioctl and has a variable size. This patch recognises the specific ioctls that do not use the variable part of the buffer and skips allocating memory for it. In particular, when a device is suspended and a resume ioctl is sent, this now avoid memory allocation completely. The variable "struct dm_ioctl tmp" is moved from the function copy_params to its caller ctl_ioctl and renamed to param_kernel. It is used directly when the ioctl function doesn't need any arguments. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 52 ++++++++++++++++++++++++++++--------------- include/uapi/linux/dm-ioctl.h | 6 ++--- 2 files changed, 37 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 9ae11b2994f8..7eb0682d574f 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1560,7 +1560,8 @@ static int check_version(unsigned int cmd, struct dm_ioctl __user *user) return r; } -#define DM_PARAMS_VMALLOC 0x0001 /* Params alloced with vmalloc not kmalloc */ +#define DM_PARAMS_KMALLOC 0x0001 /* Params alloced with kmalloc */ +#define DM_PARAMS_VMALLOC 0x0002 /* Params alloced with vmalloc */ #define DM_WIPE_BUFFER 0x0010 /* Wipe input buffer before returning from ioctl */ static void free_params(struct dm_ioctl *param, size_t param_size, int param_flags) @@ -1568,66 +1569,80 @@ static void free_params(struct dm_ioctl *param, size_t param_size, int param_fla if (param_flags & DM_WIPE_BUFFER) memset(param, 0, param_size); + if (param_flags & DM_PARAMS_KMALLOC) + kfree(param); if (param_flags & DM_PARAMS_VMALLOC) vfree(param); - else - kfree(param); } -static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl **param, int *param_flags) +static int copy_params(struct dm_ioctl __user *user, struct dm_ioctl *param_kernel, + int ioctl_flags, + struct dm_ioctl **param, int *param_flags) { - struct dm_ioctl tmp, *dmi; + struct dm_ioctl *dmi; int secure_data; + const size_t minimum_data_size = sizeof(*param_kernel) - sizeof(param_kernel->data); - if (copy_from_user(&tmp, user, sizeof(tmp) - sizeof(tmp.data))) + if (copy_from_user(param_kernel, user, minimum_data_size)) return -EFAULT; - if (tmp.data_size < (sizeof(tmp) - sizeof(tmp.data))) + if (param_kernel->data_size < minimum_data_size) return -EINVAL; - secure_data = tmp.flags & DM_SECURE_DATA_FLAG; + secure_data = param_kernel->flags & DM_SECURE_DATA_FLAG; *param_flags = secure_data ? DM_WIPE_BUFFER : 0; + if (ioctl_flags & IOCTL_FLAGS_NO_PARAMS) { + dmi = param_kernel; + dmi->data_size = minimum_data_size; + goto data_copied; + } + /* * Try to avoid low memory issues when a device is suspended. * Use kmalloc() rather than vmalloc() when we can. */ dmi = NULL; - if (tmp.data_size <= KMALLOC_MAX_SIZE) - dmi = kmalloc(tmp.data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (param_kernel->data_size <= KMALLOC_MAX_SIZE) { + dmi = kmalloc(param_kernel->data_size, GFP_NOIO | __GFP_NORETRY | __GFP_NOMEMALLOC | __GFP_NOWARN); + if (dmi) + *param_flags |= DM_PARAMS_KMALLOC; + } if (!dmi) { - dmi = __vmalloc(tmp.data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); - *param_flags |= DM_PARAMS_VMALLOC; + dmi = __vmalloc(param_kernel->data_size, GFP_NOIO | __GFP_REPEAT | __GFP_HIGH, PAGE_KERNEL); + if (dmi) + *param_flags |= DM_PARAMS_VMALLOC; } if (!dmi) { - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) return -EFAULT; return -ENOMEM; } - if (copy_from_user(dmi, user, tmp.data_size)) + if (copy_from_user(dmi, user, param_kernel->data_size)) goto bad; +data_copied: /* * Abort if something changed the ioctl data while it was being copied. */ - if (dmi->data_size != tmp.data_size) { + if (dmi->data_size != param_kernel->data_size) { DMERR("rejecting ioctl: data size modified while processing parameters"); goto bad; } /* Wipe the user buffer so we do not return it to userspace */ - if (secure_data && clear_user(user, tmp.data_size)) + if (secure_data && clear_user(user, param_kernel->data_size)) goto bad; *param = dmi; return 0; bad: - free_params(dmi, tmp.data_size, *param_flags); + free_params(dmi, param_kernel->data_size, *param_flags); return -EFAULT; } @@ -1671,6 +1686,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) struct dm_ioctl *uninitialized_var(param); ioctl_fn fn = NULL; size_t input_param_size; + struct dm_ioctl param_kernel; /* only root can play with this */ if (!capable(CAP_SYS_ADMIN)) @@ -1704,7 +1720,7 @@ static int ctl_ioctl(uint command, struct dm_ioctl __user *user) /* * Copy the parameters into kernel space. */ - r = copy_params(user, ¶m, ¶m_flags); + r = copy_params(user, ¶m_kernel, ioctl_flags, ¶m, ¶m_flags); if (r) return r; diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index 539b179b349c..b8a6bddf0727 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -267,9 +267,9 @@ enum { #define DM_DEV_SET_GEOMETRY _IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl) #define DM_VERSION_MAJOR 4 -#define DM_VERSION_MINOR 23 -#define DM_VERSION_PATCHLEVEL 1 -#define DM_VERSION_EXTRA "-ioctl (2012-12-18)" +#define DM_VERSION_MINOR 24 +#define DM_VERSION_PATCHLEVEL 0 +#define DM_VERSION_EXTRA "-ioctl (2013-01-15)" /* Status bits */ #define DM_READONLY_FLAG (1 << 0) /* In/Out */ -- cgit v1.2.3-70-g09d2 From a26062416ef8add48f16fbadded2b5f6fb84d024 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm ioctl: allow message to return data This patch introduces enhanced message support that allows the device-mapper core to recognise messages that are common to all devices, and for messages to return data to userspace. Core messages are processed by the function "message_for_md". If the device mapper doesn't support the message, it is passed to the target driver. If the message returns data, the kernel sets the flag DM_MESSAGE_OUT_FLAG. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-ioctl.c | 36 +++++++++++++++++++++++++++++++++++- include/uapi/linux/dm-ioctl.h | 5 +++++ 2 files changed, 40 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 7eb0682d574f..aa04f0224642 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -1414,6 +1414,22 @@ static int table_status(struct dm_ioctl *param, size_t param_size) return 0; } +static bool buffer_test_overflow(char *result, unsigned maxlen) +{ + return !maxlen || strlen(result) + 1 >= maxlen; +} + +/* + * Process device-mapper dependent messages. + * Returns a number <= 1 if message was processed by device mapper. + * Returns 2 if message should be delivered to the target. + */ +static int message_for_md(struct mapped_device *md, unsigned argc, char **argv, + char *result, unsigned maxlen) +{ + return 2; +} + /* * Pass a message to the target that's at the supplied device offset. */ @@ -1425,6 +1441,8 @@ static int target_message(struct dm_ioctl *param, size_t param_size) struct dm_table *table; struct dm_target *ti; struct dm_target_msg *tmsg = (void *) param + param->data_start; + size_t maxlen; + char *result = get_result_buffer(param, param_size, &maxlen); md = find_device(param); if (!md) @@ -1448,6 +1466,10 @@ static int target_message(struct dm_ioctl *param, size_t param_size) goto out_argv; } + r = message_for_md(md, argc, argv, result, maxlen); + if (r <= 1) + goto out_argv; + table = dm_get_live_table(md); if (!table) goto out_argv; @@ -1473,7 +1495,18 @@ static int target_message(struct dm_ioctl *param, size_t param_size) out_argv: kfree(argv); out: - param->data_size = 0; + if (r >= 0) + __dev_status(md, param); + + if (r == 1) { + param->flags |= DM_DATA_OUT_FLAG; + if (buffer_test_overflow(result, maxlen)) + param->flags |= DM_BUFFER_FULL_FLAG; + else + param->data_size = param->data_start + strlen(result) + 1; + r = 0; + } + dm_put(md); return r; } @@ -1653,6 +1686,7 @@ static int validate_params(uint cmd, struct dm_ioctl *param) param->flags &= ~DM_BUFFER_FULL_FLAG; param->flags &= ~DM_UEVENT_GENERATED_FLAG; param->flags &= ~DM_SECURE_DATA_FLAG; + param->flags &= ~DM_DATA_OUT_FLAG; /* Ignores parameters */ if (cmd == DM_REMOVE_ALL_CMD || diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h index b8a6bddf0727..7e75b6fd8d45 100644 --- a/include/uapi/linux/dm-ioctl.h +++ b/include/uapi/linux/dm-ioctl.h @@ -336,4 +336,9 @@ enum { */ #define DM_SECURE_DATA_FLAG (1 << 15) /* In */ +/* + * If set, a message generated output data. + */ +#define DM_DATA_OUT_FLAG (1 << 16) /* Out */ + #endif /* _LINUX_DM_IOCTL_H */ -- cgit v1.2.3-70-g09d2 From df5d2e9089c7d5b8c46f767e4278610ea3e815b9 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm kcopyd: introduce configurable throttling This patch allows the administrator to reduce the rate at which kcopyd issues I/O. Each module that uses kcopyd acquires a throttle parameter that can be set in /sys/module/*/parameters. We maintain a history of kcopyd usage by each module in the variables io_period and total_period in struct dm_kcopyd_throttle. The actual kcopyd activity is calculated as a percentage of time equal to "(100 * io_period / total_period)". This is compared with the user-defined throttle percentage threshold and if it is exceeded, we sleep. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm-kcopyd.c | 121 +++++++++++++++++++++++++++++++++++++++++++++- drivers/md/dm-raid1.c | 5 +- drivers/md/dm-snap.c | 5 +- drivers/md/dm-thin.c | 5 +- include/linux/dm-kcopyd.h | 25 +++++++++- 5 files changed, 156 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index 68c02673263b..d581fe5d2faf 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,8 @@ struct dm_kcopyd_client { struct workqueue_struct *kcopyd_wq; struct work_struct kcopyd_work; + struct dm_kcopyd_throttle *throttle; + /* * We maintain three lists of jobs: * @@ -68,6 +71,117 @@ struct dm_kcopyd_client { static struct page_list zero_page_list; +static DEFINE_SPINLOCK(throttle_spinlock); + +/* + * IO/IDLE accounting slowly decays after (1 << ACCOUNT_INTERVAL_SHIFT) period. + * When total_period >= (1 << ACCOUNT_INTERVAL_SHIFT) the counters are divided + * by 2. + */ +#define ACCOUNT_INTERVAL_SHIFT SHIFT_HZ + +/* + * Sleep this number of milliseconds. + * + * The value was decided experimentally. + * Smaller values seem to cause an increased copy rate above the limit. + * The reason for this is unknown but possibly due to jiffies rounding errors + * or read/write cache inside the disk. + */ +#define SLEEP_MSEC 100 + +/* + * Maximum number of sleep events. There is a theoretical livelock if more + * kcopyd clients do work simultaneously which this limit avoids. + */ +#define MAX_SLEEPS 10 + +static void io_job_start(struct dm_kcopyd_throttle *t) +{ + unsigned throttle, now, difference; + int slept = 0, skew; + + if (unlikely(!t)) + return; + +try_again: + spin_lock_irq(&throttle_spinlock); + + throttle = ACCESS_ONCE(t->throttle); + + if (likely(throttle >= 100)) + goto skip_limit; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + if (t->num_io_jobs) + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + + if (unlikely(t->total_period >= (1 << ACCOUNT_INTERVAL_SHIFT))) { + int shift = fls(t->total_period >> ACCOUNT_INTERVAL_SHIFT); + t->total_period >>= shift; + t->io_period >>= shift; + } + + skew = t->io_period - throttle * t->total_period / 100; + + if (unlikely(skew > 0) && slept < MAX_SLEEPS) { + slept++; + spin_unlock_irq(&throttle_spinlock); + msleep(SLEEP_MSEC); + goto try_again; + } + +skip_limit: + t->num_io_jobs++; + + spin_unlock_irq(&throttle_spinlock); +} + +static void io_job_finish(struct dm_kcopyd_throttle *t) +{ + unsigned long flags; + + if (unlikely(!t)) + return; + + spin_lock_irqsave(&throttle_spinlock, flags); + + t->num_io_jobs--; + + if (likely(ACCESS_ONCE(t->throttle) >= 100)) + goto skip_limit; + + if (!t->num_io_jobs) { + unsigned now, difference; + + now = jiffies; + difference = now - t->last_jiffies; + t->last_jiffies = now; + + t->io_period += difference; + t->total_period += difference; + + /* + * Maintain sane values if we got a temporary overflow. + */ + if (unlikely(t->io_period > t->total_period)) + t->io_period = t->total_period; + } + +skip_limit: + spin_unlock_irqrestore(&throttle_spinlock, flags); +} + + static void wake(struct dm_kcopyd_client *kc) { queue_work(kc->kcopyd_wq, &kc->kcopyd_work); @@ -348,6 +462,8 @@ static void complete_io(unsigned long error, void *context) struct kcopyd_job *job = (struct kcopyd_job *) context; struct dm_kcopyd_client *kc = job->kc; + io_job_finish(kc->throttle); + if (error) { if (job->rw & WRITE) job->write_err |= error; @@ -389,6 +505,8 @@ static int run_io_job(struct kcopyd_job *job) .client = job->kc->io_client, }; + io_job_start(job->kc->throttle); + if (job->rw == READ) r = dm_io(&io_req, 1, &job->source, NULL); else @@ -695,7 +813,7 @@ int kcopyd_cancel(struct kcopyd_job *job, int block) /*----------------------------------------------------------------- * Client setup *---------------------------------------------------------------*/ -struct dm_kcopyd_client *dm_kcopyd_client_create(void) +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle) { int r = -ENOMEM; struct dm_kcopyd_client *kc; @@ -708,6 +826,7 @@ struct dm_kcopyd_client *dm_kcopyd_client_create(void) INIT_LIST_HEAD(&kc->complete_jobs); INIT_LIST_HEAD(&kc->io_jobs); INIT_LIST_HEAD(&kc->pages_jobs); + kc->throttle = throttle; kc->job_pool = mempool_create_slab_pool(MIN_JOBS, _job_cache); if (!kc->job_pool) diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index e2ea97723e10..d053098c6a91 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -82,6 +82,9 @@ struct mirror_set { struct mirror mirror[0]; }; +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(raid1_resync_throttle, + "A percentage of time allocated for raid resynchronization"); + static void wakeup_mirrord(void *context) { struct mirror_set *ms = context; @@ -1111,7 +1114,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto err_destroy_wq; } - ms->kcopyd_client = dm_kcopyd_client_create(); + ms->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(ms->kcopyd_client)) { r = PTR_ERR(ms->kcopyd_client); goto err_destroy_wq; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 58c2b5881377..c0e07026a8d1 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -124,6 +124,9 @@ struct dm_snapshot { #define RUNNING_MERGE 0 #define SHUTDOWN_MERGE 1 +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + struct dm_dev *dm_snap_origin(struct dm_snapshot *s) { return s->origin; @@ -1108,7 +1111,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad_hash_tables; } - s->kcopyd_client = dm_kcopyd_client_create(); + s->kcopyd_client = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(s->kcopyd_client)) { r = PTR_ERR(s->kcopyd_client); ti->error = "Could not create kcopyd client"; diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 303e11da7c2a..35d9d0396cc2 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -26,6 +26,9 @@ #define PRISON_CELLS 1024 #define COMMIT_PERIOD HZ +DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(snapshot_copy_throttle, + "A percentage of time allocated for copy on write"); + /* * The block size of the device holding pool data must be * between 64KB and 1GB. @@ -1642,7 +1645,7 @@ static struct pool *pool_create(struct mapped_device *pool_md, goto bad_prison; } - pool->copier = dm_kcopyd_client_create(); + pool->copier = dm_kcopyd_client_create(&dm_kcopyd_throttle); if (IS_ERR(pool->copier)) { r = PTR_ERR(pool->copier); *error = "Error creating pool's kcopyd client"; diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index 47d9d376e4e7..f486d636b82e 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -21,11 +21,34 @@ #define DM_KCOPYD_IGNORE_ERROR 1 +struct dm_kcopyd_throttle { + unsigned throttle; + unsigned num_io_jobs; + unsigned io_period; + unsigned total_period; + unsigned last_jiffies; +}; + +/* + * kcopyd clients that want to support throttling must pass an initialised + * dm_kcopyd_throttle struct into dm_kcopyd_client_create(). + * Two or more clients may share the same instance of this struct between + * them if they wish to be throttled as a group. + * + * This macro also creates a corresponding module parameter to configure + * the amount of throttling. + */ +#define DECLARE_DM_KCOPYD_THROTTLE_WITH_MODULE_PARM(name, description) \ +static struct dm_kcopyd_throttle dm_kcopyd_throttle = { 100, 0, 0, 0, 0 }; \ +module_param_named(name, dm_kcopyd_throttle.throttle, uint, 0644); \ +MODULE_PARM_DESC(name, description) + /* * To use kcopyd you must first create a dm_kcopyd_client object. + * throttle can be NULL if you don't want any throttling. */ struct dm_kcopyd_client; -struct dm_kcopyd_client *dm_kcopyd_client_create(void); +struct dm_kcopyd_client *dm_kcopyd_client_create(struct dm_kcopyd_throttle *throttle); void dm_kcopyd_client_destroy(struct dm_kcopyd_client *kc); /* -- cgit v1.2.3-70-g09d2 From b0d8ed4d96a26ef3ac54a4aa8911c9413070662e Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Fri, 1 Mar 2013 22:45:49 +0000 Subject: dm: add target num_write_bios fn Add a num_write_bios function to struct target. If an instance of a target sets this, it will be queried before the target's mapping function is called on a write bio, and the response controls the number of copies of the write bio that the target will receive. This provides a convenient way for a target to send the same data to more than one device. The new cache target uses this in writethrough mode, to send the data both to the cache and the backing device. Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 22 +++++++++++++++------- include/linux/device-mapper.h | 15 +++++++++++++++ 2 files changed, 30 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e417cf0a69ef..7e469260fe5e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1152,15 +1152,23 @@ static void __clone_and_map_data_bio(struct clone_info *ci, struct dm_target *ti { struct bio *bio = ci->bio; struct dm_target_io *tio; + unsigned target_bio_nr; + unsigned num_target_bios = 1; - tio = alloc_tio(ci, ti, nr_iovecs, 0); - - if (split_bvec) - clone_split_bio(tio, bio, sector, idx, offset, len); - else - clone_bio(tio, bio, sector, idx, bv_count, len); + /* + * Does the target want to receive duplicate copies of the bio? + */ + if (bio_data_dir(bio) == WRITE && ti->num_write_bios) + num_target_bios = ti->num_write_bios(ti, bio); - __map_bio(tio); + for (target_bio_nr = 0; target_bio_nr < num_target_bios; target_bio_nr++) { + tio = alloc_tio(ci, ti, nr_iovecs, target_bio_nr); + if (split_bvec) + clone_split_bio(tio, bio, sector, idx, offset, len); + else + clone_bio(tio, bio, sector, idx, bv_count, len); + __map_bio(tio); + } } typedef unsigned (*get_num_bios_fn)(struct dm_target *ti); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index d5f984b07466..1e483fa7afb4 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -175,6 +175,14 @@ struct target_type { #define DM_TARGET_IMMUTABLE 0x00000004 #define dm_target_is_immutable(type) ((type)->features & DM_TARGET_IMMUTABLE) +/* + * Some targets need to be sent the same WRITE bio severals times so + * that they can send copies of it to different devices. This function + * examines any supplied bio and returns the number of copies of it the + * target requires. + */ +typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio); + struct dm_target { struct dm_table *table; struct target_type *type; @@ -214,6 +222,13 @@ struct dm_target { */ unsigned per_bio_data_size; + /* + * If defined, this function is called to find out how many + * duplicate bios should be sent to the target when writing + * data. + */ + dm_num_write_bios_fn num_write_bios; + /* target specific data */ void *private; -- cgit v1.2.3-70-g09d2