From 63a7138671c50a6f2c27bbd1a308dc75967062a3 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 8 Feb 2008 12:41:03 +0100 Subject: block: fixup rq_init() a bit Rearrange fields in cache order and initialize some fields that we didn't previously init. Remove init of ->completion_data, it's part of a union with ->hash. Luckily clearing the rb node is the same as setting it to null! Signed-off-by: Jens Axboe --- block/blk-core.c | 23 +++++++++++++++++------ include/linux/blkdev.h | 4 +++- 2 files changed, 20 insertions(+), 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 4afb39c82339..fba4ca7c6086 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -102,27 +102,38 @@ struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev) } EXPORT_SYMBOL(blk_get_backing_dev_info); +/* + * We can't just memset() the structure, since the allocation path + * already stored some information in the request. + */ void rq_init(struct request_queue *q, struct request *rq) { INIT_LIST_HEAD(&rq->queuelist); INIT_LIST_HEAD(&rq->donelist); - - rq->errors = 0; + rq->q = q; + rq->sector = rq->hard_sector = (sector_t) -1; + rq->nr_sectors = rq->hard_nr_sectors = 0; + rq->current_nr_sectors = rq->hard_cur_sectors = 0; rq->bio = rq->biotail = NULL; INIT_HLIST_NODE(&rq->hash); RB_CLEAR_NODE(&rq->rb_node); + rq->rq_disk = NULL; + rq->nr_phys_segments = 0; + rq->nr_hw_segments = 0; rq->ioprio = 0; + rq->special = NULL; rq->buffer = NULL; + rq->tag = -1; + rq->errors = 0; rq->ref_count = 1; - rq->q = q; - rq->special = NULL; + rq->cmd_len = 0; + memset(rq->cmd, 0, sizeof(rq->cmd)); rq->data_len = 0; + rq->sense_len = 0; rq->data = NULL; - rq->nr_phys_segments = 0; rq->sense = NULL; rq->end_io = NULL; rq->end_io_data = NULL; - rq->completion_data = NULL; rq->next_rq = NULL; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90392a9d7a9c..e1888cc5b8ae 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -137,7 +137,9 @@ enum rq_flag_bits { #define BLK_MAX_CDB 16 /* - * try to put the fields that are referenced together in the same cacheline + * try to put the fields that are referenced together in the same cacheline. + * if you modify this structure, be sure to check block/blk-core.c:rq_init() + * as well! */ struct request { struct list_head queuelist; -- cgit v1.2.3-70-g09d2 From ea5c48ab2a76559d4af39e1f7de137c0851ac0a5 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 8 Feb 2008 11:04:09 +0100 Subject: Enhanced partition statistics: core statistics This patch contain the core infrastructure of enhanced partition statistics. It adds to struct hd_struct the same stats data as struct gendisk and define basics function to manipulate them. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- include/linux/genhd.h | 151 ++++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 141 insertions(+), 10 deletions(-) diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1dbea0ac5693..589830aca99d 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -91,6 +91,15 @@ struct partition { __le32 nr_sects; /* nr of sectors in partition */ } __attribute__((packed)); +struct disk_stats { + unsigned long sectors[2]; /* READs and WRITEs */ + unsigned long ios[2]; + unsigned long merges[2]; + unsigned long ticks[2]; + unsigned long io_ticks; + unsigned long time_in_queue; +}; + struct hd_struct { sector_t start_sect; sector_t nr_sects; @@ -100,6 +109,13 @@ struct hd_struct { int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; +#endif + unsigned long stamp; + int in_flight; +#ifdef CONFIG_SMP + struct disk_stats *dkstats; +#else + struct disk_stats dkstats; #endif }; @@ -111,15 +127,7 @@ struct hd_struct { #define GENHD_FL_SUPPRESS_PARTITION_INFO 32 #define GENHD_FL_FAIL 64 -struct disk_stats { - unsigned long sectors[2]; /* READs and WRITEs */ - unsigned long ios[2]; - unsigned long merges[2]; - unsigned long ticks[2]; - unsigned long io_ticks; - unsigned long time_in_queue; -}; - + struct gendisk { int major; /* major number of driver */ int first_minor; @@ -158,6 +166,20 @@ struct gendisk { * The __ variants should only be called in critical sections. The full * variants disable/enable preemption. */ +static inline struct hd_struct *get_part(struct gendisk *gendiskp, + sector_t sector) +{ + struct hd_struct *part; + int i; + for (i = 0; i < gendiskp->minors - 1; i++) { + part = gendiskp->part[i]; + if (part && part->start_sect <= sector + && sector < part->start_sect + part->nr_sects) + return part; + } + return NULL; +} + #ifdef CONFIG_SMP #define __disk_stat_add(gendiskp, field, addnd) \ (per_cpu_ptr(gendiskp->dkstats, smp_processor_id())->field += addnd) @@ -177,15 +199,62 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { memset(per_cpu_ptr(gendiskp->dkstats, i), value, sizeof (struct disk_stats)); } + +#define __part_stat_add(part, field, addnd) \ + (per_cpu_ptr(part->dkstats, smp_processor_id())->field += addnd) + +#define __all_stat_add(gendiskp, field, addnd, sector) \ +({ \ + struct hd_struct *part = get_part(gendiskp, sector); \ + if (part) \ + __part_stat_add(part, field, addnd); \ + __disk_stat_add(gendiskp, field, addnd); \ +}) + +#define part_stat_read(part, field) \ +({ \ + typeof(part->dkstats->field) res = 0; \ + int i; \ + for_each_possible_cpu(i) \ + res += per_cpu_ptr(part->dkstats, i)->field; \ + res; \ +}) + +static inline void part_stat_set_all(struct hd_struct *part, int value) { + int i; + for_each_possible_cpu(i) + memset(per_cpu_ptr(part->dkstats, i), value, + sizeof(struct disk_stats)); +} #else #define __disk_stat_add(gendiskp, field, addnd) \ (gendiskp->dkstats.field += addnd) #define disk_stat_read(gendiskp, field) (gendiskp->dkstats.field) -static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { +static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) +{ memset(&gendiskp->dkstats, value, sizeof (struct disk_stats)); } + +#define __part_stat_add(part, field, addnd) \ + (part->dkstats.field += addnd) + +#define __all_stat_add(gendiskp, field, addnd, sector) \ +({ \ + struct hd_struct *part = get_part(gendiskp, sector); \ + if (part) \ + part->dkstats.field += addnd; \ + __disk_stat_add(gendiskp, field, addnd); \ +}) + +#define part_stat_read(part, field) (part->dkstats.field) + +static inline void part_stat_set_all(struct hd_struct *part, int value) +{ + memset(&part->dkstats, value, sizeof(struct disk_stats)); +} + #endif #define disk_stat_add(gendiskp, field, addnd) \ @@ -206,6 +275,45 @@ static inline void disk_stat_set_all(struct gendisk *gendiskp, int value) { #define disk_stat_sub(gendiskp, field, subnd) \ disk_stat_add(gendiskp, field, -subnd) +#define part_stat_add(gendiskp, field, addnd) \ + do { \ + preempt_disable(); \ + __part_stat_add(gendiskp, field, addnd);\ + preempt_enable(); \ + } while (0) + +#define __part_stat_dec(gendiskp, field) __part_stat_add(gendiskp, field, -1) +#define part_stat_dec(gendiskp, field) part_stat_add(gendiskp, field, -1) + +#define __part_stat_inc(gendiskp, field) __part_stat_add(gendiskp, field, 1) +#define part_stat_inc(gendiskp, field) part_stat_add(gendiskp, field, 1) + +#define __part_stat_sub(gendiskp, field, subnd) \ + __part_stat_add(gendiskp, field, -subnd) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) + +#define all_stat_add(gendiskp, field, addnd, sector) \ + do { \ + preempt_disable(); \ + __all_stat_add(gendiskp, field, addnd, sector); \ + preempt_enable(); \ + } while (0) + +#define __all_stat_dec(gendiskp, field, sector) \ + __all_stat_add(gendiskp, field, -1, sector) +#define all_stat_dec(gendiskp, field, sector) \ + all_stat_add(gendiskp, field, -1, sector) + +#define __all_stat_inc(gendiskp, field, sector) \ + __all_stat_add(gendiskp, field, 1, sector) +#define all_stat_inc(gendiskp, field, sector) \ + all_stat_add(gendiskp, field, 1, sector) + +#define __all_stat_sub(gendiskp, field, subnd, sector) \ + __all_stat_add(gendiskp, field, -subnd, sector) +#define all_stat_sub(gendiskp, field, subnd, sector) \ + all_stat_add(gendiskp, field, -subnd, sector) /* Inlines to alloc and free disk stats in struct gendisk */ #ifdef CONFIG_SMP @@ -221,6 +329,20 @@ static inline void free_disk_stats(struct gendisk *disk) { free_percpu(disk->dkstats); } + +static inline int init_part_stats(struct hd_struct *part) +{ + part->dkstats = alloc_percpu(struct disk_stats); + if (!part->dkstats) + return 0; + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ + free_percpu(part->dkstats); +} + #else /* CONFIG_SMP */ static inline int init_disk_stats(struct gendisk *disk) { @@ -230,6 +352,15 @@ static inline int init_disk_stats(struct gendisk *disk) static inline void free_disk_stats(struct gendisk *disk) { } + +static inline int init_part_stats(struct hd_struct *part) +{ + return 1; +} + +static inline void free_part_stats(struct hd_struct *part) +{ +} #endif /* CONFIG_SMP */ /* drivers/block/ll_rw_blk.c */ -- cgit v1.2.3-70-g09d2 From 6f2576af5ba5913538fda7dfb7c6a17771025477 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 8 Feb 2008 11:04:35 +0100 Subject: Enhanced partition statistics: update partition statitics Updates the enhanced partition statistics in generic block layer besides the disk statistics. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- block/blk-core.c | 34 ++++++++++++++++++++++++++++++---- block/blk-merge.c | 6 ++++++ fs/partitions/check.c | 7 +++++++ include/linux/genhd.h | 1 + 4 files changed, 44 insertions(+), 4 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index fba4ca7c6086..2358fc5de5a4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -60,10 +60,15 @@ static void drive_stat_acct(struct request *rq, int new_io) return; if (!new_io) { - __disk_stat_inc(rq->rq_disk, merges[rw]); + __all_stat_inc(rq->rq_disk, merges[rw], rq->sector); } else { + struct hd_struct *part = get_part(rq->rq_disk, rq->sector); disk_round_stats(rq->rq_disk); rq->rq_disk->in_flight++; + if (part) { + part_round_stats(part); + part->in_flight++; + } } } @@ -997,6 +1002,21 @@ void disk_round_stats(struct gendisk *disk) } EXPORT_SYMBOL_GPL(disk_round_stats); +void part_round_stats(struct hd_struct *part) +{ + unsigned long now = jiffies; + + if (now == part->stamp) + return; + + if (part->in_flight) { + __part_stat_add(part, time_in_queue, + part->in_flight * (now - part->stamp)); + __part_stat_add(part, io_ticks, (now - part->stamp)); + } + part->stamp = now; +} + /* * queue lock must be held */ @@ -1530,7 +1550,8 @@ static int __end_that_request_first(struct request *req, int error, if (blk_fs_request(req) && req->rq_disk) { const int rw = rq_data_dir(req); - disk_stat_add(req->rq_disk, sectors[rw], nr_bytes >> 9); + all_stat_add(req->rq_disk, sectors[rw], + nr_bytes >> 9, req->sector); } total_bytes = bio_nbytes = 0; @@ -1715,11 +1736,16 @@ static void end_that_request_last(struct request *req, int error) if (disk && blk_fs_request(req) && req != &req->q->bar_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); + struct hd_struct *part = get_part(disk, req->sector); - __disk_stat_inc(disk, ios[rw]); - __disk_stat_add(disk, ticks[rw], duration); + __all_stat_inc(disk, ios[rw], req->sector); + __all_stat_add(disk, ticks[rw], duration, req->sector); disk_round_stats(disk); disk->in_flight--; + if (part) { + part_round_stats(part); + part->in_flight--; + } } if (req->end_io) diff --git a/block/blk-merge.c b/block/blk-merge.c index 845ef8131108..d3b84bbb776a 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -454,8 +454,14 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); if (req->rq_disk) { + struct hd_struct *part + = get_part(req->rq_disk, req->sector); disk_round_stats(req->rq_disk); req->rq_disk->in_flight--; + if (part) { + part_round_stats(part); + part->in_flight--; + } } req->ioprio = ioprio_best(req->ioprio, next->ioprio); diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 9a64045ff845..f2ec7f1b0ec5 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "check.h" @@ -273,6 +274,7 @@ static struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); + free_part_stats(p); kfree(p); } @@ -314,6 +316,7 @@ void delete_partition(struct gendisk *disk, int part) p->nr_sects = 0; p->ios[0] = p->ios[1] = 0; p->sectors[0] = p->sectors[1] = 0; + part_stat_set_all(p, 0); kobject_put(p->holder_dir); device_del(&p->dev); put_device(&p->dev); @@ -336,6 +339,10 @@ void add_partition(struct gendisk *disk, int part, sector_t start, sector_t len, if (!p) return; + if (!init_part_stats(p)) { + kfree(p); + return; + } p->start_sect = start; p->nr_sects = len; p->partno = part; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 589830aca99d..4cf25a5f4159 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -365,6 +365,7 @@ static inline void free_part_stats(struct hd_struct *part) /* drivers/block/ll_rw_blk.c */ extern void disk_round_stats(struct gendisk *disk); +extern void part_round_stats(struct hd_struct *part); /* drivers/block/genhd.c */ extern int get_blkdev_list(char *, int); -- cgit v1.2.3-70-g09d2 From a890d62b9e8743341f62548104d1ac29fa8a5a88 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 8 Feb 2008 11:04:53 +0100 Subject: Enhanced partition statistics: aoe fix Updates the enhanced partition statistics in ATA over Ethernet driver (not tested). Signed-off-by: Jerome Marchand --- drivers/block/aoe/aoecmd.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/block/aoe/aoecmd.c b/drivers/block/aoe/aoecmd.c index 4d59d5057734..9e5a37fb36cf 100644 --- a/drivers/block/aoe/aoecmd.c +++ b/drivers/block/aoe/aoecmd.c @@ -648,10 +648,10 @@ aoecmd_ata_rsp(struct sk_buff *skb) struct gendisk *disk = d->gd; const int rw = bio_data_dir(buf->bio); - disk_stat_inc(disk, ios[rw]); - disk_stat_add(disk, ticks[rw], duration); - disk_stat_add(disk, sectors[rw], n_sect); - disk_stat_add(disk, io_ticks, duration); + all_stat_inc(disk, ios[rw], buf->sector); + all_stat_add(disk, ticks[rw], duration, buf->sector); + all_stat_add(disk, sectors[rw], n_sect, buf->sector); + all_stat_add(disk, io_ticks, duration, buf->sector); n = (buf->flags & BUFFL_FAIL) ? -EIO : 0; bio_endio(buf->bio, n); mempool_free(buf, d->bufpool); -- cgit v1.2.3-70-g09d2 From 34e8beac92c27d292938065f8375842d2840767c Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 8 Feb 2008 11:04:55 +0100 Subject: Enhanced partition statistics: sysfs Reports enhanced partition statistics in sysfs. Signed-off-by: Jerome Marchand --- fs/partitions/check.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/fs/partitions/check.c b/fs/partitions/check.c index f2ec7f1b0ec5..950bdb4b8f53 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -216,9 +216,25 @@ static ssize_t part_stat_show(struct device *dev, { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%8u %8llu %8u %8llu\n", - p->ios[0], (unsigned long long)p->sectors[0], - p->ios[1], (unsigned long long)p->sectors[1]); + preempt_disable(); + part_round_stats(p); + preempt_enable(); + return sprintf(buf, + "%8lu %8lu %8llu %8u " + "%8lu %8lu %8llu %8u " + "%8u %8u %8u" + "\n", + part_stat_read(p, ios[READ]), + part_stat_read(p, merges[READ]), + (unsigned long long)part_stat_read(p, sectors[READ]), + jiffies_to_msecs(part_stat_read(p, ticks[READ])), + part_stat_read(p, ios[WRITE]), + part_stat_read(p, merges[WRITE]), + (unsigned long long)part_stat_read(p, sectors[WRITE]), + jiffies_to_msecs(part_stat_read(p, ticks[WRITE])), + p->in_flight, + jiffies_to_msecs(part_stat_read(p, io_ticks)), + jiffies_to_msecs(part_stat_read(p, time_in_queue))); } #ifdef CONFIG_FAIL_MAKE_REQUEST -- cgit v1.2.3-70-g09d2 From 28f39d553ee242000e62f6c589ee3dc6de3f9aaa Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 8 Feb 2008 11:04:56 +0100 Subject: Enhanced partition statistics: procfs Reports enhanced partition statistics in /proc/diskstats. Signed-off-by: Jerome Marchand --- block/genhd.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/block/genhd.c b/block/genhd.c index de2ebb2fab43..53f2238e69c8 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -584,12 +584,28 @@ static int diskstats_show(struct seq_file *s, void *v) for (n = 0; n < gp->minors - 1; n++) { struct hd_struct *hd = gp->part[n]; - if (hd && hd->nr_sects) - seq_printf(s, "%4d %4d %s %u %u %u %u\n", - gp->major, n + gp->first_minor + 1, - disk_name(gp, n + 1, buf), - hd->ios[0], hd->sectors[0], - hd->ios[1], hd->sectors[1]); + if (!hd || !hd->nr_sects) + continue; + + preempt_disable(); + part_round_stats(hd); + preempt_enable(); + seq_printf(s, "%4d %4d %s %lu %lu %llu " + "%u %lu %lu %llu %u %u %u %u\n", + gp->major, n + gp->first_minor + 1, + disk_name(gp, n + 1, buf), + part_stat_read(hd, ios[0]), + part_stat_read(hd, merges[0]), + (unsigned long long)part_stat_read(hd, sectors[0]), + jiffies_to_msecs(part_stat_read(hd, ticks[0])), + part_stat_read(hd, ios[1]), + part_stat_read(hd, merges[1]), + (unsigned long long)part_stat_read(hd, sectors[1]), + jiffies_to_msecs(part_stat_read(hd, ticks[1])), + hd->in_flight, + jiffies_to_msecs(part_stat_read(hd, io_ticks)), + jiffies_to_msecs(part_stat_read(hd, time_in_queue)) + ); } return 0; -- cgit v1.2.3-70-g09d2 From c3c930d93365c495fbc1df28649da7cd4b97f4af Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 8 Feb 2008 12:06:21 +0100 Subject: Enhanced partition statistics: remove old partition statistics Removes the now unused old partition statistic code. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- block/blk-core.c | 4 ---- fs/partitions/check.c | 2 -- include/linux/genhd.h | 1 - 3 files changed, 7 deletions(-) diff --git a/block/blk-core.c b/block/blk-core.c index 2358fc5de5a4..e9754dc98ec4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1219,10 +1219,6 @@ static inline void blk_partition_remap(struct bio *bio) if (bio_sectors(bio) && bdev != bdev->bd_contains) { struct hd_struct *p = bdev->bd_part; - const int rw = bio_data_dir(bio); - - p->sectors[rw] += bio_sectors(bio); - p->ios[rw]++; bio->bi_sector += p->start_sect; bio->bi_bdev = bdev->bd_contains; diff --git a/fs/partitions/check.c b/fs/partitions/check.c index 950bdb4b8f53..03f808c5b79d 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -330,8 +330,6 @@ void delete_partition(struct gendisk *disk, int part) disk->part[part-1] = NULL; p->start_sect = 0; p->nr_sects = 0; - p->ios[0] = p->ios[1] = 0; - p->sectors[0] = p->sectors[1] = 0; part_stat_set_all(p, 0); kobject_put(p->holder_dir); device_del(&p->dev); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 4cf25a5f4159..09a3b18918c7 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -105,7 +105,6 @@ struct hd_struct { sector_t nr_sects; struct device dev; struct kobject *holder_dir; - unsigned ios[2], sectors[2]; /* READs and WRITEs */ int policy, partno; #ifdef CONFIG_FAIL_MAKE_REQUEST int make_it_fail; -- cgit v1.2.3-70-g09d2 From 0e53c2be0495afa97c6b0d06397adcbff9c65347 Mon Sep 17 00:00:00 2001 From: Jerome Marchand Date: Fri, 8 Feb 2008 11:10:56 +0100 Subject: Enhanced partition statistics: documentation update Update the documentation to reflect the change in userspace interface. Signed-off-by: Jerome Marchand Signed-off-by: Jens Axboe --- Documentation/ABI/testing/procfs-diskstats | 22 ++++++++++++++++++++++ Documentation/ABI/testing/sysfs-block | 28 ++++++++++++++++++++++++++++ Documentation/iostats.txt | 15 ++++++++++++++- 3 files changed, 64 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/testing/procfs-diskstats create mode 100644 Documentation/ABI/testing/sysfs-block diff --git a/Documentation/ABI/testing/procfs-diskstats b/Documentation/ABI/testing/procfs-diskstats new file mode 100644 index 000000000000..99233902e09e --- /dev/null +++ b/Documentation/ABI/testing/procfs-diskstats @@ -0,0 +1,22 @@ +What: /proc/diskstats +Date: February 2008 +Contact: Jerome Marchand +Description: + The /proc/diskstats file displays the I/O statistics + of block devices. Each line contains the following 14 + fields: + 1 - major number + 2 - minor mumber + 3 - device name + 4 - reads completed succesfully + 5 - reads merged + 6 - sectors read + 7 - time spent reading (ms) + 8 - writes completed + 9 - writes merged + 10 - sectors written + 11 - time spent writing (ms) + 12 - I/Os currently in progress + 13 - time spent doing I/Os (ms) + 14 - weighted time spent doing I/Os (ms) + For more details refer to Documentation/iostats.txt diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block new file mode 100644 index 000000000000..4bd9ea539129 --- /dev/null +++ b/Documentation/ABI/testing/sysfs-block @@ -0,0 +1,28 @@ +What: /sys/block//stat +Date: February 2008 +Contact: Jerome Marchand +Description: + The /sys/block//stat files displays the I/O + statistics of disk . They contain 11 fields: + 1 - reads completed succesfully + 2 - reads merged + 3 - sectors read + 4 - time spent reading (ms) + 5 - writes completed + 6 - writes merged + 7 - sectors written + 8 - time spent writing (ms) + 9 - I/Os currently in progress + 10 - time spent doing I/Os (ms) + 11 - weighted time spent doing I/Os (ms) + For more details refer Documentation/iostats.txt + + +What: /sys/block///stat +Date: February 2008 +Contact: Jerome Marchand +Description: + The /sys/block///stat files display the + I/O statistics of partition . The format is the + same as the above-written /sys/block//stat + format. diff --git a/Documentation/iostats.txt b/Documentation/iostats.txt index b963c3b4afa5..5925c3cd030d 100644 --- a/Documentation/iostats.txt +++ b/Documentation/iostats.txt @@ -58,7 +58,7 @@ they should not wrap twice before you notice them. Each set of stats only applies to the indicated device; if you want system-wide stats you'll have to find all the devices and sum them all up. -Field 1 -- # of reads issued +Field 1 -- # of reads completed This is the total number of reads completed successfully. Field 2 -- # of reads merged, field 6 -- # of writes merged Reads and writes which are adjacent to each other may be merged for @@ -132,6 +132,19 @@ words, the number of reads for partitions is counted slightly before time of queuing for partitions, and at completion for whole disks. This is a subtle distinction that is probably uninteresting for most cases. +More significant is the error induced by counting the numbers of +reads/writes before merges for partitions and after for disks. Since a +typical workload usually contains a lot of successive and adjacent requests, +the number of reads/writes issued can be several times higher than the +number of reads/writes completed. + +In 2.6.25, the full statistic set is again available for partitions and +disk and partition statistics are consistent again. Since we still don't +keep record of the partition-relative address, an operation is attributed to +the partition which contains the first sector of the request after the +eventual merges. As requests can be merged across partition, this could lead +to some (probably insignificant) innacuracy. + Additional notes ---------------- -- cgit v1.2.3-70-g09d2