summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorIlya Dryomov <idryomov@gmail.com>2017-06-21 17:27:18 +0200
committerIlya Dryomov <idryomov@gmail.com>2017-07-07 17:25:18 +0200
commit6f428df47dae2c8ea31fd4c0c74a12a8a5ac2d1d (patch)
tree00076a72eda7738433726f6e6bc4ce1644d370f6
parent278b1d709c6acc6f7d138fed775c76695b068e43 (diff)
libceph: pg_upmap[_items] infrastructure
pg_temp and pg_upmap encodings are the same (PG -> array of osds), except for the incremental remove: it's an empty mapping in new_pg_temp for pg_temp and a separate old_pg_upmap set for pg_upmap. (This isn't to allow for empty pg_upmap mappings -- apparently, pg_temp just wasn't looked at as an example for pg_upmap encoding.) Reuse __decode_pg_temp() for decoding pg_upmap and new_pg_upmap. __decode_pg_temp() stores into pg_temp union member, but since pg_upmap union member is identical, reading through pg_upmap later is OK. Signed-off-by: Ilya Dryomov <idryomov@gmail.com>
-rw-r--r--include/linux/ceph/osdmap.h10
-rw-r--r--net/ceph/debugfs.c23
-rw-r--r--net/ceph/osdmap.c135
3 files changed, 164 insertions, 4 deletions
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index fe6d189bdd30..c612cff81f5c 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -143,10 +143,14 @@ struct ceph_pg_mapping {
struct {
int len;
int osds[];
- } pg_temp;
+ } pg_temp, pg_upmap;
struct {
int osd;
} primary_temp;
+ struct {
+ int len;
+ int from_to[][2];
+ } pg_upmap_items;
};
};
@@ -165,6 +169,10 @@ struct ceph_osdmap {
struct rb_root pg_temp;
struct rb_root primary_temp;
+ /* remap (post-CRUSH, pre-up) */
+ struct rb_root pg_upmap; /* PG := raw set */
+ struct rb_root pg_upmap_items; /* from -> to within raw set */
+
u32 *osd_primary_affinity;
struct rb_root pg_pools;
diff --git a/net/ceph/debugfs.c b/net/ceph/debugfs.c
index 017f15c575f8..4f57d5bcaba2 100644
--- a/net/ceph/debugfs.c
+++ b/net/ceph/debugfs.c
@@ -104,6 +104,29 @@ static int osdmap_show(struct seq_file *s, void *p)
seq_printf(s, "primary_temp %llu.%x %d\n", pg->pgid.pool,
pg->pgid.seed, pg->primary_temp.osd);
}
+ for (n = rb_first(&map->pg_upmap); n; n = rb_next(n)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(n, struct ceph_pg_mapping, node);
+
+ seq_printf(s, "pg_upmap %llu.%x [", pg->pgid.pool,
+ pg->pgid.seed);
+ for (i = 0; i < pg->pg_upmap.len; i++)
+ seq_printf(s, "%s%d", (i == 0 ? "" : ","),
+ pg->pg_upmap.osds[i]);
+ seq_printf(s, "]\n");
+ }
+ for (n = rb_first(&map->pg_upmap_items); n; n = rb_next(n)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(n, struct ceph_pg_mapping, node);
+
+ seq_printf(s, "pg_upmap_items %llu.%x [", pg->pgid.pool,
+ pg->pgid.seed);
+ for (i = 0; i < pg->pg_upmap_items.len; i++)
+ seq_printf(s, "%s%d->%d", (i == 0 ? "" : ","),
+ pg->pg_upmap_items.from_to[i][0],
+ pg->pg_upmap_items.from_to[i][1]);
+ seq_printf(s, "]\n");
+ }
up_read(&osdc->lock);
return 0;
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index f6d561edd511..a3f60d0bfd13 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -735,6 +735,8 @@ struct ceph_osdmap *ceph_osdmap_alloc(void)
map->pool_max = -1;
map->pg_temp = RB_ROOT;
map->primary_temp = RB_ROOT;
+ map->pg_upmap = RB_ROOT;
+ map->pg_upmap_items = RB_ROOT;
mutex_init(&map->crush_workspace_mutex);
return map;
@@ -759,6 +761,20 @@ void ceph_osdmap_destroy(struct ceph_osdmap *map)
erase_pg_mapping(&map->primary_temp, pg);
free_pg_mapping(pg);
}
+ while (!RB_EMPTY_ROOT(&map->pg_upmap)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(rb_first(&map->pg_upmap),
+ struct ceph_pg_mapping, node);
+ rb_erase(&pg->node, &map->pg_upmap);
+ kfree(pg);
+ }
+ while (!RB_EMPTY_ROOT(&map->pg_upmap_items)) {
+ struct ceph_pg_mapping *pg =
+ rb_entry(rb_first(&map->pg_upmap_items),
+ struct ceph_pg_mapping, node);
+ rb_erase(&pg->node, &map->pg_upmap_items);
+ kfree(pg);
+ }
while (!RB_EMPTY_ROOT(&map->pg_pools)) {
struct ceph_pg_pool_info *pi =
rb_entry(rb_first(&map->pg_pools),
@@ -1161,6 +1177,75 @@ e_inval:
return -EINVAL;
}
+static struct ceph_pg_mapping *__decode_pg_upmap(void **p, void *end,
+ bool __unused)
+{
+ return __decode_pg_temp(p, end, false);
+}
+
+static int decode_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+ return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+ false);
+}
+
+static int decode_new_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+ return decode_pg_mapping(p, end, &map->pg_upmap, __decode_pg_upmap,
+ true);
+}
+
+static int decode_old_pg_upmap(void **p, void *end, struct ceph_osdmap *map)
+{
+ return decode_pg_mapping(p, end, &map->pg_upmap, NULL, true);
+}
+
+static struct ceph_pg_mapping *__decode_pg_upmap_items(void **p, void *end,
+ bool __unused)
+{
+ struct ceph_pg_mapping *pg;
+ u32 len, i;
+
+ ceph_decode_32_safe(p, end, len, e_inval);
+ if (len > (SIZE_MAX - sizeof(*pg)) / (2 * sizeof(u32)))
+ return ERR_PTR(-EINVAL);
+
+ ceph_decode_need(p, end, 2 * len * sizeof(u32), e_inval);
+ pg = kzalloc(sizeof(*pg) + 2 * len * sizeof(u32), GFP_NOIO);
+ if (!pg)
+ return ERR_PTR(-ENOMEM);
+
+ pg->pg_upmap_items.len = len;
+ for (i = 0; i < len; i++) {
+ pg->pg_upmap_items.from_to[i][0] = ceph_decode_32(p);
+ pg->pg_upmap_items.from_to[i][1] = ceph_decode_32(p);
+ }
+
+ return pg;
+
+e_inval:
+ return ERR_PTR(-EINVAL);
+}
+
+static int decode_pg_upmap_items(void **p, void *end, struct ceph_osdmap *map)
+{
+ return decode_pg_mapping(p, end, &map->pg_upmap_items,
+ __decode_pg_upmap_items, false);
+}
+
+static int decode_new_pg_upmap_items(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ return decode_pg_mapping(p, end, &map->pg_upmap_items,
+ __decode_pg_upmap_items, true);
+}
+
+static int decode_old_pg_upmap_items(void **p, void *end,
+ struct ceph_osdmap *map)
+{
+ return decode_pg_mapping(p, end, &map->pg_upmap_items, NULL, true);
+}
+
/*
* decode a full map.
*/
@@ -1250,9 +1335,7 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
if (err)
goto bad;
} else {
- /* XXX can this happen? */
- kfree(map->osd_primary_affinity);
- map->osd_primary_affinity = NULL;
+ WARN_ON(map->osd_primary_affinity);
}
/* crush */
@@ -1261,6 +1344,26 @@ static int osdmap_decode(void **p, void *end, struct ceph_osdmap *map)
if (err)
goto bad;
+ *p += len;
+ if (struct_v >= 3) {
+ /* erasure_code_profiles */
+ ceph_decode_skip_map_of_map(p, end, string, string, string,
+ bad);
+ }
+
+ if (struct_v >= 4) {
+ err = decode_pg_upmap(p, end, map);
+ if (err)
+ goto bad;
+
+ err = decode_pg_upmap_items(p, end, map);
+ if (err)
+ goto bad;
+ } else {
+ WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap));
+ WARN_ON(!RB_EMPTY_ROOT(&map->pg_upmap_items));
+ }
+
/* ignore the rest */
*p = end;
@@ -1520,6 +1623,32 @@ struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end,
goto bad;
}
+ if (struct_v >= 3) {
+ /* new_erasure_code_profiles */
+ ceph_decode_skip_map_of_map(p, end, string, string, string,
+ bad);
+ /* old_erasure_code_profiles */
+ ceph_decode_skip_set(p, end, string, bad);
+ }
+
+ if (struct_v >= 4) {
+ err = decode_new_pg_upmap(p, end, map);
+ if (err)
+ goto bad;
+
+ err = decode_old_pg_upmap(p, end, map);
+ if (err)
+ goto bad;
+
+ err = decode_new_pg_upmap_items(p, end, map);
+ if (err)
+ goto bad;
+
+ err = decode_old_pg_upmap_items(p, end, map);
+ if (err)
+ goto bad;
+ }
+
/* ignore the rest */
*p = end;