summaryrefslogtreecommitdiff
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-06-08 12:49:18 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2020-06-08 12:49:18 -0700
commit95288a9b3beee8dd69d73b7691e36f2f231b7903 (patch)
treeb7bb598a516e5d7fa7dba040ac34bb0608b6388e /include
parentca687877e05ad1bf5b4cefd9cdd091044626deac (diff)
parentdc1dad8e1a612650b1e786e992cb0c6e101e226a (diff)
Merge tag 'ceph-for-5.8-rc1' of git://github.com/ceph/ceph-client
Pull ceph updates from Ilya Dryomov: "The highlights are: - OSD/MDS latency and caps cache metrics infrastructure for the filesytem (Xiubo Li). Currently available through debugfs and will be periodically sent to the MDS in the future. - support for replica reads (balanced and localized reads) for rbd and the filesystem (myself). The default remains to always read from primary, users can opt-in with the new crush_location and read_from_replica options. Note that reading from replica is safe for general use only since Octopus. - support for RADOS allocation hint flags (myself). Currently used by rbd to propagate the compressible/incompressible hint given with the new compression_hint map option and ready for passing on more advanced hints, e.g. based on fadvise() from the filesystem. - support for efficient cross-quota-realm renames (Luis Henriques) - assorted cap handling improvements and cleanups, particularly untangling some of the locking (Jeff Layton)" * tag 'ceph-for-5.8-rc1' of git://github.com/ceph/ceph-client: (29 commits) rbd: compression_hint option libceph: support for alloc hint flags libceph: read_from_replica option libceph: support for balanced and localized reads libceph: crush_location infrastructure libceph: decode CRUSH device/bucket types and names libceph: add non-asserting rbtree insertion helper ceph: skip checking caps when session reconnecting and releasing reqs ceph: make sure mdsc->mutex is nested in s->s_mutex to fix dead lock ceph: don't return -ESTALE if there's still an open file libceph, rbd: replace zero-length array with flexible-array ceph: allow rename operation under different quota realms ceph: normalize 'delta' parameter usage in check_quota_exceeded ceph: ceph_kick_flushing_caps needs the s_mutex ceph: request expedited service on session's last cap flush ceph: convert mdsc->cap_dirty to a per-session list ceph: reset i_requested_max_size if file write is not wanted ceph: throw a warning if we destroy session with mutex still locked ceph: fix potential race in ceph_check_caps ceph: document what protects i_dirty_item and i_flushing_item ...
Diffstat (limited to 'include')
-rw-r--r--include/linux/ceph/libceph.h13
-rw-r--r--include/linux/ceph/mon_client.h2
-rw-r--r--include/linux/ceph/osd_client.h8
-rw-r--r--include/linux/ceph/osdmap.h19
-rw-r--r--include/linux/ceph/rados.h14
-rw-r--r--include/linux/crush/crush.h14
6 files changed, 64 insertions, 6 deletions
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 525b7c3f1c81..2247e71beb83 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -53,6 +53,8 @@ struct ceph_options {
unsigned long osd_keepalive_timeout; /* jiffies */
unsigned long osd_request_timeout; /* jiffies */
+ u32 osd_req_flags; /* CEPH_OSD_FLAG_*, applied to each OSD request */
+
/*
* any type that can't be simply compared or doesn't need
* to be compared should go beyond this point,
@@ -64,6 +66,7 @@ struct ceph_options {
int num_mon;
char *name;
struct ceph_crypto_key *key;
+ struct rb_root crush_locs;
};
/*
@@ -188,7 +191,7 @@ static inline int calc_pages_for(u64 off, u64 len)
#define RB_CMP3WAY(a, b) ((a) < (b) ? -1 : (a) > (b))
#define DEFINE_RB_INSDEL_FUNCS2(name, type, keyfld, cmpexp, keyexp, nodefld) \
-static void insert_##name(struct rb_root *root, type *t) \
+static bool __insert_##name(struct rb_root *root, type *t) \
{ \
struct rb_node **n = &root->rb_node; \
struct rb_node *parent = NULL; \
@@ -206,11 +209,17 @@ static void insert_##name(struct rb_root *root, type *t) \
else if (cmp > 0) \
n = &(*n)->rb_right; \
else \
- BUG(); \
+ return false; \
} \
\
rb_link_node(&t->nodefld, parent, n); \
rb_insert_color(&t->nodefld, root); \
+ return true; \
+} \
+static void __maybe_unused insert_##name(struct rb_root *root, type *t) \
+{ \
+ if (!__insert_##name(root, t)) \
+ BUG(); \
} \
static void erase_##name(struct rb_root *root, type *t) \
{ \
diff --git a/include/linux/ceph/mon_client.h b/include/linux/ceph/mon_client.h
index dbb8a6959a73..ce4ffeb384d7 100644
--- a/include/linux/ceph/mon_client.h
+++ b/include/linux/ceph/mon_client.h
@@ -19,7 +19,7 @@ struct ceph_monmap {
struct ceph_fsid fsid;
u32 epoch;
u32 num_mon;
- struct ceph_entity_inst mon_inst[0];
+ struct ceph_entity_inst mon_inst[];
};
struct ceph_mon_client;
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 9d9f745b98a1..c60b59e9291b 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -8,6 +8,7 @@
#include <linux/mempool.h>
#include <linux/rbtree.h>
#include <linux/refcount.h>
+#include <linux/ktime.h>
#include <linux/ceph/types.h>
#include <linux/ceph/osdmap.h>
@@ -135,6 +136,7 @@ struct ceph_osd_req_op {
struct {
u64 expected_object_size;
u64 expected_write_size;
+ u32 flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
} alloc_hint;
struct {
u64 snapid;
@@ -164,6 +166,7 @@ struct ceph_osd_request_target {
bool recovery_deletes;
unsigned int flags; /* CEPH_OSD_FLAG_* */
+ bool used_replica;
bool paused;
u32 epoch;
@@ -213,6 +216,8 @@ struct ceph_osd_request {
/* internal */
unsigned long r_stamp; /* jiffies, send or check time */
unsigned long r_start_stamp; /* jiffies */
+ ktime_t r_start_latency; /* ktime_t */
+ ktime_t r_end_latency; /* ktime_t */
int r_attempts;
u32 r_map_dne_bound;
@@ -468,7 +473,8 @@ extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int
extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req,
unsigned int which,
u64 expected_object_size,
- u64 expected_write_size);
+ u64 expected_write_size,
+ u32 flags);
extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc,
struct ceph_snap_context *snapc,
diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h
index 5e601975745f..3f4498fef6ad 100644
--- a/include/linux/ceph/osdmap.h
+++ b/include/linux/ceph/osdmap.h
@@ -302,9 +302,26 @@ bool ceph_pg_to_primary_shard(struct ceph_osdmap *osdmap,
int ceph_pg_to_acting_primary(struct ceph_osdmap *osdmap,
const struct ceph_pg *raw_pgid);
+struct crush_loc {
+ char *cl_type_name;
+ char *cl_name;
+};
+
+struct crush_loc_node {
+ struct rb_node cl_node;
+ struct crush_loc cl_loc; /* pointers into cl_data */
+ char cl_data[];
+};
+
+int ceph_parse_crush_location(char *crush_location, struct rb_root *locs);
+int ceph_compare_crush_locs(struct rb_root *locs1, struct rb_root *locs2);
+void ceph_clear_crush_locs(struct rb_root *locs);
+
+int ceph_get_crush_locality(struct ceph_osdmap *osdmap, int id,
+ struct rb_root *locs);
+
extern struct ceph_pg_pool_info *ceph_pg_pool_by_id(struct ceph_osdmap *map,
u64 id);
-
extern const char *ceph_pg_pool_name_by_id(struct ceph_osdmap *map, u64 id);
extern int ceph_pg_poolid_by_name(struct ceph_osdmap *map, const char *name);
u64 ceph_pg_pool_flags(struct ceph_osdmap *map, u64 id);
diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index 88ed3c5c04c5..3a518fd0eaad 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -465,6 +465,19 @@ enum {
const char *ceph_osd_watch_op_name(int o);
enum {
+ CEPH_OSD_ALLOC_HINT_FLAG_SEQUENTIAL_WRITE = 1,
+ CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_WRITE = 2,
+ CEPH_OSD_ALLOC_HINT_FLAG_SEQUENTIAL_READ = 4,
+ CEPH_OSD_ALLOC_HINT_FLAG_RANDOM_READ = 8,
+ CEPH_OSD_ALLOC_HINT_FLAG_APPEND_ONLY = 16,
+ CEPH_OSD_ALLOC_HINT_FLAG_IMMUTABLE = 32,
+ CEPH_OSD_ALLOC_HINT_FLAG_SHORTLIVED = 64,
+ CEPH_OSD_ALLOC_HINT_FLAG_LONGLIVED = 128,
+ CEPH_OSD_ALLOC_HINT_FLAG_COMPRESSIBLE = 256,
+ CEPH_OSD_ALLOC_HINT_FLAG_INCOMPRESSIBLE = 512,
+};
+
+enum {
CEPH_OSD_BACKOFF_OP_BLOCK = 1,
CEPH_OSD_BACKOFF_OP_ACK_BLOCK = 2,
CEPH_OSD_BACKOFF_OP_UNBLOCK = 3,
@@ -517,6 +530,7 @@ struct ceph_osd_op {
struct {
__le64 expected_object_size;
__le64 expected_write_size;
+ __le32 flags; /* CEPH_OSD_OP_ALLOC_HINT_FLAG_* */
} __attribute__ ((packed)) alloc_hint;
struct {
__le64 snapid;
diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h
index 54741295c70b..33c16f2de7f6 100644
--- a/include/linux/crush/crush.h
+++ b/include/linux/crush/crush.h
@@ -87,7 +87,7 @@ struct crush_rule_mask {
struct crush_rule {
__u32 len;
struct crush_rule_mask mask;
- struct crush_rule_step steps[0];
+ struct crush_rule_step steps[];
};
#define crush_rule_size(len) (sizeof(struct crush_rule) + \
@@ -301,6 +301,12 @@ struct crush_map {
__u32 *choose_tries;
#else
+ /* device/bucket type id -> type name (CrushWrapper::type_map) */
+ struct rb_root type_names;
+
+ /* device/bucket id -> name (CrushWrapper::name_map) */
+ struct rb_root names;
+
/* CrushWrapper::choose_args */
struct rb_root choose_args;
#endif
@@ -342,4 +348,10 @@ struct crush_work {
struct crush_work_bucket **work; /* Per-bucket working store */
};
+#ifdef __KERNEL__
+/* osdmap.c */
+void clear_crush_names(struct rb_root *root);
+void clear_choose_args(struct crush_map *c);
+#endif
+
#endif