diff options
author | Alexander Aring <aahringo@redhat.com> | 2024-03-28 11:48:41 -0400 |
---|---|---|
committer | David Teigland <teigland@redhat.com> | 2024-04-01 13:31:12 -0500 |
commit | 986ae3c2a8dfc1e229cabe9cc2e0b01b721c8980 (patch) | |
tree | 6c036bb2a5ba03325b2c6c4ecca07d74c85bf958 /fs/dlm/dlm_internal.h | |
parent | 0175e51b5134b55c89364aae68ec16271c67e472 (diff) |
dlm: fix race between final callback and remove
This patch fixes the following issue:
node 1 is dir
node 2 is master
node 3 is other
1->2: unlock
2: put final lkb, rsb moved to toss
2->1: unlock_reply
1: queue lkb callback with EUNLOCK
2->1: remove
1: receive_remove ignored (rsb on keep because of queued lkb callback)
1: complete lkb callback, put_lkb, move rsb to toss
3->1: lookup
1->3: lookup_reply master=2
3->2: request
2->3: request_reply EBADR
In summary:
An unexpected lkb reference causes the rsb to remain on the wrong list.
The rsb being on the wrong list causes receive_remove to be ignored.
An ignored receive_remove causes inconsistent dir and master state.
This sequence requires an unusually long delay in delivering the unlock
callback, because the remove message from 2->1 usually happens after
some seconds. So, it's not known exactly how frequently this sequence
occurs in pratice. It's possible that the same end result could also
have another unknown cause.
The solution for this issue is to further separate callback state
from the lkb, so that an lkb reference (and from that, an rsb ref)
are not held while a callback remains queued. Then, within the
unlock_reply, the lkb will be freed and the rsb moved to the toss
list. So, the receive_remove will not be ignored.
Signed-off-by: Alexander Aring <aahringo@redhat.com>
Signed-off-by: David Teigland <teigland@redhat.com>
Diffstat (limited to 'fs/dlm/dlm_internal.h')
-rw-r--r-- | fs/dlm/dlm_internal.h | 60 |
1 files changed, 36 insertions, 24 deletions
diff --git a/fs/dlm/dlm_internal.h b/fs/dlm/dlm_internal.h index a9137c90f348..cda1526fd15b 100644 --- a/fs/dlm/dlm_internal.h +++ b/fs/dlm/dlm_internal.h @@ -16,6 +16,7 @@ * This is the main header file to be included in each DLM source file. */ +#include <uapi/linux/dlm_device.h> #include <linux/slab.h> #include <linux/sched.h> #include <linux/types.h> @@ -204,8 +205,7 @@ struct dlm_args { #define DLM_IFL_OVERLAP_CANCEL_BIT 20 #define DLM_IFL_ENDOFLIFE_BIT 21 #define DLM_IFL_DEADLOCK_CANCEL_BIT 24 -#define DLM_IFL_CB_PENDING_BIT 25 -#define __DLM_IFL_MAX_BIT DLM_IFL_CB_PENDING_BIT +#define __DLM_IFL_MAX_BIT DLM_IFL_DEADLOCK_CANCEL_BIT /* lkb_dflags */ @@ -217,12 +217,45 @@ struct dlm_args { #define DLM_CB_CAST 0x00000001 #define DLM_CB_BAST 0x00000002 +/* much of this is just saving user space pointers associated with the + * lock that we pass back to the user lib with an ast + */ + +struct dlm_user_args { + struct dlm_user_proc *proc; /* each process that opens the lockspace + * device has private data + * (dlm_user_proc) on the struct file, + * the process's locks point back to it + */ + struct dlm_lksb lksb; + struct dlm_lksb __user *user_lksb; + void __user *castparam; + void __user *castaddr; + void __user *bastparam; + void __user *bastaddr; + uint64_t xid; +}; + struct dlm_callback { uint32_t flags; /* DLM_CBF_ */ int sb_status; /* copy to lksb status */ uint8_t sb_flags; /* copy to lksb flags */ int8_t mode; /* rq mode of bast, gr mode of cast */ - int copy_lvb; + bool copy_lvb; + struct dlm_lksb *lkb_lksb; + unsigned char lvbptr[DLM_USER_LVB_LEN]; + + union { + void *astparam; /* caller's ast arg */ + struct dlm_user_args ua; + }; + struct work_struct work; + void (*bastfn)(void *astparam, int mode); + void (*astfn)(void *astparam); + char res_name[DLM_RESNAME_MAXLEN]; + size_t res_length; + uint32_t ls_id; + uint32_t lkb_id; struct list_head list; struct kref ref; @@ -256,10 +289,6 @@ struct dlm_lkb { struct list_head lkb_ownqueue; /* list of locks for a process */ ktime_t lkb_timestamp; - spinlock_t lkb_cb_lock; - struct work_struct lkb_cb_work; - struct list_head lkb_cb_list; /* for ls_cb_delay or proc->asts */ - struct list_head lkb_callbacks; struct dlm_callback *lkb_last_cast; struct dlm_callback *lkb_last_cb; int lkb_last_bast_mode; @@ -688,23 +717,6 @@ struct dlm_ls { #define LSFL_CB_DELAY 9 #define LSFL_NODIR 10 -/* much of this is just saving user space pointers associated with the - lock that we pass back to the user lib with an ast */ - -struct dlm_user_args { - struct dlm_user_proc *proc; /* each process that opens the lockspace - device has private data - (dlm_user_proc) on the struct file, - the process's locks point back to it*/ - struct dlm_lksb lksb; - struct dlm_lksb __user *user_lksb; - void __user *castparam; - void __user *castaddr; - void __user *bastparam; - void __user *bastaddr; - uint64_t xid; -}; - #define DLM_PROC_FLAGS_CLOSING 1 #define DLM_PROC_FLAGS_COMPAT 2 |