From ebd50e2947de9d2675b800a6a29748d0ed7d7fd4 Mon Sep 17 00:00:00 2001 From: Alan Stern Date: Wed, 16 Nov 2022 15:48:01 -0500 Subject: tools: memory-model: Add rmw-sequences to the LKMM Viktor (as relayed by Jonas) has pointed out a weakness in the Linux Kernel Memory Model. Namely, the memory ordering properties of atomic operations are not monotonic: An atomic op with full-barrier semantics does not always provide ordering as strong as one with release-barrier semantics. The following litmus test illustrates the problem: -------------------------------------------------- C atomics-not-monotonic {} P0(int *x, atomic_t *y) { WRITE_ONCE(*x, 1); smp_wmb(); atomic_set(y, 1); } P1(atomic_t *y) { int r1; r1 = atomic_inc_return(y); } P2(int *x, atomic_t *y) { int r2; int r3; r2 = atomic_read(y); smp_rmb(); r3 = READ_ONCE(*x); } exists (2:r2=2 /\ 2:r3=0) -------------------------------------------------- The litmus test is allowed as shown with atomic_inc_return(), which has full-barrier semantics. But if the operation is changed to atomic_inc_return_release(), which only has release-barrier semantics, the litmus test is forbidden. Clearly this violates monotonicity. The reason is because the LKMM treats full-barrier atomic ops as if they were written: mb(); load(); store(); mb(); (where the load() and store() are the two parts of an atomic RMW op), whereas it treats release-barrier atomic ops as if they were written: load(); release_barrier(); store(); The difference is that here the release barrier orders the load part of the atomic op before the store part with A-cumulativity, whereas the mb()'s above do not. This means that release-barrier atomics can effectively extend the cumul-fence relation but full-barrier atomics cannot. To resolve this problem we introduce the rmw-sequence relation, representing an arbitrarily long sequence of atomic RMW operations in which each operation reads from the previous one, and explicitly allow it to extend cumul-fence. This modification of the memory model is sound; it holds for PPC because of B-cumulativity, it holds for TSO and ARM64 because of other-multicopy atomicity, and we can assume that atomic ops on all other architectures will be implemented so as to make it hold for them. For similar reasons we also allow rmw-sequence to extend the w-post-bounded relation, which is analogous to cumul-fence in some ways. Reported-by: Viktor Vafeiadis Signed-off-by: Alan Stern Reviewed-by: Jonas Oberhauser Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/explanation.txt | 30 ++++++++++++++++++++++++ tools/memory-model/linux-kernel.cat | 5 ++-- 2 files changed, 33 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index 11a1d2d4f681..e901b47236c3 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -1007,6 +1007,36 @@ order. Equivalently, where the rmw relation links the read and write events making up each atomic update. This is what the LKMM's "atomic" axiom says. +Atomic rmw updates play one more role in the LKMM: They can form "rmw +sequences". An rmw sequence is simply a bunch of atomic updates where +each update reads from the previous one. Written using events, it +looks like this: + + Z0 ->rf Y1 ->rmw Z1 ->rf ... ->rf Yn ->rmw Zn, + +where Z0 is some store event and n can be any number (even 0, in the +degenerate case). We write this relation as: Z0 ->rmw-sequence Zn. +Note that this implies Z0 and Zn are stores to the same variable. + +Rmw sequences have a special property in the LKMM: They can extend the +cumul-fence relation. That is, if we have: + + U ->cumul-fence X -> rmw-sequence Y + +then also U ->cumul-fence Y. Thinking about this in terms of the +operational model, U ->cumul-fence X says that the store U propagates +to each CPU before the store X does. Then the fact that X and Y are +linked by an rmw sequence means that U also propagates to each CPU +before Y does. In an analogous way, rmw sequences can also extend +the w-post-bounded relation defined below in the PLAIN ACCESSES AND +DATA RACES section. + +(The notion of rmw sequences in the LKMM is similar to, but not quite +the same as, that of release sequences in the C11 memory model. They +were added to the LKMM to fix an obscure bug; without them, atomic +updates with full-barrier semantics did not always guarantee ordering +at least as strong as atomic updates with release-barrier semantics.) + THE PRESERVED PROGRAM ORDER RELATION: ppo ----------------------------------------- diff --git a/tools/memory-model/linux-kernel.cat b/tools/memory-model/linux-kernel.cat index d70315fddef6..07f884f9b2bf 100644 --- a/tools/memory-model/linux-kernel.cat +++ b/tools/memory-model/linux-kernel.cat @@ -74,8 +74,9 @@ let ppo = to-r | to-w | fence | (po-unlock-lock-po & int) (* Propagation: Ordering from release operations and strong fences. *) let A-cumul(r) = (rfe ; [Marked])? ; r +let rmw-sequence = (rf ; rmw)* let cumul-fence = [Marked] ; (A-cumul(strong-fence | po-rel) | wmb | - po-unlock-lock-po) ; [Marked] + po-unlock-lock-po) ; [Marked] ; rmw-sequence let prop = [Marked] ; (overwrite & ext)? ; cumul-fence* ; [Marked] ; rfe? ; [Marked] @@ -174,7 +175,7 @@ let vis = cumul-fence* ; rfe? ; [Marked] ; let w-pre-bounded = [Marked] ; (addr | fence)? let r-pre-bounded = [Marked] ; (addr | nonrw-fence | ([R4rmb] ; fencerel(Rmb) ; [~Noreturn]))? -let w-post-bounded = fence? ; [Marked] +let w-post-bounded = fence? ; [Marked] ; rmw-sequence let r-post-bounded = (nonrw-fence | ([~Noreturn] ; fencerel(Rmb) ; [R4rmb]))? ; [Marked] -- cgit v1.2.3-70-g09d2 From 9ba7d3b3b826ef47c1b7b8dbc2d57da868168128 Mon Sep 17 00:00:00 2001 From: Jonas Oberhauser Date: Fri, 2 Dec 2022 13:51:00 +0100 Subject: tools: memory-model: Make plain accesses carry dependencies As reported by Viktor, plain accesses in LKMM are weaker than accesses to registers: the latter carry dependencies but the former do not. This is exemplified in the following snippet: int r = READ_ONCE(*x); WRITE_ONCE(*y, r); Here a data dependency links the READ_ONCE() to the WRITE_ONCE(), preserving their order, because the model treats r as a register. If r is turned into a memory location accessed by plain accesses, however, the link is broken and the order between READ_ONCE() and WRITE_ONCE() is no longer preserved. This is too conservative, since any optimizations on plain accesses that might break dependencies are also possible on registers; it also contradicts the intuitive notion of "dependency" as the data stored by the WRITE_ONCE() does depend on the data read by the READ_ONCE(), independently of whether r is a register or a memory location. This is resolved by redefining all dependencies to include dependencies carried by memory accesses; a dependency is said to be carried by memory accesses (in the model: carry-dep) from one load to another load if the initial load is followed by an arbitrarily long sequence alternating between stores and loads of the same thread, where the data of each store depends on the previous load, and is read by the next load. Any dependency linking the final load in the sequence to another access also links the initial load in the sequence to that access. More deep details can be found in this LKML discussion: https://lore.kernel.org/lkml/d86295788ad14a02874ab030ddb8a6f8@huawei.com/ Reported-by: Viktor Vafeiadis Signed-off-by: Jonas Oberhauser Reviewed-by: Alan Stern Signed-off-by: Paul E. McKenney --- tools/memory-model/Documentation/explanation.txt | 9 ++++++- tools/memory-model/linux-kernel.bell | 6 +++++ tools/memory-model/litmus-tests/dep+plain.litmus | 31 ++++++++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 tools/memory-model/litmus-tests/dep+plain.litmus (limited to 'tools') diff --git a/tools/memory-model/Documentation/explanation.txt b/tools/memory-model/Documentation/explanation.txt index e901b47236c3..8e7085238470 100644 --- a/tools/memory-model/Documentation/explanation.txt +++ b/tools/memory-model/Documentation/explanation.txt @@ -2575,7 +2575,7 @@ smp_store_release() -- which is basically how the Linux kernel treats them. Although we said that plain accesses are not linked by the ppo -relation, they do contribute to it indirectly. Namely, when there is +relation, they do contribute to it indirectly. Firstly, when there is an address dependency from a marked load R to a plain store W, followed by smp_wmb() and then a marked store W', the LKMM creates a ppo link from R to W'. The reasoning behind this is perhaps a little @@ -2584,6 +2584,13 @@ for this source code in which W' could execute before R. Just as with pre-bounding by address dependencies, it is possible for the compiler to undermine this relation if sufficient care is not taken. +Secondly, plain accesses can carry dependencies: If a data dependency +links a marked load R to a store W, and the store is read by a load R' +from the same thread, then the data loaded by R' depends on the data +loaded originally by R. Thus, if R' is linked to any access X by a +dependency, R is also linked to access X by the same dependency, even +if W' or R' (or both!) are plain. + There are a few oddball fences which need special treatment: smp_mb__before_atomic(), smp_mb__after_atomic(), and smp_mb__after_spinlock(). The LKMM uses fence events with special diff --git a/tools/memory-model/linux-kernel.bell b/tools/memory-model/linux-kernel.bell index 5be86b1025e8..70a9073dec3e 100644 --- a/tools/memory-model/linux-kernel.bell +++ b/tools/memory-model/linux-kernel.bell @@ -82,3 +82,9 @@ flag ~empty different-values(srcu-rscs) as srcu-bad-nesting let Marked = (~M) | IW | Once | Release | Acquire | domain(rmw) | range(rmw) | LKR | LKW | UL | LF | RL | RU let Plain = M \ Marked + +(* Redefine dependencies to include those carried through plain accesses *) +let carry-dep = (data ; rfi)* +let addr = carry-dep ; addr +let ctrl = carry-dep ; ctrl +let data = carry-dep ; data diff --git a/tools/memory-model/litmus-tests/dep+plain.litmus b/tools/memory-model/litmus-tests/dep+plain.litmus new file mode 100644 index 000000000000..ebf84daa9a59 --- /dev/null +++ b/tools/memory-model/litmus-tests/dep+plain.litmus @@ -0,0 +1,31 @@ +C dep+plain + +(* + * Result: Never + * + * This litmus test demonstrates that in LKMM, plain accesses + * carry dependencies much like accesses to registers: + * The data stored to *z1 and *z2 by P0() originates from P0()'s + * READ_ONCE(), and therefore using that data to compute the + * conditional of P0()'s if-statement creates a control dependency + * from that READ_ONCE() to P0()'s WRITE_ONCE(). + *) + +{} + +P0(int *x, int *y, int *z1, int *z2) +{ + int a = READ_ONCE(*x); + *z1 = a; + *z2 = *z1; + if (*z2 == 1) + WRITE_ONCE(*y, 1); +} + +P1(int *x, int *y) +{ + int r = smp_load_acquire(y); + smp_store_release(x, r); +} + +exists (x=1 /\ y=1) -- cgit v1.2.3-70-g09d2