summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-02-02 12:47:28 +0100
committerPaolo Abeni <pabeni@redhat.com>2023-02-02 12:47:28 +0100
commita8248fc4ad9b815c9345deb73873cc72a543d148 (patch)
tree6cc0fda009facb8017f1c6842fb205bafb00fd2f
parent609aa68d60965f70485655def733d533f99b341b (diff)
parent550130a0ce303f7cd754c7067b0a971ca179db63 (diff)
Merge tag 'rxrpc-next-20230131' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs
David Howells says: ==================== Here's the fifth part of patches in the process of moving rxrpc from doing a lot of its stuff in softirq context to doing it in an I/O thread in process context and thereby making it easier to support a larger SACK table. The full description is in the description for the first part[1] which is now upstream. The second and third parts are also upstream[2]. A subset of the original fourth part[3] got applied as a fix for a race[4]. The fifth part includes some cleanups: (1) Miscellaneous trace header cleanups: fix a trace string, display the security index in rx_packet rather than displaying the type twice, remove some whitespace to make checkpatch happier and remove some excess tabulation. (2) Convert ->recvmsg_lock to a spinlock as it's only ever locked exclusively. (3) Make ->ackr_window and ->ackr_nr_unacked non-atomic as they're only used in the I/O thread. (4) Don't use call->tx_lock to access ->tx_buffer as that is only accessed inside the I/O thread. sendmsg() loads onto ->tx_sendmsg and the I/O thread decants from that to the buffer. (5) Remove local->defrag_sem as DATA packets are transmitted serially by the I/O thread. (6) Remove the service connection bundle is it was only used for its channel_lock - which has now gone. And some more significant changes: (7) Add a debugging option to allow a delay to be injected into packet reception to help investigate the behaviour over longer links than just a few cm. (8) Generate occasional PING ACKs to probe for RTT information during a receive heavy call. (9) Simplify the SACK table maintenance and ACK generation. Now that both parts are done in the same thread, there's no possibility of a race and no need to try and be cunning to avoid taking a BH spinlock whilst copying the SACK table (which in the future will be up to 2K) and no need to rotate the copy to fit the ACK packet table. (10) Use SKB_CONSUMED when freeing received DATA packets (stop dropwatch complaining). * tag 'rxrpc-next-20230131' of git://git.kernel.org/pub/scm/linux/kernel/git/dhowells/linux-fs: rxrpc: Kill service bundle rxrpc: Change rx_packet tracepoint to display securityIndex not type twice rxrpc: Show consumed and freed packets as non-dropped in dropwatch rxrpc: Remove local->defrag_sem rxrpc: Don't lock call->tx_lock to access call->tx_buffer rxrpc: Simplify ACK handling rxrpc: De-atomic call->ackr_window and call->ackr_nr_unacked rxrpc: Generate extra pings for RTT during heavy-receive call rxrpc: Allow a delay to be injected into packet reception rxrpc: Convert call->recvmsg_lock to a spinlock rxrpc: Shrink the tabulation in the rxrpc trace header a bit rxrpc: Remove whitespace before ')' in trace header rxrpc: Fix trace string ==================== Link: https://lore.kernel.org/all/20230131171227.3912130-1-dhowells@redhat.com/ Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/trace/events/rxrpc.h480
-rw-r--r--net/rxrpc/Kconfig9
-rw-r--r--net/rxrpc/af_rxrpc.c2
-rw-r--r--net/rxrpc/ar-internal.h15
-rw-r--r--net/rxrpc/call_accept.c2
-rw-r--r--net/rxrpc/call_event.c15
-rw-r--r--net/rxrpc/call_object.c7
-rw-r--r--net/rxrpc/conn_service.c7
-rw-r--r--net/rxrpc/input.c60
-rw-r--r--net/rxrpc/io_thread.c48
-rw-r--r--net/rxrpc/local_object.c7
-rw-r--r--net/rxrpc/misc.c7
-rw-r--r--net/rxrpc/output.c69
-rw-r--r--net/rxrpc/proc.c4
-rw-r--r--net/rxrpc/recvmsg.c18
-rw-r--r--net/rxrpc/skbuff.c4
-rw-r--r--net/rxrpc/sysctl.c17
-rw-r--r--net/rxrpc/txbuf.c12
18 files changed, 438 insertions, 345 deletions
diff --git a/include/trace/events/rxrpc.h b/include/trace/events/rxrpc.h
index 283db0ea3db4..d7bb4acf4580 100644
--- a/include/trace/events/rxrpc.h
+++ b/include/trace/events/rxrpc.h
@@ -163,7 +163,7 @@
EM(rxrpc_local_put_for_use, "PUT for-use ") \
EM(rxrpc_local_put_kill_conn, "PUT conn-kil") \
EM(rxrpc_local_put_peer, "PUT peer ") \
- EM(rxrpc_local_put_prealloc_conn, "PUT conn-pre") \
+ EM(rxrpc_local_put_prealloc_peer, "PUT peer-pre") \
EM(rxrpc_local_put_release_sock, "PUT rel-sock") \
EM(rxrpc_local_stop, "STOP ") \
EM(rxrpc_local_stopped, "STOPPED ") \
@@ -360,11 +360,12 @@
EM(rxrpc_propose_ack_client_tx_end, "ClTxEnd") \
EM(rxrpc_propose_ack_input_data, "DataIn ") \
EM(rxrpc_propose_ack_input_data_hole, "DataInH") \
- EM(rxrpc_propose_ack_ping_for_check_life, "ChkLife") \
EM(rxrpc_propose_ack_ping_for_keepalive, "KeepAlv") \
EM(rxrpc_propose_ack_ping_for_lost_ack, "LostAck") \
EM(rxrpc_propose_ack_ping_for_lost_reply, "LostRpl") \
+ EM(rxrpc_propose_ack_ping_for_old_rtt, "OldRtt ") \
EM(rxrpc_propose_ack_ping_for_params, "Params ") \
+ EM(rxrpc_propose_ack_ping_for_rtt, "Rtt ") \
EM(rxrpc_propose_ack_processing_op, "ProcOp ") \
EM(rxrpc_propose_ack_respond_to_ack, "Rsp2Ack") \
EM(rxrpc_propose_ack_respond_to_ping, "Rsp2Png") \
@@ -421,6 +422,13 @@
EM(RXRPC_ACK_IDLE, "IDL") \
E_(RXRPC_ACK__INVALID, "-?-")
+#define rxrpc_sack_traces \
+ EM(rxrpc_sack_advance, "ADV") \
+ EM(rxrpc_sack_fill, "FIL") \
+ EM(rxrpc_sack_nack, "NAK") \
+ EM(rxrpc_sack_none, "---") \
+ E_(rxrpc_sack_oos, "OOS")
+
#define rxrpc_completions \
EM(RXRPC_CALL_SUCCEEDED, "Succeeded") \
EM(RXRPC_CALL_REMOTELY_ABORTED, "RemoteAbort") \
@@ -496,6 +504,7 @@ enum rxrpc_recvmsg_trace { rxrpc_recvmsg_traces } __mode(byte);
enum rxrpc_req_ack_trace { rxrpc_req_ack_traces } __mode(byte);
enum rxrpc_rtt_rx_trace { rxrpc_rtt_rx_traces } __mode(byte);
enum rxrpc_rtt_tx_trace { rxrpc_rtt_tx_traces } __mode(byte);
+enum rxrpc_sack_trace { rxrpc_sack_traces } __mode(byte);
enum rxrpc_skb_trace { rxrpc_skb_traces } __mode(byte);
enum rxrpc_timer_trace { rxrpc_timer_traces } __mode(byte);
enum rxrpc_tx_point { rxrpc_tx_points } __mode(byte);
@@ -530,6 +539,7 @@ rxrpc_recvmsg_traces;
rxrpc_req_ack_traces;
rxrpc_rtt_rx_traces;
rxrpc_rtt_tx_traces;
+rxrpc_sack_traces;
rxrpc_skb_traces;
rxrpc_timer_traces;
rxrpc_tx_points;
@@ -552,10 +562,10 @@ TRACE_EVENT(rxrpc_local,
TP_ARGS(local_debug_id, op, ref, usage),
TP_STRUCT__entry(
- __field(unsigned int, local )
- __field(int, op )
- __field(int, ref )
- __field(int, usage )
+ __field(unsigned int, local)
+ __field(int, op)
+ __field(int, ref)
+ __field(int, usage)
),
TP_fast_assign(
@@ -578,9 +588,9 @@ TRACE_EVENT(rxrpc_peer,
TP_ARGS(peer_debug_id, ref, why),
TP_STRUCT__entry(
- __field(unsigned int, peer )
- __field(int, ref )
- __field(enum rxrpc_peer_trace, why )
+ __field(unsigned int, peer)
+ __field(int, ref)
+ __field(enum rxrpc_peer_trace, why)
),
TP_fast_assign(
@@ -601,9 +611,9 @@ TRACE_EVENT(rxrpc_bundle,
TP_ARGS(bundle_debug_id, ref, why),
TP_STRUCT__entry(
- __field(unsigned int, bundle )
- __field(int, ref )
- __field(int, why )
+ __field(unsigned int, bundle)
+ __field(int, ref)
+ __field(int, why)
),
TP_fast_assign(
@@ -624,9 +634,9 @@ TRACE_EVENT(rxrpc_conn,
TP_ARGS(conn_debug_id, ref, why),
TP_STRUCT__entry(
- __field(unsigned int, conn )
- __field(int, ref )
- __field(int, why )
+ __field(unsigned int, conn)
+ __field(int, ref)
+ __field(int, why)
),
TP_fast_assign(
@@ -648,11 +658,11 @@ TRACE_EVENT(rxrpc_client,
TP_ARGS(conn, channel, op),
TP_STRUCT__entry(
- __field(unsigned int, conn )
- __field(u32, cid )
- __field(int, channel )
- __field(int, usage )
- __field(enum rxrpc_client_trace, op )
+ __field(unsigned int, conn)
+ __field(u32, cid)
+ __field(int, channel)
+ __field(int, usage)
+ __field(enum rxrpc_client_trace, op)
),
TP_fast_assign(
@@ -678,10 +688,10 @@ TRACE_EVENT(rxrpc_call,
TP_ARGS(call_debug_id, ref, aux, why),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(int, ref )
- __field(int, why )
- __field(unsigned long, aux )
+ __field(unsigned int, call)
+ __field(int, ref)
+ __field(int, why)
+ __field(unsigned long, aux)
),
TP_fast_assign(
@@ -705,10 +715,10 @@ TRACE_EVENT(rxrpc_skb,
TP_ARGS(skb, usage, mod_count, why),
TP_STRUCT__entry(
- __field(struct sk_buff *, skb )
- __field(int, usage )
- __field(int, mod_count )
- __field(enum rxrpc_skb_trace, why )
+ __field(struct sk_buff *, skb)
+ __field(int, usage)
+ __field(int, mod_count)
+ __field(enum rxrpc_skb_trace, why)
),
TP_fast_assign(
@@ -731,7 +741,7 @@ TRACE_EVENT(rxrpc_rx_packet,
TP_ARGS(sp),
TP_STRUCT__entry(
- __field_struct(struct rxrpc_host_header, hdr )
+ __field_struct(struct rxrpc_host_header, hdr)
),
TP_fast_assign(
@@ -742,9 +752,8 @@ TRACE_EVENT(rxrpc_rx_packet,
__entry->hdr.epoch, __entry->hdr.cid,
__entry->hdr.callNumber, __entry->hdr.serviceId,
__entry->hdr.serial, __entry->hdr.seq,
- __entry->hdr.type, __entry->hdr.flags,
- __entry->hdr.type <= 15 ?
- __print_symbolic(__entry->hdr.type, rxrpc_pkts) : "?UNK")
+ __entry->hdr.securityIndex, __entry->hdr.flags,
+ __print_symbolic(__entry->hdr.type, rxrpc_pkts))
);
TRACE_EVENT(rxrpc_rx_done,
@@ -753,8 +762,8 @@ TRACE_EVENT(rxrpc_rx_done,
TP_ARGS(result, abort_code),
TP_STRUCT__entry(
- __field(int, result )
- __field(int, abort_code )
+ __field(int, result)
+ __field(int, abort_code)
),
TP_fast_assign(
@@ -772,13 +781,13 @@ TRACE_EVENT(rxrpc_abort,
TP_ARGS(call_nr, why, cid, call_id, seq, abort_code, error),
TP_STRUCT__entry(
- __field(unsigned int, call_nr )
- __field(enum rxrpc_abort_reason, why )
- __field(u32, cid )
- __field(u32, call_id )
- __field(rxrpc_seq_t, seq )
- __field(int, abort_code )
- __field(int, error )
+ __field(unsigned int, call_nr)
+ __field(enum rxrpc_abort_reason, why)
+ __field(u32, cid)
+ __field(u32, call_id)
+ __field(rxrpc_seq_t, seq)
+ __field(int, abort_code)
+ __field(int, error)
),
TP_fast_assign(
@@ -804,10 +813,10 @@ TRACE_EVENT(rxrpc_call_complete,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_call_completion, compl )
- __field(int, error )
- __field(u32, abort_code )
+ __field(unsigned int, call)
+ __field(enum rxrpc_call_completion, compl)
+ __field(int, error)
+ __field(u32, abort_code)
),
TP_fast_assign(
@@ -830,13 +839,13 @@ TRACE_EVENT(rxrpc_txqueue,
TP_ARGS(call, why),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_txqueue_trace, why )
- __field(rxrpc_seq_t, acks_hard_ack )
- __field(rxrpc_seq_t, tx_bottom )
- __field(rxrpc_seq_t, tx_top )
- __field(rxrpc_seq_t, tx_prepared )
- __field(int, tx_winsize )
+ __field(unsigned int, call)
+ __field(enum rxrpc_txqueue_trace, why)
+ __field(rxrpc_seq_t, acks_hard_ack)
+ __field(rxrpc_seq_t, tx_bottom)
+ __field(rxrpc_seq_t, tx_top)
+ __field(rxrpc_seq_t, tx_prepared)
+ __field(int, tx_winsize)
),
TP_fast_assign(
@@ -867,10 +876,10 @@ TRACE_EVENT(rxrpc_rx_data,
TP_ARGS(call, seq, serial, flags),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_seq_t, seq )
- __field(rxrpc_serial_t, serial )
- __field(u8, flags )
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, seq)
+ __field(rxrpc_serial_t, serial)
+ __field(u8, flags)
),
TP_fast_assign(
@@ -895,13 +904,13 @@ TRACE_EVENT(rxrpc_rx_ack,
TP_ARGS(call, serial, ack_serial, first, prev, reason, n_acks),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_serial_t, serial )
- __field(rxrpc_serial_t, ack_serial )
- __field(rxrpc_seq_t, first )
- __field(rxrpc_seq_t, prev )
- __field(u8, reason )
- __field(u8, n_acks )
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, serial)
+ __field(rxrpc_serial_t, ack_serial)
+ __field(rxrpc_seq_t, first)
+ __field(rxrpc_seq_t, prev)
+ __field(u8, reason)
+ __field(u8, n_acks)
),
TP_fast_assign(
@@ -931,9 +940,9 @@ TRACE_EVENT(rxrpc_rx_abort,
TP_ARGS(call, serial, abort_code),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_serial_t, serial )
- __field(u32, abort_code )
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, serial)
+ __field(u32, abort_code)
),
TP_fast_assign(
@@ -955,11 +964,11 @@ TRACE_EVENT(rxrpc_rx_challenge,
TP_ARGS(conn, serial, version, nonce, min_level),
TP_STRUCT__entry(
- __field(unsigned int, conn )
- __field(rxrpc_serial_t, serial )
- __field(u32, version )
- __field(u32, nonce )
- __field(u32, min_level )
+ __field(unsigned int, conn)
+ __field(rxrpc_serial_t, serial)
+ __field(u32, version)
+ __field(u32, nonce)
+ __field(u32, min_level)
),
TP_fast_assign(
@@ -985,11 +994,11 @@ TRACE_EVENT(rxrpc_rx_response,
TP_ARGS(conn, serial, version, kvno, ticket_len),
TP_STRUCT__entry(
- __field(unsigned int, conn )
- __field(rxrpc_serial_t, serial )
- __field(u32, version )
- __field(u32, kvno )
- __field(u32, ticket_len )
+ __field(unsigned int, conn)
+ __field(rxrpc_serial_t, serial)
+ __field(u32, version)
+ __field(u32, kvno)
+ __field(u32, ticket_len)
),
TP_fast_assign(
@@ -1015,10 +1024,10 @@ TRACE_EVENT(rxrpc_rx_rwind_change,
TP_ARGS(call, serial, rwind, wake),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_serial_t, serial )
- __field(u32, rwind )
- __field(bool, wake )
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, serial)
+ __field(u32, rwind)
+ __field(bool, wake)
),
TP_fast_assign(
@@ -1042,9 +1051,9 @@ TRACE_EVENT(rxrpc_tx_packet,
TP_ARGS(call_id, whdr, where),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_tx_point, where )
- __field_struct(struct rxrpc_wire_header, whdr )
+ __field(unsigned int, call)
+ __field(enum rxrpc_tx_point, where)
+ __field_struct(struct rxrpc_wire_header, whdr)
),
TP_fast_assign(
@@ -1074,14 +1083,14 @@ TRACE_EVENT(rxrpc_tx_data,
TP_ARGS(call, seq, serial, flags, retrans, lose),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_seq_t, seq )
- __field(rxrpc_serial_t, serial )
- __field(u32, cid )
- __field(u32, call_id )
- __field(u8, flags )
- __field(bool, retrans )
- __field(bool, lose )
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, seq)
+ __field(rxrpc_serial_t, serial)
+ __field(u32, cid)
+ __field(u32, call_id)
+ __field(u8, flags)
+ __field(bool, retrans)
+ __field(bool, lose)
),
TP_fast_assign(
@@ -1114,12 +1123,12 @@ TRACE_EVENT(rxrpc_tx_ack,
TP_ARGS(call, serial, ack_first, ack_serial, reason, n_acks),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_serial_t, serial )
- __field(rxrpc_seq_t, ack_first )
- __field(rxrpc_serial_t, ack_serial )
- __field(u8, reason )
- __field(u8, n_acks )
+ __field(unsigned int, call)
+ __field(rxrpc_serial_t, serial)
+ __field(rxrpc_seq_t, ack_first)
+ __field(rxrpc_serial_t, ack_serial)
+ __field(u8, reason)
+ __field(u8, n_acks)
),
TP_fast_assign(
@@ -1147,11 +1156,12 @@ TRACE_EVENT(rxrpc_receive,
TP_ARGS(call, why, serial, seq),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_receive_trace, why )
- __field(rxrpc_serial_t, serial )
- __field(rxrpc_seq_t, seq )
- __field(u64, window )
+ __field(unsigned int, call)
+ __field(enum rxrpc_receive_trace, why)
+ __field(rxrpc_serial_t, serial)
+ __field(rxrpc_seq_t, seq)
+ __field(rxrpc_seq_t, window)
+ __field(rxrpc_seq_t, wtop)
),
TP_fast_assign(
@@ -1159,7 +1169,8 @@ TRACE_EVENT(rxrpc_receive,
__entry->why = why;
__entry->serial = serial;
__entry->seq = seq;
- __entry->window = atomic64_read(&call->ackr_window);
+ __entry->window = call->ackr_window;
+ __entry->wtop = call->ackr_wtop;
),
TP_printk("c=%08x %s r=%08x q=%08x w=%08x-%08x",
@@ -1167,8 +1178,8 @@ TRACE_EVENT(rxrpc_receive,
__print_symbolic(__entry->why, rxrpc_receive_traces),
__entry->serial,
__entry->seq,
- lower_32_bits(__entry->window),
- upper_32_bits(__entry->window))
+ __entry->window,
+ __entry->wtop)
);
TRACE_EVENT(rxrpc_recvmsg,
@@ -1178,9 +1189,9 @@ TRACE_EVENT(rxrpc_recvmsg,
TP_ARGS(call_debug_id, why, ret),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_recvmsg_trace, why )
- __field(int, ret )
+ __field(unsigned int, call)
+ __field(enum rxrpc_recvmsg_trace, why)
+ __field(int, ret)
),
TP_fast_assign(
@@ -1203,12 +1214,12 @@ TRACE_EVENT(rxrpc_recvdata,
TP_ARGS(call, why, seq, offset, len, ret),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_recvmsg_trace, why )
- __field(rxrpc_seq_t, seq )
- __field(unsigned int, offset )
- __field(unsigned int, len )
- __field(int, ret )
+ __field(unsigned int, call)
+ __field(enum rxrpc_recvmsg_trace, why)
+ __field(rxrpc_seq_t, seq)
+ __field(unsigned int, offset)
+ __field(unsigned int, len)
+ __field(int, ret)
),
TP_fast_assign(
@@ -1236,10 +1247,10 @@ TRACE_EVENT(rxrpc_rtt_tx,
TP_ARGS(call, why, slot, send_serial),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_rtt_tx_trace, why )
- __field(int, slot )
- __field(rxrpc_serial_t, send_serial )
+ __field(unsigned int, call)
+ __field(enum rxrpc_rtt_tx_trace, why)
+ __field(int, slot)
+ __field(rxrpc_serial_t, send_serial)
),
TP_fast_assign(
@@ -1265,13 +1276,13 @@ TRACE_EVENT(rxrpc_rtt_rx,
TP_ARGS(call, why, slot, send_serial, resp_serial, rtt, rto),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_rtt_rx_trace, why )
- __field(int, slot )
- __field(rxrpc_serial_t, send_serial )
- __field(rxrpc_serial_t, resp_serial )
- __field(u32, rtt )
- __field(u32, rto )
+ __field(unsigned int, call)
+ __field(enum rxrpc_rtt_rx_trace, why)
+ __field(int, slot)
+ __field(rxrpc_serial_t, send_serial)
+ __field(rxrpc_serial_t, resp_serial)
+ __field(u32, rtt)
+ __field(u32, rto)
),
TP_fast_assign(
@@ -1301,17 +1312,17 @@ TRACE_EVENT(rxrpc_timer,
TP_ARGS(call, why, now),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_timer_trace, why )
- __field(long, now )
- __field(long, ack_at )
- __field(long, ack_lost_at )
- __field(long, resend_at )
- __field(long, ping_at )
- __field(long, expect_rx_by )
- __field(long, expect_req_by )
- __field(long, expect_term_by )
- __field(long, timer )
+ __field(unsigned int, call)
+ __field(enum rxrpc_timer_trace, why)
+ __field(long, now)
+ __field(long, ack_at)
+ __field(long, ack_lost_at)
+ __field(long, resend_at)
+ __field(long, ping_at)
+ __field(long, expect_rx_by)
+ __field(long, expect_req_by)
+ __field(long, expect_term_by)
+ __field(long, timer)
),
TP_fast_assign(
@@ -1345,16 +1356,16 @@ TRACE_EVENT(rxrpc_timer_expired,
TP_ARGS(call, now),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(long, now )
- __field(long, ack_at )
- __field(long, ack_lost_at )
- __field(long, resend_at )
- __field(long, ping_at )
- __field(long, expect_rx_by )
- __field(long, expect_req_by )
- __field(long, expect_term_by )
- __field(long, timer )
+ __field(unsigned int, call)
+ __field(long, now)
+ __field(long, ack_at)
+ __field(long, ack_lost_at)
+ __field(long, resend_at)
+ __field(long, ping_at)
+ __field(long, expect_rx_by)
+ __field(long, expect_req_by)
+ __field(long, expect_term_by)
+ __field(long, timer)
),
TP_fast_assign(
@@ -1386,7 +1397,7 @@ TRACE_EVENT(rxrpc_rx_lose,
TP_ARGS(sp),
TP_STRUCT__entry(
- __field_struct(struct rxrpc_host_header, hdr )
+ __field_struct(struct rxrpc_host_header, hdr)
),
TP_fast_assign(
@@ -1409,10 +1420,10 @@ TRACE_EVENT(rxrpc_propose_ack,
TP_ARGS(call, why, ack_reason, serial),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_propose_ack_trace, why )
- __field(rxrpc_serial_t, serial )
- __field(u8, ack_reason )
+ __field(unsigned int, call)
+ __field(enum rxrpc_propose_ack_trace, why)
+ __field(rxrpc_serial_t, serial)
+ __field(u8, ack_reason)
),
TP_fast_assign(
@@ -1436,10 +1447,10 @@ TRACE_EVENT(rxrpc_send_ack,
TP_ARGS(call, why, ack_reason, serial),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_propose_ack_trace, why )
- __field(rxrpc_serial_t, serial )
- __field(u8, ack_reason )
+ __field(unsigned int, call)
+ __field(enum rxrpc_propose_ack_trace, why)
+ __field(rxrpc_serial_t, serial)
+ __field(u8, ack_reason)
),
TP_fast_assign(
@@ -1463,11 +1474,11 @@ TRACE_EVENT(rxrpc_drop_ack,
TP_ARGS(call, why, ack_reason, serial, nobuf),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_propose_ack_trace, why )
- __field(rxrpc_serial_t, serial )
- __field(u8, ack_reason )
- __field(bool, nobuf )
+ __field(unsigned int, call)
+ __field(enum rxrpc_propose_ack_trace, why)
+ __field(rxrpc_serial_t, serial)
+ __field(u8, ack_reason)
+ __field(bool, nobuf)
),
TP_fast_assign(
@@ -1491,9 +1502,9 @@ TRACE_EVENT(rxrpc_retransmit,
TP_ARGS(call, seq, expiry),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_seq_t, seq )
- __field(s64, expiry )
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, seq)
+ __field(s64, expiry)
),
TP_fast_assign(
@@ -1515,13 +1526,13 @@ TRACE_EVENT(rxrpc_congest,
TP_ARGS(call, summary, ack_serial, change),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_congest_change, change )
- __field(rxrpc_seq_t, hard_ack )
- __field(rxrpc_seq_t, top )
- __field(rxrpc_seq_t, lowest_nak )
- __field(rxrpc_serial_t, ack_serial )
- __field_struct(struct rxrpc_ack_summary, sum )
+ __field(unsigned int, call)
+ __field(enum rxrpc_congest_change, change)
+ __field(rxrpc_seq_t, hard_ack)
+ __field(rxrpc_seq_t, top)
+ __field(rxrpc_seq_t, lowest_nak)
+ __field(rxrpc_serial_t, ack_serial)
+ __field_struct(struct rxrpc_ack_summary, sum)
),
TP_fast_assign(
@@ -1559,14 +1570,14 @@ TRACE_EVENT(rxrpc_reset_cwnd,
TP_ARGS(call, now),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(enum rxrpc_congest_mode, mode )
- __field(unsigned short, cwnd )
- __field(unsigned short, extra )
- __field(rxrpc_seq_t, hard_ack )
- __field(rxrpc_seq_t, prepared )
- __field(ktime_t, since_last_tx )
- __field(bool, has_data )
+ __field(unsigned int, call)
+ __field(enum rxrpc_congest_mode, mode)
+ __field(unsigned short, cwnd)
+ __field(unsigned short, extra)
+ __field(rxrpc_seq_t, hard_ack)
+ __field(rxrpc_seq_t, prepared)
+ __field(ktime_t, since_last_tx)
+ __field(bool, has_data)
),
TP_fast_assign(
@@ -1597,8 +1608,8 @@ TRACE_EVENT(rxrpc_disconnect_call,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(u32, abort_code )
+ __field(unsigned int, call)
+ __field(u32, abort_code)
),
TP_fast_assign(
@@ -1617,8 +1628,8 @@ TRACE_EVENT(rxrpc_improper_term,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(u32, abort_code )
+ __field(unsigned int, call)
+ __field(u32, abort_code)
),
TP_fast_assign(
@@ -1637,11 +1648,11 @@ TRACE_EVENT(rxrpc_connect_call,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(unsigned long, user_call_ID )
- __field(u32, cid )
- __field(u32, call_id )
- __field_struct(struct sockaddr_rxrpc, srx )
+ __field(unsigned int, call)
+ __field(unsigned long, user_call_ID)
+ __field(u32, cid)
+ __field(u32, call_id)
+ __field_struct(struct sockaddr_rxrpc, srx)
),
TP_fast_assign(
@@ -1666,10 +1677,10 @@ TRACE_EVENT(rxrpc_resend,
TP_ARGS(call, ack),
TP_STRUCT__entry(
- __field(unsigned int, call )
- __field(rxrpc_seq_t, seq )
- __field(rxrpc_seq_t, transmitted )
- __field(rxrpc_serial_t, ack_serial )
+ __field(unsigned int, call)
+ __field(rxrpc_seq_t, seq)
+ __field(rxrpc_seq_t, transmitted)
+ __field(rxrpc_serial_t, ack_serial)
),
TP_fast_assign(
@@ -1694,9 +1705,9 @@ TRACE_EVENT(rxrpc_rx_icmp,
TP_ARGS(peer, ee, srx),
TP_STRUCT__entry(
- __field(unsigned int, peer )
- __field_struct(struct sock_extended_err, ee )
- __field_struct(struct sockaddr_rxrpc, srx )
+ __field(unsigned int, peer)
+ __field_struct(struct sock_extended_err, ee)
+ __field_struct(struct sockaddr_rxrpc, srx)
),
TP_fast_assign(
@@ -1723,10 +1734,10 @@ TRACE_EVENT(rxrpc_tx_fail,
TP_ARGS(debug_id, serial, ret, where),
TP_STRUCT__entry(
- __field(unsigned int, debug_id )
- __field(rxrpc_serial_t, serial )
- __field(int, ret )
- __field(enum rxrpc_tx_point, where )
+ __field(unsigned int, debug_id)
+ __field(rxrpc_serial_t, serial)
+ __field(int, ret)
+ __field(enum rxrpc_tx_point, where)
),
TP_fast_assign(
@@ -1749,13 +1760,13 @@ TRACE_EVENT(rxrpc_call_reset,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, debug_id )
- __field(u32, cid )
- __field(u32, call_id )
- __field(rxrpc_serial_t, call_serial )
- __field(rxrpc_serial_t, conn_serial )
- __field(rxrpc_seq_t, tx_seq )
- __field(rxrpc_seq_t, rx_seq )
+ __field(unsigned int, debug_id)
+ __field(u32, cid)
+ __field(u32, call_id)
+ __field(rxrpc_serial_t, call_serial)
+ __field(rxrpc_serial_t, conn_serial)
+ __field(rxrpc_seq_t, tx_seq)
+ __field(rxrpc_seq_t, rx_seq)
),
TP_fast_assign(
@@ -1781,8 +1792,8 @@ TRACE_EVENT(rxrpc_notify_socket,
TP_ARGS(debug_id, serial),
TP_STRUCT__entry(
- __field(unsigned int, debug_id )
- __field(rxrpc_serial_t, serial )
+ __field(unsigned int, debug_id)
+ __field(rxrpc_serial_t, serial)
),
TP_fast_assign(
@@ -1804,8 +1815,8 @@ TRACE_EVENT(rxrpc_rx_discard_ack,
prev_pkt, call_ackr_prev),
TP_STRUCT__entry(
- __field(unsigned int, debug_id )
- __field(rxrpc_serial_t, serial )
+ __field(unsigned int, debug_id)
+ __field(rxrpc_serial_t, serial)
__field(rxrpc_seq_t, first_soft_ack)
__field(rxrpc_seq_t, call_ackr_first)
__field(rxrpc_seq_t, prev_pkt)
@@ -1837,9 +1848,9 @@ TRACE_EVENT(rxrpc_req_ack,
TP_ARGS(call_debug_id, seq, why),
TP_STRUCT__entry(
- __field(unsigned int, call_debug_id )
- __field(rxrpc_seq_t, seq )
- __field(enum rxrpc_req_ack_trace, why )
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_seq_t, seq)
+ __field(enum rxrpc_req_ack_trace, why)
),
TP_fast_assign(
@@ -1862,11 +1873,11 @@ TRACE_EVENT(rxrpc_txbuf,
TP_ARGS(debug_id, call_debug_id, seq, ref, what),
TP_STRUCT__entry(
- __field(unsigned int, debug_id )
- __field(unsigned int, call_debug_id )
- __field(rxrpc_seq_t, seq )
- __field(int, ref )
- __field(enum rxrpc_txbuf_trace, what )
+ __field(unsigned int, debug_id)
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_seq_t, seq)
+ __field(int, ref)
+ __field(enum rxrpc_txbuf_trace, what)
),
TP_fast_assign(
@@ -1892,9 +1903,9 @@ TRACE_EVENT(rxrpc_poke_call,
TP_ARGS(call, busy, what),
TP_STRUCT__entry(
- __field(unsigned int, call_debug_id )
- __field(bool, busy )
- __field(enum rxrpc_call_poke_trace, what )
+ __field(unsigned int, call_debug_id)
+ __field(bool, busy)
+ __field(enum rxrpc_call_poke_trace, what)
),
TP_fast_assign(
@@ -1915,7 +1926,7 @@ TRACE_EVENT(rxrpc_call_poked,
TP_ARGS(call),
TP_STRUCT__entry(
- __field(unsigned int, call_debug_id )
+ __field(unsigned int, call_debug_id)
),
TP_fast_assign(
@@ -1926,6 +1937,33 @@ TRACE_EVENT(rxrpc_call_poked,
__entry->call_debug_id)
);
+TRACE_EVENT(rxrpc_sack,
+ TP_PROTO(struct rxrpc_call *call, rxrpc_seq_t seq,
+ unsigned int sack, enum rxrpc_sack_trace what),
+
+ TP_ARGS(call, seq, sack, what),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, call_debug_id)
+ __field(rxrpc_seq_t, seq)
+ __field(unsigned int, sack)
+ __field(enum rxrpc_sack_trace, what)
+ ),
+
+ TP_fast_assign(
+ __entry->call_debug_id = call->debug_id;
+ __entry->seq = seq;
+ __entry->sack = sack;
+ __entry->what = what;
+ ),
+
+ TP_printk("c=%08x q=%08x %s k=%x",
+ __entry->call_debug_id,
+ __entry->seq,
+ __print_symbolic(__entry->what, rxrpc_sack_traces),
+ __entry->sack)
+ );
+
#undef EM
#undef E_
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 7ae023b37a83..a20986806fea 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -36,6 +36,15 @@ config AF_RXRPC_INJECT_LOSS
Say Y here to inject packet loss by discarding some received and some
transmitted packets.
+config AF_RXRPC_INJECT_RX_DELAY
+ bool "Inject delay into packet reception"
+ depends on SYSCTL
+ help
+ Say Y here to inject a delay into packet reception, allowing an
+ extended RTT time to be modelled. The delay can be configured using
+ /proc/sys/net/rxrpc/rxrpc_inject_rx_delay, setting a number of
+ milliseconds up to 0.5s (note that the granularity is actually in
+ jiffies).
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index ebbd4a1c3f86..102f5cbff91a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -786,7 +786,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
INIT_LIST_HEAD(&rx->sock_calls);
INIT_LIST_HEAD(&rx->to_be_accepted);
INIT_LIST_HEAD(&rx->recvmsg_q);
- rwlock_init(&rx->recvmsg_lock);
+ spin_lock_init(&rx->recvmsg_lock);
rwlock_init(&rx->call_lock);
memset(&rx->srx, 0, sizeof(rx->srx));
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 433060cade03..9e19688b0e06 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -149,7 +149,7 @@ struct rxrpc_sock {
struct list_head sock_calls; /* List of calls owned by this socket */
struct list_head to_be_accepted; /* calls awaiting acceptance */
struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */
- rwlock_t recvmsg_lock; /* Lock for recvmsg_q */
+ spinlock_t recvmsg_lock; /* Lock for recvmsg_q */
struct key *key; /* security for this socket */
struct key *securities; /* list of server security descriptors */
struct rb_root calls; /* User ID -> call mapping */
@@ -284,7 +284,9 @@ struct rxrpc_local {
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
- struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ struct sk_buff_head rx_delay_queue; /* Delay injection queue */
+#endif
struct sk_buff_head rx_queue; /* Received packets */
struct list_head conn_attend_q; /* Conns requiring immediate attention */
struct list_head call_attend_q; /* Calls requiring immediate attention */
@@ -688,9 +690,11 @@ struct rxrpc_call {
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
+ u16 ackr_sack_base; /* Starting slot in SACK table ring */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- atomic64_t ackr_window; /* Base (in LSW) and top (in MSW) of SACK window */
- atomic_t ackr_nr_unacked; /* Number of unacked packets */
+ rxrpc_seq_t ackr_window; /* Base of SACK window */
+ rxrpc_seq_t ackr_wtop; /* Base of SACK window */
+ unsigned int ackr_nr_unacked; /* Number of unacked packets */
atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
struct {
#define RXRPC_SACK_SIZE 256
@@ -1109,6 +1113,9 @@ extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+extern unsigned long rxrpc_inject_rx_delay;
+#endif
/*
* net_ns.c
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index 3e8689fdc437..0f5a1d77b890 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -195,7 +195,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
- rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn);
+ rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index 1abdef15debc..e363f21a2014 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -498,9 +498,18 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
rxrpc_propose_ack_rx_idle);
- if (atomic_read(&call->ackr_nr_unacked) > 2)
- rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
- rxrpc_propose_ack_input_data);
+ if (call->ackr_nr_unacked > 2) {
+ if (call->peer->rtt_count < 3)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_rtt);
+ else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_old_rtt);
+ else
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_input_data);
+ }
/* Make sure the timer is restarted */
if (!__rxrpc_call_is_complete(call)) {
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index f3c9f0201c15..6eaffb0d8fdc 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -167,7 +167,8 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
call->next_req_timo = 1 * HZ;
- atomic64_set(&call->ackr_window, 0x100000001ULL);
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -560,7 +561,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
rxrpc_put_call_slot(call);
/* Make sure we don't get any more notifications */
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -573,7 +574,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->recvmsg_link.next = NULL;
call->recvmsg_link.prev = NULL;
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (put)
rxrpc_put_call(call, rxrpc_call_put_unnotify);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index f30323de82bd..89ac05a711a4 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -8,11 +8,6 @@
#include <linux/slab.h>
#include "ar-internal.h"
-static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
- .ref = REFCOUNT_INIT(1),
- .debug_id = UINT_MAX,
-};
-
/*
* Find a service connection under RCU conditions.
*
@@ -132,8 +127,6 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
refcount_set(&conn->ref, 2);
- conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle,
- rxrpc_bundle_get_service_conn);
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 367927a99881..d68848fce51f 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -338,7 +338,8 @@ static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
rxrpc_seq_t window, rxrpc_seq_t wtop)
{
- atomic64_set_release(&call->ackr_window, ((u64)wtop) << 32 | window);
+ call->ackr_window = window;
+ call->ackr_wtop = wtop;
}
/*
@@ -367,9 +368,9 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
rxrpc_serial_t serial = sp->hdr.serial;
- u64 win = atomic64_read(&call->ackr_window);
- rxrpc_seq_t window = lower_32_bits(win);
- rxrpc_seq_t wtop = upper_32_bits(win);
+ unsigned int sack = call->ackr_sack_base;
+ rxrpc_seq_t window = call->ackr_window;
+ rxrpc_seq_t wtop = call->ackr_wtop;
rxrpc_seq_t wlimit = window + call->rx_winsize - 1;
rxrpc_seq_t seq = sp->hdr.seq;
bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
@@ -410,20 +411,23 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Queue the packet. */
if (seq == window) {
- rxrpc_seq_t reset_from;
- bool reset_sack = false;
-
if (sp->hdr.flags & RXRPC_REQUEST_ACK)
ack_reason = RXRPC_ACK_REQUESTED;
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
else
- atomic_inc_return(&call->ackr_nr_unacked);
+ call->ackr_nr_unacked++;
window++;
- if (after(window, wtop))
+ if (after(window, wtop)) {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_none);
wtop = window;
+ } else {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_advance);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
+ }
+
rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
@@ -440,43 +444,39 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
__skb_unlink(oos, &call->rx_oos_queue);
last = osp->hdr.flags & RXRPC_LAST_PACKET;
seq = osp->hdr.seq;
- if (!reset_sack) {
- reset_from = seq;
- reset_sack = true;
- }
+ call->ackr_sack_table[sack] = 0;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_fill);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
window++;
rxrpc_input_queue_data(call, oos, window, wtop,
- rxrpc_receive_queue_oos);
+ rxrpc_receive_queue_oos);
}
spin_unlock(&call->recvmsg_queue.lock);
- if (reset_sack) {
- do {
- call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0;
- } while (reset_from++, before(reset_from, window));
- }
+ call->ackr_sack_base = sack;
} else {
- bool keep = false;
+ unsigned int slot;
ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE;
- if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) {
- call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1;
- keep = 1;
+ slot = seq - window;
+ sack = (sack + slot) % RXRPC_SACK_SIZE;
+
+ if (call->ackr_sack_table[sack % RXRPC_SACK_SIZE]) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
}
+ call->ackr_sack_table[sack % RXRPC_SACK_SIZE] |= 1;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_oos);
+
if (after(seq + 1, wtop)) {
wtop = seq + 1;
rxrpc_input_update_ack_window(call, window, wtop);
}
- if (!keep) {
- ack_reason = RXRPC_ACK_DUPLICATE;
- goto send_ack;
- }
-
skb_queue_walk(&call->rx_oos_queue, oos) {
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
@@ -567,8 +567,8 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_serial_t serial = sp->hdr.serial;
rxrpc_seq_t seq0 = sp->hdr.seq;
- _enter("{%llx,%x},{%u,%x}",
- atomic64_read(&call->ackr_window), call->rx_highest_seq,
+ _enter("{%x,%x,%x},{%u,%x}",
+ call->ackr_window, call->ackr_wtop, call->rx_highest_seq,
skb->len, seq0);
if (__rxrpc_call_is_complete(call))
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 9e9dfb2fc559..4a3a08a0e2cd 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -25,6 +25,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
*/
int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
{
+ struct sk_buff_head *rx_queue;
struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
if (unlikely(!local)) {
@@ -36,7 +37,16 @@ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
skb->mark = RXRPC_SKB_MARK_PACKET;
rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv);
- skb_queue_tail(&local->rx_queue, skb);
+ rx_queue = &local->rx_queue;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ if (rxrpc_inject_rx_delay ||
+ !skb_queue_empty(&local->rx_delay_queue)) {
+ skb->tstamp = ktime_add_ms(skb->tstamp, rxrpc_inject_rx_delay);
+ rx_queue = &local->rx_delay_queue;
+ }
+#endif
+
+ skb_queue_tail(rx_queue, skb);
rxrpc_wake_up_io_thread(local);
return 0;
}
@@ -407,6 +417,9 @@ int rxrpc_io_thread(void *data)
struct rxrpc_local *local = data;
struct rxrpc_call *call;
struct sk_buff *skb;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ ktime_t now;
+#endif
bool should_stop;
complete(&local->io_thread_ready);
@@ -481,6 +494,17 @@ int rxrpc_io_thread(void *data)
continue;
}
+ /* Inject a delay into packets if requested. */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ now = ktime_get_real();
+ while ((skb = skb_peek(&local->rx_delay_queue))) {
+ if (ktime_before(now, skb->tstamp))
+ break;
+ skb = skb_dequeue(&local->rx_delay_queue);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+#endif
+
if (!skb_queue_empty(&local->rx_queue)) {
spin_lock_irq(&local->rx_queue.lock);
skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
@@ -502,6 +526,28 @@ int rxrpc_io_thread(void *data)
if (should_stop)
break;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb = skb_peek(&local->rx_delay_queue);
+ if (skb) {
+ unsigned long timeout;
+ ktime_t tstamp = skb->tstamp;
+ ktime_t now = ktime_get_real();
+ s64 delay_ns = ktime_to_ns(ktime_sub(tstamp, now));
+
+ if (delay_ns <= 0) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ timeout = nsecs_to_jiffies(delay_ns);
+ timeout = max(timeout, 1UL);
+ schedule_timeout(timeout);
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+#endif
+
schedule();
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index b8eaca5d9f22..7d910aee4f8c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -108,8 +108,10 @@ static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
local->net = net;
local->rxnet = rxrpc_net(net);
INIT_HLIST_NODE(&local->link);
- init_rwsem(&local->defrag_sem);
init_completion(&local->io_thread_ready);
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb_queue_head_init(&local->rx_delay_queue);
+#endif
skb_queue_head_init(&local->rx_queue);
INIT_LIST_HEAD(&local->conn_attend_q);
INIT_LIST_HEAD(&local->call_attend_q);
@@ -434,6 +436,9 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
/* At this point, there should be no more packets coming in to the
* local endpoint.
*/
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ rxrpc_purge_queue(&local->rx_delay_queue);
+#endif
rxrpc_purge_queue(&local->rx_queue);
rxrpc_purge_client_connections(local);
}
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 056c428d8bf3..825b81183046 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -53,3 +53,10 @@ unsigned int rxrpc_rx_mtu = 5692;
* sender that we're willing to handle.
*/
unsigned int rxrpc_rx_jumbo_max = 4;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+/*
+ * The delay to inject into packet reception.
+ */
+unsigned long rxrpc_inject_rx_delay;
+#endif
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a9746be29634..6b2022240076 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -83,59 +83,36 @@ static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
struct rxrpc_txbuf *txb)
{
struct rxrpc_ackinfo ackinfo;
- unsigned int qsize;
- rxrpc_seq_t window, wtop, wrap_point, ix, first;
+ unsigned int qsize, sack, wrap, to;
+ rxrpc_seq_t window, wtop;
int rsize;
- u64 wtmp;
u32 mtu, jmax;
u8 *ackp = txb->acks;
- u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8);
- atomic_set(&call->ackr_nr_unacked, 0);
+ call->ackr_nr_unacked = 0;
atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
- /* Barrier against rxrpc_input_data(). */
-retry:
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
+ window = call->ackr_window;
+ wtop = call->ackr_wtop;
+ sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = 0;
+ txb->ack.nAcks = wtop - window;
if (after(wtop, window)) {
- /* Try to copy the SACK ring locklessly. We can use the copy,
- * only if the now-current top of the window didn't go past the
- * previously read base - otherwise we can't know whether we
- * have old data or new data.
- */
- memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer));
- wrap_point = window + RXRPC_SACK_SIZE - 1;
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
- if (after(wtop, wrap_point)) {
- cond_resched();
- goto retry;
- }
-
- /* The buffer is maintained as a ring with an invariant mapping
- * between bit position and sequence number, so we'll probably
- * need to rotate it.
- */
- txb->ack.nAcks = wtop - window;
- ix = window % RXRPC_SACK_SIZE;
- first = sizeof(sack_buffer) - ix;
+ wrap = RXRPC_SACK_SIZE - sack;
+ to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
- if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks);
+ if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
} else {
- memcpy(txb->acks, sack_buffer + ix, first);
- memcpy(txb->acks + first, sack_buffer,
- txb->ack.nAcks - first);
+ memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
+ memcpy(txb->acks + wrap, call->ackr_sack_table,
+ to - wrap);
}
- ackp += txb->ack.nAcks;
+ ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
} else if (txb->ack.reason == RXRPC_ACK_DELAY) {
@@ -253,12 +230,15 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
call->peer->last_tx_at = ktime_get_seconds();
- if (ret < 0)
+ if (ret < 0) {
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_ack);
- else
+ } else {
trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
rxrpc_tx_point_call_ack);
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = ktime_get_real();
+ }
rxrpc_tx_backoff(call, ret);
if (!__rxrpc_call_is_complete(call)) {
@@ -429,8 +409,6 @@ dont_set_request_ack:
if (txb->len >= call->peer->maxdata)
goto send_fragmentable;
- down_read(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -445,7 +423,6 @@ dont_set_request_ack:
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- up_read(&conn->local->defrag_sem);
if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
@@ -506,8 +483,6 @@ send_fragmentable:
/* attempt to send this message with fragmentation enabled */
_debug("send fragment");
- down_write(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -539,8 +514,6 @@ send_fragmentable:
rxrpc_tx_point_call_data_frag);
}
rxrpc_tx_backoff(call, ret);
-
- up_write(&conn->local->defrag_sem);
goto done;
}
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 750158a085cd..682636d3b060 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -55,7 +55,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
- u64 wtmp;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -83,7 +82,6 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
}
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
- wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
@@ -98,7 +96,7 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->abort_code,
call->debug_id,
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
- lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
+ call->ackr_window, call->ackr_wtop - call->ackr_window,
call->rx_serial,
call->cong_cwnd,
timeout);
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index dd54ceee7bcc..50d263a6359d 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,12 +40,12 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
call->notify_rx(sk, call, call->user_call_ID);
spin_unlock(&call->notify_lock);
} else {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
rxrpc_get_call(call, rxrpc_call_get_notify_socket);
list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
}
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (!sock_flag(sk, SOCK_DEAD)) {
_debug("call %ps", sk->sk_data_ready);
@@ -95,7 +95,7 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
}
trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal,
- lower_32_bits(atomic64_read(&call->ackr_window)) - 1,
+ call->ackr_window - 1,
call->rx_pkt_offset, call->rx_pkt_len, ret);
return ret;
}
@@ -175,13 +175,13 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rx_pkt_len = call->rx_pkt_len;
if (rxrpc_call_has_failed(call)) {
- seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ seq = call->ackr_window - 1;
ret = -EIO;
goto done;
}
if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
- seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ seq = call->ackr_window - 1;
ret = 1;
goto done;
}
@@ -335,14 +335,14 @@ try_again:
/* Find the next call and dequeue it if we're not just peeking. If we
* do dequeue it, that comes with a ref that we will need to release.
*/
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
l = rx->recvmsg_q.next;
call = list_entry(l, struct rxrpc_call, recvmsg_link);
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
rxrpc_get_call(call, rxrpc_call_get_recvmsg);
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
call_debug_id = call->debug_id;
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
@@ -431,9 +431,9 @@ error_unlock_call:
error_requeue_call:
if (!(flags & MSG_PEEK)) {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index ebe0c75e7b07..944320e65ea8 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
if (skb) {
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_CONSUMED);
}
}
@@ -78,6 +78,6 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
rxrpc_skb_put_purge);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_CONSUMED);
}
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index cde3224a5cd2..ecaeb4ecfb58 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -17,6 +17,9 @@ static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = 255;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+static const unsigned long max_500 = 500;
+#endif
/*
* RxRPC operating parameters.
@@ -63,6 +66,19 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra2 = (void *)&max_jiffies,
},
+ /* Values used in milliseconds */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ {
+ .procname = "inject_rx_delay",
+ .data = &rxrpc_inject_rx_delay,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)SYSCTL_LONG_ZERO,
+ .extra2 = (void *)&max_500,
+ },
+#endif
+
/* Non-time values */
{
.procname = "reap_client_conns",
@@ -109,7 +125,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
-
{ }
};
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index d2cf2aac3adb..d43be8512386 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -110,12 +110,8 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
_enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
- for (;;) {
- spin_lock(&call->tx_lock);
- txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link);
- if (!txb)
- break;
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
hard_ack = smp_load_acquire(&call->acks_hard_ack);
if (before(hard_ack, txb->seq))
break;
@@ -128,15 +124,11 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
- spin_unlock(&call->tx_lock);
-
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
if (after(call->acks_hard_ack, call->tx_bottom + 128))
wake = true;
}
- spin_unlock(&call->tx_lock);
-
if (wake)
wake_up(&call->waitq);
}