summaryrefslogtreecommitdiff
path: root/net/ipv4
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4')
-rw-r--r--net/ipv4/fib_semantics.c1
-rw-r--r--net/ipv4/fib_trie.c4
-rw-r--r--net/ipv4/route.c6
-rw-r--r--net/ipv4/tcp.c10
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_input.c13
-rw-r--r--net/ipv4/tcp_output.c8
7 files changed, 32 insertions, 14 deletions
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index eafa4a033515..1ea82bc33ef1 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1887,6 +1887,7 @@ int fib_sync_down_addr(struct net_device *dev, __be32 local)
continue;
if (fi->fib_prefsrc == local) {
fi->fib_flags |= RTNH_F_DEAD;
+ fi->pfsrc_removed = true;
ret++;
}
}
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index d13fb9e76b97..9bdfdab906fe 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -2027,6 +2027,7 @@ void fib_table_flush_external(struct fib_table *tb)
int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
{
struct trie *t = (struct trie *)tb->tb_data;
+ struct nl_info info = { .nl_net = net };
struct key_vector *pn = t->kv;
unsigned long cindex = 1;
struct hlist_node *tmp;
@@ -2089,6 +2090,9 @@ int fib_table_flush(struct net *net, struct fib_table *tb, bool flush_all)
fib_notify_alias_delete(net, n->key, &n->leaf, fa,
NULL);
+ if (fi->pfsrc_removed)
+ rtmsg_fib(RTM_DELROUTE, htonl(n->key), fa,
+ KEYLENGTH - fa->fa_slen, tb->tb_id, &info, 0);
hlist_del_rcu(&fa->fa_list);
fib_release_info(fa->fa_info);
alias_free_mem_rcu(fa);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 66f419e7f9a7..b214b5a2e045 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1213,6 +1213,7 @@ EXPORT_INDIRECT_CALLABLE(ipv4_dst_check);
static void ipv4_send_dest_unreach(struct sk_buff *skb)
{
+ struct net_device *dev;
struct ip_options opt;
int res;
@@ -1230,7 +1231,8 @@ static void ipv4_send_dest_unreach(struct sk_buff *skb)
opt.optlen = ip_hdr(skb)->ihl * 4 - sizeof(struct iphdr);
rcu_read_lock();
- res = __ip_options_compile(dev_net(skb->dev), &opt, skb, NULL);
+ dev = skb->dev ? skb->dev : skb_rtable(skb)->dst.dev;
+ res = __ip_options_compile(dev_net(dev), &opt, skb, NULL);
rcu_read_unlock();
if (res)
@@ -3415,6 +3417,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
fa->fa_type == fri.type) {
fri.offload = READ_ONCE(fa->offload);
fri.trap = READ_ONCE(fa->trap);
+ fri.offload_failed =
+ READ_ONCE(fa->offload_failed);
break;
}
}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0c3040a63ebd..3f66cdeef7de 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1621,16 +1621,13 @@ EXPORT_SYMBOL(tcp_read_sock);
int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
{
- struct tcp_sock *tp = tcp_sk(sk);
- u32 seq = tp->copied_seq;
struct sk_buff *skb;
int copied = 0;
- u32 offset;
if (sk->sk_state == TCP_LISTEN)
return -ENOTCONN;
- while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) {
+ while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) {
u8 tcp_flags;
int used;
@@ -1643,13 +1640,10 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor)
copied = used;
break;
}
- seq += used;
copied += used;
- if (tcp_flags & TCPHDR_FIN) {
- ++seq;
+ if (tcp_flags & TCPHDR_FIN)
break;
- }
}
return copied;
}
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 81f0dff69e0b..327268203001 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -222,6 +222,7 @@ static int tcp_bpf_recvmsg_parser(struct sock *sk,
int *addr_len)
{
struct tcp_sock *tcp = tcp_sk(sk);
+ int peek = flags & MSG_PEEK;
u32 seq = tcp->copied_seq;
struct sk_psock *psock;
int copied = 0;
@@ -311,7 +312,8 @@ msg_bytes_ready:
copied = -EAGAIN;
}
out:
- WRITE_ONCE(tcp->copied_seq, seq);
+ if (!peek)
+ WRITE_ONCE(tcp->copied_seq, seq);
tcp_rcv_space_adjust(sk);
if (copied > 0)
__tcp_cleanup_rbuf(sk, copied);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 06fe1cf645d5..8afb0950a697 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -253,6 +253,19 @@ static void tcp_measure_rcv_mss(struct sock *sk, const struct sk_buff *skb)
if (unlikely(len > icsk->icsk_ack.rcv_mss +
MAX_TCP_OPTION_SPACE))
tcp_gro_dev_warn(sk, skb, len);
+ /* If the skb has a len of exactly 1*MSS and has the PSH bit
+ * set then it is likely the end of an application write. So
+ * more data may not be arriving soon, and yet the data sender
+ * may be waiting for an ACK if cwnd-bound or using TX zero
+ * copy. So we set ICSK_ACK_PUSHED here so that
+ * tcp_cleanup_rbuf() will send an ACK immediately if the app
+ * reads all of the data and is not ping-pong. If len > MSS
+ * then this logic does not matter (and does not hurt) because
+ * tcp_cleanup_rbuf() will always ACK immediately if the app
+ * reads data and there is more than an MSS of unACKed data.
+ */
+ if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_PSH)
+ icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
} else {
/* Otherwise, we make more careful check taking into account,
* that SACKs block is variable.
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ccfc8bbf7455..9c8c42c280b7 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -177,8 +177,7 @@ static void tcp_event_data_sent(struct tcp_sock *tp,
}
/* Account for an ACK we sent. */
-static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
- u32 rcv_nxt)
+static inline void tcp_event_ack_sent(struct sock *sk, u32 rcv_nxt)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -192,7 +191,7 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
if (unlikely(rcv_nxt != tp->rcv_nxt))
return; /* Special ACK sent by DCTCP to reflect ECN */
- tcp_dec_quickack_mode(sk, pkts);
+ tcp_dec_quickack_mode(sk);
inet_csk_clear_xmit_timer(sk, ICSK_TIME_DACK);
}
@@ -1387,7 +1386,7 @@ static int __tcp_transmit_skb(struct sock *sk, struct sk_buff *skb,
sk, skb);
if (likely(tcb->tcp_flags & TCPHDR_ACK))
- tcp_event_ack_sent(sk, tcp_skb_pcount(skb), rcv_nxt);
+ tcp_event_ack_sent(sk, rcv_nxt);
if (skb->len != tcp_header_size) {
tcp_event_data_sent(tp, sk);
@@ -2457,6 +2456,7 @@ static int tcp_mtu_probe(struct sock *sk)
/* build the payload, and be prepared to abort if this fails. */
if (tcp_clone_payload(sk, nskb, probe_size)) {
+ tcp_skb_tsorted_anchor_cleanup(nskb);
consume_skb(nskb);
return -1;
}