diff options
author | Eric Dumazet <edumazet@google.com> | 2024-08-29 14:46:39 +0000 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-08-30 11:14:06 -0700 |
commit | 8c2bd38b95f75f3d2a08c93e35303e26d480d24e (patch) | |
tree | 76e4e48e5fe7d64b56eb5cc25f4dac1c03a653e3 /net/ipv4 | |
parent | b26b64493343659cce8bbffa358bf39e4f68bdec (diff) |
icmp: change the order of rate limits
ICMP messages are ratelimited :
After the blamed commits, the two rate limiters are applied in this order:
1) host wide ratelimit (icmp_global_allow())
2) Per destination ratelimit (inetpeer based)
In order to avoid side-channels attacks, we need to apply
the per destination check first.
This patch makes the following change :
1) icmp_global_allow() checks if the host wide limit is reached.
But credits are not yet consumed. This is deferred to 3)
2) The per destination limit is checked/updated.
This might add a new node in inetpeer tree.
3) icmp_global_consume() consumes tokens if prior operations succeeded.
This means that host wide ratelimit is still effective
in keeping inetpeer tree small even under DDOS.
As a bonus, I removed icmp_global.lock as the fast path
can use a lock-free operation.
Fixes: c0303efeab73 ("net: reduce cycles spend on ICMP replies that gets rate limited")
Fixes: 4cdf507d5452 ("icmp: add a global rate limitation")
Reported-by: Keyu Man <keyu.man@email.ucr.edu>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: David Ahern <dsahern@kernel.org>
Cc: Jesper Dangaard Brouer <hawk@kernel.org>
Cc: stable@vger.kernel.org
Link: https://patch.msgid.link/20240829144641.3880376-2-edumazet@google.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'net/ipv4')
-rw-r--r-- | net/ipv4/icmp.c | 103 |
1 files changed, 56 insertions, 47 deletions
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index b8f56d03fcbb..0078e8fb2e86 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -224,57 +224,59 @@ int sysctl_icmp_msgs_per_sec __read_mostly = 1000; int sysctl_icmp_msgs_burst __read_mostly = 50; static struct { - spinlock_t lock; - u32 credit; + atomic_t credit; u32 stamp; -} icmp_global = { - .lock = __SPIN_LOCK_UNLOCKED(icmp_global.lock), -}; +} icmp_global; /** * icmp_global_allow - Are we allowed to send one more ICMP message ? * * Uses a token bucket to limit our ICMP messages to ~sysctl_icmp_msgs_per_sec. * Returns false if we reached the limit and can not send another packet. - * Note: called with BH disabled + * Works in tandem with icmp_global_consume(). */ bool icmp_global_allow(void) { - u32 credit, delta, incr = 0, now = (u32)jiffies; - bool rc = false; + u32 delta, now, oldstamp; + int incr, new, old; - /* Check if token bucket is empty and cannot be refilled - * without taking the spinlock. The READ_ONCE() are paired - * with the following WRITE_ONCE() in this same function. + /* Note: many cpus could find this condition true. + * Then later icmp_global_consume() could consume more credits, + * this is an acceptable race. */ - if (!READ_ONCE(icmp_global.credit)) { - delta = min_t(u32, now - READ_ONCE(icmp_global.stamp), HZ); - if (delta < HZ / 50) - return false; - } + if (atomic_read(&icmp_global.credit) > 0) + return true; - spin_lock(&icmp_global.lock); - delta = min_t(u32, now - icmp_global.stamp, HZ); - if (delta >= HZ / 50) { - incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; - if (incr) - WRITE_ONCE(icmp_global.stamp, now); - } - credit = min_t(u32, icmp_global.credit + incr, - READ_ONCE(sysctl_icmp_msgs_burst)); - if (credit) { - /* We want to use a credit of one in average, but need to randomize - * it for security reasons. - */ - credit = max_t(int, credit - get_random_u32_below(3), 0); - rc = true; + now = jiffies; + oldstamp = READ_ONCE(icmp_global.stamp); + delta = min_t(u32, now - oldstamp, HZ); + if (delta < HZ / 50) + return false; + + incr = READ_ONCE(sysctl_icmp_msgs_per_sec) * delta / HZ; + if (!incr) + return false; + + if (cmpxchg(&icmp_global.stamp, oldstamp, now) == oldstamp) { + old = atomic_read(&icmp_global.credit); + do { + new = min(old + incr, READ_ONCE(sysctl_icmp_msgs_burst)); + } while (!atomic_try_cmpxchg(&icmp_global.credit, &old, new)); } - WRITE_ONCE(icmp_global.credit, credit); - spin_unlock(&icmp_global.lock); - return rc; + return true; } EXPORT_SYMBOL(icmp_global_allow); +void icmp_global_consume(void) +{ + int credits = get_random_u32_below(3); + + /* Note: this might make icmp_global.credit negative. */ + if (credits) + atomic_sub(credits, &icmp_global.credit); +} +EXPORT_SYMBOL(icmp_global_consume); + static bool icmpv4_mask_allow(struct net *net, int type, int code) { if (type > NR_ICMP_TYPES) @@ -291,14 +293,16 @@ static bool icmpv4_mask_allow(struct net *net, int type, int code) return false; } -static bool icmpv4_global_allow(struct net *net, int type, int code) +static bool icmpv4_global_allow(struct net *net, int type, int code, + bool *apply_ratelimit) { if (icmpv4_mask_allow(net, type, code)) return true; - if (icmp_global_allow()) + if (icmp_global_allow()) { + *apply_ratelimit = true; return true; - + } __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL); return false; } @@ -308,15 +312,16 @@ static bool icmpv4_global_allow(struct net *net, int type, int code) */ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, - struct flowi4 *fl4, int type, int code) + struct flowi4 *fl4, int type, int code, + bool apply_ratelimit) { struct dst_entry *dst = &rt->dst; struct inet_peer *peer; bool rc = true; int vif; - if (icmpv4_mask_allow(net, type, code)) - goto out; + if (!apply_ratelimit) + return true; /* No rate limit on loopback */ if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) @@ -331,6 +336,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt, out: if (!rc) __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST); + else + icmp_global_consume(); return rc; } @@ -402,6 +409,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct ipcm_cookie ipc; struct rtable *rt = skb_rtable(skb); struct net *net = dev_net(rt->dst.dev); + bool apply_ratelimit = false; struct flowi4 fl4; struct sock *sk; struct inet_sock *inet; @@ -413,11 +421,11 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) if (ip_options_echo(net, &icmp_param->replyopts.opt.opt, skb)) return; - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); - /* global icmp_msgs_per_sec */ - if (!icmpv4_global_allow(net, type, code)) + /* is global icmp_msgs_per_sec exhausted ? */ + if (!icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -450,7 +458,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) rt = ip_route_output_key(net, &fl4); if (IS_ERR(rt)) goto out_unlock; - if (icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) icmp_push_reply(sk, icmp_param, &fl4, &ipc, &rt); ip_rt_put(rt); out_unlock: @@ -596,6 +604,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, int room; struct icmp_bxm icmp_param; struct rtable *rt = skb_rtable(skb_in); + bool apply_ratelimit = false; struct ipcm_cookie ipc; struct flowi4 fl4; __be32 saddr; @@ -677,7 +686,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, } } - /* Needed by both icmp_global_allow and icmp_xmit_lock */ + /* Needed by both icmpv4_global_allow and icmp_xmit_lock */ local_bh_disable(); /* Check global sysctl_icmp_msgs_per_sec ratelimit, unless @@ -685,7 +694,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, * loopback, then peer ratelimit still work (in icmpv4_xrlim_allow) */ if (!(skb_in->dev && (skb_in->dev->flags&IFF_LOOPBACK)) && - !icmpv4_global_allow(net, type, code)) + !icmpv4_global_allow(net, type, code, &apply_ratelimit)) goto out_bh_enable; sk = icmp_xmit_lock(net); @@ -744,7 +753,7 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, goto out_unlock; /* peer icmp_ratelimit */ - if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code)) + if (!icmpv4_xrlim_allow(net, rt, &fl4, type, code, apply_ratelimit)) goto ende; /* RFC says return as much as we can without exceeding 576 bytes. */ |