From 21df56c6e2372e09c916111efb6c14c372a5ab2e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 18 Nov 2007 18:48:08 -0800 Subject: [TCP]: Fix TCP header misalignment Indeed my previous change to alloc_pskb has made it possible for the TCP header to be misaligned iff the MTU is not a multiple of 4 (and less than a page). So I suspect the optimised IPsec MTU calculation is giving you just such an MTU :) This patch fixes it by changing alloc_pskb to make sure that the size is at least 32-bit aligned. This does not cause the problem fixed by the previous patch because max_header is always 32-bit aligned which means that in the SG/NOTSO case this will be a no-op. I thought about putting this in the callers but all the current callers are from TCP. If and when we get a non-TCP caller we can always create a TCP wrapper for this function and move the alignment over there. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sock.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/sock.h b/include/net/sock.h index 567e468d7492..67e35c7e230c 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1236,6 +1236,9 @@ static inline struct sk_buff *sk_stream_alloc_pskb(struct sock *sk, { struct sk_buff *skb; + /* The TCP header must be at least 32-bit aligned. */ + size = ALIGN(size, 4); + skb = alloc_skb_fclone(size + sk->sk_prot->max_header, gfp); if (skb) { skb->truesize += mem; -- cgit v1.2.3-70-g09d2 From 611cd55b155a89d9a0ce5f92a9cbabc5e284d0d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 19 Nov 2007 21:49:25 -0800 Subject: [IPVS]: Fix sysctl warnings about missing strategy Running the latest git code I get the following messages during boot: sysctl table check failed: /net/ipv4/vs/drop_entry .3.5.21.4 Missing strategy [...] sysctl table check failed: /net/ipv4/vs/drop_packet .3.5.21.5 Missing strategy [...] sysctl table check failed: /net/ipv4/vs/secure_tcp .3.5.21.6 Missing strategy [...] sysctl table check failed: /net/ipv4/vs/sync_threshold .3.5.21.24 Missing strategy I removed the binary sysctl handler for those messages and also removed the definitions in ip_vs.h. The alternative would be to implement a proper strategy handler, but syscall sysctl is deprecated. There are other sysctl definitions that are commented out or work with the default sysctl_data strategy. I did not touch these. Signed-off-by: Christian Borntraeger Acked-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 4 ---- kernel/sysctl_check.c | 4 ---- net/ipv4/ipvs/ip_vs_ctl.c | 4 ---- 3 files changed, 12 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 67ea2c0c0ab7..b362c8c14979 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -336,9 +336,6 @@ enum { NET_IPV4_VS_DEBUG_LEVEL=1, NET_IPV4_VS_AMEMTHRESH=2, NET_IPV4_VS_AMDROPRATE=3, - NET_IPV4_VS_DROP_ENTRY=4, - NET_IPV4_VS_DROP_PACKET=5, - NET_IPV4_VS_SECURE_TCP=6, NET_IPV4_VS_TO_ES=7, NET_IPV4_VS_TO_SS=8, NET_IPV4_VS_TO_SR=9, @@ -355,7 +352,6 @@ enum { NET_IPV4_VS_LBLCR_EXPIRE=20, NET_IPV4_VS_CACHE_BYPASS=22, NET_IPV4_VS_EXPIRE_NODEST_CONN=23, - NET_IPV4_VS_SYNC_THRESHOLD=24, NET_IPV4_VS_NAT_ICMP_SEND=25, NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE=26, NET_IPV4_VS_LAST diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 4abc6d2306f4..9e1749760371 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -242,9 +242,6 @@ static struct trans_ctl_table trans_net_ipv4_vs_table[] = { { NET_IPV4_VS_AMEMTHRESH, "amemthresh" }, { NET_IPV4_VS_DEBUG_LEVEL, "debug_level" }, { NET_IPV4_VS_AMDROPRATE, "am_droprate" }, - { NET_IPV4_VS_DROP_ENTRY, "drop_entry" }, - { NET_IPV4_VS_DROP_PACKET, "drop_packet" }, - { NET_IPV4_VS_SECURE_TCP, "secure_tcp" }, { NET_IPV4_VS_TO_ES, "timeout_established" }, { NET_IPV4_VS_TO_SS, "timeout_synsent" }, { NET_IPV4_VS_TO_SR, "timeout_synrecv" }, @@ -260,7 +257,6 @@ static struct trans_ctl_table trans_net_ipv4_vs_table[] = { { NET_IPV4_VS_CACHE_BYPASS, "cache_bypass" }, { NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn" }, { NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template" }, - { NET_IPV4_VS_SYNC_THRESHOLD, "sync_threshold" }, { NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send" }, { NET_IPV4_VS_LBLC_EXPIRE, "lblc_expiration" }, { NET_IPV4_VS_LBLCR_EXPIRE, "lblcr_expiration" }, diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index b64cf45a9ead..06b8ae0e9c50 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -1450,7 +1450,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_DROP_ENTRY, .procname = "drop_entry", .data = &sysctl_ip_vs_drop_entry, .maxlen = sizeof(int), @@ -1458,7 +1457,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_defense_mode, }, { - .ctl_name = NET_IPV4_VS_DROP_PACKET, .procname = "drop_packet", .data = &sysctl_ip_vs_drop_packet, .maxlen = sizeof(int), @@ -1466,7 +1464,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_defense_mode, }, { - .ctl_name = NET_IPV4_VS_SECURE_TCP, .procname = "secure_tcp", .data = &sysctl_ip_vs_secure_tcp, .maxlen = sizeof(int), @@ -1596,7 +1593,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_SYNC_THRESHOLD, .procname = "sync_threshold", .data = &sysctl_ip_vs_sync_threshold, .maxlen = sizeof(sysctl_ip_vs_sync_threshold), -- cgit v1.2.3-70-g09d2 From 9e103fa6bd53147e228e941256803a6b8927cdb9 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 19 Nov 2007 21:50:21 -0800 Subject: [IPVS]: Fix sysctl warnings about missing strategy in schedulers sysctl table check failed: /net/ipv4/vs/lblc_expiration .3.5.21.19 Missing strategy [...] sysctl table check failed: /net/ipv4/vs/lblcr_expiration .3.5.21.20 Missing strategy Switch these entried over to use CTL_UNNUMBERED as clearly the sys_syscal portion wasn't working. This is along the same lines as Christian Borntraeger's patch that fixes up entries with no stratergy in net/ipv4/ipvs/ip_vs_ctl.c Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 2 -- kernel/sysctl_check.c | 2 -- net/ipv4/ipvs/ip_vs_lblc.c | 1 - net/ipv4/ipvs/ip_vs_lblcr.c | 1 - 4 files changed, 6 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b362c8c14979..f1c41eede993 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -348,8 +348,6 @@ enum { NET_IPV4_VS_TO_SA=16, NET_IPV4_VS_TO_UDP=17, NET_IPV4_VS_TO_ICMP=18, - NET_IPV4_VS_LBLC_EXPIRE=19, - NET_IPV4_VS_LBLCR_EXPIRE=20, NET_IPV4_VS_CACHE_BYPASS=22, NET_IPV4_VS_EXPIRE_NODEST_CONN=23, NET_IPV4_VS_NAT_ICMP_SEND=25, diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index 9e1749760371..cffb4adf138a 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -258,8 +258,6 @@ static struct trans_ctl_table trans_net_ipv4_vs_table[] = { { NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn" }, { NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template" }, { NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send" }, - { NET_IPV4_VS_LBLC_EXPIRE, "lblc_expiration" }, - { NET_IPV4_VS_LBLCR_EXPIRE, "lblcr_expiration" }, {} }; diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 052f4ed59174..7159f9c18f72 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -114,7 +114,6 @@ struct ip_vs_lblc_table { static ctl_table vs_vars_table[] = { { - .ctl_name = NET_IPV4_VS_LBLC_EXPIRE, .procname = "lblc_expiration", .data = &sysctl_ip_vs_lblc_expiration, .maxlen = sizeof(int), diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 427b593c1069..96d9b818d99d 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -302,7 +302,6 @@ struct ip_vs_lblcr_table { static ctl_table vs_vars_table[] = { { - .ctl_name = NET_IPV4_VS_LBLCR_EXPIRE, .procname = "lblcr_expiration", .data = &sysctl_ip_vs_lblcr_expiration, .maxlen = sizeof(int), -- cgit v1.2.3-70-g09d2 From 9055fa1f3ded5ad858a55ae18439ed55227ee7eb Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 19 Nov 2007 21:51:13 -0800 Subject: [IPVS]: Move remaining sysctl handlers over to CTL_UNNUMBERED Switch the remaining IPVS sysctl entries over to to use CTL_UNNUMBERED, I stronly doubt that anyone is using the sys_sysctl interface to these variables. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/net/ip_vs.h | 28 ---------------------------- kernel/sysctl_check.c | 25 ------------------------- net/ipv4/ipvs/ip_vs_ctl.c | 20 -------------------- net/ipv4/ipvs/ip_vs_lblc.c | 1 - net/ipv4/ipvs/ip_vs_lblcr.c | 1 - 5 files changed, 75 deletions(-) (limited to 'include') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f1c41eede993..8a7d59be8a0d 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -327,34 +327,6 @@ extern int ip_vs_get_debug_level(void); #define FTPPORT __constant_htons(21) #define FTPDATA __constant_htons(20) -/* - * IPVS sysctl variables under the /proc/sys/net/ipv4/vs/ - */ -#define NET_IPV4_VS 21 - -enum { - NET_IPV4_VS_DEBUG_LEVEL=1, - NET_IPV4_VS_AMEMTHRESH=2, - NET_IPV4_VS_AMDROPRATE=3, - NET_IPV4_VS_TO_ES=7, - NET_IPV4_VS_TO_SS=8, - NET_IPV4_VS_TO_SR=9, - NET_IPV4_VS_TO_FW=10, - NET_IPV4_VS_TO_TW=11, - NET_IPV4_VS_TO_CL=12, - NET_IPV4_VS_TO_CW=13, - NET_IPV4_VS_TO_LA=14, - NET_IPV4_VS_TO_LI=15, - NET_IPV4_VS_TO_SA=16, - NET_IPV4_VS_TO_UDP=17, - NET_IPV4_VS_TO_ICMP=18, - NET_IPV4_VS_CACHE_BYPASS=22, - NET_IPV4_VS_EXPIRE_NODEST_CONN=23, - NET_IPV4_VS_NAT_ICMP_SEND=25, - NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE=26, - NET_IPV4_VS_LAST -}; - /* * TCP State Values */ diff --git a/kernel/sysctl_check.c b/kernel/sysctl_check.c index cffb4adf138a..fdfca0dd9905 100644 --- a/kernel/sysctl_check.c +++ b/kernel/sysctl_check.c @@ -237,30 +237,6 @@ static struct trans_ctl_table trans_net_ipv4_conf_table[] = { {} }; - -static struct trans_ctl_table trans_net_ipv4_vs_table[] = { - { NET_IPV4_VS_AMEMTHRESH, "amemthresh" }, - { NET_IPV4_VS_DEBUG_LEVEL, "debug_level" }, - { NET_IPV4_VS_AMDROPRATE, "am_droprate" }, - { NET_IPV4_VS_TO_ES, "timeout_established" }, - { NET_IPV4_VS_TO_SS, "timeout_synsent" }, - { NET_IPV4_VS_TO_SR, "timeout_synrecv" }, - { NET_IPV4_VS_TO_FW, "timeout_finwait" }, - { NET_IPV4_VS_TO_TW, "timeout_timewait" }, - { NET_IPV4_VS_TO_CL, "timeout_close" }, - { NET_IPV4_VS_TO_CW, "timeout_closewait" }, - { NET_IPV4_VS_TO_LA, "timeout_lastack" }, - { NET_IPV4_VS_TO_LI, "timeout_listen" }, - { NET_IPV4_VS_TO_SA, "timeout_synack" }, - { NET_IPV4_VS_TO_UDP, "timeout_udp" }, - { NET_IPV4_VS_TO_ICMP, "timeout_icmp" }, - { NET_IPV4_VS_CACHE_BYPASS, "cache_bypass" }, - { NET_IPV4_VS_EXPIRE_NODEST_CONN, "expire_nodest_conn" }, - { NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, "expire_quiescent_template" }, - { NET_IPV4_VS_NAT_ICMP_SEND, "nat_icmp_send" }, - {} -}; - static struct trans_ctl_table trans_net_neigh_vars_table[] = { { NET_NEIGH_MCAST_SOLICIT, "mcast_solicit" }, { NET_NEIGH_UCAST_SOLICIT, "ucast_solicit" }, @@ -335,7 +311,6 @@ static struct trans_ctl_table trans_net_ipv4_table[] = { { NET_IPV4_ROUTE, "route", trans_net_ipv4_route_table }, /* NET_IPV4_FIB_HASH unused */ { NET_IPV4_NETFILTER, "netfilter", trans_net_ipv4_netfilter_table }, - { NET_IPV4_VS, "vs", trans_net_ipv4_vs_table }, { NET_IPV4_TCP_TIMESTAMPS, "tcp_timestamps" }, { NET_IPV4_TCP_WINDOW_SCALING, "tcp_window_scaling" }, diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 06b8ae0e9c50..693d92490c11 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -1424,7 +1424,6 @@ proc_do_sync_threshold(ctl_table *table, int write, struct file *filp, static struct ctl_table vs_vars[] = { { - .ctl_name = NET_IPV4_VS_AMEMTHRESH, .procname = "amemthresh", .data = &sysctl_ip_vs_amemthresh, .maxlen = sizeof(int), @@ -1433,7 +1432,6 @@ static struct ctl_table vs_vars[] = { }, #ifdef CONFIG_IP_VS_DEBUG { - .ctl_name = NET_IPV4_VS_DEBUG_LEVEL, .procname = "debug_level", .data = &sysctl_ip_vs_debug_level, .maxlen = sizeof(int), @@ -1442,7 +1440,6 @@ static struct ctl_table vs_vars[] = { }, #endif { - .ctl_name = NET_IPV4_VS_AMDROPRATE, .procname = "am_droprate", .data = &sysctl_ip_vs_am_droprate, .maxlen = sizeof(int), @@ -1472,7 +1469,6 @@ static struct ctl_table vs_vars[] = { }, #if 0 { - .ctl_name = NET_IPV4_VS_TO_ES, .procname = "timeout_established", .data = &vs_timeout_table_dos.timeout[IP_VS_S_ESTABLISHED], .maxlen = sizeof(int), @@ -1480,7 +1476,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_SS, .procname = "timeout_synsent", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_SENT], .maxlen = sizeof(int), @@ -1488,7 +1483,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_SR, .procname = "timeout_synrecv", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYN_RECV], .maxlen = sizeof(int), @@ -1496,7 +1490,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_FW, .procname = "timeout_finwait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_FIN_WAIT], .maxlen = sizeof(int), @@ -1504,7 +1497,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_TW, .procname = "timeout_timewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_TIME_WAIT], .maxlen = sizeof(int), @@ -1512,7 +1504,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_CL, .procname = "timeout_close", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE], .maxlen = sizeof(int), @@ -1520,7 +1511,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_CW, .procname = "timeout_closewait", .data = &vs_timeout_table_dos.timeout[IP_VS_S_CLOSE_WAIT], .maxlen = sizeof(int), @@ -1528,7 +1518,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_LA, .procname = "timeout_lastack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LAST_ACK], .maxlen = sizeof(int), @@ -1536,7 +1525,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_LI, .procname = "timeout_listen", .data = &vs_timeout_table_dos.timeout[IP_VS_S_LISTEN], .maxlen = sizeof(int), @@ -1544,7 +1532,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_SA, .procname = "timeout_synack", .data = &vs_timeout_table_dos.timeout[IP_VS_S_SYNACK], .maxlen = sizeof(int), @@ -1552,7 +1539,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_UDP, .procname = "timeout_udp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_UDP], .maxlen = sizeof(int), @@ -1560,7 +1546,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec_jiffies, }, { - .ctl_name = NET_IPV4_VS_TO_ICMP, .procname = "timeout_icmp", .data = &vs_timeout_table_dos.timeout[IP_VS_S_ICMP], .maxlen = sizeof(int), @@ -1569,7 +1554,6 @@ static struct ctl_table vs_vars[] = { }, #endif { - .ctl_name = NET_IPV4_VS_CACHE_BYPASS, .procname = "cache_bypass", .data = &sysctl_ip_vs_cache_bypass, .maxlen = sizeof(int), @@ -1577,7 +1561,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_EXPIRE_NODEST_CONN, .procname = "expire_nodest_conn", .data = &sysctl_ip_vs_expire_nodest_conn, .maxlen = sizeof(int), @@ -1585,7 +1568,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_dointvec, }, { - .ctl_name = NET_IPV4_VS_EXPIRE_QUIESCENT_TEMPLATE, .procname = "expire_quiescent_template", .data = &sysctl_ip_vs_expire_quiescent_template, .maxlen = sizeof(int), @@ -1600,7 +1582,6 @@ static struct ctl_table vs_vars[] = { .proc_handler = &proc_do_sync_threshold, }, { - .ctl_name = NET_IPV4_VS_NAT_ICMP_SEND, .procname = "nat_icmp_send", .data = &sysctl_ip_vs_nat_icmp_send, .maxlen = sizeof(int), @@ -1612,7 +1593,6 @@ static struct ctl_table vs_vars[] = { static ctl_table vs_table[] = { { - .ctl_name = NET_IPV4_VS, .procname = "vs", .mode = 0555, .child = vs_vars diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 7159f9c18f72..b843a11d7cf7 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -125,7 +125,6 @@ static ctl_table vs_vars_table[] = { static ctl_table vs_table[] = { { - .ctl_name = NET_IPV4_VS, .procname = "vs", .mode = 0555, .child = vs_vars_table diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 96d9b818d99d..e5b323a6b2f7 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -313,7 +313,6 @@ static ctl_table vs_vars_table[] = { static ctl_table vs_table[] = { { - .ctl_name = NET_IPV4_VS, .procname = "vs", .mode = 0555, .child = vs_vars_table -- cgit v1.2.3-70-g09d2 From 6e42141009ff18297fe19d19296738b742f861db Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Mon, 19 Nov 2007 23:24:09 -0800 Subject: [TCP] MTUprobe: fix potential sk_send_head corruption MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the abstraction functions got added, conversion here was made incorrectly. As a result, the skb may end up pointing to skb which got included to the probe skb and then was freed. For it to trigger, however, skb_transmit must fail sending as well. Signed-off-by: Ilpo Järvinen Signed-off-by: David S. Miller --- include/net/tcp.h | 3 +++ net/ipv4/tcp_output.c | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index d695cea7730d..cb5b033e0e59 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1288,6 +1288,9 @@ static inline void tcp_insert_write_queue_before(struct sk_buff *new, struct sock *sk) { __skb_insert(new, skb->prev, skb, &sk->sk_write_queue); + + if (sk->sk_send_head == skb) + sk->sk_send_head = new; } static inline void tcp_unlink_write_queue(struct sk_buff *skb, struct sock *sk) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 324b4207254a..e48e28e7d539 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1352,7 +1352,6 @@ static int tcp_mtu_probe(struct sock *sk) skb = tcp_send_head(sk); tcp_insert_write_queue_before(nskb, skb, sk); - tcp_advance_send_head(sk, skb); TCP_SKB_CB(nskb)->seq = TCP_SKB_CB(skb)->seq; TCP_SKB_CB(nskb)->end_seq = TCP_SKB_CB(skb)->seq + probe_size; -- cgit v1.2.3-70-g09d2 From 92468c53cf5af0aea06caec7b7d416c18e973685 Mon Sep 17 00:00:00 2001 From: Guillaume Chazarain Date: Mon, 19 Nov 2007 10:07:00 +0100 Subject: ieee80211: Stop net_ratelimit/IEEE80211_DEBUG_DROP log pollution if (net_ratelimit()) IEEE80211_DEBUG_DROP(...) can pollute the logs with messages like: printk: 1 messages suppressed. printk: 2 messages suppressed. printk: 7 messages suppressed. if debugging information is disabled. These messages are printed by net_ratelimit(). Add a wrapper to net_ratelimit() that takes into account the log level, so that net_ratelimit() is called only when we really want to print something. Signed-off-by: Guillaume Chazarain Signed-off-by: John W. Linville --- include/net/ieee80211.h | 8 ++++++++ net/ieee80211/ieee80211_crypt_ccmp.c | 2 +- net/ieee80211/ieee80211_crypt_tkip.c | 4 ++-- 3 files changed, 11 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index 164d13211165..d8ae48439f12 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -115,8 +115,16 @@ extern u32 ieee80211_debug_level; do { if (ieee80211_debug_level & (level)) \ printk(KERN_DEBUG "ieee80211: %c %s " fmt, \ in_interrupt() ? 'I' : 'U', __FUNCTION__ , ## args); } while (0) +static inline bool ieee80211_ratelimit_debug(u32 level) +{ + return (ieee80211_debug_level & level) && net_ratelimit(); +} #else #define IEEE80211_DEBUG(level, fmt, args...) do {} while (0) +static inline bool ieee80211_ratelimit_debug(u32 level) +{ + return false; +} #endif /* CONFIG_IEEE80211_DEBUG */ /* escape_essid() is intended to be used in debug (and possibly error) diff --git a/net/ieee80211/ieee80211_crypt_ccmp.c b/net/ieee80211/ieee80211_crypt_ccmp.c index c6d760d9fbbe..208bf35b5546 100644 --- a/net/ieee80211/ieee80211_crypt_ccmp.c +++ b/net/ieee80211/ieee80211_crypt_ccmp.c @@ -338,7 +338,7 @@ static int ieee80211_ccmp_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (ccmp_replay_check(pn, key->rx_pn)) { - if (net_ratelimit()) { + if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { IEEE80211_DEBUG_DROP("CCMP: replay detected: STA=%s " "previous PN %02x%02x%02x%02x%02x%02x " "received PN %02x%02x%02x%02x%02x%02x\n", diff --git a/net/ieee80211/ieee80211_crypt_tkip.c b/net/ieee80211/ieee80211_crypt_tkip.c index 58b22619ab15..8e146949fc6f 100644 --- a/net/ieee80211/ieee80211_crypt_tkip.c +++ b/net/ieee80211/ieee80211_crypt_tkip.c @@ -464,7 +464,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) pos += 8; if (tkip_replay_check(iv32, iv16, tkey->rx_iv32, tkey->rx_iv16)) { - if (net_ratelimit()) { + if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { IEEE80211_DEBUG_DROP("TKIP: replay detected: STA=%s" " previous TSC %08x%04x received TSC " "%08x%04x\n", print_mac(mac, hdr->addr2), @@ -504,7 +504,7 @@ static int ieee80211_tkip_decrypt(struct sk_buff *skb, int hdr_len, void *priv) * it needs to be recalculated for the next packet. */ tkey->rx_phase1_done = 0; } - if (net_ratelimit()) { + if (ieee80211_ratelimit_debug(IEEE80211_DL_DROP)) { IEEE80211_DEBUG_DROP("TKIP: ICV error detected: STA=" "%s\n", print_mac(mac, hdr->addr2)); } -- cgit v1.2.3-70-g09d2 From 5fe4a33430d90243ff93a77ea31e20f7557bca8a Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Thu, 22 Nov 2007 19:38:25 +0800 Subject: [SUNRPC]: Make xprtsock.c:xs_setup_{udp,tcp}() static xs_setup_{udp,tcp}() can now become static. Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: Herbert Xu --- include/linux/sunrpc/xprtsock.h | 6 ------ net/sunrpc/xprtsock.c | 4 ++-- 2 files changed, 2 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 2c6c2c2783d8..c2a46c45c8f7 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -9,12 +9,6 @@ #ifdef __KERNEL__ -/* - * Socket transport setup operations - */ -struct rpc_xprt *xs_setup_udp(struct xprt_create *args); -struct rpc_xprt *xs_setup_tcp(struct xprt_create *args); - int init_socket_xprt(void); void cleanup_socket_xprt(void); diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 02298f529dad..2f630a512ab7 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -1828,7 +1828,7 @@ static struct rpc_xprt *xs_setup_xprt(struct xprt_create *args, * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_udp(struct xprt_create *args) +static struct rpc_xprt *xs_setup_udp(struct xprt_create *args) { struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; @@ -1894,7 +1894,7 @@ struct rpc_xprt *xs_setup_udp(struct xprt_create *args) * @args: rpc transport creation arguments * */ -struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) +static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) { struct sockaddr *addr = args->dstaddr; struct rpc_xprt *xprt; -- cgit v1.2.3-70-g09d2 From 218ad12f42e0b6207105cde8fd13017d1ed449e4 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Mon, 26 Nov 2007 20:23:31 +0800 Subject: [IPV4]: Fix memory leak in inet_hashtables.h when NUMA is on The inet_ehash_locks_alloc() looks like this: #ifdef CONFIG_NUMA if (size > PAGE_SIZE) x = vmalloc(...); else #endif x = kmalloc(...); Unlike it, the inet_ehash_locks_alloc() looks like this: #ifdef CONFIG_NUMA if (size > PAGE_SIZE) vfree(x); else #else kfree(x); #endif The error is obvious - if the NUMA is on and the size is less than the PAGE_SIZE we leak the pointer (kfree is inside the #else branch). Compiler doesn't warn us because after the kfree(x) there's a "x = NULL" assignment, so here's another (minor?) bug: we don't set x to NULL under certain circumstances. Boring explanation, I know... Patch explains it better. Signed-off-by: Pavel Emelyanov Signed-off-by: Herbert Xu --- include/net/inet_hashtables.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 469216d93663..37f6cb112127 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -186,9 +186,8 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) if (size > PAGE_SIZE) vfree(hashinfo->ehash_locks); else -#else - kfree(hashinfo->ehash_locks); #endif + kfree(hashinfo->ehash_locks); hashinfo->ehash_locks = NULL; } } -- cgit v1.2.3-70-g09d2 From 2d4baff8da06f424a6fca10e26434c4926a7c3df Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Mon, 26 Nov 2007 23:11:19 +0800 Subject: [SKBUFF]: Free old skb properly in skb_morph The skb_morph function only freed the data part of the dst skb, but leaked the auxiliary data such as the netfilter fields. This patch fixes this by moving the relevant parts from __kfree_skb to skb_release_all and calling it in skb_morph. It also makes kfree_skbmem static since it's no longer called anywhere else and it now no longer does skb_release_data. Thanks to Yasuyuki KOZAKAI for finding this problem and posting a patch for it. Signed-off-by: Herbert Xu --- include/linux/skbuff.h | 1 - net/core/skbuff.c | 31 ++++++++++++++++++------------- 2 files changed, 18 insertions(+), 14 deletions(-) (limited to 'include') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 91140fe8c119..bddd50bd6878 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -356,7 +356,6 @@ static inline struct sk_buff *alloc_skb_fclone(unsigned int size, return __alloc_skb(size, priority, 1, -1); } -extern void kfree_skbmem(struct sk_buff *skb); extern struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src); extern struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t priority); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 32d5826b7177..5b4ce9b4dd20 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -275,12 +275,11 @@ static void skb_release_data(struct sk_buff *skb) /* * Free an skbuff by memory without cleaning the state. */ -void kfree_skbmem(struct sk_buff *skb) +static void kfree_skbmem(struct sk_buff *skb) { struct sk_buff *other; atomic_t *fclone_ref; - skb_release_data(skb); switch (skb->fclone) { case SKB_FCLONE_UNAVAILABLE: kmem_cache_free(skbuff_head_cache, skb); @@ -307,16 +306,8 @@ void kfree_skbmem(struct sk_buff *skb) } } -/** - * __kfree_skb - private function - * @skb: buffer - * - * Free an sk_buff. Release anything attached to the buffer. - * Clean the state. This is an internal helper function. Users should - * always call kfree_skb - */ - -void __kfree_skb(struct sk_buff *skb) +/* Free everything but the sk_buff shell. */ +static void skb_release_all(struct sk_buff *skb) { dst_release(skb->dst); #ifdef CONFIG_XFRM @@ -340,7 +331,21 @@ void __kfree_skb(struct sk_buff *skb) skb->tc_verd = 0; #endif #endif + skb_release_data(skb); +} + +/** + * __kfree_skb - private function + * @skb: buffer + * + * Free an sk_buff. Release anything attached to the buffer. + * Clean the state. This is an internal helper function. Users should + * always call kfree_skb + */ +void __kfree_skb(struct sk_buff *skb) +{ + skb_release_all(skb); kfree_skbmem(skb); } @@ -441,7 +446,7 @@ static struct sk_buff *__skb_clone(struct sk_buff *n, struct sk_buff *skb) */ struct sk_buff *skb_morph(struct sk_buff *dst, struct sk_buff *src) { - skb_release_data(dst); + skb_release_all(dst); return __skb_clone(dst, src); } EXPORT_SYMBOL_GPL(skb_morph); -- cgit v1.2.3-70-g09d2