diff options
Diffstat (limited to 'net')
45 files changed, 388 insertions, 252 deletions
diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 2e692afdc55d..43923811bd6a 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -2188,12 +2188,13 @@ static inline int f_pick(struct pktgen_dev *pkt_dev) /* If there was already an IPSEC SA, we keep it as is, else * we go look for it ... */ +#define DUMMY_MARK 0 static void get_ipsec_sa(struct pktgen_dev *pkt_dev, int flow) { struct xfrm_state *x = pkt_dev->flows[flow].x; if (!x) { /*slow path: we dont already have xfrm_state*/ - x = xfrm_stateonly_find(&init_net, + x = xfrm_stateonly_find(&init_net, DUMMY_MARK, (xfrm_address_t *)&pkt_dev->cur_daddr, (xfrm_address_t *)&pkt_dev->cur_saddr, AF_INET, diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index db9f5b39388f..813e399220a7 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -54,7 +54,7 @@ MODULE_LICENSE("GPL"); /**************** DCB attribute policies *************************************/ /* DCB netlink attributes policy */ -static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { [DCB_ATTR_IFNAME] = {.type = NLA_NUL_STRING, .len = IFNAMSIZ - 1}, [DCB_ATTR_STATE] = {.type = NLA_U8}, [DCB_ATTR_PFC_CFG] = {.type = NLA_NESTED}, @@ -68,7 +68,7 @@ static struct nla_policy dcbnl_rtnl_policy[DCB_ATTR_MAX + 1] = { }; /* DCB priority flow control to User Priority nested attributes */ -static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { [DCB_PFC_UP_ATTR_0] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_1] = {.type = NLA_U8}, [DCB_PFC_UP_ATTR_2] = {.type = NLA_U8}, @@ -81,7 +81,7 @@ static struct nla_policy dcbnl_pfc_up_nest[DCB_PFC_UP_ATTR_MAX + 1] = { }; /* DCB priority grouping nested attributes */ -static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { [DCB_PG_ATTR_TC_0] = {.type = NLA_NESTED}, [DCB_PG_ATTR_TC_1] = {.type = NLA_NESTED}, [DCB_PG_ATTR_TC_2] = {.type = NLA_NESTED}, @@ -103,7 +103,7 @@ static struct nla_policy dcbnl_pg_nest[DCB_PG_ATTR_MAX + 1] = { }; /* DCB traffic class nested attributes. */ -static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { +static const struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { [DCB_TC_ATTR_PARAM_PGID] = {.type = NLA_U8}, [DCB_TC_ATTR_PARAM_UP_MAPPING] = {.type = NLA_U8}, [DCB_TC_ATTR_PARAM_STRICT_PRIO] = {.type = NLA_U8}, @@ -112,7 +112,7 @@ static struct nla_policy dcbnl_tc_param_nest[DCB_TC_ATTR_PARAM_MAX + 1] = { }; /* DCB capabilities nested attributes. */ -static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { [DCB_CAP_ATTR_ALL] = {.type = NLA_FLAG}, [DCB_CAP_ATTR_PG] = {.type = NLA_U8}, [DCB_CAP_ATTR_PFC] = {.type = NLA_U8}, @@ -124,14 +124,14 @@ static struct nla_policy dcbnl_cap_nest[DCB_CAP_ATTR_MAX + 1] = { }; /* DCB capabilities nested attributes. */ -static struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_numtcs_nest[DCB_NUMTCS_ATTR_MAX + 1] = { [DCB_NUMTCS_ATTR_ALL] = {.type = NLA_FLAG}, [DCB_NUMTCS_ATTR_PG] = {.type = NLA_U8}, [DCB_NUMTCS_ATTR_PFC] = {.type = NLA_U8}, }; /* DCB BCN nested attributes. */ -static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { [DCB_BCN_ATTR_RP_0] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_1] = {.type = NLA_U8}, [DCB_BCN_ATTR_RP_2] = {.type = NLA_U8}, @@ -160,7 +160,7 @@ static struct nla_policy dcbnl_bcn_nest[DCB_BCN_ATTR_MAX + 1] = { }; /* DCB APP nested attributes. */ -static struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = { +static const struct nla_policy dcbnl_app_nest[DCB_APP_ATTR_MAX + 1] = { [DCB_APP_ATTR_IDTYPE] = {.type = NLA_U8}, [DCB_APP_ATTR_ID] = {.type = NLA_U16}, [DCB_APP_ATTR_PRIORITY] = {.type = NLA_U8}, diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 7ed3e4ae93ae..987b47dc69ad 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -393,7 +393,7 @@ static void ah4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA AH/%08x/%08x\n", diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 1948895beb6d..14ca1f1c3fb0 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -422,7 +422,7 @@ static void esp4_err(struct sk_buff *skb, u32 info) icmp_hdr(skb)->code != ICMP_FRAG_NEEDED) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET); if (!x) return; NETDEBUG(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%08x\n", diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index a2a5983dbf03..c0c5274d0271 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -793,7 +793,7 @@ static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev } if (mtu >= IPV6_MIN_MTU && mtu < skb->len - tunnel->hlen + gre_hlen) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 83ed71500898..629067571f02 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -36,7 +36,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info) return; spi = htonl(ntohs(ipch->cpi)); - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET); if (!x) return; @@ -63,6 +63,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->props.mode = x->props.mode; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; + memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) goto error; @@ -87,8 +88,9 @@ static int ipcomp_tunnel_attach(struct xfrm_state *x) struct net *net = xs_net(x); int err = 0; struct xfrm_state *t; + u32 mark = x->mark.v & x->mark.m; - t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr.a4, + t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr.a4, x->props.saddr.a4, IPPROTO_IPIP, AF_INET); if (!t) { t = ipcomp_tunnel_create(x); diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 7e3712ce3994..c1bc074f61b7 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -576,6 +576,20 @@ static struct ctl_table ipv4_table[] = { .proc_handler = proc_dointvec }, { + .procname = "tcp_thin_linear_timeouts", + .data = &sysctl_tcp_thin_linear_timeouts, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "tcp_thin_dupack", + .data = &sysctl_tcp_thin_dupack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { .procname = "udp_mem", .data = &sysctl_udp_mem, .maxlen = sizeof(sysctl_udp_mem), diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e471d037fcc9..5901010fad55 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2229,6 +2229,20 @@ static int do_tcp_setsockopt(struct sock *sk, int level, } break; + case TCP_THIN_LINEAR_TIMEOUTS: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->thin_lto = val; + break; + + case TCP_THIN_DUPACK: + if (val < 0 || val > 1) + err = -EINVAL; + else + tp->thin_dupack = val; + break; + case TCP_CORK: /* When set indicates to always queue non-full frames. * Later the user clears this option and we transmit diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 3fddc69ccccc..788851ca8c5d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -89,6 +89,8 @@ int sysctl_tcp_frto __read_mostly = 2; int sysctl_tcp_frto_response __read_mostly; int sysctl_tcp_nometrics_save __read_mostly; +int sysctl_tcp_thin_dupack __read_mostly; + int sysctl_tcp_moderate_rcvbuf __read_mostly = 1; int sysctl_tcp_abc __read_mostly; @@ -2447,6 +2449,16 @@ static int tcp_time_to_recover(struct sock *sk) return 1; } + /* If a thin stream is detected, retransmit after first + * received dupack. Employ only if SACK is supported in order + * to avoid possible corner-case series of spurious retransmissions + * Use only if there are no unsent data. + */ + if ((tp->thin_dupack || sysctl_tcp_thin_dupack) && + tcp_stream_is_thin(tp) && tcp_dupack_heuristics(tp) > 1 && + tcp_is_sack(tp) && !tcp_send_head(sk)) + return 1; + return 0; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index de7d1bf9114f..a17629b8912e 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -29,6 +29,7 @@ int sysctl_tcp_keepalive_intvl __read_mostly = TCP_KEEPALIVE_INTVL; int sysctl_tcp_retries1 __read_mostly = TCP_RETR1; int sysctl_tcp_retries2 __read_mostly = TCP_RETR2; int sysctl_tcp_orphan_retries __read_mostly; +int sysctl_tcp_thin_linear_timeouts __read_mostly; static void tcp_write_timer(unsigned long); static void tcp_delack_timer(unsigned long); @@ -415,7 +416,25 @@ void tcp_retransmit_timer(struct sock *sk) icsk->icsk_retransmits++; out_reset_timer: - icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + /* If stream is thin, use linear timeouts. Since 'icsk_backoff' is + * used to reset timer, set to 0. Recalculate 'icsk_rto' as this + * might be increased if the stream oscillates between thin and thick, + * thus the old value might already be too high compared to the value + * set by 'tcp_set_rto' in tcp_input.c which resets the rto without + * backoff. Limit to TCP_THIN_LINEAR_RETRIES before initiating + * exponential backoff behaviour to avoid continue hammering + * linear-timeout retransmissions into a black hole + */ + if (sk->sk_state == TCP_ESTABLISHED && + (tp->thin_lto || sysctl_tcp_thin_linear_timeouts) && + tcp_stream_is_thin(tp) && + icsk->icsk_retransmits <= TCP_THIN_LINEAR_RETRIES) { + icsk->icsk_backoff = 0; + icsk->icsk_rto = min(__tcp_set_rto(tp), TCP_RTO_MAX); + } else { + /* Use normal (exponential) backoff */ + icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); + } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) __sk_dst_reset(sk); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index c2f300c314be..5ac89025f9de 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -614,7 +614,7 @@ static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, ah->spi, IPPROTO_AH, AF_INET6); if (!x) return; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 668a46b655e6..ee9b93bdd6a2 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -365,7 +365,7 @@ static void esp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, type != ICMPV6_PKT_TOOBIG) return; - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, esph->spi, IPPROTO_ESP, AF_INET6); if (!x) return; printk(KERN_DEBUG "pmtu discovery on SA ESP/%08x/%pI6\n", diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4bac362b1335..074f2c084f9f 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -481,7 +481,7 @@ looped_back: IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_INHDRERRORS); icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, - 0, skb->dev); + 0); kfree_skb(skb); return -1; } diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 217dbc2e28d4..eb9abe24bdf0 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -67,11 +67,6 @@ #include <asm/uaccess.h> #include <asm/system.h> -DEFINE_SNMP_STAT(struct icmpv6_mib, icmpv6_statistics) __read_mostly; -EXPORT_SYMBOL(icmpv6_statistics); -DEFINE_SNMP_STAT(struct icmpv6msg_mib, icmpv6msg_statistics) __read_mostly; -EXPORT_SYMBOL(icmpv6msg_statistics); - /* * The ICMP socket(s). This is the most convenient way to flow control * our ICMP output as well as maintain a clean interface throughout @@ -119,7 +114,7 @@ static __inline__ void icmpv6_xmit_unlock(struct sock *sk) */ void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos) { - icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); + icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos); kfree_skb(skb); } @@ -305,8 +300,7 @@ static inline void mip6_addr_swap(struct sk_buff *skb) {} /* * Send an ICMP message in response to a packet in error */ -void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info, - struct net_device *dev) +void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) { struct net *net = dev_net(skb->dev); struct inet6_dev *idev = NULL; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 77e122f53ea6..2f9847924fa5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -93,29 +93,20 @@ static __u32 rt_sernum; static void fib6_gc_timer_cb(unsigned long arg); -static struct fib6_walker_t fib6_walker_list = { - .prev = &fib6_walker_list, - .next = &fib6_walker_list, -}; - -#define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) +static LIST_HEAD(fib6_walkers); +#define FOR_WALKERS(w) list_for_each_entry(w, &fib6_walkers, lh) static inline void fib6_walker_link(struct fib6_walker_t *w) { write_lock_bh(&fib6_walker_lock); - w->next = fib6_walker_list.next; - w->prev = &fib6_walker_list; - w->next->prev = w; - w->prev->next = w; + list_add(&w->lh, &fib6_walkers); write_unlock_bh(&fib6_walker_lock); } static inline void fib6_walker_unlink(struct fib6_walker_t *w) { write_lock_bh(&fib6_walker_lock); - w->next->prev = w->prev; - w->prev->next = w->next; - w->prev = w->next = w; + list_del(&w->lh); write_unlock_bh(&fib6_walker_lock); } static __inline__ u32 fib6_new_sernum(void) diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 237e2dba6e94..e28f9203deca 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -216,8 +216,7 @@ resubmit: IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INUNKNOWNPROTOS); icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_UNK_NEXTHDR, nhoff, - skb->dev); + ICMPV6_UNK_NEXTHDR, nhoff); } } else IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INDELIVERS); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index eb6d09728633..1a5fe9ad1947 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -267,7 +267,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl, if (net_ratelimit()) printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n"); skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); return -EMSGSIZE; @@ -441,8 +441,7 @@ int ip6_forward(struct sk_buff *skb) if (hdr->hop_limit <= 1) { /* Force OUTPUT device used as source address */ skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, - 0, skb->dev); + icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0); IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS); @@ -504,7 +503,7 @@ int ip6_forward(struct sk_buff *skb) goto error; if (addrtype & IPV6_ADDR_LINKLOCAL) { icmpv6_send(skb, ICMPV6_DEST_UNREACH, - ICMPV6_NOT_NEIGHBOUR, 0, skb->dev); + ICMPV6_NOT_NEIGHBOUR, 0); goto error; } } @@ -512,7 +511,7 @@ int ip6_forward(struct sk_buff *skb) if (skb->len > dst_mtu(dst)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst)); IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS); IP6_INC_STATS_BH(net, @@ -627,7 +626,7 @@ static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) */ if (!skb->local_df) { skb->dev = skb_dst(skb)->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); kfree_skb(skb); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9b02492d8706..138980eec214 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -622,7 +622,7 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (rt && rt->rt6i_dev) skb2->dev = rt->rt6i_dev; - icmpv6_send(skb2, rel_type, rel_code, rel_info, skb2->dev); + icmpv6_send(skb2, rel_type, rel_code, rel_info); if (rt) dst_release(&rt->u.dst); @@ -1014,7 +1014,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; if (tel->encap_limit == 0) { icmpv6_send(skb, ICMPV6_PARAMPROB, - ICMPV6_HDR_FIELD, offset + 2, skb->dev); + ICMPV6_HDR_FIELD, offset + 2); return -1; } encap_limit = tel->encap_limit - 1; @@ -1033,7 +1033,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) err = ip6_tnl_xmit2(skb, dev, dsfield, &fl, encap_limit, &mtu); if (err != 0) { if (err == -EMSGSIZE) - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); return -1; } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index bb42f39c1db8..85cccd6ed0b7 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -64,7 +64,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return; spi = htonl(ntohs(ipcomph->cpi)); - x = xfrm_state_lookup(net, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); + x = xfrm_state_lookup(net, skb->mark, (xfrm_address_t *)&iph->daddr, spi, IPPROTO_COMP, AF_INET6); if (!x) return; @@ -92,6 +92,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) t->props.family = AF_INET6; t->props.mode = x->props.mode; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); + memcpy(&t->mark, &x->mark, sizeof(t->mark)); if (xfrm_init_state(t)) goto error; @@ -114,10 +115,11 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) int err = 0; struct xfrm_state *t = NULL; __be32 spi; + u32 mark = x->mark.m & x->mark.v; spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&x->props.saddr); if (spi) - t = xfrm_state_lookup(net, (xfrm_address_t *)&x->id.daddr, + t = xfrm_state_lookup(net, mark, (xfrm_address_t *)&x->id.daddr, spi, IPPROTO_IPV6, AF_INET6); if (!t) { t = ipcomp6_tunnel_create(x); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index f797e8c6f3b3..2794b6002836 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -56,7 +56,7 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen) static inline void mip6_param_prob(struct sk_buff *skb, u8 code, int pos) { - icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); + icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos); } static int mip6_mh_len(int type) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 8311ca31816a..dd8afbaf00a8 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -169,7 +169,7 @@ send_unreach(struct net *net, struct sk_buff *skb_in, unsigned char code, if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) skb_in->dev = net->loopback_dev; - icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0, NULL); + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); } static unsigned int diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index b2847ed6a7d9..a555156e9779 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -228,7 +228,7 @@ static void ip6_frag_expire(unsigned long data) pointer directly, device might already disappeared. */ fq->q.fragments->dev = dev; - icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0, dev); + icmpv6_send(fq->q.fragments, ICMPV6_TIME_EXCEED, ICMPV6_EXC_FRAGTIME, 0); out_rcu_unlock: rcu_read_unlock(); out: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8500156f2637..88c0a5c49ae8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -909,7 +909,7 @@ static void ip6_link_failure(struct sk_buff *skb) { struct rt6_info *rt; - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0, skb->dev); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0); rt = (struct rt6_info *) skb_dst(skb); if (rt) { @@ -1884,7 +1884,7 @@ static int ip6_pkt_drop(struct sk_buff *skb, u8 code, int ipstats_mib_noroutes) ipstats_mib_noroutes); break; } - icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0, skb->dev); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, code, 0); kfree_skb(skb); return 0; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 96eb2d4641c4..b1eea811be48 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -743,7 +743,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (skb->len > mtu) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ip_rt_put(rt); goto tx_error; } diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c index 51e2832d13a6..e17bc1dfc1a4 100644 --- a/net/ipv6/tunnel6.c +++ b/net/ipv6/tunnel6.c @@ -98,7 +98,7 @@ static int tunnel6_rcv(struct sk_buff *skb) if (!handler->handler(skb)) return 0; - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, skb->dev); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); @@ -116,7 +116,7 @@ static int tunnel46_rcv(struct sk_buff *skb) if (!handler->handler(skb)) return 0; - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, skb->dev); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: kfree_skb(skb); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index a7af9d68cd6c..52b8347ae3b2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -680,12 +680,11 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { + struct net *net = dev_net(skb->dev); struct sock *sk; struct udphdr *uh; - struct net_device *dev = skb->dev; struct in6_addr *saddr, *daddr; u32 ulen = 0; - struct net *net = dev_net(skb->dev); if (!pskb_may_pull(skb, sizeof(struct udphdr))) goto short_packet; @@ -744,7 +743,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, UDP6_INC_STATS_BH(net, UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE); - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); kfree_skb(skb); return 0; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 9084582d236b..2bc98ede1235 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -101,7 +101,7 @@ int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, break; } - x = xfrm_state_lookup_byaddr(net, dst, src, proto, AF_INET6); + x = xfrm_state_lookup_byaddr(net, skb->mark, dst, src, proto, AF_INET6); if (!x) continue; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index c4f4eef032a3..0c92112dcba3 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -38,7 +38,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb) if (!skb->local_df && skb->len > mtu) { skb->dev = dst->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); ret = -EMSGSIZE; } diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 476b307bd801..69b5b75f5431 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -124,7 +124,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) return ret; } -static struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = { +static const struct nla_policy irda_nl_policy[IRDA_NL_ATTR_MAX + 1] = { [IRDA_NL_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 }, [IRDA_NL_ATTR_MODE] = { .type = NLA_U32 }, diff --git a/net/key/af_key.c b/net/key/af_key.c index 79d2c0f3c334..368707882647 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -41,10 +41,10 @@ struct netns_pfkey { struct hlist_head table; atomic_t socks_nr; }; -static DECLARE_WAIT_QUEUE_HEAD(pfkey_table_wait); -static DEFINE_RWLOCK(pfkey_table_lock); -static atomic_t pfkey_table_users = ATOMIC_INIT(0); +static DEFINE_MUTEX(pfkey_mutex); +#define DUMMY_MARK 0 +static struct xfrm_mark dummy_mark = {0, 0}; struct pfkey_sock { /* struct sock must be the first member of struct pfkey_sock */ struct sock sk; @@ -108,50 +108,6 @@ static void pfkey_sock_destruct(struct sock *sk) atomic_dec(&net_pfkey->socks_nr); } -static void pfkey_table_grab(void) -{ - write_lock_bh(&pfkey_table_lock); - - if (atomic_read(&pfkey_table_users)) { - DECLARE_WAITQUEUE(wait, current); - - add_wait_queue_exclusive(&pfkey_table_wait, &wait); - for(;;) { - set_current_state(TASK_UNINTERRUPTIBLE); - if (atomic_read(&pfkey_table_users) == 0) - break; - write_unlock_bh(&pfkey_table_lock); - schedule(); - write_lock_bh(&pfkey_table_lock); - } - - __set_current_state(TASK_RUNNING); - remove_wait_queue(&pfkey_table_wait, &wait); - } -} - -static __inline__ void pfkey_table_ungrab(void) -{ - write_unlock_bh(&pfkey_table_lock); - wake_up(&pfkey_table_wait); -} - -static __inline__ void pfkey_lock_table(void) -{ - /* read_lock() synchronizes us to pfkey_table_grab */ - - read_lock(&pfkey_table_lock); - atomic_inc(&pfkey_table_users); - read_unlock(&pfkey_table_lock); -} - -static __inline__ void pfkey_unlock_table(void) -{ - if (atomic_dec_and_test(&pfkey_table_users)) - wake_up(&pfkey_table_wait); -} - - static const struct proto_ops pfkey_ops; static void pfkey_insert(struct sock *sk) @@ -159,16 +115,16 @@ static void pfkey_insert(struct sock *sk) struct net *net = sock_net(sk); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); - pfkey_table_grab(); - sk_add_node(sk, &net_pfkey->table); - pfkey_table_ungrab(); + mutex_lock(&pfkey_mutex); + sk_add_node_rcu(sk, &net_pfkey->table); + mutex_unlock(&pfkey_mutex); } static void pfkey_remove(struct sock *sk) { - pfkey_table_grab(); - sk_del_node_init(sk); - pfkey_table_ungrab(); + mutex_lock(&pfkey_mutex); + sk_del_node_init_rcu(sk); + mutex_unlock(&pfkey_mutex); } static struct proto key_proto = { @@ -223,6 +179,8 @@ static int pfkey_release(struct socket *sock) sock_orphan(sk); sock->sk = NULL; skb_queue_purge(&sk->sk_write_queue); + + synchronize_rcu(); sock_put(sk); return 0; @@ -277,8 +235,8 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, if (!skb) return -ENOMEM; - pfkey_lock_table(); - sk_for_each(sk, node, &net_pfkey->table) { + rcu_read_lock(); + sk_for_each_rcu(sk, node, &net_pfkey->table) { struct pfkey_sock *pfk = pfkey_sk(sk); int err2; @@ -309,7 +267,7 @@ static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, if ((broadcast_flags & BROADCAST_REGISTERED) && err) err = err2; } - pfkey_unlock_table(); + rcu_read_unlock(); if (one_sk != NULL) err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); @@ -691,7 +649,7 @@ static struct xfrm_state *pfkey_xfrm_state_lookup(struct net *net, struct sadb_ if (!xaddr) return NULL; - return xfrm_state_lookup(net, xaddr, sa->sadb_sa_spi, proto, family); + return xfrm_state_lookup(net, DUMMY_MARK, xaddr, sa->sadb_sa_spi, proto, family); } #define PFKEY_ALIGN8(a) (1 + (((a) - 1) | (8 - 1))) @@ -1360,7 +1318,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (hdr->sadb_msg_seq) { - x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x && xfrm_addr_cmp(&x->id.daddr, xdaddr, family)) { xfrm_state_put(x); x = NULL; @@ -1368,7 +1326,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h } if (!x) - x = xfrm_find_acq(net, mode, reqid, proto, xdaddr, xsaddr, 1, family); + x = xfrm_find_acq(net, &dummy_mark, mode, reqid, proto, xdaddr, xsaddr, 1, family); if (x == NULL) return -ENOENT; @@ -1417,7 +1375,7 @@ static int pfkey_acquire(struct sock *sk, struct sk_buff *skb, struct sadb_msg * if (hdr->sadb_msg_seq == 0 || hdr->sadb_msg_errno == 0) return 0; - x = xfrm_find_acq_byseq(net, hdr->sadb_msg_seq); + x = xfrm_find_acq_byseq(net, DUMMY_MARK, hdr->sadb_msg_seq); if (x == NULL) return 0; @@ -1712,6 +1670,23 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, struct sadb_msg return 0; } +static int unicast_flush_resp(struct sock *sk, struct sadb_msg *ihdr) +{ + struct sk_buff *skb; + struct sadb_msg *hdr; + + skb = alloc_skb(sizeof(struct sadb_msg) + 16, GFP_ATOMIC); + if (!skb) + return -ENOBUFS; + + hdr = (struct sadb_msg *) skb_put(skb, sizeof(struct sadb_msg)); + memcpy(hdr, ihdr, sizeof(struct sadb_msg)); + hdr->sadb_msg_errno = (uint8_t) 0; + hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); + + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); +} + static int key_notify_sa_flush(struct km_event *c) { struct sk_buff *skb; @@ -1740,7 +1715,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd unsigned proto; struct km_event c; struct xfrm_audit audit_info; - int err; + int err, err2; proto = pfkey_satype2proto(hdr->sadb_msg_satype); if (proto == 0) @@ -1750,8 +1725,13 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, struct sadb_msg *hd audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; err = xfrm_state_flush(net, proto, &audit_info); - if (err) - return err; + err2 = unicast_flush_resp(sk, hdr); + if (err || err2) { + if (err == -ESRCH) /* empty table - go quietly */ + err = 0; + return err ? err : err2; + } + c.data.proto = proto; c.seq = hdr->sadb_msg_seq; c.pid = hdr->sadb_msg_pid; @@ -2346,7 +2326,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(net, XFRM_POLICY_TYPE_MAIN, + xp = xfrm_policy_bysel_ctx(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir - 1, &sel, pol_ctx, 1, &err); security_xfrm_policy_free(pol_ctx); @@ -2594,8 +2574,8 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h return -EINVAL; delete = (hdr->sadb_msg_type == SADB_X_SPDDELETE2); - xp = xfrm_policy_byid(net, XFRM_POLICY_TYPE_MAIN, dir, - pol->sadb_x_policy_id, delete, &err); + xp = xfrm_policy_byid(net, DUMMY_MARK, XFRM_POLICY_TYPE_MAIN, + dir, pol->sadb_x_policy_id, delete, &err); if (xp == NULL) return -ENOENT; @@ -2706,14 +2686,19 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg struct net *net = sock_net(sk); struct km_event c; struct xfrm_audit audit_info; - int err; + int err, err2; audit_info.loginuid = audit_get_loginuid(current); audit_info.sessionid = audit_get_sessionid(current); audit_info.secid = 0; err = xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); - if (err) + err2 = unicast_flush_resp(sk, hdr); + if (err || err2) { + if (err == -ESRCH) /* empty table - old silent behavior */ + return 0; return err; + } + c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; @@ -3675,8 +3660,8 @@ static void *pfkey_seq_start(struct seq_file *f, loff_t *ppos) struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); - read_lock(&pfkey_table_lock); - return seq_hlist_start_head(&net_pfkey->table, *ppos); + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net_pfkey->table, *ppos); } static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) @@ -3684,12 +3669,12 @@ static void *pfkey_seq_next(struct seq_file *f, void *v, loff_t *ppos) struct net *net = seq_file_net(f); struct netns_pfkey *net_pfkey = net_generic(net, pfkey_net_id); - return seq_hlist_next(v, &net_pfkey->table, ppos); + return seq_hlist_next_rcu(v, &net_pfkey->table, ppos); } static void pfkey_seq_stop(struct seq_file *f, void *v) { - read_unlock(&pfkey_table_lock); + rcu_read_unlock(); } static const struct seq_operations pfkey_seq_ops = { diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 72e96d823ebf..44590887a92c 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -515,8 +515,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, */ #ifdef CONFIG_IP_VS_IPV6 if (svc->af == AF_INET6) - icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, - skb->dev); + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); else #endif icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); @@ -1048,7 +1047,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, - 0, skb->dev); + 0); else #endif icmp_send(skb, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 30b3189bd29c..223b5018c7dc 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -311,7 +311,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { dst_release(&rt->u.dst); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } @@ -454,7 +454,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { dst_release(&rt->u.dst); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); goto tx_error; @@ -672,7 +672,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->u.dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -814,7 +814,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* MTU checking */ mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); dst_release(&rt->u.dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; @@ -965,7 +965,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, mtu = dst_mtu(&rt->u.dst); if (skb->len > mtu) { dst_release(&rt->u.dst); - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev); + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); goto tx_error; } diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 10f7295bcefb..2f0369367ee0 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -1262,24 +1262,22 @@ static int packet_release(struct socket *sock) net = sock_net(sk); po = pkt_sk(sk); - write_lock_bh(&net->packet.sklist_lock); - sk_del_node_init(sk); + spin_lock_bh(&net->packet.sklist_lock); + sk_del_node_init_rcu(sk); sock_prot_inuse_add(net, sk->sk_prot, -1); - write_unlock_bh(&net->packet.sklist_lock); - - /* - * Unhook packet receive handler. - */ + spin_unlock_bh(&net->packet.sklist_lock); + spin_lock(&po->bind_lock); if (po->running) { /* - * Remove the protocol hook + * Remove from protocol table */ - dev_remove_pack(&po->prot_hook); po->running = 0; po->num = 0; + __dev_remove_pack(&po->prot_hook); __sock_put(sk); } + spin_unlock(&po->bind_lock); packet_flush_mclist(sk); @@ -1291,10 +1289,10 @@ static int packet_release(struct socket *sock) if (po->tx_ring.pg_vec) packet_set_ring(sk, &req, 1, 1); + synchronize_net(); /* * Now the socket is dead. No more input will appear. */ - sock_orphan(sk); sock->sk = NULL; @@ -1478,10 +1476,11 @@ static int packet_create(struct net *net, struct socket *sock, int protocol, po->running = 1; } - write_lock_bh(&net->packet.sklist_lock); - sk_add_node(sk, &net->packet.sklist); + spin_lock_bh(&net->packet.sklist_lock); + sk_add_node_rcu(sk, &net->packet.sklist); sock_prot_inuse_add(net, &packet_proto, 1); - write_unlock_bh(&net->packet.sklist_lock); + spin_unlock_bh(&net->packet.sklist_lock); + return 0; out: return err; @@ -2075,8 +2074,8 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void struct net_device *dev = data; struct net *net = dev_net(dev); - read_lock(&net->packet.sklist_lock); - sk_for_each(sk, node, &net->packet.sklist) { + rcu_read_lock(); + sk_for_each_rcu(sk, node, &net->packet.sklist) { struct packet_sock *po = pkt_sk(sk); switch (msg) { @@ -2104,18 +2103,19 @@ static int packet_notifier(struct notifier_block *this, unsigned long msg, void } break; case NETDEV_UP: - spin_lock(&po->bind_lock); - if (dev->ifindex == po->ifindex && po->num && - !po->running) { - dev_add_pack(&po->prot_hook); - sock_hold(sk); - po->running = 1; + if (dev->ifindex == po->ifindex) { + spin_lock(&po->bind_lock); + if (po->num && !po->running) { + dev_add_pack(&po->prot_hook); + sock_hold(sk); + po->running = 1; + } + spin_unlock(&po->bind_lock); } - spin_unlock(&po->bind_lock); break; } } - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); return NOTIFY_DONE; } @@ -2512,24 +2512,24 @@ static struct notifier_block packet_netdev_notifier = { #ifdef CONFIG_PROC_FS static void *packet_seq_start(struct seq_file *seq, loff_t *pos) - __acquires(seq_file_net(seq)->packet.sklist_lock) + __acquires(RCU) { struct net *net = seq_file_net(seq); - read_lock(&net->packet.sklist_lock); - return seq_hlist_start_head(&net->packet.sklist, *pos); + + rcu_read_lock(); + return seq_hlist_start_head_rcu(&net->packet.sklist, *pos); } static void *packet_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct net *net = seq_file_net(seq); - return seq_hlist_next(v, &net->packet.sklist, pos); + return seq_hlist_next_rcu(v, &net->packet.sklist, pos); } static void packet_seq_stop(struct seq_file *seq, void *v) - __releases(seq_file_net(seq)->packet.sklist_lock) + __releases(RCU) { - struct net *net = seq_file_net(seq); - read_unlock(&net->packet.sklist_lock); + rcu_read_unlock(); } static int packet_seq_show(struct seq_file *seq, void *v) @@ -2581,7 +2581,7 @@ static const struct file_operations packet_seq_fops = { static int __net_init packet_net_init(struct net *net) { - rwlock_init(&net->packet.sklist_lock); + spin_lock_init(&net->packet.sklist_lock); INIT_HLIST_HEAD(&net->packet.sklist); if (!proc_net_fops_create(net, "packet", 0, &packet_seq_fops)) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 9bc9b92bc099..3d9122e78f41 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -144,7 +144,7 @@ static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) /* * SMP locking strategy: * hash table is protected with spinlock unix_table_lock - * each socket state is protected by separate rwlock. + * each socket state is protected by separate spin lock. */ static inline unsigned unix_hash_fold(__wsum n) diff --git a/net/wimax/op-msg.c b/net/wimax/op-msg.c index d3bfb6ef13ae..7718657e93dc 100644 --- a/net/wimax/op-msg.c +++ b/net/wimax/op-msg.c @@ -320,8 +320,7 @@ int wimax_msg(struct wimax_dev *wimax_dev, const char *pipe_name, EXPORT_SYMBOL_GPL(wimax_msg); -static const -struct nla_policy wimax_gnl_msg_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_msg_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_MSG_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/op-reset.c b/net/wimax/op-reset.c index 35f370091f4f..4dc82a54ba30 100644 --- a/net/wimax/op-reset.c +++ b/net/wimax/op-reset.c @@ -91,8 +91,7 @@ int wimax_reset(struct wimax_dev *wimax_dev) EXPORT_SYMBOL(wimax_reset); -static const -struct nla_policy wimax_gnl_reset_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_reset_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_RESET_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/op-rfkill.c b/net/wimax/op-rfkill.c index ae752a64d920..e978c7136c97 100644 --- a/net/wimax/op-rfkill.c +++ b/net/wimax/op-rfkill.c @@ -410,8 +410,7 @@ void wimax_rfkill_rm(struct wimax_dev *wimax_dev) * just query). */ -static const -struct nla_policy wimax_gnl_rfkill_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_rfkill_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_RFKILL_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/op-state-get.c b/net/wimax/op-state-get.c index a76b8fcb056d..11ad3356eb56 100644 --- a/net/wimax/op-state-get.c +++ b/net/wimax/op-state-get.c @@ -33,8 +33,7 @@ #include "debug-levels.h" -static const -struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_state_get_policy[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_STGET_IFIDX] = { .type = NLA_U32, }, diff --git a/net/wimax/stack.c b/net/wimax/stack.c index c8866412f830..813e1eaea29b 100644 --- a/net/wimax/stack.c +++ b/net/wimax/stack.c @@ -75,8 +75,7 @@ MODULE_PARM_DESC(debug, * close to where the data is generated. */ /* -static const -struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = { +static const struct nla_policy wimax_gnl_re_status_change[WIMAX_GNL_ATTR_MAX + 1] = { [WIMAX_GNL_STCH_STATE_OLD] = { .type = NLA_U8 }, [WIMAX_GNL_STCH_STATE_NEW] = { .type = NLA_U8 }, }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 5b79ecf17bea..a001ea32cb7d 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -58,7 +58,7 @@ static int get_rdev_dev_by_info_ifindex(struct genl_info *info, } /* policy for the attributes */ -static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { +static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING, .len = 20-1 }, @@ -148,8 +148,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = { }; /* policy for the attributes */ -static struct nla_policy -nl80211_key_policy[NL80211_KEY_MAX + 1] __read_mostly = { +static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_KEY_IDX] = { .type = NLA_U8 }, [NL80211_KEY_CIPHER] = { .type = NLA_U32 }, @@ -2501,8 +2500,7 @@ static int nl80211_set_bss(struct sk_buff *skb, struct genl_info *info) return err; } -static const struct nla_policy - reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { +static const struct nla_policy reg_rule_policy[NL80211_REG_RULE_ATTR_MAX + 1] = { [NL80211_ATTR_REG_RULE_FLAGS] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_START] = { .type = NLA_U32 }, [NL80211_ATTR_FREQ_RANGE_END] = { .type = NLA_U32 }, @@ -2671,8 +2669,7 @@ do {\ } \ } while (0);\ -static struct nla_policy -nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] __read_mostly = { +static const struct nla_policy nl80211_meshconf_params_policy[NL80211_MESHCONF_ATTR_MAX+1] = { [NL80211_MESHCONF_RETRY_TIMEOUT] = { .type = NLA_U16 }, [NL80211_MESHCONF_CONFIRM_TIMEOUT] = { .type = NLA_U16 }, [NL80211_MESHCONF_HOLDING_TIMEOUT] = { .type = NLA_U16 }, @@ -4470,8 +4467,7 @@ static u32 rateset_to_mask(struct ieee80211_supported_band *sband, return mask; } -static struct nla_policy -nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = { +static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = { [NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY, .len = NL80211_MAX_SUPP_RATES }, }; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index e0009c17d809..45f1c98d4fce 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -152,7 +152,7 @@ int xfrm_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type) goto drop; } - x = xfrm_state_lookup(net, daddr, spi, nexthdr, family); + x = xfrm_state_lookup(net, skb->mark, daddr, spi, nexthdr, family); if (x == NULL) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOSTATES); xfrm_audit_state_notfound(skb, family, spi, seq); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 2c5d93181f13..34a5ef8316e7 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -556,6 +556,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct hlist_head *chain; struct hlist_node *entry, *newpos; struct dst_entry *gc_list; + u32 mark = policy->mark.v & policy->mark.m; write_lock_bh(&xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); @@ -564,6 +565,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == policy->type && !selector_cmp(&pol->selector, &policy->selector) && + (mark & pol->mark.m) == pol->mark.v && xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { @@ -635,8 +637,8 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, - struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type, + int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete, int *err) { @@ -650,6 +652,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, ret = NULL; hlist_for_each_entry(pol, entry, chain, bydst) { if (pol->type == type && + (mark & pol->mark.m) == pol->mark.v && !selector_cmp(sel, &pol->selector) && xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); @@ -676,8 +679,8 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u8 type, int dir, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id, - int delete, int *err) +struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type, + int dir, u32 id, int delete, int *err) { struct xfrm_policy *pol, *ret; struct hlist_head *chain; @@ -692,7 +695,8 @@ struct xfrm_policy *xfrm_policy_byid(struct net *net, u8 type, int dir, u32 id, chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, entry, chain, byidx) { - if (pol->type == type && pol->index == id) { + if (pol->type == type && pol->index == id && + (mark & pol->mark.m) == pol->mark.v) { xfrm_pol_hold(pol); if (delete) { *err = security_xfrm_policy_delete( @@ -771,7 +775,8 @@ xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audi int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) { - int dir, err = 0; + int dir, err = 0, cnt = 0; + struct xfrm_policy *dp; write_lock_bh(&xfrm_policy_lock); @@ -789,8 +794,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) &net->xfrm.policy_inexact[dir], bydst) { if (pol->type != type) continue; - __xfrm_policy_unlink(pol, dir); + dp = __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); + if (dp) + cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, audit_info->sessionid, @@ -809,8 +816,10 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) bydst) { if (pol->type != type) continue; - __xfrm_policy_unlink(pol, dir); + dp = __xfrm_policy_unlink(pol, dir); write_unlock_bh(&xfrm_policy_lock); + if (dp) + cnt++; xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, @@ -824,6 +833,8 @@ int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) } } + if (!cnt) + err = -ESRCH; atomic_inc(&flow_cache_genid); out: write_unlock_bh(&xfrm_policy_lock); @@ -909,6 +920,7 @@ static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, int match, ret = -ESRCH; if (pol->family != family || + (fl->mark & pol->mark.m) != pol->mark.v || pol->type != type) return ret; @@ -1033,6 +1045,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) { + if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { + pol = NULL; + goto out; + } err = security_xfrm_policy_lookup(pol->security, fl->secid, policy_to_flow_dir(dir)); @@ -1045,6 +1061,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc } else pol = NULL; } +out: read_unlock_bh(&xfrm_policy_lock); return pol; } @@ -1137,6 +1154,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) } newp->lft = old->lft; newp->curlft = old->curlft; + newp->mark = old->mark; newp->action = old->action; newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; @@ -2045,8 +2063,7 @@ int __xfrm_route_forward(struct sk_buff *skb, unsigned short family) int res; if (xfrm_decode_session(skb, &fl, family) < 0) { - /* XXX: we should have something like FWDHDRERROR here. */ - XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR); + XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR); return 0; } diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c index 003f2c437ac3..58d9ae005597 100644 --- a/net/xfrm/xfrm_proc.c +++ b/net/xfrm/xfrm_proc.c @@ -41,6 +41,7 @@ static const struct snmp_mib xfrm_mib_list[] = { SNMP_MIB_ITEM("XfrmOutPolBlock", LINUX_MIB_XFRMOUTPOLBLOCK), SNMP_MIB_ITEM("XfrmOutPolDead", LINUX_MIB_XFRMOUTPOLDEAD), SNMP_MIB_ITEM("XfrmOutPolError", LINUX_MIB_XFRMOUTPOLERROR), + SNMP_MIB_ITEM("XfrmFwdHdrError", LINUX_MIB_XFRMFWDHDRERROR), SNMP_MIB_SENTINEL }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index c9d6a5f1348d..17d5b96f2fc8 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -603,13 +603,14 @@ xfrm_state_flush_secctx_check(struct net *net, u8 proto, struct xfrm_audit *audi int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info) { - int i, err = 0; + int i, err = 0, cnt = 0; spin_lock_bh(&xfrm_state_lock); err = xfrm_state_flush_secctx_check(net, proto, audit_info); if (err) goto out; + err = -ESRCH; for (i = 0; i <= net->xfrm.state_hmask; i++) { struct hlist_node *entry; struct xfrm_state *x; @@ -626,13 +627,16 @@ restart: audit_info->sessionid, audit_info->secid); xfrm_state_put(x); + if (!err) + cnt++; spin_lock_bh(&xfrm_state_lock); goto restart; } } } - err = 0; + if (cnt) + err = 0; out: spin_unlock_bh(&xfrm_state_lock); @@ -665,7 +669,7 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { unsigned int h = xfrm_spi_hash(net, daddr, spi, proto, family); struct xfrm_state *x; @@ -678,6 +682,8 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d xfrm_addr_cmp(&x->id.daddr, daddr, family)) continue; + if ((mark & x->mark.m) != x->mark.v) + continue; xfrm_state_hold(x); return x; } @@ -685,7 +691,7 @@ static struct xfrm_state *__xfrm_state_lookup(struct net *net, xfrm_address_t *d return NULL; } -static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { unsigned int h = xfrm_src_hash(net, daddr, saddr, family); struct xfrm_state *x; @@ -698,6 +704,8 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(struct net *net, xfrm_addre xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; + if ((mark & x->mark.m) != x->mark.v) + continue; xfrm_state_hold(x); return x; } @@ -709,12 +717,14 @@ static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) { struct net *net = xs_net(x); + u32 mark = x->mark.v & x->mark.m; if (use_spi) - return __xfrm_state_lookup(net, &x->id.daddr, x->id.spi, - x->id.proto, family); + return __xfrm_state_lookup(net, mark, &x->id.daddr, + x->id.spi, x->id.proto, family); else - return __xfrm_state_lookup_byaddr(net, &x->id.daddr, + return __xfrm_state_lookup_byaddr(net, mark, + &x->id.daddr, &x->props.saddr, x->id.proto, family); } @@ -779,6 +789,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; + u32 mark = pol->mark.v & pol->mark.m; to_put = NULL; @@ -787,6 +798,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && @@ -802,6 +814,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h_wildcard, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && @@ -815,7 +828,7 @@ found: x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = __xfrm_state_lookup(net, daddr, tmpl->id.spi, + (x0 = __xfrm_state_lookup(net, mark, daddr, tmpl->id.spi, tmpl->id.proto, family)) != NULL) { to_put = x0; error = -EEXIST; @@ -829,6 +842,7 @@ found: /* Initialize temporary selector matching only * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + memcpy(&x->mark, &pol->mark, sizeof(x->mark)); error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); if (error) { @@ -871,7 +885,7 @@ out: } struct xfrm_state * -xfrm_stateonly_find(struct net *net, +xfrm_stateonly_find(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, u8 mode, u8 proto, u32 reqid) { @@ -884,6 +898,7 @@ xfrm_stateonly_find(struct net *net, hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && + (mark & x->mark.m) == x->mark.v && !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && mode == x->props.mode && @@ -946,11 +961,13 @@ static void __xfrm_state_bump_genids(struct xfrm_state *xnew) struct xfrm_state *x; struct hlist_node *entry; unsigned int h; + u32 mark = xnew->mark.v & xnew->mark.m; h = xfrm_dst_hash(net, &xnew->id.daddr, &xnew->props.saddr, reqid, family); hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == reqid && + (mark & x->mark.m) == x->mark.v && !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family) && !xfrm_addr_cmp(&x->props.saddr, &xnew->props.saddr, family)) x->genid = xfrm_state_genid; @@ -967,11 +984,12 @@ void xfrm_state_insert(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_insert); /* xfrm_state_lock is held */ -static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +static struct xfrm_state *__find_acq_core(struct net *net, struct xfrm_mark *m, unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(net, daddr, saddr, reqid, family); struct hlist_node *entry; struct xfrm_state *x; + u32 mark = m->v & m->m; hlist_for_each_entry(x, entry, net->xfrm.state_bydst+h, bydst) { if (x->props.reqid != reqid || @@ -980,6 +998,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->km.state != XFRM_STATE_ACQ || x->id.spi != 0 || x->id.proto != proto || + (mark & x->mark.m) != x->mark.v || xfrm_addr_cmp(&x->id.daddr, daddr, family) || xfrm_addr_cmp(&x->props.saddr, saddr, family)) continue; @@ -1022,6 +1041,8 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family x->props.family = family; x->props.mode = mode; x->props.reqid = reqid; + x->mark.v = m->v; + x->mark.m = m->m; x->lft.hard_add_expires_seconds = net->xfrm.sysctl_acq_expires; xfrm_state_hold(x); tasklet_hrtimer_start(&x->mtimer, ktime_set(net->xfrm.sysctl_acq_expires, 0), HRTIMER_MODE_REL); @@ -1038,7 +1059,7 @@ static struct xfrm_state *__find_acq_core(struct net *net, unsigned short family return x; } -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq); +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq); int xfrm_state_add(struct xfrm_state *x) { @@ -1046,6 +1067,7 @@ int xfrm_state_add(struct xfrm_state *x) struct xfrm_state *x1, *to_put; int family; int err; + u32 mark = x->mark.v & x->mark.m; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; @@ -1063,7 +1085,7 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && x->km.seq) { - x1 = __xfrm_find_acq_byseq(net, x->km.seq); + x1 = __xfrm_find_acq_byseq(net, mark, x->km.seq); if (x1 && ((x1->id.proto != x->id.proto) || xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family))) { to_put = x1; @@ -1072,8 +1094,8 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = __find_acq_core(net, family, x->props.mode, x->props.reqid, - x->id.proto, + x1 = __find_acq_core(net, &x->mark, family, x->props.mode, + x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); __xfrm_state_bump_genids(x); @@ -1147,6 +1169,8 @@ static struct xfrm_state *xfrm_state_clone(struct xfrm_state *orig, int *errp) goto error; } + memcpy(&x->mark, &orig->mark, sizeof(x->mark)); + err = xfrm_init_state(x); if (err) goto error; @@ -1338,41 +1362,41 @@ int xfrm_state_check_expire(struct xfrm_state *x) EXPORT_SYMBOL(xfrm_state_check_expire); struct xfrm_state * -xfrm_state_lookup(struct net *net, xfrm_address_t *daddr, __be32 spi, u8 proto, - unsigned short family) +xfrm_state_lookup(struct net *net, u32 mark, xfrm_address_t *daddr, __be32 spi, + u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup(net, daddr, spi, proto, family); + x = __xfrm_state_lookup(net, mark, daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup); struct xfrm_state * -xfrm_state_lookup_byaddr(struct net *net, +xfrm_state_lookup_byaddr(struct net *net, u32 mark, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_state_lookup_byaddr(net, daddr, saddr, proto, family); + x = __xfrm_state_lookup_byaddr(net, mark, daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); struct xfrm_state * -xfrm_find_acq(struct net *net, u8 mode, u32 reqid, u8 proto, +xfrm_find_acq(struct net *net, struct xfrm_mark *mark, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __find_acq_core(net, family, mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(net, mark, family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); return x; @@ -1419,7 +1443,7 @@ EXPORT_SYMBOL(xfrm_state_sort); /* Silly enough, but I'm lazy to build resolution list */ -static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) +static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { int i; @@ -1429,6 +1453,7 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) hlist_for_each_entry(x, entry, net->xfrm.state_bydst+i, bydst) { if (x->km.seq == seq && + (mark & x->mark.m) == x->mark.v && x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; @@ -1438,12 +1463,12 @@ static struct xfrm_state *__xfrm_find_acq_byseq(struct net *net, u32 seq) return NULL; } -struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq) +struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 mark, u32 seq) { struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); - x = __xfrm_find_acq_byseq(net, seq); + x = __xfrm_find_acq_byseq(net, mark, seq); spin_unlock_bh(&xfrm_state_lock); return x; } @@ -1470,6 +1495,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) int err = -ENOENT; __be32 minspi = htonl(low); __be32 maxspi = htonl(high); + u32 mark = x->mark.v & x->mark.m; spin_lock_bh(&x->lock); if (x->km.state == XFRM_STATE_DEAD) @@ -1482,7 +1508,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) err = -ENOENT; if (minspi == maxspi) { - x0 = xfrm_state_lookup(net, &x->id.daddr, minspi, x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, minspi, x->id.proto, x->props.family); if (x0) { xfrm_state_put(x0); goto unlock; @@ -1492,7 +1518,7 @@ int xfrm_alloc_spi(struct xfrm_state *x, u32 low, u32 high) u32 spi = 0; for (h=0; h<high-low+1; h++) { spi = low + net_random()%(high-low+1); - x0 = xfrm_state_lookup(net, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); + x0 = xfrm_state_lookup(net, mark, &x->id.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 943c8712bd97..6106b72826d3 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -446,6 +446,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, goto error; } + xfrm_mark_get(attrs, &x->mark); + err = xfrm_init_state(x); if (err) goto error; @@ -526,11 +528,13 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net, int *errp) { struct xfrm_state *x = NULL; + struct xfrm_mark m; int err; + u32 mark = xfrm_mark_get(attrs, &m); if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { err = -ESRCH; - x = xfrm_state_lookup(net, &p->daddr, p->spi, p->proto, p->family); + x = xfrm_state_lookup(net, mark, &p->daddr, p->spi, p->proto, p->family); } else { xfrm_address_t *saddr = NULL; @@ -541,7 +545,8 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net, } err = -ESRCH; - x = xfrm_state_lookup_byaddr(net, &p->daddr, saddr, + x = xfrm_state_lookup_byaddr(net, mark, + &p->daddr, saddr, p->proto, p->family); } @@ -683,6 +688,9 @@ static int copy_to_user_state_extra(struct xfrm_state *x, if (x->encap) NLA_PUT(skb, XFRMA_ENCAP, sizeof(*x->encap), x->encap); + if (xfrm_mark_put(skb, &x->mark)) + goto nla_put_failure; + if (x->security && copy_sec_ctx(x->security, skb) < 0) goto nla_put_failure; @@ -947,6 +955,8 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, xfrm_address_t *daddr; int family; int err; + u32 mark; + struct xfrm_mark m; p = nlmsg_data(nlh); err = verify_userspi_info(p); @@ -957,8 +967,10 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, daddr = &p->info.id.daddr; x = NULL; + + mark = xfrm_mark_get(attrs, &m); if (p->info.seq) { - x = xfrm_find_acq_byseq(net, p->info.seq); + x = xfrm_find_acq_byseq(net, mark, p->info.seq); if (x && xfrm_addr_cmp(&x->id.daddr, daddr, family)) { xfrm_state_put(x); x = NULL; @@ -966,7 +978,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, } if (!x) - x = xfrm_find_acq(net, p->info.mode, p->info.reqid, + x = xfrm_find_acq(net, &m, p->info.mode, p->info.reqid, p->info.id.proto, daddr, &p->info.saddr, 1, family); @@ -1220,6 +1232,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us if (err) goto error; + xfrm_mark_get(attrs, &xp->mark); + return xp; error: *errp = err; @@ -1366,10 +1380,13 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; + if (xfrm_mark_put(skb, &xp->mark)) + goto nla_put_failure; nlmsg_end(skb, nlh); return 0; +nla_put_failure: nlmsg_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -1441,6 +1458,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, int err; struct km_event c; int delete; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; @@ -1454,7 +1473,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, return err; if (p->index) - xp = xfrm_policy_byid(net, type, p->dir, p->index, delete, &err); + xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, delete, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1471,8 +1490,8 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, - delete, &err); + xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, &p->sel, + ctx, delete, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -1524,8 +1543,11 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; err = xfrm_state_flush(net, p->proto, &audit_info); - if (err) + if (err) { + if (err == -ESRCH) /* empty table */ + return 0; return err; + } c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1541,6 +1563,7 @@ static inline size_t xfrm_aevent_msgsize(void) return NLMSG_ALIGN(sizeof(struct xfrm_aevent_id)) + nla_total_size(sizeof(struct xfrm_replay_state)) + nla_total_size(sizeof(struct xfrm_lifetime_cur)) + + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(4) /* XFRM_AE_RTHR */ + nla_total_size(4); /* XFRM_AE_ETHR */ } @@ -1573,6 +1596,9 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, struct km_eve NLA_PUT_U32(skb, XFRMA_ETIMER_THRESH, x->replay_maxage * 10 / HZ); + if (xfrm_mark_put(skb, &x->mark)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); nla_put_failure: @@ -1588,6 +1614,8 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct sk_buff *r_skb; int err; struct km_event c; + u32 mark; + struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct xfrm_usersa_id *id = &p->sa_id; @@ -1595,7 +1623,9 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (r_skb == NULL) return -ENOMEM; - x = xfrm_state_lookup(net, &id->daddr, id->spi, id->proto, id->family); + mark = xfrm_mark_get(attrs, &m); + + x = xfrm_state_lookup(net, mark, &id->daddr, id->spi, id->proto, id->family); if (x == NULL) { kfree_skb(r_skb); return -ESRCH; @@ -1626,6 +1656,8 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_state *x; struct km_event c; int err = - EINVAL; + u32 mark = 0; + struct xfrm_mark m; struct xfrm_aevent_id *p = nlmsg_data(nlh); struct nlattr *rp = attrs[XFRMA_REPLAY_VAL]; struct nlattr *lt = attrs[XFRMA_LTIME_VAL]; @@ -1637,7 +1669,9 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, if (!(nlh->nlmsg_flags&NLM_F_REPLACE)) return err; - x = xfrm_state_lookup(net, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); + mark = xfrm_mark_get(attrs, &m); + + x = xfrm_state_lookup(net, mark, &p->sa_id.daddr, p->sa_id.spi, p->sa_id.proto, p->sa_id.family); if (x == NULL) return -ESRCH; @@ -1676,8 +1710,12 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, audit_info.sessionid = NETLINK_CB(skb).sessionid; audit_info.secid = NETLINK_CB(skb).sid; err = xfrm_policy_flush(net, type, &audit_info); - if (err) + if (err) { + if (err == -ESRCH) /* empty table */ + return 0; return err; + } + c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; @@ -1696,13 +1734,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_userpolicy_info *p = &up->pol; u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m); err = copy_from_user_policy_type(&type, attrs); if (err) return err; if (p->index) - xp = xfrm_policy_byid(net, type, p->dir, p->index, 0, &err); + xp = xfrm_policy_byid(net, mark, type, p->dir, p->index, 0, &err); else { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; @@ -1719,7 +1759,8 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, if (err) return err; } - xp = xfrm_policy_bysel_ctx(net, type, p->dir, &p->sel, ctx, 0, &err); + xp = xfrm_policy_bysel_ctx(net, mark, type, p->dir, + &p->sel, ctx, 0, &err); security_xfrm_policy_free(ctx); } if (xp == NULL) @@ -1759,8 +1800,10 @@ static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, int err; struct xfrm_user_expire *ue = nlmsg_data(nlh); struct xfrm_usersa_info *p = &ue->state; + struct xfrm_mark m; + u32 mark = xfrm_mark_get(attrs, &m);; - x = xfrm_state_lookup(net, &p->id.daddr, p->id.spi, p->id.proto, p->family); + x = xfrm_state_lookup(net, mark, &p->id.daddr, p->id.spi, p->id.proto, p->family); err = -ENOENT; if (x == NULL) @@ -1794,6 +1837,7 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_user_tmpl *ut; int i; struct nlattr *rt = attrs[XFRMA_TMPL]; + struct xfrm_mark mark; struct xfrm_user_acquire *ua = nlmsg_data(nlh); struct xfrm_state *x = xfrm_state_alloc(net); @@ -1802,6 +1846,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, if (!x) goto nomem; + xfrm_mark_get(attrs, &mark); + err = verify_newpolicy_info(&ua->policy); if (err) goto bad_policy; @@ -1814,7 +1860,8 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, memcpy(&x->id, &ua->id, sizeof(ua->id)); memcpy(&x->props.saddr, &ua->saddr, sizeof(ua->saddr)); memcpy(&x->sel, &ua->sel, sizeof(ua->sel)); - + xp->mark.m = x->mark.m = mark.m; + xp->mark.v = x->mark.v = mark.v; ut = nla_data(rt); /* extract the templates and for each call km_key */ for (i = 0; i < xp->xfrm_nr; i++, ut++) { @@ -2074,6 +2121,7 @@ static const struct nla_policy xfrma_policy[XFRMA_MAX+1] = { [XFRMA_POLICY_TYPE] = { .len = sizeof(struct xfrm_userpolicy_type)}, [XFRMA_MIGRATE] = { .len = sizeof(struct xfrm_user_migrate) }, [XFRMA_KMADDRESS] = { .len = sizeof(struct xfrm_user_kmaddress) }, + [XFRMA_MARK] = { .len = sizeof(struct xfrm_mark) }, }; static struct xfrm_link { @@ -2153,7 +2201,8 @@ static void xfrm_netlink_rcv(struct sk_buff *skb) static inline size_t xfrm_expire_msgsize(void) { - return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)); + return NLMSG_ALIGN(sizeof(struct xfrm_user_expire)) + + nla_total_size(sizeof(struct xfrm_mark)); } static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_event *c) @@ -2169,7 +2218,13 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, struct km_eve copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; + if (xfrm_mark_put(skb, &x->mark)) + goto nla_put_failure; + return nlmsg_end(skb, nlh); + +nla_put_failure: + return -EMSGSIZE; } static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) @@ -2181,8 +2236,10 @@ static int xfrm_exp_state_notify(struct xfrm_state *x, struct km_event *c) if (skb == NULL) return -ENOMEM; - if (build_expire(skb, x, c) < 0) - BUG(); + if (build_expire(skb, x, c) < 0) { + kfree_skb(skb); + return -EMSGSIZE; + } return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_EXPIRE, GFP_ATOMIC); } @@ -2270,6 +2327,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, struct km_event *c) if (c->event == XFRM_MSG_DELSA) { len += nla_total_size(headlen); headlen = sizeof(*id); + len += nla_total_size(sizeof(struct xfrm_mark)); } len += NLMSG_ALIGN(headlen); @@ -2340,6 +2398,7 @@ static inline size_t xfrm_acquire_msgsize(struct xfrm_state *x, { return NLMSG_ALIGN(sizeof(struct xfrm_user_acquire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + + nla_total_size(sizeof(struct xfrm_mark)) + nla_total_size(xfrm_user_sec_ctx_size(x->security)) + userpolicy_type_attrsize(); } @@ -2372,9 +2431,12 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, goto nlmsg_failure; if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; + if (xfrm_mark_put(skb, &xp->mark)) + goto nla_put_failure; return nlmsg_end(skb, nlh); +nla_put_failure: nlmsg_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -2461,6 +2523,7 @@ static inline size_t xfrm_polexpire_msgsize(struct xfrm_policy *xp) return NLMSG_ALIGN(sizeof(struct xfrm_user_polexpire)) + nla_total_size(sizeof(struct xfrm_user_tmpl) * xp->xfrm_nr) + nla_total_size(xfrm_user_sec_ctx_size(xp->security)) + + nla_total_size(sizeof(struct xfrm_mark)) + userpolicy_type_attrsize(); } @@ -2483,10 +2546,13 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; + if (xfrm_mark_put(skb, &xp->mark)) + goto nla_put_failure; upe->hard = !!hard; return nlmsg_end(skb, nlh); +nla_put_failure: nlmsg_failure: nlmsg_cancel(skb, nlh); return -EMSGSIZE; @@ -2523,6 +2589,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * headlen = sizeof(*id); } len += userpolicy_type_attrsize(); + len += nla_total_size(sizeof(struct xfrm_mark)); len += NLMSG_ALIGN(headlen); skb = nlmsg_new(len, GFP_ATOMIC); @@ -2558,10 +2625,14 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * if (copy_to_user_policy_type(xp->type, skb) < 0) goto nlmsg_failure; + if (xfrm_mark_put(skb, &xp->mark)) + goto nla_put_failure; + nlmsg_end(skb, nlh); return nlmsg_multicast(net->xfrm.nlsk, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); +nla_put_failure: nlmsg_failure: kfree_skb(skb); return -1; |