diff options
Diffstat (limited to 'net/core')
-rw-r--r-- | net/core/dev.c | 232 | ||||
-rw-r--r-- | net/core/neighbour.c | 44 | ||||
-rw-r--r-- | net/core/rtnetlink.c | 6 | ||||
-rw-r--r-- | net/core/skbuff.c | 1 |
4 files changed, 169 insertions, 114 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index f411c28d0a66..7fe82929f509 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1694,6 +1694,7 @@ int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb) skb_scrub_packet(skb, true); skb->protocol = eth_type_trans(skb, dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); return 0; } @@ -2351,7 +2352,6 @@ EXPORT_SYMBOL(skb_checksum_help); __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { - unsigned int vlan_depth = skb->mac_len; __be16 type = skb->protocol; /* Tunnel gso handlers can set protocol to ethernet. */ @@ -2365,35 +2365,7 @@ __be16 skb_network_protocol(struct sk_buff *skb, int *depth) type = eth->h_proto; } - /* if skb->protocol is 802.1Q/AD then the header should already be - * present at mac_len - VLAN_HLEN (if mac_len > 0), or at - * ETH_HLEN otherwise - */ - if (type == htons(ETH_P_8021Q) || type == htons(ETH_P_8021AD)) { - if (vlan_depth) { - if (WARN_ON(vlan_depth < VLAN_HLEN)) - return 0; - vlan_depth -= VLAN_HLEN; - } else { - vlan_depth = ETH_HLEN; - } - do { - struct vlan_hdr *vh; - - if (unlikely(!pskb_may_pull(skb, - vlan_depth + VLAN_HLEN))) - return 0; - - vh = (struct vlan_hdr *)(skb->data + vlan_depth); - type = vh->h_vlan_encapsulated_proto; - vlan_depth += VLAN_HLEN; - } while (type == htons(ETH_P_8021Q) || - type == htons(ETH_P_8021AD)); - } - - *depth = vlan_depth; - - return type; + return __vlan_get_protocol(skb, type, depth); } /** @@ -2522,7 +2494,7 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb) /* If MPLS offload request, verify we are testing hardware MPLS features * instead of standard features for the netdev. */ -#ifdef CONFIG_NET_MPLS_GSO +#if IS_ENABLED(CONFIG_NET_MPLS_GSO) static netdev_features_t net_mpls_features(struct sk_buff *skb, netdev_features_t features, __be16 type) @@ -2562,7 +2534,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb, netdev_features_t netif_skb_features(struct sk_buff *skb) { - const struct net_device *dev = skb->dev; + struct net_device *dev = skb->dev; netdev_features_t features = dev->features; u16 gso_segs = skb_shinfo(skb)->gso_segs; __be16 protocol = skb->protocol; @@ -2570,11 +2542,21 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs) features &= ~NETIF_F_GSO_MASK; - if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) { - struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; - protocol = veh->h_vlan_encapsulated_proto; - } else if (!vlan_tx_tag_present(skb)) { - return harmonize_features(skb, features); + /* If encapsulation offload request, verify we are testing + * hardware encapsulation features instead of standard + * features for the netdev + */ + if (skb->encapsulation) + features &= dev->hw_enc_features; + + if (!vlan_tx_tag_present(skb)) { + if (unlikely(protocol == htons(ETH_P_8021Q) || + protocol == htons(ETH_P_8021AD))) { + struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data; + protocol = veh->h_vlan_encapsulated_proto; + } else { + goto finalize; + } } features = netdev_intersect_features(features, @@ -2591,6 +2573,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb) NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX); +finalize: + if (dev->netdev_ops->ndo_features_check) + features &= dev->netdev_ops->ndo_features_check(skb, dev, + features); + return harmonize_features(skb, features); } EXPORT_SYMBOL(netif_skb_features); @@ -2661,19 +2648,12 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device if (unlikely(!skb)) goto out_null; - /* If encapsulation offload request, verify we are testing - * hardware encapsulation features instead of standard - * features for the netdev - */ - if (skb->encapsulation) - features &= dev->hw_enc_features; - if (netif_needs_gso(dev, skb, features)) { struct sk_buff *segs; segs = skb_gso_segment(skb, features); if (IS_ERR(segs)) { - segs = NULL; + goto out_kfree_skb; } else if (segs) { consume_skb(skb); skb = segs; @@ -4557,6 +4537,68 @@ void netif_napi_del(struct napi_struct *napi) } EXPORT_SYMBOL(netif_napi_del); +static int napi_poll(struct napi_struct *n, struct list_head *repoll) +{ + void *have; + int work, weight; + + list_del_init(&n->poll_list); + + have = netpoll_poll_lock(n); + + weight = n->weight; + + /* This NAPI_STATE_SCHED test is for avoiding a race + * with netpoll's poll_napi(). Only the entity which + * obtains the lock and sees NAPI_STATE_SCHED set will + * actually make the ->poll() call. Therefore we avoid + * accidentally calling ->poll() when NAPI is not scheduled. + */ + work = 0; + if (test_bit(NAPI_STATE_SCHED, &n->state)) { + work = n->poll(n, weight); + trace_napi_poll(n); + } + + WARN_ON_ONCE(work > weight); + + if (likely(work < weight)) + goto out_unlock; + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (unlikely(napi_disable_pending(n))) { + napi_complete(n); + goto out_unlock; + } + + if (n->gro_list) { + /* flush too old packets + * If HZ < 1000, flush all packets. + */ + napi_gro_flush(n, HZ >= 1000); + } + + /* Some drivers may have called napi_schedule + * prior to exhausting their budget. + */ + if (unlikely(!list_empty(&n->poll_list))) { + pr_warn_once("%s: Budget exhausted after napi rescheduled\n", + n->dev ? n->dev->name : "backlog"); + goto out_unlock; + } + + list_add_tail(&n->poll_list, repoll); + +out_unlock: + netpoll_poll_unlock(have); + + return work; +} + static void net_rx_action(struct softirq_action *h) { struct softnet_data *sd = this_cpu_ptr(&softnet_data); @@ -4564,74 +4606,34 @@ static void net_rx_action(struct softirq_action *h) int budget = netdev_budget; LIST_HEAD(list); LIST_HEAD(repoll); - void *have; local_irq_disable(); list_splice_init(&sd->poll_list, &list); local_irq_enable(); - while (!list_empty(&list)) { + for (;;) { struct napi_struct *n; - int work, weight; - - /* If softirq window is exhausted then punt. - * Allow this to run for 2 jiffies since which will allow - * an average latency of 1.5/HZ. - */ - if (unlikely(budget <= 0 || time_after_eq(jiffies, time_limit))) - goto softnet_break; - - - n = list_first_entry(&list, struct napi_struct, poll_list); - list_del_init(&n->poll_list); - - have = netpoll_poll_lock(n); - weight = n->weight; - - /* This NAPI_STATE_SCHED test is for avoiding a race - * with netpoll's poll_napi(). Only the entity which - * obtains the lock and sees NAPI_STATE_SCHED set will - * actually make the ->poll() call. Therefore we avoid - * accidentally calling ->poll() when NAPI is not scheduled. - */ - work = 0; - if (test_bit(NAPI_STATE_SCHED, &n->state)) { - work = n->poll(n, weight); - trace_napi_poll(n); + if (list_empty(&list)) { + if (!sd_has_rps_ipi_waiting(sd) && list_empty(&repoll)) + return; + break; } - WARN_ON_ONCE(work > weight); - - budget -= work; + n = list_first_entry(&list, struct napi_struct, poll_list); + budget -= napi_poll(n, &repoll); - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can - * move the instance around on the list at-will. + /* If softirq window is exhausted then punt. + * Allow this to run for 2 jiffies since which will allow + * an average latency of 1.5/HZ. */ - if (unlikely(work == weight)) { - if (unlikely(napi_disable_pending(n))) { - napi_complete(n); - } else { - if (n->gro_list) { - /* flush too old packets - * If HZ < 1000, flush all packets. - */ - napi_gro_flush(n, HZ >= 1000); - } - list_add_tail(&n->poll_list, &repoll); - } + if (unlikely(budget <= 0 || + time_after_eq(jiffies, time_limit))) { + sd->time_squeeze++; + break; } - - netpoll_poll_unlock(have); } - if (!sd_has_rps_ipi_waiting(sd) && - list_empty(&list) && - list_empty(&repoll)) - return; -out: local_irq_disable(); list_splice_tail_init(&sd->poll_list, &list); @@ -4641,12 +4643,6 @@ out: __raise_softirq_irqoff(NET_RX_SOFTIRQ); net_rps_action_and_irq_enable(sd); - - return; - -softnet_break: - sd->time_squeeze++; - goto out; } struct netdev_adjacent { @@ -5298,7 +5294,7 @@ void netdev_upper_dev_unlink(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_dev_unlink); -void netdev_adjacent_add_links(struct net_device *dev) +static void netdev_adjacent_add_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -5323,7 +5319,7 @@ void netdev_adjacent_add_links(struct net_device *dev) } } -void netdev_adjacent_del_links(struct net_device *dev) +static void netdev_adjacent_del_links(struct net_device *dev) { struct netdev_adjacent *iter; @@ -6631,7 +6627,7 @@ struct netdev_queue *dev_ingress_queue_create(struct net_device *dev) if (!queue) return NULL; netdev_init_one_queue(dev, queue, NULL); - queue->qdisc = &noop_qdisc; + RCU_INIT_POINTER(queue->qdisc, &noop_qdisc); queue->qdisc_sleeping = &noop_qdisc; rcu_assign_pointer(dev->ingress_queue, queue); #endif @@ -7047,10 +7043,20 @@ static int dev_cpu_callback(struct notifier_block *nfb, oldsd->output_queue = NULL; oldsd->output_queue_tailp = &oldsd->output_queue; } - /* Append NAPI poll list from offline CPU. */ - if (!list_empty(&oldsd->poll_list)) { - list_splice_init(&oldsd->poll_list, &sd->poll_list); - raise_softirq_irqoff(NET_RX_SOFTIRQ); + /* Append NAPI poll list from offline CPU, with one exception : + * process_backlog() must be called by cpu owning percpu backlog. + * We properly handle process_queue & input_pkt_queue later. + */ + while (!list_empty(&oldsd->poll_list)) { + struct napi_struct *napi = list_first_entry(&oldsd->poll_list, + struct napi_struct, + poll_list); + + list_del_init(&napi->poll_list); + if (napi->poll == process_backlog) + napi->state = 0; + else + ____napi_schedule(sd, napi); } raise_softirq_irqoff(NET_TX_SOFTIRQ); @@ -7061,7 +7067,7 @@ static int dev_cpu_callback(struct notifier_block *nfb, netif_rx_internal(skb); input_queue_head_incr(oldsd); } - while ((skb = __skb_dequeue(&oldsd->input_pkt_queue))) { + while ((skb = skb_dequeue(&oldsd->input_pkt_queue))) { netif_rx_internal(skb); input_queue_head_incr(oldsd); } diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 8e38f17288d3..8d614c93f86a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2043,6 +2043,12 @@ static int neightbl_set(struct sk_buff *skb, struct nlmsghdr *nlh) case NDTPA_BASE_REACHABLE_TIME: NEIGH_VAR_SET(p, BASE_REACHABLE_TIME, nla_get_msecs(tbp[i])); + /* update reachable_time as well, otherwise, the change will + * only be effective after the next time neigh_periodic_work + * decides to recompute it (can be multiple minutes) + */ + p->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); break; case NDTPA_GC_STALETIME: NEIGH_VAR_SET(p, GC_STALETIME, @@ -2921,6 +2927,31 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, return ret; } +static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, + void __user *buffer, + size_t *lenp, loff_t *ppos) +{ + struct neigh_parms *p = ctl->extra2; + int ret; + + if (strcmp(ctl->procname, "base_reachable_time") == 0) + ret = neigh_proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); + else if (strcmp(ctl->procname, "base_reachable_time_ms") == 0) + ret = neigh_proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); + else + ret = -1; + + if (write && ret == 0) { + /* update reachable_time as well, otherwise, the change will + * only be effective after the next time neigh_periodic_work + * decides to recompute it + */ + p->reachable_time = + neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME)); + } + return ret; +} + #define NEIGH_PARMS_DATA_OFFSET(index) \ (&((struct neigh_parms *) 0)->data[index]) @@ -3047,6 +3078,19 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p, t->neigh_vars[NEIGH_VAR_RETRANS_TIME_MS].proc_handler = handler; /* ReachableTime (in milliseconds) */ t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = handler; + } else { + /* Those handlers will update p->reachable_time after + * base_reachable_time(_ms) is set to ensure the new timer starts being + * applied after the next neighbour update instead of waiting for + * neigh_periodic_work to update its value (can be multiple minutes) + * So any handler that replaces them should do this as well + */ + /* ReachableTime */ + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME].proc_handler = + neigh_proc_base_reachable_time; + /* ReachableTime (in milliseconds) */ + t->neigh_vars[NEIGH_VAR_BASE_REACHABLE_TIME_MS].proc_handler = + neigh_proc_base_reachable_time; } /* Don't export sysctls to unprivileged users */ diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 9cf6fe9ddc0c..446cbaf81185 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2895,12 +2895,16 @@ static int rtnl_bridge_notify(struct net_device *dev, u16 flags) goto errout; } + if (!skb->len) + goto errout; + rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, GFP_ATOMIC); return 0; errout: WARN_ON(err == -EMSGSIZE); kfree_skb(skb); - rtnl_set_sk_err(net, RTNLGRP_LINK, err); + if (err) + rtnl_set_sk_err(net, RTNLGRP_LINK, err); return err; } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index ae13ef6b3ea7..395c15b82087 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -4148,6 +4148,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet) skb->ignore_df = 0; skb_dst_drop(skb); skb->mark = 0; + skb_init_secmark(skb); secpath_reset(skb); nf_reset(skb); nf_reset_trace(skb); |