diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 732 |
1 files changed, 355 insertions, 377 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 6666b28b6815..6deba68ad9e4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -139,7 +139,6 @@ #include <linux/errqueue.h> #include <linux/hrtimer.h> #include <linux/netfilter_ingress.h> -#include <linux/sctp.h> #include <linux/crash_dump.h> #include "net-sysfs.h" @@ -1944,37 +1943,80 @@ static void netif_setup_tc(struct net_device *dev, unsigned int txq) } } +int netdev_txq_to_tc(struct net_device *dev, unsigned int txq) +{ + if (dev->num_tc) { + struct netdev_tc_txq *tc = &dev->tc_to_txq[0]; + int i; + + for (i = 0; i < TC_MAX_QUEUE; i++, tc++) { + if ((txq - tc->offset) < tc->count) + return i; + } + + return -1; + } + + return 0; +} + #ifdef CONFIG_XPS static DEFINE_MUTEX(xps_map_mutex); #define xmap_dereference(P) \ rcu_dereference_protected((P), lockdep_is_held(&xps_map_mutex)) -static struct xps_map *remove_xps_queue(struct xps_dev_maps *dev_maps, - int cpu, u16 index) +static bool remove_xps_queue(struct xps_dev_maps *dev_maps, + int tci, u16 index) { struct xps_map *map = NULL; int pos; if (dev_maps) - map = xmap_dereference(dev_maps->cpu_map[cpu]); + map = xmap_dereference(dev_maps->cpu_map[tci]); + if (!map) + return false; - for (pos = 0; map && pos < map->len; pos++) { - if (map->queues[pos] == index) { - if (map->len > 1) { - map->queues[pos] = map->queues[--map->len]; - } else { - RCU_INIT_POINTER(dev_maps->cpu_map[cpu], NULL); - kfree_rcu(map, rcu); - map = NULL; - } + for (pos = map->len; pos--;) { + if (map->queues[pos] != index) + continue; + + if (map->len > 1) { + map->queues[pos] = map->queues[--map->len]; break; } + + RCU_INIT_POINTER(dev_maps->cpu_map[tci], NULL); + kfree_rcu(map, rcu); + return false; } - return map; + return true; } -static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +static bool remove_xps_queue_cpu(struct net_device *dev, + struct xps_dev_maps *dev_maps, + int cpu, u16 offset, u16 count) +{ + int num_tc = dev->num_tc ? : 1; + bool active = false; + int tci; + + for (tci = cpu * num_tc; num_tc--; tci++) { + int i, j; + + for (i = count, j = offset; i--; j++) { + if (!remove_xps_queue(dev_maps, cpu, j)) + break; + } + + active |= i < 0; + } + + return active; +} + +static void netif_reset_xps_queues(struct net_device *dev, u16 offset, + u16 count) { struct xps_dev_maps *dev_maps; int cpu, i; @@ -1986,21 +2028,16 @@ static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) if (!dev_maps) goto out_no_maps; - for_each_possible_cpu(cpu) { - for (i = index; i < dev->num_tx_queues; i++) { - if (!remove_xps_queue(dev_maps, cpu, i)) - break; - } - if (i == dev->num_tx_queues) - active = true; - } + for_each_possible_cpu(cpu) + active |= remove_xps_queue_cpu(dev, dev_maps, cpu, + offset, count); if (!active) { RCU_INIT_POINTER(dev->xps_maps, NULL); kfree_rcu(dev_maps, rcu); } - for (i = index; i < dev->num_tx_queues; i++) + for (i = offset + (count - 1); count--; i--) netdev_queue_numa_node_write(netdev_get_tx_queue(dev, i), NUMA_NO_NODE); @@ -2008,6 +2045,11 @@ out_no_maps: mutex_unlock(&xps_map_mutex); } +static void netif_reset_xps_queues_gt(struct net_device *dev, u16 index) +{ + netif_reset_xps_queues(dev, index, dev->num_tx_queues - index); +} + static struct xps_map *expand_xps_map(struct xps_map *map, int cpu, u16 index) { @@ -2047,20 +2089,28 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, u16 index) { struct xps_dev_maps *dev_maps, *new_dev_maps = NULL; + int i, cpu, tci, numa_node_id = -2; + int maps_sz, num_tc = 1, tc = 0; struct xps_map *map, *new_map; - int maps_sz = max_t(unsigned int, XPS_DEV_MAPS_SIZE, L1_CACHE_BYTES); - int cpu, numa_node_id = -2; bool active = false; + if (dev->num_tc) { + num_tc = dev->num_tc; + tc = netdev_txq_to_tc(dev, index); + if (tc < 0) + return -EINVAL; + } + + maps_sz = XPS_DEV_MAPS_SIZE(num_tc); + if (maps_sz < L1_CACHE_BYTES) + maps_sz = L1_CACHE_BYTES; + mutex_lock(&xps_map_mutex); dev_maps = xmap_dereference(dev->xps_maps); /* allocate memory for queue storage */ - for_each_online_cpu(cpu) { - if (!cpumask_test_cpu(cpu, mask)) - continue; - + for_each_cpu_and(cpu, cpu_online_mask, mask) { if (!new_dev_maps) new_dev_maps = kzalloc(maps_sz, GFP_KERNEL); if (!new_dev_maps) { @@ -2068,25 +2118,38 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, return -ENOMEM; } - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : + tci = cpu * num_tc + tc; + map = dev_maps ? xmap_dereference(dev_maps->cpu_map[tci]) : NULL; map = expand_xps_map(map, cpu, index); if (!map) goto error; - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } if (!new_dev_maps) goto out_no_new_maps; for_each_possible_cpu(cpu) { + /* copy maps belonging to foreign traffic classes */ + for (i = tc, tci = cpu * num_tc; dev_maps && i--; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } + + /* We need to explicitly update tci as prevous loop + * could break out early if dev_maps is NULL. + */ + tci = cpu * num_tc + tc; + if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) { /* add queue to CPU maps */ int pos = 0; - map = xmap_dereference(new_dev_maps->cpu_map[cpu]); + map = xmap_dereference(new_dev_maps->cpu_map[tci]); while ((pos < map->len) && (map->queues[pos] != index)) pos++; @@ -2100,26 +2163,36 @@ int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask, #endif } else if (dev_maps) { /* fill in the new device map from the old device map */ - map = xmap_dereference(dev_maps->cpu_map[cpu]); - RCU_INIT_POINTER(new_dev_maps->cpu_map[cpu], map); + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); } + /* copy maps belonging to foreign traffic classes */ + for (i = num_tc - tc, tci++; dev_maps && --i; tci++) { + /* fill in the new device map from the old device map */ + map = xmap_dereference(dev_maps->cpu_map[tci]); + RCU_INIT_POINTER(new_dev_maps->cpu_map[tci], map); + } } rcu_assign_pointer(dev->xps_maps, new_dev_maps); /* Cleanup old maps */ - if (dev_maps) { - for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = xmap_dereference(dev_maps->cpu_map[cpu]); + if (!dev_maps) + goto out_no_old_maps; + + for_each_possible_cpu(cpu) { + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = xmap_dereference(dev_maps->cpu_map[tci]); if (map && map != new_map) kfree_rcu(map, rcu); } - - kfree_rcu(dev_maps, rcu); } + kfree_rcu(dev_maps, rcu); + +out_no_old_maps: dev_maps = new_dev_maps; active = true; @@ -2134,11 +2207,12 @@ out_no_new_maps: /* removes queue from unused CPUs */ for_each_possible_cpu(cpu) { - if (cpumask_test_cpu(cpu, mask) && cpu_online(cpu)) - continue; - - if (remove_xps_queue(dev_maps, cpu, index)) - active = true; + for (i = tc, tci = cpu * num_tc; i--; tci++) + active |= remove_xps_queue(dev_maps, tci, index); + if (!cpumask_test_cpu(cpu, mask) || !cpu_online(cpu)) + active |= remove_xps_queue(dev_maps, tci, index); + for (i = num_tc - tc, tci++; --i; tci++) + active |= remove_xps_queue(dev_maps, tci, index); } /* free map if not active */ @@ -2154,11 +2228,14 @@ out_no_maps: error: /* remove any maps that we added */ for_each_possible_cpu(cpu) { - new_map = xmap_dereference(new_dev_maps->cpu_map[cpu]); - map = dev_maps ? xmap_dereference(dev_maps->cpu_map[cpu]) : - NULL; - if (new_map && new_map != map) - kfree(new_map); + for (i = num_tc, tci = cpu * num_tc; i--; tci++) { + new_map = xmap_dereference(new_dev_maps->cpu_map[tci]); + map = dev_maps ? + xmap_dereference(dev_maps->cpu_map[tci]) : + NULL; + if (new_map && new_map != map) + kfree(new_map); + } } mutex_unlock(&xps_map_mutex); @@ -2169,6 +2246,44 @@ error: EXPORT_SYMBOL(netif_set_xps_queue); #endif +void netdev_reset_tc(struct net_device *dev) +{ +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, 0); +#endif + dev->num_tc = 0; + memset(dev->tc_to_txq, 0, sizeof(dev->tc_to_txq)); + memset(dev->prio_tc_map, 0, sizeof(dev->prio_tc_map)); +} +EXPORT_SYMBOL(netdev_reset_tc); + +int netdev_set_tc_queue(struct net_device *dev, u8 tc, u16 count, u16 offset) +{ + if (tc >= dev->num_tc) + return -EINVAL; + +#ifdef CONFIG_XPS + netif_reset_xps_queues(dev, offset, count); +#endif + dev->tc_to_txq[tc].count = count; + dev->tc_to_txq[tc].offset = offset; + return 0; +} +EXPORT_SYMBOL(netdev_set_tc_queue); + +int netdev_set_num_tc(struct net_device *dev, u8 num_tc) +{ + if (num_tc > TC_MAX_QUEUE) + return -EINVAL; + +#ifdef CONFIG_XPS + netif_reset_xps_queues_gt(dev, 0); +#endif + dev->num_tc = num_tc; + return 0; +} +EXPORT_SYMBOL(netdev_set_num_tc); + /* * Routine to help set real_num_tx_queues. To avoid skbs mapped to queues * greater then real_num_tx_queues stale skbs on the qdisc must be flushed. @@ -2487,141 +2602,6 @@ out: } EXPORT_SYMBOL(skb_checksum_help); -/* skb_csum_offload_check - Driver helper function to determine if a device - * with limited checksum offload capabilities is able to offload the checksum - * for a given packet. - * - * Arguments: - * skb - sk_buff for the packet in question - * spec - contains the description of what device can offload - * csum_encapped - returns true if the checksum being offloaded is - * encpasulated. That is it is checksum for the transport header - * in the inner headers. - * checksum_help - when set indicates that helper function should - * call skb_checksum_help if offload checks fail - * - * Returns: - * true: Packet has passed the checksum checks and should be offloadable to - * the device (a driver may still need to check for additional - * restrictions of its device) - * false: Checksum is not offloadable. If checksum_help was set then - * skb_checksum_help was called to resolve checksum for non-GSO - * packets and when IP protocol is not SCTP - */ -bool __skb_csum_offload_chk(struct sk_buff *skb, - const struct skb_csum_offl_spec *spec, - bool *csum_encapped, - bool csum_help) -{ - struct iphdr *iph; - struct ipv6hdr *ipv6; - void *nhdr; - int protocol; - u8 ip_proto; - - if (skb->protocol == htons(ETH_P_8021Q) || - skb->protocol == htons(ETH_P_8021AD)) { - if (!spec->vlan_okay) - goto need_help; - } - - /* We check whether the checksum refers to a transport layer checksum in - * the outermost header or an encapsulated transport layer checksum that - * corresponds to the inner headers of the skb. If the checksum is for - * something else in the packet we need help. - */ - if (skb_checksum_start_offset(skb) == skb_transport_offset(skb)) { - /* Non-encapsulated checksum */ - protocol = eproto_to_ipproto(vlan_get_protocol(skb)); - nhdr = skb_network_header(skb); - *csum_encapped = false; - if (spec->no_not_encapped) - goto need_help; - } else if (skb->encapsulation && spec->encap_okay && - skb_checksum_start_offset(skb) == - skb_inner_transport_offset(skb)) { - /* Encapsulated checksum */ - *csum_encapped = true; - switch (skb->inner_protocol_type) { - case ENCAP_TYPE_ETHER: - protocol = eproto_to_ipproto(skb->inner_protocol); - break; - case ENCAP_TYPE_IPPROTO: - protocol = skb->inner_protocol; - break; - } - nhdr = skb_inner_network_header(skb); - } else { - goto need_help; - } - - switch (protocol) { - case IPPROTO_IP: - if (!spec->ipv4_okay) - goto need_help; - iph = nhdr; - ip_proto = iph->protocol; - if (iph->ihl != 5 && !spec->ip_options_okay) - goto need_help; - break; - case IPPROTO_IPV6: - if (!spec->ipv6_okay) - goto need_help; - if (spec->no_encapped_ipv6 && *csum_encapped) - goto need_help; - ipv6 = nhdr; - nhdr += sizeof(*ipv6); - ip_proto = ipv6->nexthdr; - break; - default: - goto need_help; - } - -ip_proto_again: - switch (ip_proto) { - case IPPROTO_TCP: - if (!spec->tcp_okay || - skb->csum_offset != offsetof(struct tcphdr, check)) - goto need_help; - break; - case IPPROTO_UDP: - if (!spec->udp_okay || - skb->csum_offset != offsetof(struct udphdr, check)) - goto need_help; - break; - case IPPROTO_SCTP: - if (!spec->sctp_okay || - skb->csum_offset != offsetof(struct sctphdr, checksum)) - goto cant_help; - break; - case NEXTHDR_HOP: - case NEXTHDR_ROUTING: - case NEXTHDR_DEST: { - u8 *opthdr = nhdr; - - if (protocol != IPPROTO_IPV6 || !spec->ext_hdrs_okay) - goto need_help; - - ip_proto = opthdr[0]; - nhdr += (opthdr[1] + 1) << 3; - - goto ip_proto_again; - } - default: - goto need_help; - } - - /* Passed the tests for offloading checksum */ - return true; - -need_help: - if (csum_help && !skb_shinfo(skb)->gso_size) - skb_checksum_help(skb); -cant_help: - return false; -} -EXPORT_SYMBOL(__skb_csum_offload_chk); - __be16 skb_network_protocol(struct sk_buff *skb, int *depth) { __be16 type = skb->protocol; @@ -3216,8 +3196,14 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb) rcu_read_lock(); dev_maps = rcu_dereference(dev->xps_maps); if (dev_maps) { - map = rcu_dereference( - dev_maps->cpu_map[skb->sender_cpu - 1]); + unsigned int tci = skb->sender_cpu - 1; + + if (dev->num_tc) { + tci *= dev->num_tc; + tci += netdev_get_prio_tc_map(dev, skb->priority); + } + + map = rcu_dereference(dev_maps->cpu_map[tci]); if (map) { if (map->len == 1) queue_index = map->queues[0]; @@ -4491,7 +4477,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff if (!(skb->dev->features & NETIF_F_GRO)) goto normal; - if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad) + if (skb->csum_bad) goto normal; gro_list_prepare(napi, skb); @@ -4504,7 +4490,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff skb_set_network_header(skb, skb_gro_offset(skb)); skb_reset_mac_len(skb); NAPI_GRO_CB(skb)->same_flow = 0; - NAPI_GRO_CB(skb)->flush = 0; + NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); NAPI_GRO_CB(skb)->free = 0; NAPI_GRO_CB(skb)->encap_mark = 0; NAPI_GRO_CB(skb)->recursion_counter = 0; @@ -5026,7 +5012,7 @@ EXPORT_SYMBOL(sk_busy_loop); #endif /* CONFIG_NET_RX_BUSY_POLL */ -void napi_hash_add(struct napi_struct *napi) +static void napi_hash_add(struct napi_struct *napi) { if (test_bit(NAPI_STATE_NO_BUSY_POLL, &napi->state) || test_and_set_bit(NAPI_STATE_HASHED, &napi->state)) @@ -5046,7 +5032,6 @@ void napi_hash_add(struct napi_struct *napi) spin_unlock(&napi_hash_lock); } -EXPORT_SYMBOL_GPL(napi_hash_add); /* Warning : caller is responsible to make sure rcu grace period * is respected before freeing memory containing @napi @@ -5270,6 +5255,13 @@ static struct netdev_adjacent *__netdev_find_adj(struct net_device *adj_dev, return NULL; } +static int __netdev_has_upper_dev(struct net_device *upper_dev, void *data) +{ + struct net_device *dev = data; + + return upper_dev == dev; +} + /** * netdev_has_upper_dev - Check if device is linked to an upper device * @dev: device @@ -5284,11 +5276,30 @@ bool netdev_has_upper_dev(struct net_device *dev, { ASSERT_RTNL(); - return __netdev_find_adj(upper_dev, &dev->all_adj_list.upper); + return netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); } EXPORT_SYMBOL(netdev_has_upper_dev); /** + * netdev_has_upper_dev_all - Check if device is linked to an upper device + * @dev: device + * @upper_dev: upper device to check + * + * Find out if a device is linked to specified upper device and return true + * in case it is. Note that this checks the entire upper device chain. + * The caller must hold rcu lock. + */ + +bool netdev_has_upper_dev_all_rcu(struct net_device *dev, + struct net_device *upper_dev) +{ + return !!netdev_walk_all_upper_dev_rcu(dev, __netdev_has_upper_dev, + upper_dev); +} +EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); + +/** * netdev_has_any_upper_dev - Check if device is linked to some device * @dev: device * @@ -5299,7 +5310,7 @@ static bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); - return !list_empty(&dev->all_adj_list.upper); + return !list_empty(&dev->adj_list.upper); } /** @@ -5326,6 +5337,20 @@ struct net_device *netdev_master_upper_dev_get(struct net_device *dev) } EXPORT_SYMBOL(netdev_master_upper_dev_get); +/** + * netdev_has_any_lower_dev - Check if device is linked to some device + * @dev: device + * + * Find out if a device is linked to a lower device and return true in case + * it is. The caller must hold the RTNL lock. + */ +static bool netdev_has_any_lower_dev(struct net_device *dev) +{ + ASSERT_RTNL(); + + return !list_empty(&dev->adj_list.lower); +} + void *netdev_adjacent_get_private(struct list_head *adj_list) { struct netdev_adjacent *adj; @@ -5362,16 +5387,8 @@ struct net_device *netdev_upper_get_next_dev_rcu(struct net_device *dev, } EXPORT_SYMBOL(netdev_upper_get_next_dev_rcu); -/** - * netdev_all_upper_get_next_dev_rcu - Get the next dev from upper list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next device from the dev's upper list, starting from iter - * position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, - struct list_head **iter) +static struct net_device *netdev_next_upper_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *upper; @@ -5379,14 +5396,41 @@ struct net_device *netdev_all_upper_get_next_dev_rcu(struct net_device *dev, upper = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - if (&upper->list == &dev->all_adj_list.upper) + if (&upper->list == &dev->adj_list.upper) return NULL; *iter = &upper->list; return upper->dev; } -EXPORT_SYMBOL(netdev_all_upper_get_next_dev_rcu); + +int netdev_walk_all_upper_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *udev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.upper, + udev = netdev_next_upper_dev_rcu(dev, &iter); + udev; + udev = netdev_next_upper_dev_rcu(dev, &iter)) { + /* first is the upper device itself */ + ret = fn(udev, data); + if (ret) + return ret; + + /* then look at all of its upper devices */ + ret = netdev_walk_all_upper_dev_rcu(udev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_upper_dev_rcu); /** * netdev_lower_get_next_private - Get the next ->private from the @@ -5469,55 +5513,90 @@ void *netdev_lower_get_next(struct net_device *dev, struct list_head **iter) } EXPORT_SYMBOL(netdev_lower_get_next); -/** - * netdev_all_lower_get_next - Get the next device from all lower neighbour list - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RTNL lock or - * its own locking that guarantees that the neighbour all lower - * list will remain unchanged. - */ -struct net_device *netdev_all_lower_get_next(struct net_device *dev, struct list_head **iter) +static struct net_device *netdev_next_lower_dev(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; - lower = list_entry(*iter, struct netdev_adjacent, list); + lower = list_entry((*iter)->next, struct netdev_adjacent, list); - if (&lower->list == &dev->all_adj_list.lower) + if (&lower->list == &dev->adj_list.lower) return NULL; - *iter = lower->list.next; + *iter = &lower->list; return lower->dev; } -EXPORT_SYMBOL(netdev_all_lower_get_next); -/** - * netdev_all_lower_get_next_rcu - Get the next device from all - * lower neighbour list, RCU variant - * @dev: device - * @iter: list_head ** of the current position - * - * Gets the next netdev_adjacent from the dev's all lower neighbour - * list, starting from iter position. The caller must hold RCU read lock. - */ -struct net_device *netdev_all_lower_get_next_rcu(struct net_device *dev, - struct list_head **iter) +int netdev_walk_all_lower_dev(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev(dev, &iter); + ldev; + ldev = netdev_next_lower_dev(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev); + +static struct net_device *netdev_next_lower_dev_rcu(struct net_device *dev, + struct list_head **iter) { struct netdev_adjacent *lower; lower = list_entry_rcu((*iter)->next, struct netdev_adjacent, list); - - if (&lower->list == &dev->all_adj_list.lower) + if (&lower->list == &dev->adj_list.lower) return NULL; *iter = &lower->list; return lower->dev; } -EXPORT_SYMBOL(netdev_all_lower_get_next_rcu); + +int netdev_walk_all_lower_dev_rcu(struct net_device *dev, + int (*fn)(struct net_device *dev, + void *data), + void *data) +{ + struct net_device *ldev; + struct list_head *iter; + int ret; + + for (iter = &dev->adj_list.lower, + ldev = netdev_next_lower_dev_rcu(dev, &iter); + ldev; + ldev = netdev_next_lower_dev_rcu(dev, &iter)) { + /* first is the lower device itself */ + ret = fn(ldev, data); + if (ret) + return ret; + + /* then look at all of its lower devices */ + ret = netdev_walk_all_lower_dev_rcu(ldev, fn, data); + if (ret) + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(netdev_walk_all_lower_dev_rcu); /** * netdev_lower_get_first_private_rcu - Get the first ->private from the @@ -5590,7 +5669,6 @@ static inline bool netdev_adjacent_is_neigh_list(struct net_device *dev, static int __netdev_adjacent_dev_insert(struct net_device *dev, struct net_device *adj_dev, - u16 ref_nr, struct list_head *dev_list, void *private, bool master) { @@ -5600,7 +5678,10 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj = __netdev_find_adj(adj_dev, dev_list); if (adj) { - adj->ref_nr += ref_nr; + adj->ref_nr += 1; + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d\n", + dev->name, adj_dev->name, adj->ref_nr); + return 0; } @@ -5610,12 +5691,12 @@ static int __netdev_adjacent_dev_insert(struct net_device *dev, adj->dev = adj_dev; adj->master = master; - adj->ref_nr = ref_nr; + adj->ref_nr = 1; adj->private = private; dev_hold(adj_dev); - pr_debug("dev_hold for %s, because of link added from %s to %s\n", - adj_dev->name, dev->name, adj_dev->name); + pr_debug("Insert adjacency: dev %s adj_dev %s adj->ref_nr %d; dev_hold on %s\n", + dev->name, adj_dev->name, adj->ref_nr, adj_dev->name); if (netdev_adjacent_is_neigh_list(dev, adj_dev, dev_list)) { ret = netdev_adjacent_sysfs_add(dev, adj_dev, dev_list); @@ -5654,17 +5735,22 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, { struct netdev_adjacent *adj; + pr_debug("Remove adjacency: dev %s adj_dev %s ref_nr %d\n", + dev->name, adj_dev->name, ref_nr); + adj = __netdev_find_adj(adj_dev, dev_list); if (!adj) { - pr_err("tried to remove device %s from %s\n", + pr_err("Adjacency does not exist for device %s from %s\n", dev->name, adj_dev->name); - BUG(); + WARN_ON(1); + return; } if (adj->ref_nr > ref_nr) { - pr_debug("%s to %s ref_nr-%d = %d\n", dev->name, adj_dev->name, - ref_nr, adj->ref_nr-ref_nr); + pr_debug("adjacency: %s to %s ref_nr - %d = %d\n", + dev->name, adj_dev->name, ref_nr, + adj->ref_nr - ref_nr); adj->ref_nr -= ref_nr; return; } @@ -5676,7 +5762,7 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, netdev_adjacent_sysfs_del(dev, adj_dev->name, dev_list); list_del_rcu(&adj->list); - pr_debug("dev_put for %s, because link removed from %s to %s\n", + pr_debug("adjacency: dev_put for %s, because link removed from %s to %s\n", adj_dev->name, dev->name, adj_dev->name); dev_put(adj_dev); kfree_rcu(adj, rcu); @@ -5684,38 +5770,27 @@ static void __netdev_adjacent_dev_remove(struct net_device *dev, static int __netdev_adjacent_dev_link_lists(struct net_device *dev, struct net_device *upper_dev, - u16 ref_nr, struct list_head *up_list, struct list_head *down_list, void *private, bool master) { int ret; - ret = __netdev_adjacent_dev_insert(dev, upper_dev, ref_nr, up_list, + ret = __netdev_adjacent_dev_insert(dev, upper_dev, up_list, private, master); if (ret) return ret; - ret = __netdev_adjacent_dev_insert(upper_dev, dev, ref_nr, down_list, + ret = __netdev_adjacent_dev_insert(upper_dev, dev, down_list, private, false); if (ret) { - __netdev_adjacent_dev_remove(dev, upper_dev, ref_nr, up_list); + __netdev_adjacent_dev_remove(dev, upper_dev, 1, up_list); return ret; } return 0; } -static int __netdev_adjacent_dev_link(struct net_device *dev, - struct net_device *upper_dev, - u16 ref_nr) -{ - return __netdev_adjacent_dev_link_lists(dev, upper_dev, ref_nr, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower, - NULL, false); -} - static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, struct net_device *upper_dev, u16 ref_nr, @@ -5726,40 +5801,19 @@ static void __netdev_adjacent_dev_unlink_lists(struct net_device *dev, __netdev_adjacent_dev_remove(upper_dev, dev, ref_nr, down_list); } -static void __netdev_adjacent_dev_unlink(struct net_device *dev, - struct net_device *upper_dev, - u16 ref_nr) -{ - __netdev_adjacent_dev_unlink_lists(dev, upper_dev, ref_nr, - &dev->all_adj_list.upper, - &upper_dev->all_adj_list.lower); -} - static int __netdev_adjacent_dev_link_neighbour(struct net_device *dev, struct net_device *upper_dev, void *private, bool master) { - int ret = __netdev_adjacent_dev_link(dev, upper_dev, 1); - - if (ret) - return ret; - - ret = __netdev_adjacent_dev_link_lists(dev, upper_dev, 1, - &dev->adj_list.upper, - &upper_dev->adj_list.lower, - private, master); - if (ret) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); - return ret; - } - - return 0; + return __netdev_adjacent_dev_link_lists(dev, upper_dev, + &dev->adj_list.upper, + &upper_dev->adj_list.lower, + private, master); } static void __netdev_adjacent_dev_unlink_neighbour(struct net_device *dev, struct net_device *upper_dev) { - __netdev_adjacent_dev_unlink(dev, upper_dev, 1); __netdev_adjacent_dev_unlink_lists(dev, upper_dev, 1, &dev->adj_list.upper, &upper_dev->adj_list.lower); @@ -5770,7 +5824,6 @@ static int __netdev_upper_dev_link(struct net_device *dev, void *upper_priv, void *upper_info) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j, *to_i, *to_j; int ret = 0; ASSERT_RTNL(); @@ -5779,10 +5832,10 @@ static int __netdev_upper_dev_link(struct net_device *dev, return -EBUSY; /* To prevent loops, check if dev is not upper device to upper_dev. */ - if (__netdev_find_adj(dev, &upper_dev->all_adj_list.upper)) + if (netdev_has_upper_dev(upper_dev, dev)) return -EBUSY; - if (__netdev_find_adj(upper_dev, &dev->adj_list.upper)) + if (netdev_has_upper_dev(dev, upper_dev)) return -EEXIST; if (master && netdev_master_upper_dev_get(dev)) @@ -5804,80 +5857,15 @@ static int __netdev_upper_dev_link(struct net_device *dev, if (ret) return ret; - /* Now that we linked these devs, make all the upper_dev's - * all_adj_list.upper visible to every dev's all_adj_list.lower an - * versa, and don't forget the devices itself. All of these - * links are non-neighbours. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - pr_debug("Interlinking %s with %s, non-neighbour\n", - i->dev->name, j->dev->name); - ret = __netdev_adjacent_dev_link(i->dev, j->dev, i->ref_nr); - if (ret) - goto rollback_mesh; - } - } - - /* add dev to every upper_dev's upper device */ - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - pr_debug("linking %s's upper device %s with %s\n", - upper_dev->name, i->dev->name, dev->name); - ret = __netdev_adjacent_dev_link(dev, i->dev, i->ref_nr); - if (ret) - goto rollback_upper_mesh; - } - - /* add upper_dev to every dev's lower device */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - pr_debug("linking %s's lower device %s with %s\n", dev->name, - i->dev->name, upper_dev->name); - ret = __netdev_adjacent_dev_link(i->dev, upper_dev, i->ref_nr); - if (ret) - goto rollback_lower_mesh; - } - ret = call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); ret = notifier_to_errno(ret); if (ret) - goto rollback_lower_mesh; + goto rollback; return 0; -rollback_lower_mesh: - to_i = i; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - } - - i = NULL; - -rollback_upper_mesh: - to_i = i; - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) { - if (i == to_i) - break; - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - } - - i = j = NULL; - -rollback_mesh: - to_i = i; - to_j = j; - list_for_each_entry(i, &dev->all_adj_list.lower, list) { - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) { - if (i == to_i && j == to_j) - break; - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - } - if (i == to_i) - break; - } - +rollback: __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); return ret; @@ -5934,7 +5922,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev) { struct netdev_notifier_changeupper_info changeupper_info; - struct netdev_adjacent *i, *j; ASSERT_RTNL(); changeupper_info.upper_dev = upper_dev; @@ -5946,23 +5933,6 @@ void netdev_upper_dev_unlink(struct net_device *dev, __netdev_adjacent_dev_unlink_neighbour(dev, upper_dev); - /* Here is the tricky part. We must remove all dev's lower - * devices from all upper_dev's upper devices and vice - * versa, to maintain the graph relationship. - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - list_for_each_entry(j, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(i->dev, j->dev, i->ref_nr); - - /* remove also the devices itself from lower/upper device - * list - */ - list_for_each_entry(i, &dev->all_adj_list.lower, list) - __netdev_adjacent_dev_unlink(i->dev, upper_dev, i->ref_nr); - - list_for_each_entry(i, &upper_dev->all_adj_list.upper, list) - __netdev_adjacent_dev_unlink(dev, i->dev, i->ref_nr); - call_netdevice_notifiers_info(NETDEV_CHANGEUPPER, dev, &changeupper_info.info); } @@ -6500,9 +6470,18 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) if (new_mtu == dev->mtu) return 0; - /* MTU must be positive. */ - if (new_mtu < 0) + /* MTU must be positive, and in range */ + if (new_mtu < 0 || new_mtu < dev->min_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw min %d\n", + dev->name, new_mtu, dev->min_mtu); return -EINVAL; + } + + if (dev->max_mtu > 0 && new_mtu > dev->max_mtu) { + net_err_ratelimited("%s: Invalid MTU %d requested, hw max %d\n", + dev->name, new_mtu, dev->max_mtu); + return -EINVAL; + } if (!netif_device_present(dev)) return -ENODEV; @@ -6777,6 +6756,7 @@ static void rollback_registered_many(struct list_head *head) /* Notifier chain MUST detach us all upper devices. */ WARN_ON(netdev_has_any_upper_dev(dev)); + WARN_ON(netdev_has_any_lower_dev(dev)); /* Remove entries from kobject tree */ netdev_unregister_kobject(dev); @@ -7655,8 +7635,6 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, INIT_LIST_HEAD(&dev->link_watch_list); INIT_LIST_HEAD(&dev->adj_list.upper); INIT_LIST_HEAD(&dev->adj_list.lower); - INIT_LIST_HEAD(&dev->all_adj_list.upper); - INIT_LIST_HEAD(&dev->all_adj_list.lower); INIT_LIST_HEAD(&dev->ptype_all); INIT_LIST_HEAD(&dev->ptype_specific); #ifdef CONFIG_NET_SCHED @@ -7667,7 +7645,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, if (!dev->tx_queue_len) { dev->priv_flags |= IFF_NO_QUEUE; - dev->tx_queue_len = 1; + dev->tx_queue_len = DEFAULT_TX_QUEUE_LEN; } dev->num_tx_queues = txqs; |