summaryrefslogtreecommitdiff
path: root/net/core/dev.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/core/dev.c')
-rw-r--r--net/core/dev.c249
1 files changed, 112 insertions, 137 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index 5d59155adf2a..d70e4a3a49f2 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -208,7 +208,8 @@ static inline void dev_base_seq_inc(struct net *net)
static inline struct hlist_head *dev_name_hash(struct net *net, const char *name)
{
- unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+ unsigned int hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
+
return &net->dev_name_head[hash_32(hash, NETDEV_HASHBITS)];
}
@@ -299,10 +300,9 @@ static const unsigned short netdev_lock_type[] =
ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
- ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
- ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET,
- ARPHRD_PHONET_PIPE, ARPHRD_IEEE802154,
- ARPHRD_VOID, ARPHRD_NONE};
+ ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
+ ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
+ ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
static const char *const netdev_lock_name[] =
{"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
@@ -317,10 +317,9 @@ static const char *const netdev_lock_name[] =
"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
- "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
- "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET",
- "_xmit_PHONET_PIPE", "_xmit_IEEE802154",
- "_xmit_VOID", "_xmit_NONE"};
+ "_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
+ "_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
+ "_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
static struct lock_class_key netdev_addr_lock_key[ARRAY_SIZE(netdev_lock_type)];
@@ -1137,8 +1136,8 @@ void dev_load(struct net *net, const char *name)
no_module = request_module("netdev-%s", name);
if (no_module && capable(CAP_SYS_MODULE)) {
if (!request_module("%s", name))
- pr_err("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
- name);
+ pr_warn("Loading kernel module for a network device with CAP_SYS_MODULE (deprecated). Use CAP_NET_ADMIN and alias netdev-%s instead.\n",
+ name);
}
}
EXPORT_SYMBOL(dev_load);
@@ -1409,14 +1408,34 @@ EXPORT_SYMBOL(register_netdevice_notifier);
* register_netdevice_notifier(). The notifier is unlinked into the
* kernel structures and may then be reused. A negative errno code
* is returned on a failure.
+ *
+ * After unregistering unregister and down device events are synthesized
+ * for all devices on the device list to the removed notifier to remove
+ * the need for special case cleanup code.
*/
int unregister_netdevice_notifier(struct notifier_block *nb)
{
+ struct net_device *dev;
+ struct net *net;
int err;
rtnl_lock();
err = raw_notifier_chain_unregister(&netdev_chain, nb);
+ if (err)
+ goto unlock;
+
+ for_each_net(net) {
+ for_each_netdev(net, dev) {
+ if (dev->flags & IFF_UP) {
+ nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
+ nb->notifier_call(nb, NETDEV_DOWN, dev);
+ }
+ nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
+ nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev);
+ }
+ }
+unlock:
rtnl_unlock();
return err;
}
@@ -1596,10 +1615,15 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
kfree_skb(skb);
return NET_RX_DROP;
}
- skb_set_dev(skb, dev);
+ skb->skb_iif = 0;
+ skb->dev = dev;
+ skb_dst_drop(skb);
skb->tstamp.tv64 = 0;
skb->pkt_type = PACKET_HOST;
skb->protocol = eth_type_trans(skb, dev);
+ skb->mark = 0;
+ secpath_reset(skb);
+ nf_reset(skb);
return netif_rx(skb);
}
EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1651,10 +1675,9 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
if (skb_network_header(skb2) < skb2->data ||
skb2->network_header > skb2->tail) {
- if (net_ratelimit())
- pr_crit("protocol %04x is buggy, dev %s\n",
- ntohs(skb2->protocol),
- dev->name);
+ net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+ ntohs(skb2->protocol),
+ dev->name);
skb_reset_network_header(skb2);
}
@@ -1668,7 +1691,8 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
rcu_read_unlock();
}
-/* netif_setup_tc - Handle tc mappings on real_num_tx_queues change
+/**
+ * netif_setup_tc - Handle tc mappings on real_num_tx_queues change
* @dev: Network device
* @txq: number of queues available
*
@@ -1770,6 +1794,18 @@ int netif_set_real_num_rx_queues(struct net_device *dev, unsigned int rxq)
EXPORT_SYMBOL(netif_set_real_num_rx_queues);
#endif
+/**
+ * netif_get_num_default_rss_queues - default number of RSS queues
+ *
+ * This routine should set an upper limit on the number of RSS queues
+ * used by default by multiqueue devices.
+ */
+int netif_get_num_default_rss_queues(void)
+{
+ return min_t(int, DEFAULT_MAX_NUM_RSS_QUEUES, num_online_cpus());
+}
+EXPORT_SYMBOL(netif_get_num_default_rss_queues);
+
static inline void __netif_reschedule(struct Qdisc *q)
{
struct softnet_data *sd;
@@ -1848,36 +1884,6 @@ void netif_device_attach(struct net_device *dev)
}
EXPORT_SYMBOL(netif_device_attach);
-/**
- * skb_dev_set -- assign a new device to a buffer
- * @skb: buffer for the new device
- * @dev: network device
- *
- * If an skb is owned by a device already, we have to reset
- * all data private to the namespace a device belongs to
- * before assigning it a new device.
- */
-#ifdef CONFIG_NET_NS
-void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
-{
- skb_dst_drop(skb);
- if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
- secpath_reset(skb);
- nf_reset(skb);
- skb_init_secmark(skb);
- skb->mark = 0;
- skb->priority = 0;
- skb->nf_trace = 0;
- skb->ipvs_property = 0;
-#ifdef CONFIG_NET_SCHED
- skb->tc_index = 0;
-#endif
- }
- skb->dev = dev;
-}
-EXPORT_SYMBOL(skb_set_dev);
-#endif /* CONFIG_NET_NS */
-
static void skb_warn_bad_offload(const struct sk_buff *skb)
{
static const netdev_features_t null_features = 0;
@@ -2096,25 +2102,6 @@ static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
return 0;
}
-/*
- * Try to orphan skb early, right before transmission by the device.
- * We cannot orphan skb if tx timestamp is requested or the sk-reference
- * is needed on driver level for other reasons, e.g. see net/can/raw.c
- */
-static inline void skb_orphan_try(struct sk_buff *skb)
-{
- struct sock *sk = skb->sk;
-
- if (sk && !skb_shinfo(skb)->tx_flags) {
- /* skb_tx_hash() wont be able to get sk.
- * We copy sk_hash into skb->rxhash
- */
- if (!skb->rxhash)
- skb->rxhash = sk->sk_hash;
- skb_orphan(skb);
- }
-}
-
static bool can_checksum_protocol(netdev_features_t features, __be16 protocol)
{
return ((features & NETIF_F_GEN_CSUM) ||
@@ -2200,8 +2187,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
if (!list_empty(&ptype_all))
dev_queue_xmit_nit(skb, dev);
- skb_orphan_try(skb);
-
features = netif_skb_features(skb);
if (vlan_tx_tag_present(skb) &&
@@ -2311,7 +2296,7 @@ u16 __skb_tx_hash(const struct net_device *dev, const struct sk_buff *skb,
if (skb->sk && skb->sk->sk_hash)
hash = skb->sk->sk_hash;
else
- hash = (__force u16) skb->protocol ^ skb->rxhash;
+ hash = (__force u16) skb->protocol;
hash = jhash_1word(hash, hashrnd);
return (u16) (((u64) hash * qcount) >> 32) + qoffset;
@@ -2321,11 +2306,9 @@ EXPORT_SYMBOL(__skb_tx_hash);
static inline u16 dev_cap_txqueue(struct net_device *dev, u16 queue_index)
{
if (unlikely(queue_index >= dev->real_num_tx_queues)) {
- if (net_ratelimit()) {
- pr_warn("%s selects TX queue %d, but real number of TX queues is %d\n",
- dev->name, queue_index,
- dev->real_num_tx_queues);
- }
+ net_warn_ratelimited("%s selects TX queue %d, but real number of TX queues is %d\n",
+ dev->name, queue_index,
+ dev->real_num_tx_queues);
return 0;
}
return queue_index;
@@ -2474,8 +2457,12 @@ static void skb_update_prio(struct sk_buff *skb)
{
struct netprio_map *map = rcu_dereference_bh(skb->dev->priomap);
- if ((!skb->priority) && (skb->sk) && map)
- skb->priority = map->priomap[skb->sk->sk_cgrp_prioidx];
+ if (!skb->priority && skb->sk && map) {
+ unsigned int prioidx = skb->sk->sk_cgrp_prioidx;
+
+ if (prioidx < map->priomap_len)
+ skb->priority = map->priomap[prioidx];
+ }
}
#else
#define skb_update_prio(skb)
@@ -2485,6 +2472,23 @@ static DEFINE_PER_CPU(int, xmit_recursion);
#define RECURSION_LIMIT 10
/**
+ * dev_loopback_xmit - loop back @skb
+ * @skb: buffer to transmit
+ */
+int dev_loopback_xmit(struct sk_buff *skb)
+{
+ skb_reset_mac_header(skb);
+ __skb_pull(skb, skb_network_offset(skb));
+ skb->pkt_type = PACKET_LOOPBACK;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
+ WARN_ON(!skb_dst(skb));
+ skb_dst_force(skb);
+ netif_rx_ni(skb);
+ return 0;
+}
+EXPORT_SYMBOL(dev_loopback_xmit);
+
+/**
* dev_queue_xmit - transmit a buffer
* @skb: buffer to transmit
*
@@ -2567,17 +2571,15 @@ int dev_queue_xmit(struct sk_buff *skb)
}
}
HARD_TX_UNLOCK(dev, txq);
- if (net_ratelimit())
- pr_crit("Virtual device %s asks to queue packet!\n",
- dev->name);
+ net_crit_ratelimited("Virtual device %s asks to queue packet!\n",
+ dev->name);
} else {
/* Recursion is detected! It is possible,
* unfortunately
*/
recursion_alert:
- if (net_ratelimit())
- pr_crit("Dead loop on virtual device %s, fix it urgently!\n",
- dev->name);
+ net_crit_ratelimited("Dead loop on virtual device %s, fix it urgently!\n",
+ dev->name);
}
}
@@ -3058,9 +3060,8 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
struct Qdisc *q;
if (unlikely(MAX_RED_LOOP < ttl++)) {
- if (net_ratelimit())
- pr_warn("Redir loop detected Dropping packet (%d->%d)\n",
- skb->skb_iif, dev->ifindex);
+ net_warn_ratelimited("Redir loop detected Dropping packet (%d->%d)\n",
+ skb->skb_iif, dev->ifindex);
return TC_ACT_SHOT;
}
@@ -3520,10 +3521,16 @@ gro_result_t napi_skb_finish(gro_result_t ret, struct sk_buff *skb)
break;
case GRO_DROP:
- case GRO_MERGED_FREE:
kfree_skb(skb);
break;
+ case GRO_MERGED_FREE:
+ if (NAPI_GRO_CB(skb)->free == NAPI_GRO_FREE_STOLEN_HEAD)
+ kmem_cache_free(skbuff_head_cache, skb);
+ else
+ __kfree_skb(skb);
+ break;
+
case GRO_HELD:
case GRO_MERGED:
break;
@@ -3608,7 +3615,7 @@ gro_result_t napi_frags_finish(struct napi_struct *napi, struct sk_buff *skb,
}
EXPORT_SYMBOL(napi_frags_finish);
-struct sk_buff *napi_frags_skb(struct napi_struct *napi)
+static struct sk_buff *napi_frags_skb(struct napi_struct *napi)
{
struct sk_buff *skb = napi->skb;
struct ethhdr *eth;
@@ -3643,7 +3650,6 @@ struct sk_buff *napi_frags_skb(struct napi_struct *napi)
out:
return skb;
}
-EXPORT_SYMBOL(napi_frags_skb);
gro_result_t napi_gro_frags(struct napi_struct *napi)
{
@@ -4027,54 +4033,41 @@ static int dev_ifconf(struct net *net, char __user *arg)
#ifdef CONFIG_PROC_FS
-#define BUCKET_SPACE (32 - NETDEV_HASHBITS)
-
-struct dev_iter_state {
- struct seq_net_private p;
- unsigned int pos; /* bucket << BUCKET_SPACE + offset */
-};
+#define BUCKET_SPACE (32 - NETDEV_HASHBITS - 1)
#define get_bucket(x) ((x) >> BUCKET_SPACE)
#define get_offset(x) ((x) & ((1 << BUCKET_SPACE) - 1))
#define set_bucket_offset(b, o) ((b) << BUCKET_SPACE | (o))
-static inline struct net_device *dev_from_same_bucket(struct seq_file *seq)
+static inline struct net_device *dev_from_same_bucket(struct seq_file *seq, loff_t *pos)
{
- struct dev_iter_state *state = seq->private;
struct net *net = seq_file_net(seq);
struct net_device *dev;
struct hlist_node *p;
struct hlist_head *h;
- unsigned int count, bucket, offset;
+ unsigned int count = 0, offset = get_offset(*pos);
- bucket = get_bucket(state->pos);
- offset = get_offset(state->pos);
- h = &net->dev_name_head[bucket];
- count = 0;
+ h = &net->dev_name_head[get_bucket(*pos)];
hlist_for_each_entry_rcu(dev, p, h, name_hlist) {
- if (count++ == offset) {
- state->pos = set_bucket_offset(bucket, count);
+ if (++count == offset)
return dev;
- }
}
return NULL;
}
-static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
+static inline struct net_device *dev_from_bucket(struct seq_file *seq, loff_t *pos)
{
- struct dev_iter_state *state = seq->private;
struct net_device *dev;
unsigned int bucket;
- bucket = get_bucket(state->pos);
do {
- dev = dev_from_same_bucket(seq);
+ dev = dev_from_same_bucket(seq, pos);
if (dev)
return dev;
- bucket++;
- state->pos = set_bucket_offset(bucket, 0);
+ bucket = get_bucket(*pos) + 1;
+ *pos = set_bucket_offset(bucket, 1);
} while (bucket < NETDEV_HASHENTRIES);
return NULL;
@@ -4087,33 +4080,20 @@ static inline struct net_device *dev_from_new_bucket(struct seq_file *seq)
void *dev_seq_start(struct seq_file *seq, loff_t *pos)
__acquires(RCU)
{
- struct dev_iter_state *state = seq->private;
-
rcu_read_lock();
if (!*pos)
return SEQ_START_TOKEN;
- /* check for end of the hash */
- if (state->pos == 0 && *pos > 1)
+ if (get_bucket(*pos) >= NETDEV_HASHENTRIES)
return NULL;
- return dev_from_new_bucket(seq);
+ return dev_from_bucket(seq, pos);
}
void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
- struct net_device *dev;
-
++*pos;
-
- if (v == SEQ_START_TOKEN)
- return dev_from_new_bucket(seq);
-
- dev = dev_from_same_bucket(seq);
- if (dev)
- return dev;
-
- return dev_from_new_bucket(seq);
+ return dev_from_bucket(seq, pos);
}
void dev_seq_stop(struct seq_file *seq, void *v)
@@ -4212,13 +4192,7 @@ static const struct seq_operations dev_seq_ops = {
static int dev_seq_open(struct inode *inode, struct file *file)
{
return seq_open_net(inode, file, &dev_seq_ops,
- sizeof(struct dev_iter_state));
-}
-
-int dev_seq_open_ops(struct inode *inode, struct file *file,
- const struct seq_operations *ops)
-{
- return seq_open_net(inode, file, ops, sizeof(struct dev_iter_state));
+ sizeof(struct seq_net_private));
}
static const struct file_operations dev_seq_fops = {
@@ -4629,9 +4603,9 @@ void dev_set_rx_mode(struct net_device *dev)
*
* Get the combination of flag bits exported through APIs to userspace.
*/
-unsigned dev_get_flags(const struct net_device *dev)
+unsigned int dev_get_flags(const struct net_device *dev)
{
- unsigned flags;
+ unsigned int flags;
flags = (dev->flags & ~(IFF_PROMISC |
IFF_ALLMULTI |
@@ -5702,7 +5676,7 @@ int netdev_refcnt_read(const struct net_device *dev)
}
EXPORT_SYMBOL(netdev_refcnt_read);
-/*
+/**
* netdev_wait_allrefs - wait until all references are gone.
*
* This is called when unregistering network devices.
@@ -6339,7 +6313,8 @@ static struct hlist_head *netdev_create_hash(void)
/* Initialize per network namespace state */
static int __net_init netdev_init(struct net *net)
{
- INIT_LIST_HEAD(&net->dev_base_head);
+ if (net != &init_net)
+ INIT_LIST_HEAD(&net->dev_base_head);
net->dev_name_head = netdev_create_hash();
if (net->dev_name_head == NULL)