diff options
Diffstat (limited to 'net/core/dev.c')
-rw-r--r-- | net/core/dev.c | 304 |
1 files changed, 169 insertions, 135 deletions
diff --git a/net/core/dev.c b/net/core/dev.c index 3bad1afc89fa..ab39fe17cb58 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -115,6 +115,7 @@ #include <net/iw_handler.h> #include <asm/current.h> #include <linux/audit.h> +#include <linux/dmaengine.h> /* * The list of packet types we will receive (as opposed to discard) @@ -127,7 +128,7 @@ * sure which should go first, but I bet it won't make much * difference if we are running VLANs. The good news is that * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversly affected. + * the average user (w/out VLANs) will not be adversely affected. * --BLG * * 0800 IP @@ -148,8 +149,14 @@ static DEFINE_SPINLOCK(ptype_lock); static struct list_head ptype_base[16]; /* 16 way hashed list */ static struct list_head ptype_all; /* Taps */ +#ifdef CONFIG_NET_DMA +static struct dma_client *net_dma_client; +static unsigned int net_dma_count; +static spinlock_t net_dma_event_lock; +#endif + /* - * The @dev_base list is protected by @dev_base_lock and the rtln + * The @dev_base list is protected by @dev_base_lock and the rtnl * semaphore. * * Pure readers hold dev_base_lock for reading. @@ -193,7 +200,7 @@ static inline struct hlist_head *dev_index_hash(int ifindex) * Our notifier list */ -static BLOCKING_NOTIFIER_HEAD(netdev_chain); +static RAW_NOTIFIER_HEAD(netdev_chain); /* * Device drivers call our routines to queue packets here. We empty the @@ -641,10 +648,12 @@ int dev_valid_name(const char *name) * @name: name format string * * Passed a format string - eg "lt%d" it will try and find a suitable - * id. Not efficient for many devices, not called a lot. The caller - * must hold the dev_base or rtnl lock while allocating the name and - * adding the device in order to avoid duplicates. Returns the number - * of the unit assigned or a negative errno code. + * id. It scans list of devices to build up a free map, then chooses + * the first empty slot. The caller must hold the dev_base or rtnl lock + * while allocating the name and adding the device in order to avoid + * duplicates. + * Limited to bits_per_byte * page size devices (ie 32K on most platforms). + * Returns the number of the unit assigned or a negative errno code. */ int dev_alloc_name(struct net_device *dev, const char *name) @@ -736,7 +745,7 @@ int dev_change_name(struct net_device *dev, char *newname) if (!err) { hlist_del(&dev->name_hlist); hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name)); - blocking_notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev); } @@ -744,14 +753,14 @@ int dev_change_name(struct net_device *dev, char *newname) } /** - * netdev_features_change - device changes fatures + * netdev_features_change - device changes features * @dev: device to cause notification * * Called to indicate a device has changed features. */ void netdev_features_change(struct net_device *dev) { - blocking_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev); } EXPORT_SYMBOL(netdev_features_change); @@ -766,7 +775,7 @@ EXPORT_SYMBOL(netdev_features_change); void netdev_state_change(struct net_device *dev) { if (dev->flags & IFF_UP) { - blocking_notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); rtmsg_ifinfo(RTM_NEWLINK, dev, 0); } @@ -864,7 +873,7 @@ int dev_open(struct net_device *dev) /* * ... and announce new interface. */ - blocking_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev); } return ret; } @@ -887,7 +896,7 @@ int dev_close(struct net_device *dev) * Tell people we are going down, so that they can * prepare to death, when device is still operating. */ - blocking_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev); dev_deactivate(dev); @@ -924,7 +933,7 @@ int dev_close(struct net_device *dev) /* * Tell people we are down */ - blocking_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev); return 0; } @@ -955,7 +964,7 @@ int register_netdevice_notifier(struct notifier_block *nb) int err; rtnl_lock(); - err = blocking_notifier_chain_register(&netdev_chain, nb); + err = raw_notifier_chain_register(&netdev_chain, nb); if (!err) { for (dev = dev_base; dev; dev = dev->next) { nb->notifier_call(nb, NETDEV_REGISTER, dev); @@ -983,7 +992,7 @@ int unregister_netdevice_notifier(struct notifier_block *nb) int err; rtnl_lock(); - err = blocking_notifier_chain_unregister(&netdev_chain, nb); + err = raw_notifier_chain_unregister(&netdev_chain, nb); rtnl_unlock(); return err; } @@ -994,12 +1003,12 @@ int unregister_netdevice_notifier(struct notifier_block *nb) * @v: pointer passed unmodified to notifier function * * Call all network notifier blocks. Parameters and return value - * are as for blocking_notifier_call_chain(). + * are as for raw_notifier_call_chain(). */ int call_netdevice_notifiers(unsigned long val, void *v) { - return blocking_notifier_call_chain(&netdev_chain, val, v); + return raw_notifier_call_chain(&netdev_chain, val, v); } /* When > 0 there are consumers of rx skb time stamps */ @@ -1213,75 +1222,15 @@ static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb) #define illegal_highdma(dev, skb) (0) #endif -/* Keep head the same: replace data */ -int __skb_linearize(struct sk_buff *skb, gfp_t gfp_mask) -{ - unsigned int size; - u8 *data; - long offset; - struct skb_shared_info *ninfo; - int headerlen = skb->data - skb->head; - int expand = (skb->tail + skb->data_len) - skb->end; - - if (skb_shared(skb)) - BUG(); - - if (expand <= 0) - expand = 0; - - size = skb->end - skb->head + expand; - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - return -ENOMEM; - - /* Copy entire thing */ - if (skb_copy_bits(skb, -headerlen, data, headerlen + skb->len)) - BUG(); - - /* Set up shinfo */ - ninfo = (struct skb_shared_info*)(data + size); - atomic_set(&ninfo->dataref, 1); - ninfo->tso_size = skb_shinfo(skb)->tso_size; - ninfo->tso_segs = skb_shinfo(skb)->tso_segs; - ninfo->nr_frags = 0; - ninfo->frag_list = NULL; - - /* Offset between the two in bytes */ - offset = data - skb->head; - - /* Free old data. */ - skb_release_data(skb); - - skb->head = data; - skb->end = data + size; - - /* Set up new pointers */ - skb->h.raw += offset; - skb->nh.raw += offset; - skb->mac.raw += offset; - skb->tail += offset; - skb->data += offset; - - /* We are no longer a clone, even if we were. */ - skb->cloned = 0; - - skb->tail += skb->data_len; - skb->data_len = 0; - return 0; -} - #define HARD_TX_LOCK(dev, cpu) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - spin_lock(&dev->xmit_lock); \ - dev->xmit_lock_owner = cpu; \ + netif_tx_lock(dev); \ } \ } #define HARD_TX_UNLOCK(dev) { \ if ((dev->features & NETIF_F_LLTX) == 0) { \ - dev->xmit_lock_owner = -1; \ - spin_unlock(&dev->xmit_lock); \ + netif_tx_unlock(dev); \ } \ } @@ -1319,7 +1268,7 @@ int dev_queue_xmit(struct sk_buff *skb) if (skb_shinfo(skb)->frag_list && !(dev->features & NETIF_F_FRAGLIST) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* Fragmented skb is linearized if device does not support SG, @@ -1328,14 +1277,14 @@ int dev_queue_xmit(struct sk_buff *skb) */ if (skb_shinfo(skb)->nr_frags && (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) && - __skb_linearize(skb, GFP_ATOMIC)) + __skb_linearize(skb)) goto out_kfree_skb; /* If packet is not checksummed and device does not support * checksumming for this protocol, complete checksumming here. */ if (skb->ip_summed == CHECKSUM_HW && - (!(dev->features & (NETIF_F_HW_CSUM | NETIF_F_NO_CSUM)) && + (!(dev->features & NETIF_F_GEN_CSUM) && (!(dev->features & NETIF_F_IP_CSUM) || skb->protocol != htons(ETH_P_IP)))) if (skb_checksum_help(skb, 0)) @@ -1380,8 +1329,8 @@ int dev_queue_xmit(struct sk_buff *skb) /* The device has no queue. Common case for software devices: loopback, all the sorts of tunnels... - Really, it is unlikely that xmit_lock protection is necessary here. - (f.e. loopback and IP tunnels are clean ignoring statistics + Really, it is unlikely that netif_tx_lock protection is necessary + here. (f.e. loopback and IP tunnels are clean ignoring statistics counters.) However, it is possible, that they rely on protection made by us here. @@ -1844,6 +1793,19 @@ static void net_rx_action(struct softirq_action *h) } } out: +#ifdef CONFIG_NET_DMA + /* + * There may not be any more sk_buffs coming right now, so push + * any pending DMA copies to hardware + */ + if (net_dma_client) { + struct dma_chan *chan; + rcu_read_lock(); + list_for_each_entry_rcu(chan, &net_dma_client->channels, client_node) + dma_async_memcpy_issue_pending(chan); + rcu_read_unlock(); + } +#endif local_irq_enable(); return; @@ -2196,7 +2158,7 @@ int netdev_set_master(struct net_device *slave, struct net_device *master) * @dev: device * @inc: modifier * - * Add or remove promsicuity from a device. While the count in the device + * Add or remove promiscuity from a device. While the count in the device * remains above zero the interface remains promiscuous. Once it hits zero * the device reverts back to normal filtering operation. A negative inc * value is used to drop promiscuity on the device. @@ -2308,7 +2270,7 @@ int dev_change_flags(struct net_device *dev, unsigned flags) if (dev->flags & IFF_UP && ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI | IFF_VOLATILE))) - blocking_notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGE, dev); if ((flags ^ dev->gflags) & IFF_PROMISC) { @@ -2353,7 +2315,7 @@ int dev_set_mtu(struct net_device *dev, int new_mtu) else dev->mtu = new_mtu; if (!err && dev->flags & IFF_UP) - blocking_notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGEMTU, dev); return err; } @@ -2370,7 +2332,7 @@ int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa) return -ENODEV; err = dev->set_mac_address(dev, sa); if (!err) - blocking_notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); return err; } @@ -2427,7 +2389,7 @@ static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd) return -EINVAL; memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data, min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len)); - blocking_notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGEADDR, dev); return 0; @@ -2777,11 +2739,13 @@ int register_netdevice(struct net_device *dev) BUG_ON(dev_boot_phase); ASSERT_RTNL(); + might_sleep(); + /* When net_device's are persistent, this will be fatal. */ BUG_ON(dev->reg_state != NETREG_UNINITIALIZED); spin_lock_init(&dev->queue_lock); - spin_lock_init(&dev->xmit_lock); + spin_lock_init(&dev->_xmit_lock); dev->xmit_lock_owner = -1; #ifdef CONFIG_NET_CLS_ACT spin_lock_init(&dev->ingress_lock); @@ -2825,9 +2789,7 @@ int register_netdevice(struct net_device *dev) /* Fix illegal SG+CSUM combinations. */ if ((dev->features & NETIF_F_SG) && - !(dev->features & (NETIF_F_IP_CSUM | - NETIF_F_NO_CSUM | - NETIF_F_HW_CSUM))) { + !(dev->features & NETIF_F_ALL_CSUM)) { printk("%s: Dropping NETIF_F_SG since no checksum feature.\n", dev->name); dev->features &= ~NETIF_F_SG; @@ -2863,6 +2825,11 @@ int register_netdevice(struct net_device *dev) if (!dev->rebuild_header) dev->rebuild_header = default_rebuild_header; + ret = netdev_register_sysfs(dev); + if (ret) + goto out_err; + dev->reg_state = NETREG_REGISTERED; + /* * Default initial state at registry is that the * device is present. @@ -2878,14 +2845,11 @@ int register_netdevice(struct net_device *dev) hlist_add_head(&dev->name_hlist, head); hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex)); dev_hold(dev); - dev->reg_state = NETREG_REGISTERING; write_unlock_bh(&dev_base_lock); /* Notify protocols, that a new device appeared. */ - blocking_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev); - /* Finish registration after unlock */ - net_set_todo(dev); ret = 0; out: @@ -2961,7 +2925,7 @@ static void netdev_wait_allrefs(struct net_device *dev) rtnl_lock(); /* Rebroadcast unregister notification */ - blocking_notifier_call_chain(&netdev_chain, + raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, @@ -3008,7 +2972,7 @@ static void netdev_wait_allrefs(struct net_device *dev) * * We are invoked by rtnl_unlock() after it drops the semaphore. * This allows us to deal with problems: - * 1) We can create/delete sysfs objects which invoke hotplug + * 1) We can delete sysfs objects which invoke hotplug * without deadlocking with linkwatch via keventd. * 2) Since we run with the RTNL semaphore not held, we can sleep * safely in order to wait for the netdev refcnt to drop to zero. @@ -3017,8 +2981,6 @@ static DEFINE_MUTEX(net_todo_run_mutex); void netdev_run_todo(void) { struct list_head list = LIST_HEAD_INIT(list); - int err; - /* Need to guard against multiple cpu's getting out of order. */ mutex_lock(&net_todo_run_mutex); @@ -3041,40 +3003,29 @@ void netdev_run_todo(void) = list_entry(list.next, struct net_device, todo_list); list_del(&dev->todo_list); - switch(dev->reg_state) { - case NETREG_REGISTERING: - dev->reg_state = NETREG_REGISTERED; - err = netdev_register_sysfs(dev); - if (err) - printk(KERN_ERR "%s: failed sysfs registration (%d)\n", - dev->name, err); - break; - - case NETREG_UNREGISTERING: - netdev_unregister_sysfs(dev); - dev->reg_state = NETREG_UNREGISTERED; - - netdev_wait_allrefs(dev); + if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { + printk(KERN_ERR "network todo '%s' but state %d\n", + dev->name, dev->reg_state); + dump_stack(); + continue; + } - /* paranoia */ - BUG_ON(atomic_read(&dev->refcnt)); - BUG_TRAP(!dev->ip_ptr); - BUG_TRAP(!dev->ip6_ptr); - BUG_TRAP(!dev->dn_ptr); + netdev_unregister_sysfs(dev); + dev->reg_state = NETREG_UNREGISTERED; + netdev_wait_allrefs(dev); - /* It must be the very last action, - * after this 'dev' may point to freed up memory. - */ - if (dev->destructor) - dev->destructor(dev); - break; + /* paranoia */ + BUG_ON(atomic_read(&dev->refcnt)); + BUG_TRAP(!dev->ip_ptr); + BUG_TRAP(!dev->ip6_ptr); + BUG_TRAP(!dev->dn_ptr); - default: - printk(KERN_ERR "network todo '%s' but state %d\n", - dev->name, dev->reg_state); - break; - } + /* It must be the very last action, + * after this 'dev' may point to freed up memory. + */ + if (dev->destructor) + dev->destructor(dev); } out: @@ -3131,7 +3082,7 @@ EXPORT_SYMBOL(alloc_netdev); void free_netdev(struct net_device *dev) { #ifdef CONFIG_SYSFS - /* Compatiablity with error handling in drivers */ + /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { kfree((char *)dev - dev->padded); return; @@ -3216,7 +3167,7 @@ int unregister_netdevice(struct net_device *dev) /* Notify protocols, that we are about to destroy this device. They should clean all the things. */ - blocking_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); + raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev); /* * Flush the multicast chain @@ -3307,6 +3258,88 @@ static int dev_cpu_callback(struct notifier_block *nfb, } #endif /* CONFIG_HOTPLUG_CPU */ +#ifdef CONFIG_NET_DMA +/** + * net_dma_rebalance - + * This is called when the number of channels allocated to the net_dma_client + * changes. The net_dma_client tries to have one DMA channel per CPU. + */ +static void net_dma_rebalance(void) +{ + unsigned int cpu, i, n; + struct dma_chan *chan; + + lock_cpu_hotplug(); + + if (net_dma_count == 0) { + for_each_online_cpu(cpu) + rcu_assign_pointer(per_cpu(softnet_data.net_dma, cpu), NULL); + unlock_cpu_hotplug(); + return; + } + + i = 0; + cpu = first_cpu(cpu_online_map); + + rcu_read_lock(); + list_for_each_entry(chan, &net_dma_client->channels, client_node) { + n = ((num_online_cpus() / net_dma_count) + + (i < (num_online_cpus() % net_dma_count) ? 1 : 0)); + + while(n) { + per_cpu(softnet_data.net_dma, cpu) = chan; + cpu = next_cpu(cpu, cpu_online_map); + n--; + } + i++; + } + rcu_read_unlock(); + + unlock_cpu_hotplug(); +} + +/** + * netdev_dma_event - event callback for the net_dma_client + * @client: should always be net_dma_client + * @chan: + * @event: + */ +static void netdev_dma_event(struct dma_client *client, struct dma_chan *chan, + enum dma_event event) +{ + spin_lock(&net_dma_event_lock); + switch (event) { + case DMA_RESOURCE_ADDED: + net_dma_count++; + net_dma_rebalance(); + break; + case DMA_RESOURCE_REMOVED: + net_dma_count--; + net_dma_rebalance(); + break; + default: + break; + } + spin_unlock(&net_dma_event_lock); +} + +/** + * netdev_dma_regiser - register the networking subsystem as a DMA client + */ +static int __init netdev_dma_register(void) +{ + spin_lock_init(&net_dma_event_lock); + net_dma_client = dma_async_client_register(netdev_dma_event); + if (net_dma_client == NULL) + return -ENOMEM; + + dma_async_client_chan_request(net_dma_client, num_online_cpus()); + return 0; +} + +#else +static int __init netdev_dma_register(void) { return -ENODEV; } +#endif /* CONFIG_NET_DMA */ /* * Initialize the DEV module. At boot time this walks the device list and @@ -3360,6 +3393,8 @@ static int __init net_dev_init(void) atomic_set(&queue->backlog_dev.refcnt, 1); } + netdev_dma_register(); + dev_boot_phase = 0; open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL); @@ -3378,7 +3413,6 @@ subsys_initcall(net_dev_init); EXPORT_SYMBOL(__dev_get_by_index); EXPORT_SYMBOL(__dev_get_by_name); EXPORT_SYMBOL(__dev_remove_pack); -EXPORT_SYMBOL(__skb_linearize); EXPORT_SYMBOL(dev_valid_name); EXPORT_SYMBOL(dev_add_pack); EXPORT_SYMBOL(dev_alloc_name); |