summaryrefslogtreecommitdiff
path: root/net/core
diff options
context:
space:
mode:
Diffstat (limited to 'net/core')
-rw-r--r--net/core/dev.c10
-rw-r--r--net/core/flow.c132
-rw-r--r--net/core/flow_dissector.c14
-rw-r--r--net/core/neighbour.c17
-rw-r--r--net/core/net-sysfs.c2
-rw-r--r--net/core/pktgen.c32
-rw-r--r--net/core/skbuff.c269
-rw-r--r--net/core/sock.c5
8 files changed, 244 insertions, 237 deletions
diff --git a/net/core/dev.c b/net/core/dev.c
index b1b0c8d4d7df..587f9fb85d73 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3495,11 +3495,11 @@ EXPORT_SYMBOL_GPL(netdev_rx_handler_unregister);
static bool skb_pfmemalloc_protocol(struct sk_buff *skb)
{
switch (skb->protocol) {
- case __constant_htons(ETH_P_ARP):
- case __constant_htons(ETH_P_IP):
- case __constant_htons(ETH_P_IPV6):
- case __constant_htons(ETH_P_8021Q):
- case __constant_htons(ETH_P_8021AD):
+ case htons(ETH_P_ARP):
+ case htons(ETH_P_IP):
+ case htons(ETH_P_IPV6):
+ case htons(ETH_P_8021Q):
+ case htons(ETH_P_8021AD):
return true;
default:
return false;
diff --git a/net/core/flow.c b/net/core/flow.c
index dfa602ceb8cd..31cfb365e0c6 100644
--- a/net/core/flow.c
+++ b/net/core/flow.c
@@ -24,6 +24,7 @@
#include <net/flow.h>
#include <linux/atomic.h>
#include <linux/security.h>
+#include <net/net_namespace.h>
struct flow_cache_entry {
union {
@@ -38,37 +39,14 @@ struct flow_cache_entry {
struct flow_cache_object *object;
};
-struct flow_cache_percpu {
- struct hlist_head *hash_table;
- int hash_count;
- u32 hash_rnd;
- int hash_rnd_recalc;
- struct tasklet_struct flush_tasklet;
-};
-
struct flow_flush_info {
struct flow_cache *cache;
atomic_t cpuleft;
struct completion completion;
};
-struct flow_cache {
- u32 hash_shift;
- struct flow_cache_percpu __percpu *percpu;
- struct notifier_block hotcpu_notifier;
- int low_watermark;
- int high_watermark;
- struct timer_list rnd_timer;
-};
-
-atomic_t flow_cache_genid = ATOMIC_INIT(0);
-EXPORT_SYMBOL(flow_cache_genid);
-static struct flow_cache flow_cache_global;
static struct kmem_cache *flow_cachep __read_mostly;
-static DEFINE_SPINLOCK(flow_cache_gc_lock);
-static LIST_HEAD(flow_cache_gc_list);
-
#define flow_cache_hash_size(cache) (1 << (cache)->hash_shift)
#define FLOW_HASH_RND_PERIOD (10 * 60 * HZ)
@@ -84,16 +62,18 @@ static void flow_cache_new_hashrnd(unsigned long arg)
add_timer(&fc->rnd_timer);
}
-static int flow_entry_valid(struct flow_cache_entry *fle)
+static int flow_entry_valid(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
- if (atomic_read(&flow_cache_genid) != fle->genid)
+ if (atomic_read(&xfrm->flow_cache_genid) != fle->genid)
return 0;
if (fle->object && !fle->object->ops->check(fle->object))
return 0;
return 1;
}
-static void flow_entry_kill(struct flow_cache_entry *fle)
+static void flow_entry_kill(struct flow_cache_entry *fle,
+ struct netns_xfrm *xfrm)
{
if (fle->object)
fle->object->ops->delete(fle->object);
@@ -104,26 +84,28 @@ static void flow_cache_gc_task(struct work_struct *work)
{
struct list_head gc_list;
struct flow_cache_entry *fce, *n;
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
INIT_LIST_HEAD(&gc_list);
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail_init(&flow_cache_gc_list, &gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail_init(&xfrm->flow_cache_gc_list, &gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
list_for_each_entry_safe(fce, n, &gc_list, u.gc_list)
- flow_entry_kill(fce);
+ flow_entry_kill(fce, xfrm);
}
-static DECLARE_WORK(flow_cache_gc_work, flow_cache_gc_task);
static void flow_cache_queue_garbage(struct flow_cache_percpu *fcp,
- int deleted, struct list_head *gc_list)
+ int deleted, struct list_head *gc_list,
+ struct netns_xfrm *xfrm)
{
if (deleted) {
fcp->hash_count -= deleted;
- spin_lock_bh(&flow_cache_gc_lock);
- list_splice_tail(gc_list, &flow_cache_gc_list);
- spin_unlock_bh(&flow_cache_gc_lock);
- schedule_work(&flow_cache_gc_work);
+ spin_lock_bh(&xfrm->flow_cache_gc_lock);
+ list_splice_tail(gc_list, &xfrm->flow_cache_gc_list);
+ spin_unlock_bh(&xfrm->flow_cache_gc_lock);
+ schedule_work(&xfrm->flow_cache_gc_work);
}
}
@@ -135,6 +117,8 @@ static void __flow_cache_shrink(struct flow_cache *fc,
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
int saved = 0;
@@ -142,7 +126,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
if (saved < shrink_to &&
- flow_entry_valid(fle)) {
+ flow_entry_valid(fle, xfrm)) {
saved++;
} else {
deleted++;
@@ -152,7 +136,7 @@ static void __flow_cache_shrink(struct flow_cache *fc,
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
}
static void flow_cache_shrink(struct flow_cache *fc,
@@ -208,7 +192,7 @@ struct flow_cache_object *
flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
flow_resolve_t resolver, void *ctx)
{
- struct flow_cache *fc = &flow_cache_global;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
struct flow_cache_percpu *fcp;
struct flow_cache_entry *fle, *tfle;
struct flow_cache_object *flo;
@@ -258,7 +242,7 @@ flow_cache_lookup(struct net *net, const struct flowi *key, u16 family, u8 dir,
hlist_add_head(&fle->u.hlist, &fcp->hash_table[hash]);
fcp->hash_count++;
}
- } else if (likely(fle->genid == atomic_read(&flow_cache_genid))) {
+ } else if (likely(fle->genid == atomic_read(&net->xfrm.flow_cache_genid))) {
flo = fle->object;
if (!flo)
goto ret_object;
@@ -279,7 +263,7 @@ nocache:
}
flo = resolver(net, key, family, dir, flo, ctx);
if (fle) {
- fle->genid = atomic_read(&flow_cache_genid);
+ fle->genid = atomic_read(&net->xfrm.flow_cache_genid);
if (!IS_ERR(flo))
fle->object = flo;
else
@@ -303,12 +287,14 @@ static void flow_cache_flush_tasklet(unsigned long data)
struct hlist_node *tmp;
LIST_HEAD(gc_list);
int i, deleted = 0;
+ struct netns_xfrm *xfrm = container_of(fc, struct netns_xfrm,
+ flow_cache_global);
fcp = this_cpu_ptr(fc->percpu);
for (i = 0; i < flow_cache_hash_size(fc); i++) {
hlist_for_each_entry_safe(fle, tmp,
&fcp->hash_table[i], u.hlist) {
- if (flow_entry_valid(fle))
+ if (flow_entry_valid(fle, xfrm))
continue;
deleted++;
@@ -317,7 +303,7 @@ static void flow_cache_flush_tasklet(unsigned long data)
}
}
- flow_cache_queue_garbage(fcp, deleted, &gc_list);
+ flow_cache_queue_garbage(fcp, deleted, &gc_list, xfrm);
if (atomic_dec_and_test(&info->cpuleft))
complete(&info->completion);
@@ -351,10 +337,9 @@ static void flow_cache_flush_per_cpu(void *data)
tasklet_schedule(tasklet);
}
-void flow_cache_flush(void)
+void flow_cache_flush(struct net *net)
{
struct flow_flush_info info;
- static DEFINE_MUTEX(flow_flush_sem);
cpumask_var_t mask;
int i, self;
@@ -365,8 +350,8 @@ void flow_cache_flush(void)
/* Don't want cpus going down or up during this. */
get_online_cpus();
- mutex_lock(&flow_flush_sem);
- info.cache = &flow_cache_global;
+ mutex_lock(&net->xfrm.flow_flush_sem);
+ info.cache = &net->xfrm.flow_cache_global;
for_each_online_cpu(i)
if (!flow_cache_percpu_empty(info.cache, i))
cpumask_set_cpu(i, mask);
@@ -386,21 +371,23 @@ void flow_cache_flush(void)
wait_for_completion(&info.completion);
done:
- mutex_unlock(&flow_flush_sem);
+ mutex_unlock(&net->xfrm.flow_flush_sem);
put_online_cpus();
free_cpumask_var(mask);
}
static void flow_cache_flush_task(struct work_struct *work)
{
- flow_cache_flush();
-}
+ struct netns_xfrm *xfrm = container_of(work, struct netns_xfrm,
+ flow_cache_gc_work);
+ struct net *net = container_of(xfrm, struct net, xfrm);
-static DECLARE_WORK(flow_cache_flush_work, flow_cache_flush_task);
+ flow_cache_flush(net);
+}
-void flow_cache_flush_deferred(void)
+void flow_cache_flush_deferred(struct net *net)
{
- schedule_work(&flow_cache_flush_work);
+ schedule_work(&net->xfrm.flow_cache_flush_work);
}
static int flow_cache_cpu_prepare(struct flow_cache *fc, int cpu)
@@ -425,7 +412,8 @@ static int flow_cache_cpu(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
- struct flow_cache *fc = container_of(nfb, struct flow_cache, hotcpu_notifier);
+ struct flow_cache *fc = container_of(nfb, struct flow_cache,
+ hotcpu_notifier);
int res, cpu = (unsigned long) hcpu;
struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, cpu);
@@ -444,9 +432,20 @@ static int flow_cache_cpu(struct notifier_block *nfb,
return NOTIFY_OK;
}
-static int __init flow_cache_init(struct flow_cache *fc)
+int flow_cache_init(struct net *net)
{
int i;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
+
+ if (!flow_cachep)
+ flow_cachep = kmem_cache_create("flow_cache",
+ sizeof(struct flow_cache_entry),
+ 0, SLAB_PANIC, NULL);
+ spin_lock_init(&net->xfrm.flow_cache_gc_lock);
+ INIT_LIST_HEAD(&net->xfrm.flow_cache_gc_list);
+ INIT_WORK(&net->xfrm.flow_cache_gc_work, flow_cache_gc_task);
+ INIT_WORK(&net->xfrm.flow_cache_flush_work, flow_cache_flush_task);
+ mutex_init(&net->xfrm.flow_flush_sem);
fc->hash_shift = 10;
fc->low_watermark = 2 * flow_cache_hash_size(fc);
@@ -484,14 +483,23 @@ err:
return -ENOMEM;
}
+EXPORT_SYMBOL(flow_cache_init);
-static int __init flow_cache_init_global(void)
+void flow_cache_fini(struct net *net)
{
- flow_cachep = kmem_cache_create("flow_cache",
- sizeof(struct flow_cache_entry),
- 0, SLAB_PANIC, NULL);
+ int i;
+ struct flow_cache *fc = &net->xfrm.flow_cache_global;
- return flow_cache_init(&flow_cache_global);
-}
+ del_timer_sync(&fc->rnd_timer);
+ unregister_hotcpu_notifier(&fc->hotcpu_notifier);
-module_init(flow_cache_init_global);
+ for_each_possible_cpu(i) {
+ struct flow_cache_percpu *fcp = per_cpu_ptr(fc->percpu, i);
+ kfree(fcp->hash_table);
+ fcp->hash_table = NULL;
+ }
+
+ free_percpu(fc->percpu);
+ fc->percpu = NULL;
+}
+EXPORT_SYMBOL(flow_cache_fini);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index e29e810663d7..80201bf69d59 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -61,7 +61,7 @@ bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
again:
switch (proto) {
- case __constant_htons(ETH_P_IP): {
+ case htons(ETH_P_IP): {
const struct iphdr *iph;
struct iphdr _iph;
ip:
@@ -77,7 +77,7 @@ ip:
iph_to_flow_copy_addrs(flow, iph);
break;
}
- case __constant_htons(ETH_P_IPV6): {
+ case htons(ETH_P_IPV6): {
const struct ipv6hdr *iph;
struct ipv6hdr _iph;
ipv6:
@@ -91,8 +91,8 @@ ipv6:
nhoff += sizeof(struct ipv6hdr);
break;
}
- case __constant_htons(ETH_P_8021AD):
- case __constant_htons(ETH_P_8021Q): {
+ case htons(ETH_P_8021AD):
+ case htons(ETH_P_8021Q): {
const struct vlan_hdr *vlan;
struct vlan_hdr _vlan;
@@ -104,7 +104,7 @@ ipv6:
nhoff += sizeof(*vlan);
goto again;
}
- case __constant_htons(ETH_P_PPP_SES): {
+ case htons(ETH_P_PPP_SES): {
struct {
struct pppoe_hdr hdr;
__be16 proto;
@@ -115,9 +115,9 @@ ipv6:
proto = hdr->proto;
nhoff += PPPOE_SES_HLEN;
switch (proto) {
- case __constant_htons(PPP_IP):
+ case htons(PPP_IP):
goto ip;
- case __constant_htons(PPP_IPV6):
+ case htons(PPP_IPV6):
goto ipv6;
default:
return false;
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b9e9e0d38672..8f8a96ef9f3f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -766,9 +766,6 @@ static void neigh_periodic_work(struct work_struct *work)
nht = rcu_dereference_protected(tbl->nht,
lockdep_is_held(&tbl->lock));
- if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
- goto out;
-
/*
* periodically recompute ReachableTime from random function
*/
@@ -781,6 +778,9 @@ static void neigh_periodic_work(struct work_struct *work)
neigh_rand_reach_time(NEIGH_VAR(p, BASE_REACHABLE_TIME));
}
+ if (atomic_read(&tbl->entries) < tbl->gc_thresh1)
+ goto out;
+
for (i = 0 ; i < (1 << nht->hash_shift); i++) {
np = &nht->hash_buckets[i];
@@ -836,10 +836,10 @@ out:
static __inline__ int neigh_max_probes(struct neighbour *n)
{
struct neigh_parms *p = n->parms;
- return (n->nud_state & NUD_PROBE) ?
- NEIGH_VAR(p, UCAST_PROBES) :
- NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES) +
- NEIGH_VAR(p, MCAST_PROBES);
+ int max_probes = NEIGH_VAR(p, UCAST_PROBES) + NEIGH_VAR(p, APP_PROBES);
+ if (!(n->nud_state & NUD_PROBE))
+ max_probes += NEIGH_VAR(p, MCAST_PROBES);
+ return max_probes;
}
static void neigh_invalidate(struct neighbour *neigh)
@@ -945,6 +945,7 @@ static void neigh_timer_handler(unsigned long arg)
neigh->nud_state = NUD_FAILED;
notify = 1;
neigh_invalidate(neigh);
+ goto out;
}
if (neigh->nud_state & NUD_IN_TIMER) {
@@ -3046,7 +3047,7 @@ int neigh_sysctl_register(struct net_device *dev, struct neigh_parms *p,
if (!t)
goto err;
- for (i = 0; i < ARRAY_SIZE(t->neigh_vars); i++) {
+ for (i = 0; i < NEIGH_VAR_GC_INTERVAL; i++) {
t->neigh_vars[i].data += (long) p;
t->neigh_vars[i].extra1 = dev;
t->neigh_vars[i].extra2 = p;
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 73aa594674ef..daed9a64c6f6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -104,6 +104,7 @@ static ssize_t netdev_store(struct device *dev, struct device_attribute *attr,
}
NETDEVICE_SHOW_RO(dev_id, fmt_hex);
+NETDEVICE_SHOW_RO(dev_port, fmt_dec);
NETDEVICE_SHOW_RO(addr_assign_type, fmt_dec);
NETDEVICE_SHOW_RO(addr_len, fmt_dec);
NETDEVICE_SHOW_RO(iflink, fmt_dec);
@@ -373,6 +374,7 @@ static struct attribute *net_class_attrs[] = {
&dev_attr_netdev_group.attr,
&dev_attr_type.attr,
&dev_attr_dev_id.attr,
+ &dev_attr_dev_port.attr,
&dev_attr_iflink.attr,
&dev_attr_ifindex.attr,
&dev_attr_addr_assign_type.attr,
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index fdac61cac1bd..d0dac57291af 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -476,23 +476,22 @@ static int pgctrl_show(struct seq_file *seq, void *v)
static ssize_t pgctrl_write(struct file *file, const char __user *buf,
size_t count, loff_t *ppos)
{
- int err = 0;
char data[128];
struct pktgen_net *pn = net_generic(current->nsproxy->net_ns, pg_net_id);
- if (!capable(CAP_NET_ADMIN)) {
- err = -EPERM;
- goto out;
- }
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (count == 0)
+ return -EINVAL;
if (count > sizeof(data))
count = sizeof(data);
- if (copy_from_user(data, buf, count)) {
- err = -EFAULT;
- goto out;
- }
- data[count - 1] = 0; /* Make string */
+ if (copy_from_user(data, buf, count))
+ return -EFAULT;
+
+ data[count - 1] = 0; /* Strip trailing '\n' and terminate string */
if (!strcmp(data, "stop"))
pktgen_stop_all_threads_ifs(pn);
@@ -506,10 +505,7 @@ static ssize_t pgctrl_write(struct file *file, const char __user *buf,
else
pr_warning("Unknown command: %s\n", data);
- err = count;
-
-out:
- return err;
+ return count;
}
static int pgctrl_open(struct inode *inode, struct file *file)
@@ -1251,7 +1247,13 @@ static ssize_t pktgen_if_write(struct file *file,
"Flag -:%s:- unknown\nAvailable flags, (prepend ! to un-set flag):\n%s",
f,
"IPSRC_RND, IPDST_RND, UDPSRC_RND, UDPDST_RND, "
- "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, IPSEC, NODE_ALLOC\n");
+ "MACSRC_RND, MACDST_RND, TXSIZE_RND, IPV6, "
+ "MPLS_RND, VID_RND, SVID_RND, FLOW_SEQ, "
+ "QUEUE_MAP_RND, QUEUE_MAP_CPU, UDPCSUM, "
+#ifdef CONFIG_XFRM
+ "IPSEC, "
+#endif
+ "NODE_ALLOC\n");
return count;
}
sprintf(pg_result, "OK: flags=0x%x", pkt_dev->flags);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5976ef0846bd..3f14c638c2b1 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -707,9 +707,6 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->mark = old->mark;
new->skb_iif = old->skb_iif;
__nf_copy(new, old);
-#if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
- new->nf_trace = old->nf_trace;
-#endif
#ifdef CONFIG_NET_SCHED
new->tc_index = old->tc_index;
#ifdef CONFIG_NET_CLS_ACT
@@ -2841,81 +2838,84 @@ EXPORT_SYMBOL_GPL(skb_pull_rcsum);
/**
* skb_segment - Perform protocol segmentation on skb.
- * @skb: buffer to segment
+ * @head_skb: buffer to segment
* @features: features for the output path (see dev->features)
*
* This function performs segmentation on the given skb. It returns
* a pointer to the first in a list of new skbs for the segments.
* In case of error it returns ERR_PTR(err).
*/
-struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
+struct sk_buff *skb_segment(struct sk_buff *head_skb,
+ netdev_features_t features)
{
struct sk_buff *segs = NULL;
struct sk_buff *tail = NULL;
- struct sk_buff *fskb = skb_shinfo(skb)->frag_list;
- skb_frag_t *skb_frag = skb_shinfo(skb)->frags;
- unsigned int mss = skb_shinfo(skb)->gso_size;
- unsigned int doffset = skb->data - skb_mac_header(skb);
+ struct sk_buff *list_skb = skb_shinfo(head_skb)->frag_list;
+ skb_frag_t *frag = skb_shinfo(head_skb)->frags;
+ unsigned int mss = skb_shinfo(head_skb)->gso_size;
+ unsigned int doffset = head_skb->data - skb_mac_header(head_skb);
+ struct sk_buff *frag_skb = head_skb;
unsigned int offset = doffset;
- unsigned int tnl_hlen = skb_tnl_header_len(skb);
+ unsigned int tnl_hlen = skb_tnl_header_len(head_skb);
unsigned int headroom;
unsigned int len;
__be16 proto;
bool csum;
int sg = !!(features & NETIF_F_SG);
- int nfrags = skb_shinfo(skb)->nr_frags;
+ int nfrags = skb_shinfo(head_skb)->nr_frags;
int err = -ENOMEM;
int i = 0;
int pos;
- proto = skb_network_protocol(skb);
+ proto = skb_network_protocol(head_skb);
if (unlikely(!proto))
return ERR_PTR(-EINVAL);
csum = !!can_checksum_protocol(features, proto);
- __skb_push(skb, doffset);
- headroom = skb_headroom(skb);
- pos = skb_headlen(skb);
+ __skb_push(head_skb, doffset);
+ headroom = skb_headroom(head_skb);
+ pos = skb_headlen(head_skb);
do {
struct sk_buff *nskb;
- skb_frag_t *frag;
+ skb_frag_t *nskb_frag;
int hsize;
int size;
- len = skb->len - offset;
+ len = head_skb->len - offset;
if (len > mss)
len = mss;
- hsize = skb_headlen(skb) - offset;
+ hsize = skb_headlen(head_skb) - offset;
if (hsize < 0)
hsize = 0;
if (hsize > len || !sg)
hsize = len;
- if (!hsize && i >= nfrags && skb_headlen(fskb) &&
- (skb_headlen(fskb) == len || sg)) {
- BUG_ON(skb_headlen(fskb) > len);
+ if (!hsize && i >= nfrags && skb_headlen(list_skb) &&
+ (skb_headlen(list_skb) == len || sg)) {
+ BUG_ON(skb_headlen(list_skb) > len);
i = 0;
- nfrags = skb_shinfo(fskb)->nr_frags;
- skb_frag = skb_shinfo(fskb)->frags;
- pos += skb_headlen(fskb);
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
+ pos += skb_headlen(list_skb);
while (pos < offset + len) {
BUG_ON(i >= nfrags);
- size = skb_frag_size(skb_frag);
+ size = skb_frag_size(frag);
if (pos + size > offset + len)
break;
i++;
pos += size;
- skb_frag++;
+ frag++;
}
- nskb = skb_clone(fskb, GFP_ATOMIC);
- fskb = fskb->next;
+ nskb = skb_clone(list_skb, GFP_ATOMIC);
+ list_skb = list_skb->next;
if (unlikely(!nskb))
goto err;
@@ -2936,7 +2936,7 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
__skb_push(nskb, doffset);
} else {
nskb = __alloc_skb(hsize + doffset + headroom,
- GFP_ATOMIC, skb_alloc_rx_flag(skb),
+ GFP_ATOMIC, skb_alloc_rx_flag(head_skb),
NUMA_NO_NODE);
if (unlikely(!nskb))
@@ -2952,12 +2952,12 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
segs = nskb;
tail = nskb;
- __copy_skb_header(nskb, skb);
- nskb->mac_len = skb->mac_len;
+ __copy_skb_header(nskb, head_skb);
+ nskb->mac_len = head_skb->mac_len;
skb_headers_offset_update(nskb, skb_headroom(nskb) - headroom);
- skb_copy_from_linear_data_offset(skb, -tnl_hlen,
+ skb_copy_from_linear_data_offset(head_skb, -tnl_hlen,
nskb->data - tnl_hlen,
doffset + tnl_hlen);
@@ -2966,30 +2966,32 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
if (!sg) {
nskb->ip_summed = CHECKSUM_NONE;
- nskb->csum = skb_copy_and_csum_bits(skb, offset,
+ nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
skb_put(nskb, len),
len, 0);
continue;
}
- frag = skb_shinfo(nskb)->frags;
+ nskb_frag = skb_shinfo(nskb)->frags;
- skb_copy_from_linear_data_offset(skb, offset,
+ skb_copy_from_linear_data_offset(head_skb, offset,
skb_put(nskb, hsize), hsize);
- skb_shinfo(nskb)->tx_flags = skb_shinfo(skb)->tx_flags & SKBTX_SHARED_FRAG;
+ skb_shinfo(nskb)->tx_flags = skb_shinfo(head_skb)->tx_flags &
+ SKBTX_SHARED_FRAG;
while (pos < offset + len) {
if (i >= nfrags) {
- BUG_ON(skb_headlen(fskb));
+ BUG_ON(skb_headlen(list_skb));
i = 0;
- nfrags = skb_shinfo(fskb)->nr_frags;
- skb_frag = skb_shinfo(fskb)->frags;
+ nfrags = skb_shinfo(list_skb)->nr_frags;
+ frag = skb_shinfo(list_skb)->frags;
+ frag_skb = list_skb;
BUG_ON(!nfrags);
- fskb = fskb->next;
+ list_skb = list_skb->next;
}
if (unlikely(skb_shinfo(nskb)->nr_frags >=
@@ -3000,27 +3002,30 @@ struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features)
goto err;
}
- *frag = *skb_frag;
- __skb_frag_ref(frag);
- size = skb_frag_size(frag);
+ if (unlikely(skb_orphan_frags(frag_skb, GFP_ATOMIC)))
+ goto err;
+
+ *nskb_frag = *frag;
+ __skb_frag_ref(nskb_frag);
+ size = skb_frag_size(nskb_frag);
if (pos < offset) {
- frag->page_offset += offset - pos;
- skb_frag_size_sub(frag, offset - pos);
+ nskb_frag->page_offset += offset - pos;
+ skb_frag_size_sub(nskb_frag, offset - pos);
}
skb_shinfo(nskb)->nr_frags++;
if (pos + size <= offset + len) {
i++;
- skb_frag++;
+ frag++;
pos += size;
} else {
- skb_frag_size_sub(frag, pos + size - (offset + len));
+ skb_frag_size_sub(nskb_frag, pos + size - (offset + len));
goto skip_fraglist;
}
- frag++;
+ nskb_frag++;
}
skip_fraglist:
@@ -3034,7 +3039,7 @@ perform_csum_check:
nskb->len - doffset, 0);
nskb->ip_summed = CHECKSUM_NONE;
}
- } while ((offset += len) < skb->len);
+ } while ((offset += len) < head_skb->len);
return segs;
@@ -3281,6 +3286,32 @@ __skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
return elt;
}
+/* As compared with skb_to_sgvec, skb_to_sgvec_nomark only map skb to given
+ * sglist without mark the sg which contain last skb data as the end.
+ * So the caller can mannipulate sg list as will when padding new data after
+ * the first call without calling sg_unmark_end to expend sg list.
+ *
+ * Scenario to use skb_to_sgvec_nomark:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec_nomark(payload1)
+ * 3. skb_to_sgvec_nomark(payload2)
+ *
+ * This is equivalent to:
+ * 1. sg_init_table
+ * 2. skb_to_sgvec(payload1)
+ * 3. sg_unmark_end
+ * 4. skb_to_sgvec(payload2)
+ *
+ * When mapping mutilple payload conditionally, skb_to_sgvec_nomark
+ * is more preferable.
+ */
+int skb_to_sgvec_nomark(struct sk_buff *skb, struct scatterlist *sg,
+ int offset, int len)
+{
+ return __skb_to_sgvec(skb, sg, offset, len);
+}
+EXPORT_SYMBOL_GPL(skb_to_sgvec_nomark);
+
int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, int len)
{
int nsg = __skb_to_sgvec(skb, sg, offset, len);
@@ -3543,15 +3574,47 @@ static int skb_maybe_pull_tail(struct sk_buff *skb, unsigned int len,
return 0;
}
+#define MAX_TCP_HDR_LEN (15 * 4)
+
+static __sum16 *skb_checksum_setup_ip(struct sk_buff *skb,
+ typeof(IPPROTO_IP) proto,
+ unsigned int off)
+{
+ switch (proto) {
+ int err;
+
+ case IPPROTO_TCP:
+ err = skb_maybe_pull_tail(skb, off + sizeof(struct tcphdr),
+ off + MAX_TCP_HDR_LEN);
+ if (!err && !skb_partial_csum_set(skb, off,
+ offsetof(struct tcphdr,
+ check)))
+ err = -EPROTO;
+ return err ? ERR_PTR(err) : &tcp_hdr(skb)->check;
+
+ case IPPROTO_UDP:
+ err = skb_maybe_pull_tail(skb, off + sizeof(struct udphdr),
+ off + sizeof(struct udphdr));
+ if (!err && !skb_partial_csum_set(skb, off,
+ offsetof(struct udphdr,
+ check)))
+ err = -EPROTO;
+ return err ? ERR_PTR(err) : &udp_hdr(skb)->check;
+ }
+
+ return ERR_PTR(-EPROTO);
+}
+
/* This value should be large enough to cover a tagged ethernet header plus
* maximally sized IP and TCP or UDP headers.
*/
#define MAX_IP_HDR_LEN 128
-static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
+static int skb_checksum_setup_ipv4(struct sk_buff *skb, bool recalculate)
{
unsigned int off;
bool fragment;
+ __sum16 *csum;
int err;
fragment = false;
@@ -3572,51 +3635,15 @@ static int skb_checksum_setup_ip(struct sk_buff *skb, bool recalculate)
if (fragment)
goto out;
- switch (ip_hdr(skb)->protocol) {
- case IPPROTO_TCP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct tcphdr),
- MAX_IP_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct tcphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- tcp_hdr(skb)->check =
- ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_TCP, 0);
- break;
- case IPPROTO_UDP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct udphdr),
- MAX_IP_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct udphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- udp_hdr(skb)->check =
- ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
- ip_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_UDP, 0);
- break;
- default:
- goto out;
- }
+ csum = skb_checksum_setup_ip(skb, ip_hdr(skb)->protocol, off);
+ if (IS_ERR(csum))
+ return PTR_ERR(csum);
+ if (recalculate)
+ *csum = ~csum_tcpudp_magic(ip_hdr(skb)->saddr,
+ ip_hdr(skb)->daddr,
+ skb->len - off,
+ ip_hdr(skb)->protocol, 0);
err = 0;
out:
@@ -3639,6 +3666,7 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
unsigned int len;
bool fragment;
bool done;
+ __sum16 *csum;
fragment = false;
done = false;
@@ -3716,51 +3744,14 @@ static int skb_checksum_setup_ipv6(struct sk_buff *skb, bool recalculate)
if (!done || fragment)
goto out;
- switch (nexthdr) {
- case IPPROTO_TCP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct tcphdr),
- MAX_IPV6_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct tcphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- tcp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_TCP, 0);
- break;
- case IPPROTO_UDP:
- err = skb_maybe_pull_tail(skb,
- off + sizeof(struct udphdr),
- MAX_IPV6_HDR_LEN);
- if (err < 0)
- goto out;
-
- if (!skb_partial_csum_set(skb, off,
- offsetof(struct udphdr, check))) {
- err = -EPROTO;
- goto out;
- }
-
- if (recalculate)
- udp_hdr(skb)->check =
- ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
- &ipv6_hdr(skb)->daddr,
- skb->len - off,
- IPPROTO_UDP, 0);
- break;
- default:
- goto out;
- }
+ csum = skb_checksum_setup_ip(skb, nexthdr, off);
+ if (IS_ERR(csum))
+ return PTR_ERR(csum);
+ if (recalculate)
+ *csum = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr,
+ &ipv6_hdr(skb)->daddr,
+ skb->len - off, nexthdr, 0);
err = 0;
out:
@@ -3778,7 +3769,7 @@ int skb_checksum_setup(struct sk_buff *skb, bool recalculate)
switch (skb->protocol) {
case htons(ETH_P_IP):
- err = skb_checksum_setup_ip(skb, recalculate);
+ err = skb_checksum_setup_ipv4(skb, recalculate);
break;
case htons(ETH_P_IPV6):
diff --git a/net/core/sock.c b/net/core/sock.c
index 5b6a9431b017..c0fc6bdad1e3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2357,10 +2357,13 @@ void release_sock(struct sock *sk)
if (sk->sk_backlog.tail)
__release_sock(sk);
+ /* Warning : release_cb() might need to release sk ownership,
+ * ie call sock_release_ownership(sk) before us.
+ */
if (sk->sk_prot->release_cb)
sk->sk_prot->release_cb(sk);
- sk->sk_lock.owned = 0;
+ sock_release_ownership(sk);
if (waitqueue_active(&sk->sk_lock.wq))
wake_up(&sk->sk_lock.wq);
spin_unlock_bh(&sk->sk_lock.slock);