summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/batman-adv/translation-table.c60
-rw-r--r--net/batman-adv/types.h2
-rw-r--r--net/bridge/br_device.c3
-rw-r--r--net/bridge/br_switchdev.c2
-rw-r--r--net/ceph/crush/mapper.c2
-rw-r--r--net/ceph/messenger.c6
-rw-r--r--net/ceph/osd_client.c14
-rw-r--r--net/ceph/osdmap.c60
-rw-r--r--net/core/datagram.c14
-rw-r--r--net/core/dev.c6
-rw-r--r--net/core/dev_ioctl.c2
-rw-r--r--net/core/filter.c10
-rw-r--r--net/core/netpoll.c2
-rw-r--r--net/core/skbuff.c13
-rw-r--r--net/dccp/feat.c7
-rw-r--r--net/dccp/ipv4.c1
-rw-r--r--net/dccp/ipv6.c1
-rw-r--r--net/dccp/proto.c19
-rw-r--r--net/dsa/dsa2.c15
-rw-r--r--net/dsa/tag_ksz.c17
-rw-r--r--net/dsa/tag_trailer.c2
-rw-r--r--net/hsr/hsr_device.c3
-rw-r--r--net/ipv4/af_inet.c7
-rw-r--r--net/ipv4/cipso_ipv4.c12
-rw-r--r--net/ipv4/esp4.c20
-rw-r--r--net/ipv4/esp4_offload.c2
-rw-r--r--net/ipv4/fib_semantics.c14
-rw-r--r--net/ipv4/fou.c1
-rw-r--r--net/ipv4/igmp.c16
-rw-r--r--net/ipv4/ip_output.c8
-rw-r--r--net/ipv4/netfilter/arp_tables.c10
-rw-r--r--net/ipv4/netfilter/ip_tables.c9
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c4
-rw-r--r--net/ipv4/route.c16
-rw-r--r--net/ipv4/tcp.c2
-rw-r--r--net/ipv4/tcp_cong.c19
-rw-r--r--net/ipv4/tcp_input.c37
-rw-r--r--net/ipv4/tcp_ipv4.c4
-rw-r--r--net/ipv4/tcp_output.c32
-rw-r--r--net/ipv4/tcp_timer.c3
-rw-r--r--net/ipv4/tcp_ulp.c14
-rw-r--r--net/ipv4/udp.c51
-rw-r--r--net/ipv4/udp_offload.c2
-rw-r--r--net/ipv6/addrconf.c2
-rw-r--r--net/ipv6/esp6.c16
-rw-r--r--net/ipv6/esp6_offload.c2
-rw-r--r--net/ipv6/exthdrs.c1
-rw-r--r--net/ipv6/ip6_fib.c63
-rw-r--r--net/ipv6/ip6_output.c11
-rw-r--r--net/ipv6/ipv6_sockglue.c1
-rw-r--r--net/ipv6/output_core.c6
-rw-r--r--net/ipv6/route.c50
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/ipv6/udp.c53
-rw-r--r--net/ipv6/udp_offload.c2
-rw-r--r--net/irda/af_irda.c2
-rw-r--r--net/kcm/kcmsock.c4
-rw-r--r--net/key/af_key.c48
-rw-r--r--net/l2tp/l2tp_core.c72
-rw-r--r--net/l2tp/l2tp_core.h13
-rw-r--r--net/l2tp/l2tp_netlink.c66
-rw-r--r--net/mac80211/agg-rx.c22
-rw-r--r--net/netfilter/nf_conntrack_core.c52
-rw-r--r--net/netfilter/nf_nat_core.c2
-rw-r--r--net/netfilter/nft_compat.c4
-rw-r--r--net/netfilter/nft_limit.c25
-rw-r--r--net/openvswitch/actions.c1
-rw-r--r--net/openvswitch/conntrack.c7
-rw-r--r--net/openvswitch/datapath.c7
-rw-r--r--net/openvswitch/datapath.h2
-rw-r--r--net/packet/af_packet.c27
-rw-r--r--net/rds/ib_recv.c5
-rw-r--r--net/rxrpc/call_accept.c1
-rw-r--r--net/sched/act_ipt.c24
-rw-r--r--net/sched/cls_api.c18
-rw-r--r--net/sched/sch_api.c9
-rw-r--r--net/sched/sch_atm.c4
-rw-r--r--net/sched/sch_cbq.c14
-rw-r--r--net/sched/sch_fq_codel.c4
-rw-r--r--net/sched/sch_generic.c2
-rw-r--r--net/sched/sch_hfsc.c12
-rw-r--r--net/sched/sch_hhf.c3
-rw-r--r--net/sched/sch_htb.c9
-rw-r--r--net/sched/sch_multiq.c7
-rw-r--r--net/sched/sch_netem.c4
-rw-r--r--net/sched/sch_sfq.c11
-rw-r--r--net/sched/sch_tbf.c5
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/sctp_diag.c7
-rw-r--r--net/sctp/socket.c3
-rw-r--r--net/socket.c5
-rw-r--r--net/sunrpc/svcsock.c22
-rw-r--r--net/tipc/bearer.c28
-rw-r--r--net/tipc/bearer.h2
-rw-r--r--net/tipc/msg.c8
-rw-r--r--net/tipc/netlink_compat.c6
-rw-r--r--net/tipc/node.c8
-rw-r--r--net/tipc/socket.c6
-rw-r--r--net/tipc/subscr.c21
-rw-r--r--net/unix/af_unix.c5
-rw-r--r--net/xfrm/xfrm_policy.c7
-rw-r--r--net/xfrm/xfrm_state.c8
-rw-r--r--net/xfrm/xfrm_user.c6
103 files changed, 840 insertions, 545 deletions
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index e1133bc634b5..8a3ce79b1307 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -1549,9 +1549,41 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry,
return found;
}
+/**
+ * batadv_tt_global_sync_flags - update TT sync flags
+ * @tt_global: the TT global entry to update sync flags in
+ *
+ * Updates the sync flag bits in the tt_global flag attribute with a logical
+ * OR of all sync flags from any of its TT orig entries.
+ */
+static void
+batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global)
+{
+ struct batadv_tt_orig_list_entry *orig_entry;
+ const struct hlist_head *head;
+ u16 flags = BATADV_NO_FLAGS;
+
+ rcu_read_lock();
+ head = &tt_global->orig_list;
+ hlist_for_each_entry_rcu(orig_entry, head, list)
+ flags |= orig_entry->flags;
+ rcu_read_unlock();
+
+ flags |= tt_global->common.flags & (~BATADV_TT_SYNC_MASK);
+ tt_global->common.flags = flags;
+}
+
+/**
+ * batadv_tt_global_orig_entry_add - add or update a TT orig entry
+ * @tt_global: the TT global entry to add an orig entry in
+ * @orig_node: the originator to add an orig entry for
+ * @ttvn: translation table version number of this changeset
+ * @flags: TT sync flags
+ */
static void
batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
- struct batadv_orig_node *orig_node, int ttvn)
+ struct batadv_orig_node *orig_node, int ttvn,
+ u8 flags)
{
struct batadv_tt_orig_list_entry *orig_entry;
@@ -1561,7 +1593,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
* was added during a "temporary client detection"
*/
orig_entry->ttvn = ttvn;
- goto out;
+ orig_entry->flags = flags;
+ goto sync_flags;
}
orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC);
@@ -1573,6 +1606,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
batadv_tt_global_size_inc(orig_node, tt_global->common.vid);
orig_entry->orig_node = orig_node;
orig_entry->ttvn = ttvn;
+ orig_entry->flags = flags;
kref_init(&orig_entry->refcount);
spin_lock_bh(&tt_global->list_lock);
@@ -1582,6 +1616,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global,
spin_unlock_bh(&tt_global->list_lock);
atomic_inc(&tt_global->orig_list_count);
+sync_flags:
+ batadv_tt_global_sync_flags(tt_global);
out:
if (orig_entry)
batadv_tt_orig_list_entry_put(orig_entry);
@@ -1703,10 +1739,10 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
}
/* the change can carry possible "attribute" flags like the
- * TT_CLIENT_WIFI, therefore they have to be copied in the
+ * TT_CLIENT_TEMP, therefore they have to be copied in the
* client entry
*/
- common->flags |= flags;
+ common->flags |= flags & (~BATADV_TT_SYNC_MASK);
/* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only
* one originator left in the list and we previously received a
@@ -1723,7 +1759,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv,
}
add_orig_entry:
/* add the new orig_entry (if needed) or update it */
- batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn);
+ batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn,
+ flags & BATADV_TT_SYNC_MASK);
batadv_dbg(BATADV_DBG_TT, bat_priv,
"Creating new global tt entry: %pM (vid: %d, via %pM)\n",
@@ -1946,6 +1983,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
struct batadv_tt_orig_list_entry *orig,
bool best)
{
+ u16 flags = (common->flags & (~BATADV_TT_SYNC_MASK)) | orig->flags;
void *hdr;
struct batadv_orig_node_vlan *vlan;
u8 last_ttvn;
@@ -1975,7 +2013,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq,
nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) ||
nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) ||
nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) ||
- nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags))
+ nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, flags))
goto nla_put_failure;
if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST))
@@ -2589,6 +2627,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
unsigned short vid)
{
struct batadv_hashtable *hash = bat_priv->tt.global_hash;
+ struct batadv_tt_orig_list_entry *tt_orig;
struct batadv_tt_common_entry *tt_common;
struct batadv_tt_global_entry *tt_global;
struct hlist_head *head;
@@ -2627,8 +2666,9 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
/* find out if this global entry is announced by this
* originator
*/
- if (!batadv_tt_global_entry_has_orig(tt_global,
- orig_node))
+ tt_orig = batadv_tt_global_orig_entry_find(tt_global,
+ orig_node);
+ if (!tt_orig)
continue;
/* use network order to read the VID: this ensures that
@@ -2640,10 +2680,12 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv,
/* compute the CRC on flags that have to be kept in sync
* among nodes
*/
- flags = tt_common->flags & BATADV_TT_SYNC_MASK;
+ flags = tt_orig->flags;
crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags));
crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN);
+
+ batadv_tt_orig_list_entry_put(tt_orig);
}
rcu_read_unlock();
}
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index ea43a6449247..a62795868794 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1260,6 +1260,7 @@ struct batadv_tt_global_entry {
* struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client
* @orig_node: pointer to orig node announcing this non-mesh client
* @ttvn: translation table version number which added the non-mesh client
+ * @flags: per orig entry TT sync flags
* @list: list node for batadv_tt_global_entry::orig_list
* @refcount: number of contexts the object is used
* @rcu: struct used for freeing in an RCU-safe manner
@@ -1267,6 +1268,7 @@ struct batadv_tt_global_entry {
struct batadv_tt_orig_list_entry {
struct batadv_orig_node *orig_node;
u8 ttvn;
+ u8 flags;
struct hlist_node list;
struct kref refcount;
struct rcu_head rcu;
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 861ae2a165f4..5a7be3bddfa9 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -53,6 +53,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
brstats->tx_bytes += skb->len;
u64_stats_update_end(&brstats->syncp);
+#ifdef CONFIG_NET_SWITCHDEV
+ skb->offload_fwd_mark = 0;
+#endif
BR_INPUT_SKB_CB(skb)->brdev = dev;
skb_reset_mac_header(skb);
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 181a44d0f1da..f6b1c7de059d 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -115,7 +115,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac,
void
br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
{
- if (!fdb->added_by_user)
+ if (!fdb->added_by_user || !fdb->dst)
return;
switch (type) {
diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c
index 746b145bfd11..417df675c71b 100644
--- a/net/ceph/crush/mapper.c
+++ b/net/ceph/crush/mapper.c
@@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket,
const struct crush_choose_arg *arg,
int position)
{
- if (!arg || !arg->weight_set || arg->weight_set_size == 0)
+ if (!arg || !arg->weight_set)
return bucket->item_weights;
if (position >= arg->weight_set_size)
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index b7cc615d42ef..a67298c7e0cd 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con)
if (m->needs_out_seq) {
m->hdr.seq = cpu_to_le64(++con->out_seq);
m->needs_out_seq = false;
- }
- if (con->ops->reencode_message)
- con->ops->reencode_message(m);
+ if (con->ops->reencode_message)
+ con->ops->reencode_message(m);
+ }
dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n",
m, con->out_seq, le16_to_cpu(m->hdr.type),
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 901bb8221366..dcfbdd74dfd1 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
bool legacy_change;
bool split = false;
bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE);
+ bool recovery_deletes = ceph_osdmap_flag(osdc,
+ CEPH_OSDMAP_RECOVERY_DELETES);
enum calc_target_result ct_res;
int ret;
@@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
pi->pg_num,
t->sort_bitwise,
sort_bitwise,
+ t->recovery_deletes,
+ recovery_deletes,
&last_pgid))
force_resend = true;
@@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc,
t->pg_num = pi->pg_num;
t->pg_num_mask = pi->pg_num_mask;
t->sort_bitwise = sort_bitwise;
+ t->recovery_deletes = recovery_deletes;
t->osd = acting.primary;
}
@@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
}
ceph_encode_32(&p, req->r_attempts); /* retry_attempt */
- BUG_ON(p != end - 8); /* space for features */
+ BUG_ON(p > end - 8); /* space for features */
msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */
/* front_len is finalized in encode_request_finish() */
+ msg->front.iov_len = p - msg->front.iov_base;
+ msg->hdr.front_len = cpu_to_le32(msg->front.iov_len);
msg->hdr.data_len = cpu_to_le32(data_len);
/*
* The header "data_off" is a hint to the receiver allowing it
@@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req,
static void encode_request_finish(struct ceph_msg *msg)
{
void *p = msg->front.iov_base;
+ void *const partial_end = p + msg->front.iov_len;
void *const end = p + msg->front_alloc_len;
if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) {
/* luminous OSD -- encode features and be done */
- p = end - 8;
+ p = partial_end;
ceph_encode_64(&p, msg->con->peer_features);
} else {
struct {
@@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg)
oid_len = p - oid;
tail = p;
- tail_len = (end - p) - 8;
+ tail_len = partial_end - p;
p = msg->front.iov_base;
ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc));
diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c
index 64ae9f89773a..f358d0bfa76b 100644
--- a/net/ceph/osdmap.c
+++ b/net/ceph/osdmap.c
@@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c)
ret = decode_choose_arg(p, end, arg);
if (ret)
goto fail;
+
+ if (arg->ids_size &&
+ arg->ids_size != c->buckets[bucket_index]->size)
+ goto e_inval;
}
insert_choose_arg_map(&c->choose_args, arg_map);
@@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
u32 new_pg_num,
bool old_sort_bitwise,
bool new_sort_bitwise,
+ bool old_recovery_deletes,
+ bool new_recovery_deletes,
const struct ceph_pg *pgid)
{
return !osds_equal(old_acting, new_acting) ||
@@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting,
old_size != new_size ||
old_min_size != new_min_size ||
ceph_pg_is_split(pgid, old_pg_num, new_pg_num) ||
- old_sort_bitwise != new_sort_bitwise;
+ old_sort_bitwise != new_sort_bitwise ||
+ old_recovery_deletes != new_recovery_deletes;
}
static int calc_pg_rank(int osd, const struct ceph_osds *acting)
@@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi,
}
}
+/*
+ * Magic value used for a "default" fallback choose_args, used if the
+ * crush_choose_arg_map passed to do_crush() does not exist. If this
+ * also doesn't exist, fall back to canonical weights.
+ */
+#define CEPH_DEFAULT_CHOOSE_ARGS -1
+
static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
int *result, int result_max,
const __u32 *weight, int weight_max,
- u64 choose_args_index)
+ s64 choose_args_index)
{
struct crush_choose_arg_map *arg_map;
int r;
@@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x,
arg_map = lookup_choose_arg_map(&map->crush->choose_args,
choose_args_index);
+ if (!arg_map)
+ arg_map = lookup_choose_arg_map(&map->crush->choose_args,
+ CEPH_DEFAULT_CHOOSE_ARGS);
mutex_lock(&map->crush_workspace_mutex);
r = crush_do_rule(map->crush, ruleno, x, result, result_max,
@@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap,
for (i = 0; i < pg->pg_upmap.len; i++)
raw->osds[i] = pg->pg_upmap.osds[i];
raw->size = pg->pg_upmap.len;
- return;
+ /* check and apply pg_upmap_items, if any */
}
pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid);
if (pg) {
- /*
- * Note: this approach does not allow a bidirectional swap,
- * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1].
- */
- for (i = 0; i < pg->pg_upmap_items.len; i++) {
- int from = pg->pg_upmap_items.from_to[i][0];
- int to = pg->pg_upmap_items.from_to[i][1];
- int pos = -1;
- bool exists = false;
-
- /* make sure replacement doesn't already appear */
- for (j = 0; j < raw->size; j++) {
- int osd = raw->osds[j];
-
- if (osd == to) {
- exists = true;
+ for (i = 0; i < raw->size; i++) {
+ for (j = 0; j < pg->pg_upmap_items.len; j++) {
+ int from = pg->pg_upmap_items.from_to[j][0];
+ int to = pg->pg_upmap_items.from_to[j][1];
+
+ if (from == raw->osds[i]) {
+ if (!(to != CRUSH_ITEM_NONE &&
+ to < osdmap->max_osd &&
+ osdmap->osd_weight[to] == 0))
+ raw->osds[i] = to;
break;
}
- /* ignore mapping if target is marked out */
- if (osd == from && pos < 0 &&
- !(to != CRUSH_ITEM_NONE &&
- to < osdmap->max_osd &&
- osdmap->osd_weight[to] == 0)) {
- pos = j;
- }
- }
- if (!exists && pos >= 0) {
- raw->osds[pos] = to;
- return;
}
}
}
diff --git a/net/core/datagram.c b/net/core/datagram.c
index ee5647bd91b3..8c2f4489ff8f 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -169,14 +169,20 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk,
int *peeked, int *off, int *err,
struct sk_buff **last)
{
+ bool peek_at_off = false;
struct sk_buff *skb;
- int _off = *off;
+ int _off = 0;
+
+ if (unlikely(flags & MSG_PEEK && *off >= 0)) {
+ peek_at_off = true;
+ _off = *off;
+ }
*last = queue->prev;
skb_queue_walk(queue, skb) {
if (flags & MSG_PEEK) {
- if (_off >= skb->len && (skb->len || _off ||
- skb->peeked)) {
+ if (peek_at_off && _off >= skb->len &&
+ (_off || skb->peeked)) {
_off -= skb->len;
continue;
}
@@ -356,7 +362,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue,
if (flags & MSG_PEEK) {
err = -ENOENT;
spin_lock_bh(&sk_queue->lock);
- if (skb == skb_peek(sk_queue)) {
+ if (skb->next) {
__skb_unlink(skb, sk_queue);
refcount_dec(&skb->users);
if (destructor)
diff --git a/net/core/dev.c b/net/core/dev.c
index 8515f8fe0460..86b4b0a79e7a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2739,7 +2739,7 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path)
{
if (tx_path)
return skb->ip_summed != CHECKSUM_PARTIAL &&
- skb->ip_summed != CHECKSUM_NONE;
+ skb->ip_summed != CHECKSUM_UNNECESSARY;
return skb->ip_summed == CHECKSUM_NONE;
}
@@ -5289,6 +5289,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock)
* Ideally, a new ndo_busy_poll_stop() could avoid another round.
*/
rc = napi->poll(napi, BUSY_POLL_BUDGET);
+ trace_napi_poll(napi, rc, BUSY_POLL_BUDGET);
netpoll_poll_unlock(have_poll_lock);
if (rc == BUSY_POLL_BUDGET)
__napi_schedule(napi);
@@ -5667,12 +5668,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu);
* Find out if a device is linked to an upper device and return true in case
* it is. The caller must hold the RTNL lock.
*/
-static bool netdev_has_any_upper_dev(struct net_device *dev)
+bool netdev_has_any_upper_dev(struct net_device *dev)
{
ASSERT_RTNL();
return !list_empty(&dev->adj_list.upper);
}
+EXPORT_SYMBOL(netdev_has_any_upper_dev);
/**
* netdev_master_upper_dev_get - Get master upper device
diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c
index 06b147d7d9e2..709a4e6fb447 100644
--- a/net/core/dev_ioctl.c
+++ b/net/core/dev_ioctl.c
@@ -263,6 +263,8 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd)
return dev_set_mtu(dev, ifr->ifr_mtu);
case SIOCSIFHWADDR:
+ if (dev->addr_len > sizeof(struct sockaddr))
+ return -EINVAL;
return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
case SIOCSIFHWBROADCAST:
diff --git a/net/core/filter.c b/net/core/filter.c
index f44fc22fd45a..169974998c76 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2836,15 +2836,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
sk->sk_prot->setsockopt == tcp_setsockopt) {
if (optname == TCP_CONGESTION) {
char name[TCP_CA_NAME_MAX];
+ bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
- ret = tcp_set_congestion_control(sk, name, false);
- if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
- /* replacing an existing ca */
- tcp_reinit_congestion_control(sk,
- inet_csk(sk)->icsk_ca_ops);
+ ret = tcp_set_congestion_control(sk, name, false, reinit);
} else {
struct tcp_sock *tp = tcp_sk(sk);
@@ -2872,7 +2869,6 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
ret = -EINVAL;
}
}
- ret = -EINVAL;
#endif
} else {
ret = -EINVAL;
@@ -3505,6 +3501,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
bpf_target_off(struct sk_buff, tc_index, 2,
target_size));
#else
+ *target_size = 2;
if (type == BPF_WRITE)
*insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg);
else
@@ -3520,6 +3517,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type,
*insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1);
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#else
+ *target_size = 4;
*insn++ = BPF_MOV64_IMM(si->dst_reg, 0);
#endif
break;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 8357f164c660..912731bed7b7 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -666,7 +666,7 @@ int netpoll_setup(struct netpoll *np)
int err;
rtnl_lock();
- if (np->dev_name) {
+ if (np->dev_name[0]) {
struct net *net = current->nsproxy->net_ns;
ndev = __dev_get_by_name(net, np->dev_name);
}
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index f990eb8b30a9..e07556606284 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -1363,18 +1363,20 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb,
EXPORT_SYMBOL(skb_copy_expand);
/**
- * skb_pad - zero pad the tail of an skb
+ * __skb_pad - zero pad the tail of an skb
* @skb: buffer to pad
* @pad: space to pad
+ * @free_on_error: free buffer on error
*
* Ensure that a buffer is followed by a padding area that is zero
* filled. Used by network drivers which may DMA or transfer data
* beyond the buffer end onto the wire.
*
- * May return error in out of memory cases. The skb is freed on error.
+ * May return error in out of memory cases. The skb is freed on error
+ * if @free_on_error is true.
*/
-int skb_pad(struct sk_buff *skb, int pad)
+int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error)
{
int err;
int ntail;
@@ -1403,10 +1405,11 @@ int skb_pad(struct sk_buff *skb, int pad)
return 0;
free_skb:
- kfree_skb(skb);
+ if (free_on_error)
+ kfree_skb(skb);
return err;
}
-EXPORT_SYMBOL(skb_pad);
+EXPORT_SYMBOL(__skb_pad);
/**
* pskb_put - add data to the tail of a potentially fragmented buffer
diff --git a/net/dccp/feat.c b/net/dccp/feat.c
index 1704948e6a12..f227f002c73d 100644
--- a/net/dccp/feat.c
+++ b/net/dccp/feat.c
@@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk)
* singleton values (which always leads to failure).
* These settings can still (later) be overridden via sockopts.
*/
- if (ccid_get_builtin_ccids(&tx.val, &tx.len) ||
- ccid_get_builtin_ccids(&rx.val, &rx.len))
+ if (ccid_get_builtin_ccids(&tx.val, &tx.len))
return -ENOBUFS;
+ if (ccid_get_builtin_ccids(&rx.val, &rx.len)) {
+ kfree(tx.val);
+ return -ENOBUFS;
+ }
if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) ||
!dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len))
diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c
index f85d901f4e3f..1b202f16531f 100644
--- a/net/dccp/ipv4.c
+++ b/net/dccp/ipv4.c
@@ -631,6 +631,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ reqsk_put(req);
return 0;
drop_and_free:
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index c376af5bfdfb..1b58eac8aad3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
goto drop_and_free;
inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT);
+ reqsk_put(req);
return 0;
drop_and_free:
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 9fe25bf63296..b68168fcc06a 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -24,6 +24,7 @@
#include <net/checksum.h>
#include <net/inet_sock.h>
+#include <net/inet_common.h>
#include <net/sock.h>
#include <net/xfrm.h>
@@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type)
EXPORT_SYMBOL_GPL(dccp_packet_name);
+static void dccp_sk_destruct(struct sock *sk)
+{
+ struct dccp_sock *dp = dccp_sk(sk);
+
+ ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
+ dp->dccps_hc_tx_ccid = NULL;
+ inet_sock_destruct(sk);
+}
+
int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
{
struct dccp_sock *dp = dccp_sk(sk);
@@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized)
icsk->icsk_syn_retries = sysctl_dccp_request_retries;
sk->sk_state = DCCP_CLOSED;
sk->sk_write_space = dccp_write_space;
+ sk->sk_destruct = dccp_sk_destruct;
icsk->icsk_sync_mss = dccp_sync_mss;
dp->dccps_mss_cache = 536;
dp->dccps_rate_last = jiffies;
@@ -201,10 +212,7 @@ void dccp_destroy_sock(struct sock *sk)
{
struct dccp_sock *dp = dccp_sk(sk);
- /*
- * DCCP doesn't use sk_write_queue, just sk_send_head
- * for retransmissions
- */
+ __skb_queue_purge(&sk->sk_write_queue);
if (sk->sk_send_head != NULL) {
kfree_skb(sk->sk_send_head);
sk->sk_send_head = NULL;
@@ -222,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk)
dp->dccps_hc_rx_ackvec = NULL;
}
ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk);
- ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk);
- dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL;
+ dp->dccps_hc_rx_ccid = NULL;
/* clean up feature negotiation state */
dccp_feat_list_purge(&dp->dccps_featneg);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index 56e46090526b..20bc9c56fca0 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -509,21 +509,22 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index,
dst->cpu_dp->netdev = ethernet_dev;
}
+ /* Initialize cpu_port_mask now for drv->setup()
+ * to have access to a correct value, just like what
+ * net/dsa/dsa.c::dsa_switch_setup_one does.
+ */
+ ds->cpu_port_mask |= BIT(index);
+
tag_protocol = ds->ops->get_tag_protocol(ds);
dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol);
if (IS_ERR(dst->tag_ops)) {
dev_warn(ds->dev, "No tagger for this switch\n");
+ ds->cpu_port_mask &= ~BIT(index);
return PTR_ERR(dst->tag_ops);
}
dst->rcv = dst->tag_ops->rcv;
- /* Initialize cpu_port_mask now for drv->setup()
- * to have access to a correct value, just like what
- * net/dsa/dsa.c::dsa_switch_setup_one does.
- */
- ds->cpu_port_mask |= BIT(index);
-
return 0;
}
@@ -576,7 +577,7 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst)
return err;
}
- if (!dst->cpu_dp->netdev) {
+ if (!dst->cpu_dp) {
pr_warn("Tree has no master device\n");
return -EINVAL;
}
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index fab41de8e983..fcd90f79458e 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -42,6 +42,10 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len;
if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) {
+ /* Let dsa_slave_xmit() free skb */
+ if (__skb_put_padto(skb, skb->len + padlen, false))
+ return NULL;
+
nskb = skb;
} else {
nskb = alloc_skb(NET_IP_ALIGN + skb->len +
@@ -56,12 +60,15 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_transport_header(nskb,
skb_transport_header(skb) - skb->head);
skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
- kfree_skb(skb);
- }
- /* skb is freed when it fails */
- if (skb_put_padto(nskb, nskb->len + padlen))
- return NULL;
+ /* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free
+ * skb
+ */
+ if (skb_put_padto(nskb, nskb->len + padlen))
+ return NULL;
+
+ consume_skb(skb);
+ }
tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN);
tag[0] = 0;
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index b09e56214005..9c7b1d74a5c6 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -40,7 +40,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev)
skb_set_network_header(nskb, skb_network_header(skb) - skb->head);
skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head);
skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len));
- kfree_skb(skb);
+ consume_skb(skb);
if (padlen) {
skb_put_zero(nskb, padlen);
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 4e7bdb213cd0..172d8309f89e 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -314,7 +314,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr);
- skb_put_padto(skb, ETH_ZLEN + HSR_HLEN);
+ if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN))
+ return;
hsr_forward_skb(skb, master);
return;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 76c2077c3f5b..2e548eca3489 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1731,6 +1731,13 @@ static __net_init int inet_init_net(struct net *net)
net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
#endif
+ /* Some igmp sysctl, whose values are always used */
+ net->ipv4.sysctl_igmp_max_memberships = 20;
+ net->ipv4.sysctl_igmp_max_msf = 10;
+ /* IGMP reports for link-local multicast groups are enabled by default */
+ net->ipv4.sysctl_igmp_llm_reports = 1;
+ net->ipv4.sysctl_igmp_qrv = 2;
+
return 0;
}
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index c4c6e1969ed0..2ae8f54cb321 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -1523,9 +1523,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb)
int taglen;
for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) {
- if (optptr[0] == IPOPT_CIPSO)
+ switch (optptr[0]) {
+ case IPOPT_CIPSO:
return optptr;
- taglen = optptr[1];
+ case IPOPT_END:
+ return NULL;
+ case IPOPT_NOOP:
+ taglen = 1;
+ break;
+ default:
+ taglen = optptr[1];
+ }
optlen -= taglen;
optptr += taglen;
}
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 0cbee0a666ff..df68963dc90a 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -258,7 +258,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
esp_output_udp_encap(x, skb, esp);
if (!skb_cloned(skb)) {
- if (tailen <= skb_availroom(skb)) {
+ if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
trailer = skb;
tail = skb_tail_pointer(trailer);
@@ -292,8 +292,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
kunmap_atomic(vaddr);
- spin_unlock_bh(&x->lock);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -301,6 +299,9 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
skb_shinfo(skb)->nr_frags = ++nfrags;
pfrag->offset = pfrag->offset + allocsize;
+
+ spin_unlock_bh(&x->lock);
+
nfrags++;
skb->len += tailen;
@@ -381,7 +382,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
if (!esp->inplace) {
int allocsize;
@@ -392,7 +393,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
spin_lock_bh(&x->lock);
if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
spin_unlock_bh(&x->lock);
- goto error;
+ goto error_free;
}
skb_shinfo(skb)->nr_frags = 1;
@@ -409,7 +410,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -442,8 +443,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *
if (sg != dsg)
esp_ssg_unref(x, tmp);
- kfree(tmp);
+error_free:
+ kfree(tmp);
error:
return err;
}
@@ -695,8 +697,10 @@ skip_cow:
sg_init_table(sg, nfrags);
err = skb_to_sgvec(skb, sg, 0, skb->len);
- if (unlikely(err < 0))
+ if (unlikely(err < 0)) {
+ kfree(tmp);
goto out;
+ }
skb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c
index e0666016a764..50112324fa5c 100644
--- a/net/ipv4/esp4_offload.c
+++ b/net/ipv4/esp4_offload.c
@@ -257,7 +257,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_
esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
err = esp_output_tail(x, skb, &esp);
- if (err < 0)
+ if (err)
return err;
secpath_reset(skb);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 222100103808..ec3a9ce281a6 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1083,15 +1083,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg,
fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL);
if (!fi)
goto failure;
- fib_info_cnt++;
if (cfg->fc_mx) {
fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL);
- if (!fi->fib_metrics)
- goto failure;
+ if (unlikely(!fi->fib_metrics)) {
+ kfree(fi);
+ return ERR_PTR(err);
+ }
atomic_set(&fi->fib_metrics->refcnt, 1);
- } else
+ } else {
fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics;
-
+ }
+ fib_info_cnt++;
fi->fib_net = net;
fi->fib_protocol = cfg->fc_protocol;
fi->fib_scope = cfg->fc_scope;
@@ -1452,7 +1454,7 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh,
return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type,
&info.info);
case FIB_EVENT_NH_DEL:
- if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
+ if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) &&
fib_nh->nh_flags & RTNH_F_LINKDOWN) ||
(fib_nh->nh_flags & RTNH_F_DEAD))
return call_fib_notifiers(dev_net(fib_nh->nh_dev),
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 8e0257d01200..1540db65241a 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -450,6 +450,7 @@ out_unlock:
out:
NAPI_GRO_CB(skb)->flush |= flush;
skb_gro_remcsum_cleanup(skb, &grc);
+ skb->remcsum_offload = 0;
return pp;
}
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 28f14afd0dd3..caf2f1101d02 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1007,10 +1007,18 @@ int igmp_rcv(struct sk_buff *skb)
{
/* This basically follows the spec line by line -- see RFC1112 */
struct igmphdr *ih;
- struct in_device *in_dev = __in_dev_get_rcu(skb->dev);
+ struct net_device *dev = skb->dev;
+ struct in_device *in_dev;
int len = skb->len;
bool dropped = true;
+ if (netif_is_l3_master(dev)) {
+ dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif);
+ if (!dev)
+ goto drop;
+ }
+
+ in_dev = __in_dev_get_rcu(dev);
if (!in_dev)
goto drop;
@@ -2974,12 +2982,6 @@ static int __net_init igmp_net_init(struct net *net)
goto out_sock;
}
- /* Sysctl initialization */
- net->ipv4.sysctl_igmp_max_memberships = 20;
- net->ipv4.sysctl_igmp_max_msf = 10;
- /* IGMP reports for link-local multicast groups are enabled by default */
- net->ipv4.sysctl_igmp_llm_reports = 1;
- net->ipv4.sysctl_igmp_qrv = 2;
return 0;
out_sock:
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 50c74cd890bc..e153c40c2436 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -965,11 +965,12 @@ static int __ip_append_data(struct sock *sk,
csummode = CHECKSUM_PARTIAL;
cork->length += length;
- if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) ||
- (skb && skb_is_gso(skb))) &&
+ if ((skb && skb_is_gso(skb)) ||
+ (((length + (skb ? skb->len : fragheaderlen)) > mtu) &&
+ (skb_queue_len(queue) <= 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
- (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) {
+ (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) {
err = ip_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, transhdrlen,
maxfraglen, flags);
@@ -1288,6 +1289,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page,
return -EINVAL;
if ((size + skb->len > mtu) &&
+ (skb_queue_len(&sk->sk_write_queue) == 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO)) {
if (skb->ip_summed != CHECKSUM_PARTIAL)
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index 0bc3c3d73e61..9e9d9afd18f7 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -268,14 +268,14 @@ unsigned int arpt_do_table(struct sk_buff *skb,
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
- /* Target might have changed stuff. */
- arp = arp_hdr(skb);
-
- if (verdict == XT_CONTINUE)
+ if (verdict == XT_CONTINUE) {
+ /* Target might have changed stuff. */
+ arp = arp_hdr(skb);
e = arpt_next_entry(e);
- else
+ } else {
/* Verdict */
break;
+ }
} while (!acpar.hotdrop);
xt_write_recseq_end(addend);
local_bh_enable();
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2a55a40211cb..622ed2887cd5 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -352,13 +352,14 @@ ipt_do_table(struct sk_buff *skb,
acpar.targinfo = t->data;
verdict = t->u.kernel.target->target(skb, &acpar);
- /* Target might have changed stuff. */
- ip = ip_hdr(skb);
- if (verdict == XT_CONTINUE)
+ if (verdict == XT_CONTINUE) {
+ /* Target might have changed stuff. */
+ ip = ip_hdr(skb);
e = ipt_next_entry(e);
- else
+ } else {
/* Verdict */
break;
+ }
} while (!acpar.hotdrop);
xt_write_recseq_end(addend);
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
index 7d72decb80f9..efaa04dcc80e 100644
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c
@@ -117,7 +117,8 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c)
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
- proc_remove(c->pde);
+ if (cn->procdir)
+ proc_remove(c->pde);
#endif
return;
}
@@ -815,6 +816,7 @@ static void clusterip_net_exit(struct net *net)
#ifdef CONFIG_PROC_FS
struct clusterip_net *cn = net_generic(net, clusterip_net_id);
proc_remove(cn->procdir);
+ cn->procdir = NULL;
#endif
nf_unregister_net_hook(net, &cip_arp_ops);
}
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 0383e66f59bc..2331de20ca50 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1267,7 +1267,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst)
if (mtu)
return mtu;
- mtu = dst->dev->mtu;
+ mtu = READ_ONCE(dst->dev->mtu);
if (unlikely(dst_metric_locked(dst, RTAX_MTU))) {
if (rt->rt_uses_gateway && mtu > 576)
@@ -2750,26 +2750,34 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh,
err = 0;
if (IS_ERR(rt))
err = PTR_ERR(rt);
+ else
+ skb_dst_set(skb, &rt->dst);
}
if (err)
goto errout_free;
- skb_dst_set(skb, &rt->dst);
if (rtm->rtm_flags & RTM_F_NOTIFY)
rt->rt_flags |= RTCF_NOTIFY;
if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE)
table_id = rt->rt_table_id;
- if (rtm->rtm_flags & RTM_F_FIB_MATCH)
+ if (rtm->rtm_flags & RTM_F_FIB_MATCH) {
+ if (!res.fi) {
+ err = fib_props[res.type].error;
+ if (!err)
+ err = -EHOSTUNREACH;
+ goto errout_free;
+ }
err = fib_dump_info(skb, NETLINK_CB(in_skb).portid,
nlh->nlmsg_seq, RTM_NEWROUTE, table_id,
rt->rt_type, res.prefix, res.prefixlen,
fl4.flowi4_tos, res.fi, 0);
- else
+ } else {
err = rt_fill_info(net, dst, src, table_id, &fl4, skb,
NETLINK_CB(in_skb).portid, nlh->nlmsg_seq);
+ }
if (err < 0)
goto errout_free;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 71ce33decd97..a3e91b552edc 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
name[val] = 0;
lock_sock(sk);
- err = tcp_set_congestion_control(sk, name, true);
+ err = tcp_set_congestion_control(sk, name, true, true);
release_sock(sk);
return err;
}
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index fde983f6376b..421ea1b918da 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
INET_ECN_dontxmit(sk);
}
-void tcp_reinit_congestion_control(struct sock *sk,
- const struct tcp_congestion_ops *ca)
+static void tcp_reinit_congestion_control(struct sock *sk,
+ const struct tcp_congestion_ops *ca)
{
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -338,7 +338,7 @@ out:
* tcp_reinit_congestion_control (if the current congestion control was
* already initialized.
*/
-int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
+int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
@@ -360,9 +360,18 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
if (!ca) {
err = -ENOENT;
} else if (!load) {
- icsk->icsk_ca_ops = ca;
- if (!try_module_get(ca->owner))
+ const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops;
+
+ if (try_module_get(ca->owner)) {
+ if (reinit) {
+ tcp_reinit_congestion_control(sk, ca);
+ } else {
+ icsk->icsk_ca_ops = ca;
+ module_put(old_ca->owner);
+ }
+ } else {
err = -EBUSY;
+ }
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
err = -EPERM;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 2920e0cb09f8..bab7f0493098 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -107,6 +107,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
#define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */
#define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */
#define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */
+#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */
#define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */
#define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */
#define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */
@@ -2520,8 +2521,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk)
return;
/* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */
- if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR ||
- (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) {
+ if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH &&
+ (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) {
tp->snd_cwnd = tp->snd_ssthresh;
tp->snd_cwnd_stamp = tcp_jiffies32;
}
@@ -3004,21 +3005,24 @@ void tcp_rearm_rto(struct sock *sk)
/* Offset the time elapsed after installing regular RTO */
if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
- struct sk_buff *skb = tcp_write_queue_head(sk);
- u64 rto_time_stamp = skb->skb_mstamp +
- jiffies_to_usecs(rto);
- s64 delta_us = rto_time_stamp - tp->tcp_mstamp;
+ s64 delta_us = tcp_rto_delta_us(sk);
/* delta_us may not be positive if the socket is locked
* when the retrans timer fires and is rescheduled.
*/
- if (delta_us > 0)
- rto = usecs_to_jiffies(delta_us);
+ rto = usecs_to_jiffies(max_t(int, delta_us, 1));
}
inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
TCP_RTO_MAX);
}
}
+/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */
+static void tcp_set_xmit_timer(struct sock *sk)
+{
+ if (!tcp_schedule_loss_probe(sk))
+ tcp_rearm_rto(sk);
+}
+
/* If we get here, the whole TSO packet has not been acked. */
static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb)
{
@@ -3180,7 +3184,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
ca_rtt_us, sack->rate);
if (flag & FLAG_ACKED) {
- tcp_rearm_rto(sk);
+ flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */
if (unlikely(icsk->icsk_mtup.probe_size &&
!after(tp->mtu_probe.probe_seq_end, tp->snd_una))) {
tcp_mtup_probe_success(sk);
@@ -3208,7 +3212,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets,
* after when the head was last (re)transmitted. Otherwise the
* timeout may continue to extend in loss recovery.
*/
- tcp_rearm_rto(sk);
+ flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */
}
if (icsk->icsk_ca_ops->pkts_acked) {
@@ -3580,9 +3584,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
if (after(ack, tp->snd_nxt))
goto invalid_ack;
- if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)
- tcp_rearm_rto(sk);
-
if (after(ack, prior_snd_una)) {
flag |= FLAG_SND_UNA_ADVANCED;
icsk->icsk_retransmits = 0;
@@ -3647,18 +3648,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked,
&sack_state);
+ if (tp->tlp_high_seq)
+ tcp_process_tlp_ack(sk, ack, flag);
+ /* If needed, reset TLP/RTO timer; RACK may later override this. */
+ if (flag & FLAG_SET_XMIT_TIMER)
+ tcp_set_xmit_timer(sk);
+
if (tcp_ack_is_dubious(sk, flag)) {
is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP));
tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit);
}
- if (tp->tlp_high_seq)
- tcp_process_tlp_ack(sk, ack, flag);
if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP))
sk_dst_confirm(sk);
- if (icsk->icsk_pending == ICSK_TIME_RETRANS)
- tcp_schedule_loss_probe(sk);
delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */
lost = tp->lost - lost; /* freshly marked lost */
tcp_rate_gen(sk, delivered, lost, sack_state.rate);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a20e7f03d5f7..e9252c7df809 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1722,6 +1722,8 @@ process:
*/
sock_hold(sk);
refcounted = true;
+ if (tcp_filter(sk, skb))
+ goto discard_and_relse;
nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
@@ -1729,8 +1731,6 @@ process:
}
if (nsk == sk) {
reqsk_put(req);
- } else if (tcp_filter(sk, skb)) {
- goto discard_and_relse;
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v4_send_reset(nsk, skb);
goto discard_and_relse;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4e985dea1dd2..b7661a68d498 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2202,9 +2202,10 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb,
static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new)
{
const u32 now = tcp_jiffies32;
+ enum tcp_chrono old = tp->chrono_type;
- if (tp->chrono_type > TCP_CHRONO_UNSPEC)
- tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start;
+ if (old > TCP_CHRONO_UNSPEC)
+ tp->chrono_stat[old - 1] += now - tp->chrono_start;
tp->chrono_start = now;
tp->chrono_type = new;
}
@@ -2376,24 +2377,15 @@ bool tcp_schedule_loss_probe(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct tcp_sock *tp = tcp_sk(sk);
- u32 timeout, tlp_time_stamp, rto_time_stamp;
u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3);
+ u32 timeout, rto_delta_us;
- /* No consecutive loss probes. */
- if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) {
- tcp_rearm_rto(sk);
- return false;
- }
/* Don't do any loss probe on a Fast Open connection before 3WHS
* finishes.
*/
if (tp->fastopen_rsk)
return false;
- /* TLP is only scheduled when next timer event is RTO. */
- if (icsk->icsk_pending != ICSK_TIME_RETRANS)
- return false;
-
/* Schedule a loss probe in 2*RTT for SACK capable connections
* in Open state, that are either limited by cwnd or application.
*/
@@ -2416,14 +2408,10 @@ bool tcp_schedule_loss_probe(struct sock *sk)
(rtt + (rtt >> 1) + TCP_DELACK_MAX));
timeout = max_t(u32, timeout, msecs_to_jiffies(10));
- /* If RTO is shorter, just schedule TLP in its place. */
- tlp_time_stamp = tcp_jiffies32 + timeout;
- rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout;
- if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) {
- s32 delta = rto_time_stamp - tcp_jiffies32;
- if (delta > 0)
- timeout = delta;
- }
+ /* If the RTO formula yields an earlier time, then use that time. */
+ rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */
+ if (rto_delta_us > 0)
+ timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
TCP_RTO_MAX);
@@ -3448,6 +3436,10 @@ int tcp_connect(struct sock *sk)
int err;
tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB);
+
+ if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk))
+ return -EHOSTUNREACH; /* Routing failure or similar. */
+
tcp_connect_init(sk);
if (unlikely(tp->repair)) {
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c0feeeef962a..e906014890b6 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -652,7 +652,8 @@ static void tcp_keepalive_timer (unsigned long data)
goto death;
}
- if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE)
+ if (!sock_flag(sk, SOCK_KEEPOPEN) ||
+ ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT)))
goto out;
elapsed = keepalive_time_when(tp);
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 2417f55374c5..6bb9e14c710a 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -122,14 +122,14 @@ int tcp_set_ulp(struct sock *sk, const char *name)
ulp_ops = __tcp_ulp_find_autoload(name);
if (!ulp_ops)
- err = -ENOENT;
- else
- err = ulp_ops->init(sk);
+ return -ENOENT;
- if (err)
- goto out;
+ err = ulp_ops->init(sk);
+ if (err) {
+ module_put(ulp_ops->owner);
+ return err;
+ }
icsk->icsk_ulp_ops = ulp_ops;
- out:
- return err;
+ return 0;
}
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index b057653ceca9..38e795e0c4bf 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -802,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4)
if (is_udplite) /* UDP-Lite */
csum = udplite_csum(skb);
- else if (sk->sk_no_check_tx) { /* UDP csum disabled */
+ else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */
skb->ip_summed = CHECKSUM_NONE;
goto send;
@@ -1163,34 +1163,32 @@ out:
return ret;
}
-#if BITS_PER_LONG == 64
+#define UDP_SKB_IS_STATELESS 0x80000000
+
static void udp_set_dev_scratch(struct sk_buff *skb)
{
- struct udp_dev_scratch *scratch;
+ struct udp_dev_scratch *scratch = udp_skb_scratch(skb);
BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long));
- scratch = (struct udp_dev_scratch *)&skb->dev_scratch;
- scratch->truesize = skb->truesize;
+ scratch->_tsize_state = skb->truesize;
+#if BITS_PER_LONG == 64
scratch->len = skb->len;
scratch->csum_unnecessary = !!skb_csum_unnecessary(skb);
scratch->is_linear = !skb_is_nonlinear(skb);
+#endif
+ if (likely(!skb->_skb_refdst && !skb_sec_path(skb)))
+ scratch->_tsize_state |= UDP_SKB_IS_STATELESS;
}
static int udp_skb_truesize(struct sk_buff *skb)
{
- return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize;
-}
-#else
-static void udp_set_dev_scratch(struct sk_buff *skb)
-{
- skb->dev_scratch = skb->truesize;
+ return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS;
}
-static int udp_skb_truesize(struct sk_buff *skb)
+static bool udp_skb_has_head_state(struct sk_buff *skb)
{
- return skb->dev_scratch;
+ return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS);
}
-#endif
/* fully reclaim rmem/fwd memory allocated for skb */
static void udp_rmem_release(struct sock *sk, int size, int partial,
@@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len)
unlock_sock_fast(sk, slow);
}
- /* we cleared the head states previously only if the skb lacks any IP
- * options, see __udp_queue_rcv_skb().
+ /* In the more common cases we cleared the head states previously,
+ * see __udp_queue_rcv_skb().
*/
- if (unlikely(IPCB(skb)->opt.optlen > 0))
+ if (unlikely(udp_skb_has_head_state(skb)))
skb_release_head_state(skb);
consume_stateless_skb(skb);
}
@@ -1576,7 +1574,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock,
return ip_recv_error(sk, msg, len, addr_len);
try_again:
- peeking = off = sk_peek_offset(sk, flags);
+ peeking = flags & MSG_PEEK;
+ off = sk_peek_offset(sk, flags);
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
if (!skb)
return err;
@@ -1784,11 +1783,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
sk_mark_napi_id_once(sk, skb);
}
- /* At recvmsg() time we need skb->dst to process IP options-related
- * cmsg, elsewhere can we clear all pending head states while they are
- * hot in the cache
+ /* At recvmsg() time we may access skb->dst or skb->sp depending on
+ * the IP options and the cmsg flags, elsewhere can we clear all
+ * pending head states while they are hot in the cache
*/
- if (likely(IPCB(skb)->opt.optlen == 0))
+ if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb)))
skb_release_head_state(skb);
rc = __udp_enqueue_schedule_skb(sk, skb);
@@ -1811,8 +1810,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
static struct static_key udp_encap_needed __read_mostly;
void udp_encap_enable(void)
{
- if (!static_key_enabled(&udp_encap_needed))
- static_key_slow_inc(&udp_encap_needed);
+ static_key_enable(&udp_encap_needed);
}
EXPORT_SYMBOL(udp_encap_enable);
@@ -1930,15 +1928,18 @@ drop:
/* For TCP sockets, sk_rx_dst is protected by socket lock
* For UDP, we use xchg() to guard against concurrent changes.
*/
-static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
{
struct dst_entry *old;
if (dst_hold_safe(dst)) {
old = xchg(&sk->sk_rx_dst, dst);
dst_release(old);
+ return old != dst;
}
+ return false;
}
+EXPORT_SYMBOL(udp_sk_rx_dst_set);
/*
* Multicasts and broadcasts go to each listener.
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 781250151d40..0932c85b42af 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -235,7 +235,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
/* If there is no outer header we can fake a checksum offload
* due to the fact that we have already done the checksum in
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 3c46e9513a31..936e9ab4dda5 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -5556,7 +5556,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
* our DAD process, so we don't need
* to do it again
*/
- if (!(ifp->rt->rt6i_node))
+ if (!rcu_access_pointer(ifp->rt->rt6i_node))
ip6_ins_rt(ifp->rt);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 9ed35473dcb5..ab64f367d11c 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -226,7 +226,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
int tailen = esp->tailen;
if (!skb_cloned(skb)) {
- if (tailen <= skb_availroom(skb)) {
+ if (tailen <= skb_tailroom(skb)) {
nfrags = 1;
trailer = skb;
tail = skb_tail_pointer(trailer);
@@ -260,8 +260,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
kunmap_atomic(vaddr);
- spin_unlock_bh(&x->lock);
-
nfrags = skb_shinfo(skb)->nr_frags;
__skb_fill_page_desc(skb, nfrags, page, pfrag->offset,
@@ -269,6 +267,9 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
skb_shinfo(skb)->nr_frags = ++nfrags;
pfrag->offset = pfrag->offset + allocsize;
+
+ spin_unlock_bh(&x->lock);
+
nfrags++;
skb->len += tailen;
@@ -345,7 +346,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
if (!esp->inplace) {
int allocsize;
@@ -356,7 +357,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
spin_lock_bh(&x->lock);
if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) {
spin_unlock_bh(&x->lock);
- goto error;
+ goto error_free;
}
skb_shinfo(skb)->nr_frags = 1;
@@ -373,7 +374,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
(unsigned char *)esph - skb->data,
assoclen + ivlen + esp->clen + alen);
if (unlikely(err < 0))
- goto error;
+ goto error_free;
}
if ((x->props.flags & XFRM_STATE_ESN))
@@ -406,8 +407,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
if (sg != dsg)
esp_ssg_unref(x, tmp);
- kfree(tmp);
+error_free:
+ kfree(tmp);
error:
return err;
}
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index f02f131f6435..1cf437f75b0b 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -286,7 +286,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features
esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32));
err = esp6_output_tail(x, skb, &esp);
- if (err < 0)
+ if (err)
return err;
secpath_reset(skb);
diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c
index 4996d734f1d2..3cec529c6113 100644
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -756,6 +756,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff)
if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr)))
goto drop;
+ IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM;
return true;
drop:
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index ebb299cf72b7..e1c85bb4eac0 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -148,11 +148,23 @@ static struct fib6_node *node_alloc(void)
return fn;
}
-static void node_free(struct fib6_node *fn)
+static void node_free_immediate(struct fib6_node *fn)
{
kmem_cache_free(fib6_node_kmem, fn);
}
+static void node_free_rcu(struct rcu_head *head)
+{
+ struct fib6_node *fn = container_of(head, struct fib6_node, rcu);
+
+ kmem_cache_free(fib6_node_kmem, fn);
+}
+
+static void node_free(struct fib6_node *fn)
+{
+ call_rcu(&fn->rcu, node_free_rcu);
+}
+
static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt)
{
int cpu;
@@ -601,9 +613,9 @@ insert_above:
if (!in || !ln) {
if (in)
- node_free(in);
+ node_free_immediate(in);
if (ln)
- node_free(ln);
+ node_free_immediate(ln);
return ERR_PTR(-ENOMEM);
}
@@ -877,7 +889,7 @@ add:
rt->dst.rt6_next = iter;
*ins = rt;
- rt->rt6i_node = fn;
+ rcu_assign_pointer(rt->rt6i_node, fn);
atomic_inc(&rt->rt6i_ref);
if (!info->skip_notify)
inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags);
@@ -903,7 +915,7 @@ add:
return err;
*ins = rt;
- rt->rt6i_node = fn;
+ rcu_assign_pointer(rt->rt6i_node, fn);
rt->dst.rt6_next = iter->dst.rt6_next;
atomic_inc(&rt->rt6i_ref);
if (!info->skip_notify)
@@ -914,6 +926,8 @@ add:
}
nsiblings = iter->rt6i_nsiblings;
fib6_purge_rt(iter, fn, info->nl_net);
+ if (fn->rr_ptr == iter)
+ fn->rr_ptr = NULL;
rt6_release(iter);
if (nsiblings) {
@@ -926,6 +940,8 @@ add:
if (rt6_qualify_for_ecmp(iter)) {
*ins = iter->dst.rt6_next;
fib6_purge_rt(iter, fn, info->nl_net);
+ if (fn->rr_ptr == iter)
+ fn->rr_ptr = NULL;
rt6_release(iter);
nsiblings--;
} else {
@@ -1014,7 +1030,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
/* Create subtree root node */
sfn = node_alloc();
if (!sfn)
- goto st_failure;
+ goto failure;
sfn->leaf = info->nl_net->ipv6.ip6_null_entry;
atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref);
@@ -1031,12 +1047,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (IS_ERR(sn)) {
/* If it is failed, discard just allocated
- root, and then (in st_failure) stale node
+ root, and then (in failure) stale node
in main tree.
*/
- node_free(sfn);
+ node_free_immediate(sfn);
err = PTR_ERR(sn);
- goto st_failure;
+ goto failure;
}
/* Now link new subtree to main tree */
@@ -1051,7 +1067,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt,
if (IS_ERR(sn)) {
err = PTR_ERR(sn);
- goto st_failure;
+ goto failure;
}
}
@@ -1092,18 +1108,17 @@ out:
atomic_inc(&pn->leaf->rt6i_ref);
}
#endif
- /* Always release dst as dst->__refcnt is guaranteed
- * to be taken before entering this function
- */
- dst_release_immediate(&rt->dst);
+ goto failure;
}
return err;
-#ifdef CONFIG_IPV6_SUBTREES
- /* Subtree creation failed, probably main tree node
- is orphan. If it is, shoot it.
+failure:
+ /* fn->leaf could be NULL if fn is an intermediate node and we
+ * failed to add the new route to it in both subtree creation
+ * failure and fib6_add_rt2node() failure case.
+ * In both cases, fib6_repair_tree() should be called to fix
+ * fn->leaf.
*/
-st_failure:
if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT)))
fib6_repair_tree(info->nl_net, fn);
/* Always release dst as dst->__refcnt is guaranteed
@@ -1111,7 +1126,6 @@ st_failure:
*/
dst_release_immediate(&rt->dst);
return err;
-#endif
}
/*
@@ -1466,8 +1480,9 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
int fib6_del(struct rt6_info *rt, struct nl_info *info)
{
+ struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node,
+ lockdep_is_held(&rt->rt6i_table->tb6_lock));
struct net *net = info->nl_net;
- struct fib6_node *fn = rt->rt6i_node;
struct rt6_info **rtp;
#if RT6_DEBUG >= 2
@@ -1656,7 +1671,9 @@ static int fib6_clean_node(struct fib6_walker *w)
if (res) {
#if RT6_DEBUG >= 2
pr_debug("%s: del failed: rt=%p@%p err=%d\n",
- __func__, rt, rt->rt6i_node, res);
+ __func__, rt,
+ rcu_access_pointer(rt->rt6i_node),
+ res);
#endif
continue;
}
@@ -1778,8 +1795,10 @@ static int fib6_age(struct rt6_info *rt, void *arg)
}
gc_args->more++;
} else if (rt->rt6i_flags & RTF_CACHE) {
+ if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout))
+ rt->dst.obsolete = DST_OBSOLETE_KILL;
if (atomic_read(&rt->dst.__refcnt) == 1 &&
- time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) {
+ rt->dst.obsolete == DST_OBSOLETE_KILL) {
RT6_TRACE("aging clone %p\n", rt);
return -1;
} else if (rt->rt6i_flags & RTF_GATEWAY) {
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1422d6c08377..2dfe50d8d609 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -673,8 +673,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
*prevhdr = NEXTHDR_FRAGMENT;
tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
if (!tmp_hdr) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
err = -ENOMEM;
goto fail;
}
@@ -789,8 +787,6 @@ slow_path:
frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
hroom + troom, GFP_ATOMIC);
if (!frag) {
- IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
- IPSTATS_MIB_FRAGFAILS);
err = -ENOMEM;
goto fail;
}
@@ -1385,11 +1381,12 @@ emsgsize:
*/
cork->length += length;
- if ((((length + (skb ? skb->len : headersize)) > mtu) ||
- (skb && skb_is_gso(skb))) &&
+ if ((skb && skb_is_gso(skb)) ||
+ (((length + (skb ? skb->len : headersize)) > mtu) &&
+ (skb_queue_len(queue) <= 1) &&
(sk->sk_protocol == IPPROTO_UDP) &&
(rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) &&
- (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) {
+ (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) {
err = ip6_ufo_append_data(sk, queue, getfrag, from, length,
hh_len, fragheaderlen, exthdrlen,
transhdrlen, mtu, flags, fl6);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 02d795fe3d7f..a5e466d4e093 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -242,7 +242,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);
- sk->sk_destruct = inet_sock_destruct;
/*
* ... and add it to the refcnt debug socks count
* in the new family. -acme
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index abb2c307fbe8..a338bbc33cf3 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -86,7 +86,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
while (offset <= packet_len) {
struct ipv6_opt_hdr *exthdr;
- unsigned int len;
switch (**nexthdr) {
@@ -112,10 +111,9 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
offset);
- len = ipv6_optlen(exthdr);
- if (len + offset >= IPV6_MAXPLEN)
+ offset += ipv6_optlen(exthdr);
+ if (offset > IPV6_MAXPLEN)
return -EINVAL;
- offset += len;
*nexthdr = &exthdr->nexthdr;
}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 4d30c96a819d..2d0e7798c793 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -417,14 +417,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev,
struct net_device *loopback_dev =
dev_net(dev)->loopback_dev;
- if (dev != loopback_dev) {
- if (idev && idev->dev == dev) {
- struct inet6_dev *loopback_idev =
- in6_dev_get(loopback_dev);
- if (loopback_idev) {
- rt->rt6i_idev = loopback_idev;
- in6_dev_put(idev);
- }
+ if (idev && idev->dev != loopback_dev) {
+ struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev);
+ if (loopback_idev) {
+ rt->rt6i_idev = loopback_idev;
+ in6_dev_put(idev);
}
}
}
@@ -443,7 +440,8 @@ static bool rt6_check_expired(const struct rt6_info *rt)
if (time_after(jiffies, rt->dst.expires))
return true;
} else if (rt->dst.from) {
- return rt6_check_expired((struct rt6_info *) rt->dst.from);
+ return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK ||
+ rt6_check_expired((struct rt6_info *)rt->dst.from);
}
return false;
}
@@ -1292,7 +1290,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt)
static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie)
{
- if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie))
+ u32 rt_cookie = 0;
+
+ if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie)
return NULL;
if (rt6_check_expired(rt))
@@ -1360,8 +1360,14 @@ static void ip6_link_failure(struct sk_buff *skb)
if (rt->rt6i_flags & RTF_CACHE) {
if (dst_hold_safe(&rt->dst))
ip6_del_rt(rt);
- } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) {
- rt->rt6i_node->fn_sernum = -1;
+ } else {
+ struct fib6_node *fn;
+
+ rcu_read_lock();
+ fn = rcu_dereference(rt->rt6i_node);
+ if (fn && (rt->rt6i_flags & RTF_DEFAULT))
+ fn->fn_sernum = -1;
+ rcu_read_unlock();
}
}
}
@@ -1378,7 +1384,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu)
static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt)
{
return !(rt->rt6i_flags & RTF_CACHE) &&
- (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node);
+ (rt->rt6i_flags & RTF_PCPU ||
+ rcu_access_pointer(rt->rt6i_node));
}
static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk,
@@ -2351,6 +2358,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu
if (on_link)
nrt->rt6i_flags &= ~RTF_GATEWAY;
+ nrt->rt6i_protocol = RTPROT_REDIRECT;
nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key;
if (ip6_ins_rt(nrt))
@@ -2461,6 +2469,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net,
.fc_dst_len = prefixlen,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO |
RTF_UP | RTF_PREF(pref),
+ .fc_protocol = RTPROT_RA,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = net,
@@ -2513,6 +2522,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr,
.fc_ifindex = dev->ifindex,
.fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT |
RTF_UP | RTF_EXPIRES | RTF_PREF(pref),
+ .fc_protocol = RTPROT_RA,
.fc_nlinfo.portid = 0,
.fc_nlinfo.nlh = NULL,
.fc_nlinfo.nl_net = dev_net(dev),
@@ -3424,14 +3434,6 @@ static int rt6_fill_node(struct net *net,
rtm->rtm_flags = 0;
rtm->rtm_scope = RT_SCOPE_UNIVERSE;
rtm->rtm_protocol = rt->rt6i_protocol;
- if (rt->rt6i_flags & RTF_DYNAMIC)
- rtm->rtm_protocol = RTPROT_REDIRECT;
- else if (rt->rt6i_flags & RTF_ADDRCONF) {
- if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO))
- rtm->rtm_protocol = RTPROT_RA;
- else
- rtm->rtm_protocol = RTPROT_KERNEL;
- }
if (rt->rt6i_flags & RTF_CACHE)
rtm->rtm_flags |= RTM_F_CLONED;
@@ -3729,10 +3731,10 @@ static int ip6_route_dev_notify(struct notifier_block *this,
/* NETDEV_UNREGISTER could be fired for multiple times by
* netdev_wait_allrefs(). Make sure we only call this once.
*/
- in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev);
+ in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev);
#ifdef CONFIG_IPV6_MULTIPLE_TABLES
- in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev);
- in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev);
+ in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev);
+ in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev);
#endif
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 2521690d62d6..206210125fd7 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1456,6 +1456,8 @@ process:
}
sock_hold(sk);
refcounted = true;
+ if (tcp_filter(sk, skb))
+ goto discard_and_relse;
nsk = tcp_check_req(sk, skb, req, false);
if (!nsk) {
reqsk_put(req);
@@ -1464,8 +1466,6 @@ process:
if (nsk == sk) {
reqsk_put(req);
tcp_v6_restore_cb(skb);
- } else if (tcp_filter(sk, skb)) {
- goto discard_and_relse;
} else if (tcp_child_process(sk, nsk, skb)) {
tcp_v6_send_reset(nsk, skb);
goto discard_and_relse;
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 4a3e65626e8b..56030d45823a 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb,
struct udp_table *udptable)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
- struct sock *sk;
- sk = skb_steal_sock(skb);
- if (unlikely(sk))
- return sk;
return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport,
&iph->daddr, dport, inet6_iif(skb),
udptable, skb);
@@ -332,6 +328,15 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be
EXPORT_SYMBOL_GPL(udp6_lib_lookup);
#endif
+/* do not use the scratch area len for jumbogram: their length execeeds the
+ * scratch area space; note that the IP6CB flags is still in the first
+ * cacheline, so checking for jumbograms is cheap
+ */
+static int udp6_skb_len(struct sk_buff *skb)
+{
+ return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb);
+}
+
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
@@ -357,12 +362,13 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
return ipv6_recv_rxpmtu(sk, msg, len, addr_len);
try_again:
- peeking = off = sk_peek_offset(sk, flags);
+ peeking = flags & MSG_PEEK;
+ off = sk_peek_offset(sk, flags);
skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err);
if (!skb)
return err;
- ulen = udp_skb_len(skb);
+ ulen = udp6_skb_len(skb);
copied = len;
if (copied > ulen - off)
copied = ulen - off;
@@ -569,8 +575,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb,
static struct static_key udpv6_encap_needed __read_mostly;
void udpv6_encap_enable(void)
{
- if (!static_key_enabled(&udpv6_encap_needed))
- static_key_slow_inc(&udpv6_encap_needed);
+ static_key_enable(&udpv6_encap_needed);
}
EXPORT_SYMBOL(udpv6_encap_enable);
@@ -762,6 +767,15 @@ start_lookup:
return 0;
}
+static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst)
+{
+ if (udp_sk_rx_dst_set(sk, dst)) {
+ const struct rt6_info *rt = (const struct rt6_info *)dst;
+
+ inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
+ }
+}
+
int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
int proto)
{
@@ -804,6 +818,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
if (udp6_csum_init(skb, uh, proto))
goto csum_error;
+ /* Check if the socket is already available, e.g. due to early demux */
+ sk = skb_steal_sock(skb);
+ if (sk) {
+ struct dst_entry *dst = skb_dst(skb);
+ int ret;
+
+ if (unlikely(sk->sk_rx_dst != dst))
+ udp6_sk_rx_dst_set(sk, dst);
+
+ ret = udpv6_queue_rcv_skb(sk, skb);
+ sock_put(sk);
+
+ /* a return value > 0 means to resubmit the input */
+ if (ret > 0)
+ return ret;
+ return 0;
+ }
+
/*
* Multicast receive code
*/
@@ -812,11 +844,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
saddr, daddr, udptable, proto);
/* Unicast */
-
- /*
- * check socket cache ... must talk to Alan about his plans
- * for sock caches... i'll skip this for now.
- */
sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable);
if (sk) {
int ret;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index a2267f80febb..e7d378c032cb 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -72,7 +72,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
- skb->ip_summed = CHECKSUM_NONE;
+ skb->ip_summed = CHECKSUM_UNNECESSARY;
/* If there is no outer header we can fake a checksum offload
* due to the fact that we have already done the checksum in
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 2e6990f8b80b..23fa7c8b09a5 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -2213,7 +2213,7 @@ static int irda_getsockopt(struct socket *sock, int level, int optname,
{
struct sock *sk = sock->sk;
struct irda_sock *self = irda_sk(sk);
- struct irda_device_list list;
+ struct irda_device_list list = { 0 };
struct irda_device_info *discoveries;
struct irda_ias_set * ias_opt; /* IAS get/query params */
struct ias_object * ias_obj; /* Object in IAS */
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index da49191f7ad0..4abf6287d7e1 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -1383,6 +1383,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock,
if (!csk)
return -EINVAL;
+ /* We must prevent loops or risk deadlock ! */
+ if (csk->sk_family == PF_KCM)
+ return -EOPNOTSUPP;
+
psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL);
if (!psock)
return -ENOMEM;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index ca9d3ae665e7..98f4d8211b9a 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2,
#define BROADCAST_ONE 1
#define BROADCAST_REGISTERED 2
#define BROADCAST_PROMISC_ONLY 4
-static int pfkey_broadcast(struct sk_buff *skb,
+static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation,
int broadcast_flags, struct sock *one_sk,
struct net *net)
{
@@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buff *skb,
rcu_read_unlock();
if (one_sk != NULL)
- err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk);
+ err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk);
kfree_skb(skb2);
kfree_skb(skb);
@@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk)
hdr = (struct sadb_msg *) pfk->dump.skb->data;
hdr->sadb_msg_seq = 0;
hdr->sadb_msg_errno = rc;
- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = NULL;
}
@@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) /
sizeof(uint64_t));
- pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1389,7 +1389,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_
xfrm_state_put(x);
- pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net);
+ pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net);
return 0;
}
@@ -1476,7 +1476,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c)
hdr->sadb_msg_seq = c->seq;
hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x));
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x));
return 0;
}
@@ -1589,7 +1589,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg
out_hdr->sadb_msg_reserved = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk));
return 0;
}
@@ -1694,8 +1694,8 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad
return -ENOBUFS;
}
- pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk));
-
+ pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk,
+ sock_net(sk));
return 0;
}
@@ -1712,7 +1712,8 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr)
hdr->sadb_msg_errno = (uint8_t) 0;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
- return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk));
+ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk,
+ sock_net(sk));
}
static int key_notify_sa_flush(const struct km_event *c)
@@ -1733,7 +1734,7 @@ static int key_notify_sa_flush(const struct km_event *c)
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -1790,7 +1791,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -1878,7 +1879,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb
new_hdr->sadb_msg_errno = 0;
}
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk));
+ pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk));
return 0;
}
@@ -2206,7 +2207,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = c->seq;
out_hdr->sadb_msg_pid = c->portid;
- pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp));
return 0;
}
@@ -2426,7 +2427,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc
out_hdr->sadb_msg_errno = 0;
out_hdr->sadb_msg_seq = hdr->sadb_msg_seq;
out_hdr->sadb_msg_pid = hdr->sadb_msg_pid;
- pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp));
err = 0;
out:
@@ -2682,7 +2683,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr)
out_hdr->sadb_msg_pid = pfk->dump.msg_portid;
if (pfk->dump.skb)
- pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE,
+ pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE,
&pfk->sk, sock_net(&pfk->sk));
pfk->dump.skb = out_skb;
@@ -2739,7 +2740,7 @@ static int key_notify_policy_flush(const struct km_event *c)
hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC;
hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t));
hdr->sadb_msg_reserved = 0;
- pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net);
+ pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net);
return 0;
}
@@ -2803,7 +2804,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb
void *ext_hdrs[SADB_EXT_MAX];
int err;
- pfkey_broadcast(skb_clone(skb, GFP_KERNEL),
+ pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL,
BROADCAST_PROMISC_ONLY, NULL, sock_net(sk));
memset(ext_hdrs, 0, sizeof(ext_hdrs));
@@ -3024,7 +3025,8 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c)
out_hdr->sadb_msg_seq = 0;
out_hdr->sadb_msg_pid = 0;
- pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+ pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+ xs_net(x));
return 0;
}
@@ -3212,7 +3214,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct
xfrm_ctx->ctx_len);
}
- return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+ xs_net(x));
}
static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt,
@@ -3408,7 +3411,8 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
n_port->sadb_x_nat_t_port_port = sport;
n_port->sadb_x_nat_t_port_reserved = 0;
- return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x));
+ return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL,
+ xs_net(x));
}
#ifdef CONFIG_NET_KEY_MIGRATE
@@ -3599,7 +3603,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
}
/* broadcast migrate message to sockets */
- pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net);
+ pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net);
return 0;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index b0c2d4ae781d..90165a6874bc 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -113,7 +113,6 @@ struct l2tp_net {
spinlock_t l2tp_session_hlist_lock;
};
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel);
static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk)
{
@@ -127,39 +126,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net)
return net_generic(net, l2tp_net_id);
}
-/* Tunnel reference counts. Incremented per session that is added to
- * the tunnel.
- */
-static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel)
-{
- refcount_inc(&tunnel->ref_count);
-}
-
-static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel)
-{
- if (refcount_dec_and_test(&tunnel->ref_count))
- l2tp_tunnel_free(tunnel);
-}
-#ifdef L2TP_REFCNT_DEBUG
-#define l2tp_tunnel_inc_refcount(_t) \
-do { \
- pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_t)->name, \
- refcount_read(&_t->ref_count)); \
- l2tp_tunnel_inc_refcount_1(_t); \
-} while (0)
-#define l2tp_tunnel_dec_refcount(_t) \
-do { \
- pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \
- __func__, __LINE__, (_t)->name, \
- refcount_read(&_t->ref_count)); \
- l2tp_tunnel_dec_refcount_1(_t); \
-} while (0)
-#else
-#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t)
-#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t)
-#endif
-
/* Session hash global list for L2TPv3.
* The session_id SHOULD be random according to RFC3931, but several
* L2TP implementations use incrementing session_ids. So we do a real
@@ -229,6 +195,27 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id)
return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)];
}
+/* Lookup a tunnel. A new reference is held on the returned tunnel. */
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
+{
+ const struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_tunnel *tunnel;
+
+ rcu_read_lock_bh();
+ list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
+ if (tunnel->tunnel_id == tunnel_id) {
+ l2tp_tunnel_inc_refcount(tunnel);
+ rcu_read_unlock_bh();
+
+ return tunnel;
+ }
+ }
+ rcu_read_unlock_bh();
+
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
+
/* Lookup a session. A new reference is held on the returned session.
* Optionally calls session->ref() too if do_ref is true.
*/
@@ -1348,17 +1335,6 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
}
}
-/* Really kill the tunnel.
- * Come here only when all sessions have been cleared from the tunnel.
- */
-static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel)
-{
- BUG_ON(refcount_read(&tunnel->ref_count) != 0);
- BUG_ON(tunnel->sock != NULL);
- l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name);
- kfree_rcu(tunnel, rcu);
-}
-
/* Workqueue tunnel deletion function */
static void l2tp_tunnel_del_work(struct work_struct *work)
{
@@ -1844,6 +1820,8 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
l2tp_session_set_header_len(session, tunnel->version);
+ refcount_set(&session->ref_count, 1);
+
err = l2tp_session_add_to_tunnel(tunnel, session);
if (err) {
kfree(session);
@@ -1851,10 +1829,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn
return ERR_PTR(err);
}
- /* Bump the reference count. The session context is deleted
- * only when this drops to zero.
- */
- refcount_set(&session->ref_count, 1);
l2tp_tunnel_inc_refcount(tunnel);
/* Ensure tunnel socket isn't deleted */
diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h
index cdb6e3327f74..9101297f27ad 100644
--- a/net/l2tp/l2tp_core.h
+++ b/net/l2tp/l2tp_core.h
@@ -231,6 +231,8 @@ out:
return tunnel;
}
+struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id);
+
struct l2tp_session *l2tp_session_get(const struct net *net,
struct l2tp_tunnel *tunnel,
u32 session_id, bool do_ref);
@@ -269,6 +271,17 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type,
void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type);
int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg);
+static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel)
+{
+ refcount_inc(&tunnel->ref_count);
+}
+
+static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel)
+{
+ if (refcount_dec_and_test(&tunnel->ref_count))
+ kfree_rcu(tunnel, rcu);
+}
+
/* Session reference counts. Incremented when code obtains a reference
* to a session.
*/
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 12cfcd0ca807..57427d430f10 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -65,10 +65,12 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info,
(info->attrs[L2TP_ATTR_CONN_ID])) {
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel)
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (tunnel) {
session = l2tp_session_get(net, tunnel, session_id,
do_ref);
+ l2tp_tunnel_dec_refcount(tunnel);
+ }
}
return session;
@@ -271,8 +273,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
ret = -ENODEV;
goto out;
}
@@ -282,6 +284,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
(void) l2tp_tunnel_delete(tunnel);
+ l2tp_tunnel_dec_refcount(tunnel);
+
out:
return ret;
}
@@ -299,8 +303,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
ret = -ENODEV;
goto out;
}
@@ -311,6 +315,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
tunnel, L2TP_CMD_TUNNEL_MODIFY);
+ l2tp_tunnel_dec_refcount(tunnel);
+
out:
return ret;
}
@@ -438,34 +444,37 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
if (!info->attrs[L2TP_ATTR_CONN_ID]) {
ret = -EINVAL;
- goto out;
+ goto err;
}
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
- if (tunnel == NULL) {
- ret = -ENODEV;
- goto out;
- }
-
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg) {
ret = -ENOMEM;
- goto out;
+ goto err;
+ }
+
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
+ if (!tunnel) {
+ ret = -ENODEV;
+ goto err_nlmsg;
}
ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET);
if (ret < 0)
- goto err_out;
+ goto err_nlmsg_tunnel;
+
+ l2tp_tunnel_dec_refcount(tunnel);
return genlmsg_unicast(net, msg, info->snd_portid);
-err_out:
+err_nlmsg_tunnel:
+ l2tp_tunnel_dec_refcount(tunnel);
+err_nlmsg:
nlmsg_free(msg);
-
-out:
+err:
return ret;
}
@@ -509,8 +518,9 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
ret = -EINVAL;
goto out;
}
+
tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]);
- tunnel = l2tp_tunnel_find(net, tunnel_id);
+ tunnel = l2tp_tunnel_get(net, tunnel_id);
if (!tunnel) {
ret = -ENODEV;
goto out;
@@ -518,24 +528,24 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if (!info->attrs[L2TP_ATTR_SESSION_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]);
if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]);
if (!info->attrs[L2TP_ATTR_PW_TYPE]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]);
if (cfg.pw_type >= __L2TP_PWTYPE_MAX) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
if (tunnel->version > 2) {
@@ -557,7 +567,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]);
if (len > 8) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.cookie_len = len;
memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len);
@@ -566,7 +576,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]);
if (len > 8) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
cfg.peer_cookie_len = len;
memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len);
@@ -609,7 +619,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) ||
(l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) {
ret = -EPROTONOSUPPORT;
- goto out;
+ goto out_tunnel;
}
/* Check that pseudowire-specific params are present */
@@ -619,7 +629,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
case L2TP_PWTYPE_ETH_VLAN:
if (!info->attrs[L2TP_ATTR_VLAN_ID]) {
ret = -EINVAL;
- goto out;
+ goto out_tunnel;
}
break;
case L2TP_PWTYPE_ETH:
@@ -647,6 +657,8 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
}
}
+out_tunnel:
+ l2tp_tunnel_dec_refcount(tunnel);
out:
return ret;
}
diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 8708cbe8af5b..2b36eff5d97e 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -7,7 +7,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2007-2010, Intel Corporation
- * Copyright(c) 2015 Intel Deutschland GmbH
+ * Copyright(c) 2015-2017 Intel Deutschland GmbH
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License version 2 as
@@ -466,3 +466,23 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif,
rcu_read_unlock();
}
EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl);
+
+void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif,
+ const u8 *addr, unsigned int tid)
+{
+ struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_local *local = sdata->local;
+ struct sta_info *sta;
+
+ rcu_read_lock();
+ sta = sta_info_get_bss(sdata, addr);
+ if (!sta)
+ goto unlock;
+
+ set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired);
+ ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work);
+
+ unlock:
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL(ieee80211_rx_ba_timer_expired);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9979f46c81dc..51390febd5e3 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -96,19 +96,26 @@ static struct conntrack_gc_work conntrack_gc_work;
void nf_conntrack_lock(spinlock_t *lock) __acquires(lock)
{
+ /* 1) Acquire the lock */
spin_lock(lock);
- while (unlikely(nf_conntrack_locks_all)) {
- spin_unlock(lock);
- /*
- * Order the 'nf_conntrack_locks_all' load vs. the
- * spin_unlock_wait() loads below, to ensure
- * that 'nf_conntrack_locks_all_lock' is indeed held:
- */
- smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
- spin_unlock_wait(&nf_conntrack_locks_all_lock);
- spin_lock(lock);
- }
+ /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics
+ * It pairs with the smp_store_release() in nf_conntrack_all_unlock()
+ */
+ if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false))
+ return;
+
+ /* fast path failed, unlock */
+ spin_unlock(lock);
+
+ /* Slow path 1) get global lock */
+ spin_lock(&nf_conntrack_locks_all_lock);
+
+ /* Slow path 2) get the lock we want */
+ spin_lock(lock);
+
+ /* Slow path 3) release the global lock */
+ spin_unlock(&nf_conntrack_locks_all_lock);
}
EXPORT_SYMBOL_GPL(nf_conntrack_lock);
@@ -149,28 +156,27 @@ static void nf_conntrack_all_lock(void)
int i;
spin_lock(&nf_conntrack_locks_all_lock);
- nf_conntrack_locks_all = true;
- /*
- * Order the above store of 'nf_conntrack_locks_all' against
- * the spin_unlock_wait() loads below, such that if
- * nf_conntrack_lock() observes 'nf_conntrack_locks_all'
- * we must observe nf_conntrack_locks[] held:
- */
- smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */
+ nf_conntrack_locks_all = true;
for (i = 0; i < CONNTRACK_LOCKS; i++) {
- spin_unlock_wait(&nf_conntrack_locks[i]);
+ spin_lock(&nf_conntrack_locks[i]);
+
+ /* This spin_unlock provides the "release" to ensure that
+ * nf_conntrack_locks_all==true is visible to everyone that
+ * acquired spin_lock(&nf_conntrack_locks[]).
+ */
+ spin_unlock(&nf_conntrack_locks[i]);
}
}
static void nf_conntrack_all_unlock(void)
{
- /*
- * All prior stores must be complete before we clear
+ /* All prior stores must be complete before we clear
* 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock()
* might observe the false value but not the entire
- * critical section:
+ * critical section.
+ * It pairs with the smp_load_acquire() in nf_conntrack_lock()
*/
smp_store_release(&nf_conntrack_locks_all, false);
spin_unlock(&nf_conntrack_locks_all_lock);
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index eb541786ccb7..b1d3740ae36a 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -441,7 +441,7 @@ nf_nat_setup_info(struct nf_conn *ct,
else
ct->status |= IPS_DST_NAT;
- if (nfct_help(ct))
+ if (nfct_help(ct) && !nfct_seqadj(ct))
if (!nfct_seqadj_ext_add(ct))
return NF_DROP;
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index f5a7cb68694e..b89f4f65b2a0 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -305,7 +305,7 @@ static int nft_target_validate(const struct nft_ctx *ctx,
const struct nf_hook_ops *ops = &basechain->ops[0];
hook_mask = 1 << ops->hooknum;
- if (!(hook_mask & target->hooks))
+ if (target->hooks && !(hook_mask & target->hooks))
return -EINVAL;
ret = nft_compat_chain_validate_dependency(target->table,
@@ -484,7 +484,7 @@ static int nft_match_validate(const struct nft_ctx *ctx,
const struct nf_hook_ops *ops = &basechain->ops[0];
hook_mask = 1 << ops->hooknum;
- if (!(hook_mask & match->hooks))
+ if (match->hooks && !(hook_mask & match->hooks))
return -EINVAL;
ret = nft_compat_chain_validate_dependency(match->table,
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 18dd57a52651..14538b1d4d11 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -65,19 +65,23 @@ static int nft_limit_init(struct nft_limit *limit,
limit->nsecs = unit * NSEC_PER_SEC;
if (limit->rate == 0 || limit->nsecs < unit)
return -EOVERFLOW;
- limit->tokens = limit->tokens_max = limit->nsecs;
-
- if (tb[NFTA_LIMIT_BURST]) {
- u64 rate;
+ if (tb[NFTA_LIMIT_BURST])
limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST]));
+ else
+ limit->burst = 0;
+
+ if (limit->rate + limit->burst < limit->rate)
+ return -EOVERFLOW;
- rate = limit->rate + limit->burst;
- if (rate < limit->rate)
- return -EOVERFLOW;
+ /* The token bucket size limits the number of tokens can be
+ * accumulated. tokens_max specifies the bucket size.
+ * tokens_max = unit * (rate + burst) / rate.
+ */
+ limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst),
+ limit->rate);
+ limit->tokens_max = limit->tokens;
- limit->rate = rate;
- }
if (tb[NFTA_LIMIT_FLAGS]) {
u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS]));
@@ -95,9 +99,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit,
{
u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0;
u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC);
- u64 rate = limit->rate - limit->burst;
- if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate),
+ if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate),
NFTA_LIMIT_PAD) ||
nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs),
NFTA_LIMIT_PAD) ||
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index e4610676299b..a54a556fcdb5 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -1337,6 +1337,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb,
goto out;
}
+ OVS_CB(skb)->acts_origlen = acts->orig_len;
err = do_execute_actions(dp, skb, key,
acts->actions, acts->actions_len);
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index e3c4c6c3fef7..03859e386b47 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -1310,8 +1310,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
nla_for_each_nested(a, attr, rem) {
int type = nla_type(a);
- int maxlen = ovs_ct_attr_lens[type].maxlen;
- int minlen = ovs_ct_attr_lens[type].minlen;
+ int maxlen;
+ int minlen;
if (type > OVS_CT_ATTR_MAX) {
OVS_NLERR(log,
@@ -1319,6 +1319,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
type, OVS_CT_ATTR_MAX);
return -EINVAL;
}
+
+ maxlen = ovs_ct_attr_lens[type].maxlen;
+ minlen = ovs_ct_attr_lens[type].minlen;
if (nla_len(a) < minlen || nla_len(a) > maxlen) {
OVS_NLERR(log,
"Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)",
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 45fe8c8a884d..6b44fe405282 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -381,7 +381,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb,
}
static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
- unsigned int hdrlen)
+ unsigned int hdrlen, int actions_attrlen)
{
size_t size = NLMSG_ALIGN(sizeof(struct ovs_header))
+ nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */
@@ -398,7 +398,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info,
/* OVS_PACKET_ATTR_ACTIONS */
if (upcall_info->actions_len)
- size += nla_total_size(upcall_info->actions_len);
+ size += nla_total_size(actions_attrlen);
/* OVS_PACKET_ATTR_MRU */
if (upcall_info->mru)
@@ -465,7 +465,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
else
hlen = skb->len;
- len = upcall_msg_size(upcall_info, hlen - cutlen);
+ len = upcall_msg_size(upcall_info, hlen - cutlen,
+ OVS_CB(skb)->acts_origlen);
user_skb = genlmsg_new(len, GFP_ATOMIC);
if (!user_skb) {
err = -ENOMEM;
diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index 5d8dcd88815f..480600649d0b 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -99,11 +99,13 @@ struct datapath {
* when a packet is received by OVS.
* @mru: The maximum received fragement size; 0 if the packet is not
* fragmented.
+ * @acts_origlen: The netlink size of the flow actions applied to this skb.
* @cutlen: The number of bytes from the packet end to be removed.
*/
struct ovs_skb_cb {
struct vport *input_vport;
u16 mru;
+ u16 acts_origlen;
u32 cutlen;
};
#define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 008bb34ee324..1c61af9af67d 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2191,6 +2191,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
struct timespec ts;
__u32 ts_status;
bool is_drop_n_account = false;
+ bool do_vnet = false;
/* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT.
* We may add members to them until current aligned size without forcing
@@ -2241,8 +2242,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
netoff = TPACKET_ALIGN(po->tp_hdrlen +
(maclen < 16 ? 16 : maclen)) +
po->tp_reserve;
- if (po->has_vnet_hdr)
+ if (po->has_vnet_hdr) {
netoff += sizeof(struct virtio_net_hdr);
+ do_vnet = true;
+ }
macoff = netoff - maclen;
}
if (po->tp_version <= TPACKET_V2) {
@@ -2259,8 +2262,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
skb_set_owner_r(copy_skb, sk);
}
snaplen = po->rx_ring.frame_size - macoff;
- if ((int)snaplen < 0)
+ if ((int)snaplen < 0) {
snaplen = 0;
+ do_vnet = false;
+ }
}
} else if (unlikely(macoff + snaplen >
GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) {
@@ -2273,6 +2278,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
if (unlikely((int)snaplen < 0)) {
snaplen = 0;
macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len;
+ do_vnet = false;
}
}
spin_lock(&sk->sk_receive_queue.lock);
@@ -2298,7 +2304,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
}
spin_unlock(&sk->sk_receive_queue.lock);
- if (po->has_vnet_hdr) {
+ if (do_vnet) {
if (virtio_net_hdr_from_skb(skb, h.raw + macoff -
sizeof(struct virtio_net_hdr),
vio_le(), true)) {
@@ -3700,14 +3706,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
if (optlen != sizeof(val))
return -EINVAL;
- if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
- return -EBUSY;
if (copy_from_user(&val, optval, sizeof(val)))
return -EFAULT;
if (val > INT_MAX)
return -EINVAL;
- po->tp_reserve = val;
- return 0;
+ lock_sock(sk);
+ if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) {
+ ret = -EBUSY;
+ } else {
+ po->tp_reserve = val;
+ ret = 0;
+ }
+ release_sock(sk);
+ return ret;
}
case PACKET_LOSS:
{
@@ -4329,7 +4340,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u,
register_prot_hook(sk);
}
spin_unlock(&po->bind_lock);
- if (closing && (po->tp_version > TPACKET_V2)) {
+ if (pg_vec && (po->tp_version > TPACKET_V2)) {
/* Because we don't support block-based V3 on tx-ring */
if (!tx_ring)
prb_shutdown_retire_blk_timer(po, rb_queue);
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index e10624aa6959..9722bf839d9d 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -1015,8 +1015,10 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic,
if (rds_ib_ring_empty(&ic->i_recv_ring))
rds_ib_stats_inc(s_ib_rx_ring_empty);
- if (rds_ib_ring_low(&ic->i_recv_ring))
+ if (rds_ib_ring_low(&ic->i_recv_ring)) {
rds_ib_recv_refill(conn, 0, GFP_NOWAIT);
+ rds_ib_stats_inc(s_ib_rx_refill_from_cq);
+ }
}
int rds_ib_recv_path(struct rds_conn_path *cp)
@@ -1029,6 +1031,7 @@ int rds_ib_recv_path(struct rds_conn_path *cp)
if (rds_conn_up(conn)) {
rds_ib_attempt_ack(ic);
rds_ib_recv_refill(conn, 0, GFP_KERNEL);
+ rds_ib_stats_inc(s_ib_rx_refill_from_thread);
}
return ret;
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index dd30d74824b0..ec3383f97d4c 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -223,6 +223,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->call_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_call *call = b->call_backlog[tail];
+ call->socket = rx;
if (rx->discard_new_call) {
_debug("discard %lx", call->user_call_ID);
rx->discard_new_call(call, call->user_call_ID);
diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c
index 36f0ced9e60c..541707802a23 100644
--- a/net/sched/act_ipt.c
+++ b/net/sched/act_ipt.c
@@ -36,11 +36,12 @@ static struct tc_action_ops act_ipt_ops;
static unsigned int xt_net_id;
static struct tc_action_ops act_xt_ops;
-static int ipt_init_target(struct xt_entry_target *t, char *table,
- unsigned int hook)
+static int ipt_init_target(struct net *net, struct xt_entry_target *t,
+ char *table, unsigned int hook)
{
struct xt_tgchk_param par;
struct xt_target *target;
+ struct ipt_entry e = {};
int ret = 0;
target = xt_request_find_target(AF_INET, t->u.user.name,
@@ -49,8 +50,10 @@ static int ipt_init_target(struct xt_entry_target *t, char *table,
return PTR_ERR(target);
t->u.kernel.target = target;
+ memset(&par, 0, sizeof(par));
+ par.net = net;
par.table = table;
- par.entryinfo = NULL;
+ par.entryinfo = &e;
par.target = target;
par.targinfo = t->data;
par.hook_mask = hook;
@@ -91,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = {
[TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) },
};
-static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
+static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
const struct tc_action_ops *ops, int ovr, int bind)
{
+ struct tc_action_net *tn = net_generic(net, id);
struct nlattr *tb[TCA_IPT_MAX + 1];
struct tcf_ipt *ipt;
struct xt_entry_target *td, *t;
@@ -159,7 +163,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla,
if (unlikely(!t))
goto err2;
- err = ipt_init_target(t, tname, hook);
+ err = ipt_init_target(net, t, tname, hook);
if (err < 0)
goto err3;
@@ -193,18 +197,16 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind)
{
- struct tc_action_net *tn = net_generic(net, ipt_net_id);
-
- return __tcf_ipt_init(tn, nla, est, a, &act_ipt_ops, ovr, bind);
+ return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr,
+ bind);
}
static int tcf_xt_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a, int ovr,
int bind)
{
- struct tc_action_net *tn = net_generic(net, xt_net_id);
-
- return __tcf_ipt_init(tn, nla, est, a, &act_xt_ops, ovr, bind);
+ return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr,
+ bind);
}
static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a,
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 39da0c5801c9..6c5ea84d2682 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -205,7 +205,7 @@ static void tcf_chain_flush(struct tcf_chain *chain)
{
struct tcf_proto *tp;
- if (*chain->p_filter_chain)
+ if (chain->p_filter_chain)
RCU_INIT_POINTER(*chain->p_filter_chain, NULL);
while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) {
RCU_INIT_POINTER(chain->filter_chain, tp->next);
@@ -215,9 +215,15 @@ static void tcf_chain_flush(struct tcf_chain *chain)
static void tcf_chain_destroy(struct tcf_chain *chain)
{
- list_del(&chain->list);
- tcf_chain_flush(chain);
- kfree(chain);
+ /* May be already removed from the list by the previous call. */
+ if (!list_empty(&chain->list))
+ list_del_init(&chain->list);
+
+ /* There might still be a reference held when we got here from
+ * tcf_block_put. Wait for the user to drop reference before free.
+ */
+ if (!chain->refcnt)
+ kfree(chain);
}
struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index,
@@ -288,8 +294,10 @@ void tcf_block_put(struct tcf_block *block)
if (!block)
return;
- list_for_each_entry_safe(chain, tmp, &block->chain_list, list)
+ list_for_each_entry_safe(chain, tmp, &block->chain_list, list) {
+ tcf_chain_flush(chain);
tcf_chain_destroy(chain);
+ }
kfree(block);
}
EXPORT_SYMBOL(tcf_block_put);
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index bd24a550e0f9..4fb5a3222d0d 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -286,9 +286,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle)
void qdisc_hash_add(struct Qdisc *q, bool invisible)
{
if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) {
- struct Qdisc *root = qdisc_dev(q)->qdisc;
-
- WARN_ON_ONCE(root == &noop_qdisc);
ASSERT_RTNL();
hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle);
if (invisible)
@@ -839,7 +836,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent,
old = dev_graft_qdisc(dev_queue, new);
if (new && i > 0)
- refcount_inc(&new->refcnt);
+ qdisc_refcount_inc(new);
if (!ingress)
qdisc_destroy(old);
@@ -850,7 +847,7 @@ skip:
notify_and_destroy(net, skb, n, classid,
dev->qdisc, new);
if (new && !new->ops->attach)
- refcount_inc(&new->refcnt);
+ qdisc_refcount_inc(new);
dev->qdisc = new ? : &noop_qdisc;
if (new && new->ops->attach)
@@ -1259,7 +1256,7 @@ replay:
if (q == p ||
(p && check_loop(q, p, 0)))
return -ELOOP;
- refcount_inc(&q->refcnt);
+ qdisc_refcount_inc(q);
goto graft;
} else {
if (!q)
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
index 572fe2584e48..c403c87aff7a 100644
--- a/net/sched/sch_atm.c
+++ b/net/sched/sch_atm.c
@@ -572,8 +572,10 @@ static void atm_tc_destroy(struct Qdisc *sch)
struct atm_flow_data *flow, *tmp;
pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list)
+ list_for_each_entry(flow, &p->flows, list) {
tcf_block_put(flow->block);
+ flow->block = NULL;
+ }
list_for_each_entry_safe(flow, tmp, &p->flows, list) {
if (flow->ref > 1)
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
index 481036f6b54e..156c8a33c677 100644
--- a/net/sched/sch_cbq.c
+++ b/net/sched/sch_cbq.c
@@ -1139,6 +1139,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
struct tc_ratespec *r;
int err;
+ qdisc_watchdog_init(&q->watchdog, sch);
+ hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
+ q->delay_timer.function = cbq_undelay;
+
+ if (!opt)
+ return -EINVAL;
+
err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL);
if (err < 0)
return err;
@@ -1177,9 +1184,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt)
q->link.avpkt = q->link.allot/2;
q->link.minidle = -0x7FFFFFFF;
- qdisc_watchdog_init(&q->watchdog, sch);
- hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED);
- q->delay_timer.function = cbq_undelay;
q->toplevel = TC_CBQ_MAXLEVEL;
q->now = psched_get_time();
@@ -1431,8 +1435,10 @@ static void cbq_destroy(struct Qdisc *sch)
* be bound to classes which have been destroyed already. --TGR '04
*/
for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode)
+ hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
tcf_block_put(cl->block);
+ cl->block = NULL;
+ }
}
for (h = 0; h < q->clhash.hashsize; h++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c
index 337f2d6d81e4..2c0c05f2cc34 100644
--- a/net/sched/sch_fq_codel.c
+++ b/net/sched/sch_fq_codel.c
@@ -491,10 +491,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt)
if (!q->flows)
return -ENOMEM;
q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL);
- if (!q->backlogs) {
- kvfree(q->flows);
+ if (!q->backlogs)
return -ENOMEM;
- }
for (i = 0; i < q->flows_cnt; i++) {
struct fq_codel_flow *flow = q->flows + i;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 57ba406f1437..4ba6da5fb254 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -785,7 +785,7 @@ static void attach_default_qdiscs(struct net_device *dev)
dev->priv_flags & IFF_NO_QUEUE) {
netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
dev->qdisc = txq->qdisc_sleeping;
- refcount_inc(&dev->qdisc->refcnt);
+ qdisc_refcount_inc(dev->qdisc);
} else {
qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
if (qdisc) {
diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c
index b52f74610dc7..11ab8dace901 100644
--- a/net/sched/sch_hfsc.c
+++ b/net/sched/sch_hfsc.c
@@ -1418,6 +1418,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
struct tc_hfsc_qopt *qopt;
int err;
+ qdisc_watchdog_init(&q->watchdog, sch);
+
if (opt == NULL || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
qopt = nla_data(opt);
@@ -1428,6 +1430,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
return err;
q->eligible = RB_ROOT;
+ err = tcf_block_get(&q->root.block, &q->root.filter_list);
+ if (err)
+ return err;
+
q->root.cl_common.classid = sch->handle;
q->root.refcnt = 1;
q->root.sched = q;
@@ -1444,8 +1450,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt)
qdisc_class_hash_insert(&q->clhash, &q->root.cl_common);
qdisc_class_hash_grow(sch, &q->clhash);
- qdisc_watchdog_init(&q->watchdog, sch);
-
return 0;
}
@@ -1522,8 +1526,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch)
unsigned int i;
for (i = 0; i < q->clhash.hashsize; i++) {
- hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode)
+ hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) {
tcf_block_put(cl->block);
+ cl->block = NULL;
+ }
}
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c
index 51d3ba682af9..73a53c08091b 100644
--- a/net/sched/sch_hhf.c
+++ b/net/sched/sch_hhf.c
@@ -477,6 +477,9 @@ static void hhf_destroy(struct Qdisc *sch)
kvfree(q->hhf_valid_bits[i]);
}
+ if (!q->hh_flows)
+ return;
+
for (i = 0; i < HH_FLOWS_CNT; i++) {
struct hh_flow_state *flow, *next;
struct list_head *head = &q->hh_flows[i];
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index 203286ab4427..5bf5177b2bd3 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -1017,6 +1017,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
int err;
int i;
+ qdisc_watchdog_init(&q->watchdog, sch);
+ INIT_WORK(&q->work, htb_work_func);
+
if (!opt)
return -EINVAL;
@@ -1041,8 +1044,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < TC_HTB_NUMPRIO; i++)
INIT_LIST_HEAD(q->drops + i);
- qdisc_watchdog_init(&q->watchdog, sch);
- INIT_WORK(&q->work, htb_work_func);
qdisc_skb_head_init(&q->direct_queue);
if (tb[TCA_HTB_DIRECT_QLEN])
@@ -1258,8 +1259,10 @@ static void htb_destroy(struct Qdisc *sch)
tcf_block_put(q->block);
for (i = 0; i < q->clhash.hashsize; i++) {
- hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode)
+ hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) {
tcf_block_put(cl->block);
+ cl->block = NULL;
+ }
}
for (i = 0; i < q->clhash.hashsize; i++) {
hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i],
diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c
index f143b7bbaa0d..9c454f5d6c38 100644
--- a/net/sched/sch_multiq.c
+++ b/net/sched/sch_multiq.c
@@ -257,12 +257,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt)
for (i = 0; i < q->max_bands; i++)
q->queues[i] = &noop_qdisc;
- err = multiq_tune(sch, opt);
-
- if (err)
- kfree(q->queues);
-
- return err;
+ return multiq_tune(sch, opt);
}
static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb)
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index 1b3dd6190e93..14d1724e0dc4 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -933,11 +933,11 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt)
struct netem_sched_data *q = qdisc_priv(sch);
int ret;
+ qdisc_watchdog_init(&q->watchdog, sch);
+
if (!opt)
return -EINVAL;
- qdisc_watchdog_init(&q->watchdog, sch);
-
q->loss_model = CLG_RANDOM;
ret = netem_change(sch, opt);
if (ret)
diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c
index f80ea2cc5f1f..fc69fc5956e9 100644
--- a/net/sched/sch_sfq.c
+++ b/net/sched/sch_sfq.c
@@ -437,6 +437,7 @@ congestion_drop:
qdisc_drop(head, sch, to_free);
slot_queue_add(slot, skb);
+ qdisc_tree_reduce_backlog(sch, 0, delta);
return NET_XMIT_CN;
}
@@ -468,8 +469,10 @@ enqueue:
/* Return Congestion Notification only if we dropped a packet
* from this flow.
*/
- if (qlen != slot->qlen)
+ if (qlen != slot->qlen) {
+ qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb));
return NET_XMIT_CN;
+ }
/* As we dropped a packet, better let upper stack know this */
qdisc_tree_reduce_backlog(sch, 1, dropped);
@@ -713,13 +716,13 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt)
int i;
int err;
+ setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
+ (unsigned long)sch);
+
err = tcf_block_get(&q->block, &q->filter_list);
if (err)
return err;
- setup_deferrable_timer(&q->perturb_timer, sfq_perturbation,
- (unsigned long)sch);
-
for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) {
q->dep[i].next = i + SFQ_MAX_FLOWS;
q->dep[i].prev = i + SFQ_MAX_FLOWS;
diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c
index b2e4b6ad241a..493270f0d5b0 100644
--- a/net/sched/sch_tbf.c
+++ b/net/sched/sch_tbf.c
@@ -425,12 +425,13 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt)
{
struct tbf_sched_data *q = qdisc_priv(sch);
+ qdisc_watchdog_init(&q->watchdog, sch);
+ q->qdisc = &noop_qdisc;
+
if (opt == NULL)
return -EINVAL;
q->t_c = ktime_get_ns();
- qdisc_watchdog_init(&q->watchdog, sch);
- q->qdisc = &noop_qdisc;
return tbf_change(sch, opt);
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 2a186b201ad2..a4b6ffb61495 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr,
{
addr->sa.sa_family = AF_INET6;
addr->v6.sin6_port = port;
+ addr->v6.sin6_flowinfo = 0;
addr->v6.sin6_addr = *saddr;
+ addr->v6.sin6_scope_id = 0;
}
/* Compare addresses exactly.
diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c
index 9a647214a91e..e99518e79b52 100644
--- a/net/sctp/sctp_diag.c
+++ b/net/sctp/sctp_diag.c
@@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb,
info = nla_data(attr);
list_for_each_entry_rcu(laddr, address_list, list) {
- memcpy(info, &laddr->a, addrlen);
+ memcpy(info, &laddr->a, sizeof(laddr->a));
+ memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a));
info += addrlen;
}
@@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb,
info = nla_data(attr);
list_for_each_entry(from, &asoc->peer.transport_addr_list,
transports) {
- memcpy(info, &from->ipaddr, addrlen);
+ memcpy(info, &from->ipaddr, sizeof(from->ipaddr));
+ memset(info + sizeof(from->ipaddr), 0,
+ addrlen - sizeof(from->ipaddr));
info += addrlen;
}
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 1db478e34520..8d760863bc41 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -4538,8 +4538,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc,
info->sctpi_ictrlchunks = asoc->stats.ictrlchunks;
prim = asoc->peer.primary_path;
- memcpy(&info->sctpi_p_address, &prim->ipaddr,
- sizeof(struct sockaddr_storage));
+ memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr));
info->sctpi_p_state = prim->state;
info->sctpi_p_cwnd = prim->cwnd;
info->sctpi_p_srtt = prim->srtt;
diff --git a/net/socket.c b/net/socket.c
index bf2122691fba..ad22df1ffbd1 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1916,7 +1916,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
if (copy_from_user(&msg, umsg, sizeof(*umsg)))
return -EFAULT;
- kmsg->msg_control = msg.msg_control;
+ kmsg->msg_control = (void __force *)msg.msg_control;
kmsg->msg_controllen = msg.msg_controllen;
kmsg->msg_flags = msg.msg_flags;
@@ -1935,7 +1935,8 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
if (msg.msg_name && kmsg->msg_namelen) {
if (!save_addr) {
- err = move_addr_to_kernel(msg.msg_name, kmsg->msg_namelen,
+ err = move_addr_to_kernel(msg.msg_name,
+ kmsg->msg_namelen,
kmsg->msg_name);
if (err < 0)
return err;
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 2b720fa35c4f..e18500151236 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk)
dprintk("svc: socket %p(inet %p), busy=%d\n",
svsk, sk,
test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_odata(sk);
if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags))
svc_xprt_enqueue(&svsk->sk_xprt);
@@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk)
if (svsk) {
dprintk("svc: socket %p(inet %p), write_space busy=%d\n",
svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags));
+
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_owspace(sk);
svc_xprt_enqueue(&svsk->sk_xprt);
}
@@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
dprintk("svc: socket %p TCP (listen) state change %d\n",
sk, sk->sk_state);
- if (svsk)
+ if (svsk) {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_odata(sk);
+ }
+
/*
* This callback may called twice when a new connection
* is established as a child socket inherits everything
@@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk)
if (!svsk)
printk("svc: socket %p: no user data\n", sk);
else {
+ /* Refer to svc_setup_socket() for details. */
+ rmb();
svsk->sk_ostate(sk);
if (sk->sk_state != TCP_ESTABLISHED) {
set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags);
@@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv,
return ERR_PTR(err);
}
- inet->sk_user_data = svsk;
svsk->sk_sock = sock;
svsk->sk_sk = inet;
svsk->sk_ostate = inet->sk_state_change;
svsk->sk_odata = inet->sk_data_ready;
svsk->sk_owspace = inet->sk_write_space;
+ /*
+ * This barrier is necessary in order to prevent race condition
+ * with svc_data_ready(), svc_listen_data_ready() and others
+ * when calling callbacks above.
+ */
+ wmb();
+ inet->sk_user_data = svsk;
/* Initialize the socket */
if (sock->type == SOCK_DGRAM)
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index d174ee3254ee..89cd061c4468 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -65,6 +65,8 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id)
}
static void bearer_disable(struct net *net, struct tipc_bearer *b);
+static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
+ struct packet_type *pt, struct net_device *orig_dev);
/**
* tipc_media_find - locates specified media object by name
@@ -428,6 +430,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b,
/* Associate TIPC bearer with L2 bearer */
rcu_assign_pointer(b->media_ptr, dev);
+ b->pt.dev = dev;
+ b->pt.type = htons(ETH_P_TIPC);
+ b->pt.func = tipc_l2_rcv_msg;
+ dev_add_pack(&b->pt);
memset(&b->bcast_addr, 0, sizeof(b->bcast_addr));
memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len);
b->bcast_addr.media_id = b->media->type_id;
@@ -447,6 +453,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b)
struct net_device *dev;
dev = (struct net_device *)rtnl_dereference(b->media_ptr);
+ dev_remove_pack(&b->pt);
RCU_INIT_POINTER(dev->tipc_ptr, NULL);
synchronize_net();
dev_put(dev);
@@ -594,11 +601,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev,
struct tipc_bearer *b;
rcu_read_lock();
- b = rcu_dereference_rtnl(dev->tipc_ptr);
+ b = rcu_dereference_rtnl(dev->tipc_ptr) ?:
+ rcu_dereference_rtnl(orig_dev->tipc_ptr);
if (likely(b && test_bit(0, &b->up) &&
- (skb->pkt_type <= PACKET_BROADCAST))) {
+ (skb->pkt_type <= PACKET_MULTICAST))) {
skb->next = NULL;
- tipc_rcv(dev_net(dev), skb, b);
+ tipc_rcv(dev_net(b->pt.dev), skb, b);
rcu_read_unlock();
return NET_RX_SUCCESS;
}
@@ -659,11 +667,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt,
return NOTIFY_OK;
}
-static struct packet_type tipc_packet_type __read_mostly = {
- .type = htons(ETH_P_TIPC),
- .func = tipc_l2_rcv_msg,
-};
-
static struct notifier_block notifier = {
.notifier_call = tipc_l2_device_event,
.priority = 0,
@@ -671,19 +674,12 @@ static struct notifier_block notifier = {
int tipc_bearer_setup(void)
{
- int err;
-
- err = register_netdevice_notifier(&notifier);
- if (err)
- return err;
- dev_add_pack(&tipc_packet_type);
- return 0;
+ return register_netdevice_notifier(&notifier);
}
void tipc_bearer_cleanup(void)
{
unregister_netdevice_notifier(&notifier);
- dev_remove_pack(&tipc_packet_type);
}
void tipc_bearer_stop(struct net *net)
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 635c9086e19a..e07a55a80c18 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -131,6 +131,7 @@ struct tipc_media {
* @name: bearer name (format = media:interface)
* @media: ptr to media structure associated with bearer
* @bcast_addr: media address used in broadcasting
+ * @pt: packet type for bearer
* @rcu: rcu struct for tipc_bearer
* @priority: default link priority for bearer
* @window: default window size for bearer
@@ -151,6 +152,7 @@ struct tipc_bearer {
char name[TIPC_MAX_BEARER_NAME];
struct tipc_media *media;
struct tipc_media_addr bcast_addr;
+ struct packet_type pt;
struct rcu_head rcu;
u32 priority;
u32 window;
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index ab3087687a32..6ef379f004ac 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -479,13 +479,14 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg,
bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
{
struct sk_buff *_skb = *skb;
- struct tipc_msg *hdr = buf_msg(_skb);
+ struct tipc_msg *hdr;
struct tipc_msg ohdr;
- int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
+ int dlen;
if (skb_linearize(_skb))
goto exit;
hdr = buf_msg(_skb);
+ dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE);
if (msg_dest_droppable(hdr))
goto exit;
if (msg_errcode(hdr))
@@ -511,8 +512,11 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err)
pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC))
goto exit;
+ /* reassign after skb header modifications */
+ hdr = buf_msg(_skb);
/* Now reverse the concerned fields */
msg_set_errcode(hdr, err);
+ msg_set_non_seq(hdr, 0);
msg_set_origport(hdr, msg_destport(&ohdr));
msg_set_destport(hdr, msg_origport(&ohdr));
msg_set_destnode(hdr, msg_prevnode(&ohdr));
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 9bfe886ab330..750949dfc1d7 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
arg = nlmsg_new(0, GFP_KERNEL);
if (!arg) {
kfree_skb(msg->rep);
+ msg->rep = NULL;
return -ENOMEM;
}
err = __tipc_nl_compat_dumpit(cmd, msg, arg);
- if (err)
+ if (err) {
kfree_skb(msg->rep);
-
+ msg->rep = NULL;
+ }
kfree_skb(arg);
return err;
diff --git a/net/tipc/node.c b/net/tipc/node.c
index aeef8011ac7d..7dd22330a6b4 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1126,8 +1126,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr,
strncpy(linkname, tipc_link_name(link), len);
err = 0;
}
-exit:
tipc_node_read_unlock(node);
+exit:
tipc_node_put(node);
return err;
}
@@ -1455,10 +1455,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb,
/* Initiate synch mode if applicable */
if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) {
syncpt = iseqno + exp_pkts - 1;
- if (!tipc_link_is_up(l)) {
- tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT);
+ if (!tipc_link_is_up(l))
__tipc_node_link_up(n, bearer_id, xmitq);
- }
if (n->state == SELF_UP_PEER_UP) {
n->sync_point = syncpt;
tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT);
@@ -1559,6 +1557,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
/* Check/update node state before receiving */
if (unlikely(skb)) {
+ if (unlikely(skb_linearize(skb)))
+ goto discard;
tipc_node_write_lock(n);
if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) {
if (le->link) {
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 101e3597338f..d50edd6e0019 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -2255,8 +2255,8 @@ void tipc_sk_reinit(struct net *net)
do {
tsk = ERR_PTR(rhashtable_walk_start(&iter));
- if (tsk)
- continue;
+ if (IS_ERR(tsk))
+ goto walk_stop;
while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) {
spin_lock_bh(&tsk->sk.sk_lock.slock);
@@ -2265,7 +2265,7 @@ void tipc_sk_reinit(struct net *net)
msg_set_orignode(msg, tn->own_addr);
spin_unlock_bh(&tsk->sk.sk_lock.slock);
}
-
+walk_stop:
rhashtable_walk_stop(&iter);
} while (tsk == ERR_PTR(-EAGAIN));
}
diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c
index 0bf91cd3733c..be3d9e3183dc 100644
--- a/net/tipc/subscr.c
+++ b/net/tipc/subscr.c
@@ -52,7 +52,6 @@ struct tipc_subscriber {
struct list_head subscrp_list;
};
-static void tipc_subscrp_delete(struct tipc_subscription *sub);
static void tipc_subscrb_put(struct tipc_subscriber *subscriber);
/**
@@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber,
{
struct list_head *subscription_list = &subscriber->subscrp_list;
struct tipc_subscription *sub, *temp;
+ u32 timeout;
spin_lock_bh(&subscriber->lock);
list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) {
if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr)))
continue;
- tipc_nametbl_unsubscribe(sub);
- list_del(&sub->subscrp_list);
- tipc_subscrp_delete(sub);
+ timeout = htohl(sub->evt.s.timeout, sub->swap);
+ if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) {
+ tipc_nametbl_unsubscribe(sub);
+ list_del(&sub->subscrp_list);
+ tipc_subscrp_put(sub);
+ }
if (s)
break;
@@ -236,18 +239,12 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber)
tipc_subscrb_put(subscriber);
}
-static void tipc_subscrp_delete(struct tipc_subscription *sub)
-{
- u32 timeout = htohl(sub->evt.s.timeout, sub->swap);
-
- if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer))
- tipc_subscrp_put(sub);
-}
-
static void tipc_subscrp_cancel(struct tipc_subscr *s,
struct tipc_subscriber *subscriber)
{
+ tipc_subscrb_get(subscriber);
tipc_subscrb_subscrp_delete(subscriber, s);
+ tipc_subscrb_put(subscriber);
}
static struct tipc_subscription *tipc_subscrp_create(struct net *net,
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 7b52a380d710..be8982b4f8c0 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -2304,10 +2304,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state,
*/
mutex_lock(&u->iolock);
- if (flags & MSG_PEEK)
- skip = sk_peek_offset(sk, flags);
- else
- skip = 0;
+ skip = max(sk_peek_offset(sk, flags), 0);
do {
int chunk;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index ff61d8557929..69b16ee327d9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -2226,7 +2226,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
goto no_transform;
}
- dst_hold(&xdst->u.dst);
route = xdst->route;
}
}
@@ -3308,9 +3307,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
struct xfrm_state *x_new[XFRM_MAX_DEPTH];
struct xfrm_migrate *mp;
+ /* Stage 0 - sanity checks */
if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
goto out;
+ if (dir >= XFRM_POLICY_MAX) {
+ err = -EINVAL;
+ goto out;
+ }
+
/* Stage 1 - find policy */
if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
err = -ENOENT;
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 6c0956d10db6..a792effdb0b5 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -1620,6 +1620,7 @@ int
xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
unsigned short family, struct net *net)
{
+ int i;
int err = 0;
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
if (!afinfo)
@@ -1628,6 +1629,9 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n,
spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/
if (afinfo->tmpl_sort)
err = afinfo->tmpl_sort(dst, src, n);
+ else
+ for (i = 0; i < n; i++)
+ dst[i] = src[i];
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
rcu_read_unlock();
return err;
@@ -1638,6 +1642,7 @@ int
xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
unsigned short family)
{
+ int i;
int err = 0;
struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family);
struct net *net = xs_net(*src);
@@ -1648,6 +1653,9 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n,
spin_lock_bh(&net->xfrm.xfrm_state_lock);
if (afinfo->state_sort)
err = afinfo->state_sort(dst, src, n);
+ else
+ for (i = 0; i < n; i++)
+ dst[i] = src[i];
spin_unlock_bh(&net->xfrm.xfrm_state_lock);
rcu_read_unlock();
return err;
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 2be4c6af008a..9391ced05259 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -796,7 +796,7 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb
return -EMSGSIZE;
xuo = nla_data(attr);
-
+ memset(xuo, 0, sizeof(*xuo));
xuo->ifindex = xso->dev->ifindex;
xuo->flags = xso->flags;
@@ -1869,6 +1869,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
return -EMSGSIZE;
id = nlmsg_data(nlh);
+ memset(&id->sa_id, 0, sizeof(id->sa_id));
memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr));
id->sa_id.spi = x->id.spi;
id->sa_id.family = x->props.family;
@@ -2578,6 +2579,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
ue = nlmsg_data(nlh);
copy_to_user_state(x, &ue->state);
ue->hard = (c->data.hard != 0) ? 1 : 0;
+ /* clear the padding bytes */
+ memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard));
err = xfrm_mark_put(skb, &x->mark);
if (err)
@@ -2715,6 +2718,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c)
struct nlattr *attr;
id = nlmsg_data(nlh);
+ memset(id, 0, sizeof(*id));
memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr));
id->spi = x->id.spi;
id->family = x->props.family;