diff options
Diffstat (limited to 'net')
103 files changed, 840 insertions, 545 deletions
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index e1133bc634b5..8a3ce79b1307 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -1549,9 +1549,41 @@ batadv_tt_global_entry_has_orig(const struct batadv_tt_global_entry *entry, return found; } +/** + * batadv_tt_global_sync_flags - update TT sync flags + * @tt_global: the TT global entry to update sync flags in + * + * Updates the sync flag bits in the tt_global flag attribute with a logical + * OR of all sync flags from any of its TT orig entries. + */ +static void +batadv_tt_global_sync_flags(struct batadv_tt_global_entry *tt_global) +{ + struct batadv_tt_orig_list_entry *orig_entry; + const struct hlist_head *head; + u16 flags = BATADV_NO_FLAGS; + + rcu_read_lock(); + head = &tt_global->orig_list; + hlist_for_each_entry_rcu(orig_entry, head, list) + flags |= orig_entry->flags; + rcu_read_unlock(); + + flags |= tt_global->common.flags & (~BATADV_TT_SYNC_MASK); + tt_global->common.flags = flags; +} + +/** + * batadv_tt_global_orig_entry_add - add or update a TT orig entry + * @tt_global: the TT global entry to add an orig entry in + * @orig_node: the originator to add an orig entry for + * @ttvn: translation table version number of this changeset + * @flags: TT sync flags + */ static void batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, - struct batadv_orig_node *orig_node, int ttvn) + struct batadv_orig_node *orig_node, int ttvn, + u8 flags) { struct batadv_tt_orig_list_entry *orig_entry; @@ -1561,7 +1593,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, * was added during a "temporary client detection" */ orig_entry->ttvn = ttvn; - goto out; + orig_entry->flags = flags; + goto sync_flags; } orig_entry = kmem_cache_zalloc(batadv_tt_orig_cache, GFP_ATOMIC); @@ -1573,6 +1606,7 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, batadv_tt_global_size_inc(orig_node, tt_global->common.vid); orig_entry->orig_node = orig_node; orig_entry->ttvn = ttvn; + orig_entry->flags = flags; kref_init(&orig_entry->refcount); spin_lock_bh(&tt_global->list_lock); @@ -1582,6 +1616,8 @@ batadv_tt_global_orig_entry_add(struct batadv_tt_global_entry *tt_global, spin_unlock_bh(&tt_global->list_lock); atomic_inc(&tt_global->orig_list_count); +sync_flags: + batadv_tt_global_sync_flags(tt_global); out: if (orig_entry) batadv_tt_orig_list_entry_put(orig_entry); @@ -1703,10 +1739,10 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, } /* the change can carry possible "attribute" flags like the - * TT_CLIENT_WIFI, therefore they have to be copied in the + * TT_CLIENT_TEMP, therefore they have to be copied in the * client entry */ - common->flags |= flags; + common->flags |= flags & (~BATADV_TT_SYNC_MASK); /* If there is the BATADV_TT_CLIENT_ROAM flag set, there is only * one originator left in the list and we previously received a @@ -1723,7 +1759,8 @@ static bool batadv_tt_global_add(struct batadv_priv *bat_priv, } add_orig_entry: /* add the new orig_entry (if needed) or update it */ - batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn); + batadv_tt_global_orig_entry_add(tt_global_entry, orig_node, ttvn, + flags & BATADV_TT_SYNC_MASK); batadv_dbg(BATADV_DBG_TT, bat_priv, "Creating new global tt entry: %pM (vid: %d, via %pM)\n", @@ -1946,6 +1983,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, struct batadv_tt_orig_list_entry *orig, bool best) { + u16 flags = (common->flags & (~BATADV_TT_SYNC_MASK)) | orig->flags; void *hdr; struct batadv_orig_node_vlan *vlan; u8 last_ttvn; @@ -1975,7 +2013,7 @@ batadv_tt_global_dump_subentry(struct sk_buff *msg, u32 portid, u32 seq, nla_put_u8(msg, BATADV_ATTR_TT_LAST_TTVN, last_ttvn) || nla_put_u32(msg, BATADV_ATTR_TT_CRC32, crc) || nla_put_u16(msg, BATADV_ATTR_TT_VID, common->vid) || - nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, common->flags)) + nla_put_u32(msg, BATADV_ATTR_TT_FLAGS, flags)) goto nla_put_failure; if (best && nla_put_flag(msg, BATADV_ATTR_FLAG_BEST)) @@ -2589,6 +2627,7 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, unsigned short vid) { struct batadv_hashtable *hash = bat_priv->tt.global_hash; + struct batadv_tt_orig_list_entry *tt_orig; struct batadv_tt_common_entry *tt_common; struct batadv_tt_global_entry *tt_global; struct hlist_head *head; @@ -2627,8 +2666,9 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, /* find out if this global entry is announced by this * originator */ - if (!batadv_tt_global_entry_has_orig(tt_global, - orig_node)) + tt_orig = batadv_tt_global_orig_entry_find(tt_global, + orig_node); + if (!tt_orig) continue; /* use network order to read the VID: this ensures that @@ -2640,10 +2680,12 @@ static u32 batadv_tt_global_crc(struct batadv_priv *bat_priv, /* compute the CRC on flags that have to be kept in sync * among nodes */ - flags = tt_common->flags & BATADV_TT_SYNC_MASK; + flags = tt_orig->flags; crc_tmp = crc32c(crc_tmp, &flags, sizeof(flags)); crc ^= crc32c(crc_tmp, tt_common->addr, ETH_ALEN); + + batadv_tt_orig_list_entry_put(tt_orig); } rcu_read_unlock(); } diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index ea43a6449247..a62795868794 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1260,6 +1260,7 @@ struct batadv_tt_global_entry { * struct batadv_tt_orig_list_entry - orig node announcing a non-mesh client * @orig_node: pointer to orig node announcing this non-mesh client * @ttvn: translation table version number which added the non-mesh client + * @flags: per orig entry TT sync flags * @list: list node for batadv_tt_global_entry::orig_list * @refcount: number of contexts the object is used * @rcu: struct used for freeing in an RCU-safe manner @@ -1267,6 +1268,7 @@ struct batadv_tt_global_entry { struct batadv_tt_orig_list_entry { struct batadv_orig_node *orig_node; u8 ttvn; + u8 flags; struct hlist_node list; struct kref refcount; struct rcu_head rcu; diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 861ae2a165f4..5a7be3bddfa9 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -53,6 +53,9 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev) brstats->tx_bytes += skb->len; u64_stats_update_end(&brstats->syncp); +#ifdef CONFIG_NET_SWITCHDEV + skb->offload_fwd_mark = 0; +#endif BR_INPUT_SKB_CB(skb)->brdev = dev; skb_reset_mac_header(skb); diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c index 181a44d0f1da..f6b1c7de059d 100644 --- a/net/bridge/br_switchdev.c +++ b/net/bridge/br_switchdev.c @@ -115,7 +115,7 @@ br_switchdev_fdb_call_notifiers(bool adding, const unsigned char *mac, void br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type) { - if (!fdb->added_by_user) + if (!fdb->added_by_user || !fdb->dst) return; switch (type) { diff --git a/net/ceph/crush/mapper.c b/net/ceph/crush/mapper.c index 746b145bfd11..417df675c71b 100644 --- a/net/ceph/crush/mapper.c +++ b/net/ceph/crush/mapper.c @@ -306,7 +306,7 @@ static __u32 *get_choose_arg_weights(const struct crush_bucket_straw2 *bucket, const struct crush_choose_arg *arg, int position) { - if (!arg || !arg->weight_set || arg->weight_set_size == 0) + if (!arg || !arg->weight_set) return bucket->item_weights; if (position >= arg->weight_set_size) diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c index b7cc615d42ef..a67298c7e0cd 100644 --- a/net/ceph/messenger.c +++ b/net/ceph/messenger.c @@ -1287,10 +1287,10 @@ static void prepare_write_message(struct ceph_connection *con) if (m->needs_out_seq) { m->hdr.seq = cpu_to_le64(++con->out_seq); m->needs_out_seq = false; - } - if (con->ops->reencode_message) - con->ops->reencode_message(m); + if (con->ops->reencode_message) + con->ops->reencode_message(m); + } dout("prepare_write_message %p seq %lld type %d len %d+%d+%zd\n", m, con->out_seq, le16_to_cpu(m->hdr.type), diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c index 901bb8221366..dcfbdd74dfd1 100644 --- a/net/ceph/osd_client.c +++ b/net/ceph/osd_client.c @@ -1337,6 +1337,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, bool legacy_change; bool split = false; bool sort_bitwise = ceph_osdmap_flag(osdc, CEPH_OSDMAP_SORTBITWISE); + bool recovery_deletes = ceph_osdmap_flag(osdc, + CEPH_OSDMAP_RECOVERY_DELETES); enum calc_target_result ct_res; int ret; @@ -1399,6 +1401,8 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, pi->pg_num, t->sort_bitwise, sort_bitwise, + t->recovery_deletes, + recovery_deletes, &last_pgid)) force_resend = true; @@ -1421,6 +1425,7 @@ static enum calc_target_result calc_target(struct ceph_osd_client *osdc, t->pg_num = pi->pg_num; t->pg_num_mask = pi->pg_num_mask; t->sort_bitwise = sort_bitwise; + t->recovery_deletes = recovery_deletes; t->osd = acting.primary; } @@ -1918,10 +1923,12 @@ static void encode_request_partial(struct ceph_osd_request *req, } ceph_encode_32(&p, req->r_attempts); /* retry_attempt */ - BUG_ON(p != end - 8); /* space for features */ + BUG_ON(p > end - 8); /* space for features */ msg->hdr.version = cpu_to_le16(8); /* MOSDOp v8 */ /* front_len is finalized in encode_request_finish() */ + msg->front.iov_len = p - msg->front.iov_base; + msg->hdr.front_len = cpu_to_le32(msg->front.iov_len); msg->hdr.data_len = cpu_to_le32(data_len); /* * The header "data_off" is a hint to the receiver allowing it @@ -1937,11 +1944,12 @@ static void encode_request_partial(struct ceph_osd_request *req, static void encode_request_finish(struct ceph_msg *msg) { void *p = msg->front.iov_base; + void *const partial_end = p + msg->front.iov_len; void *const end = p + msg->front_alloc_len; if (CEPH_HAVE_FEATURE(msg->con->peer_features, RESEND_ON_SPLIT)) { /* luminous OSD -- encode features and be done */ - p = end - 8; + p = partial_end; ceph_encode_64(&p, msg->con->peer_features); } else { struct { @@ -1984,7 +1992,7 @@ static void encode_request_finish(struct ceph_msg *msg) oid_len = p - oid; tail = p; - tail_len = (end - p) - 8; + tail_len = partial_end - p; p = msg->front.iov_base; ceph_encode_copy(&p, &head.client_inc, sizeof(head.client_inc)); diff --git a/net/ceph/osdmap.c b/net/ceph/osdmap.c index 64ae9f89773a..f358d0bfa76b 100644 --- a/net/ceph/osdmap.c +++ b/net/ceph/osdmap.c @@ -295,6 +295,10 @@ static int decode_choose_args(void **p, void *end, struct crush_map *c) ret = decode_choose_arg(p, end, arg); if (ret) goto fail; + + if (arg->ids_size && + arg->ids_size != c->buckets[bucket_index]->size) + goto e_inval; } insert_choose_arg_map(&c->choose_args, arg_map); @@ -2078,6 +2082,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, u32 new_pg_num, bool old_sort_bitwise, bool new_sort_bitwise, + bool old_recovery_deletes, + bool new_recovery_deletes, const struct ceph_pg *pgid) { return !osds_equal(old_acting, new_acting) || @@ -2085,7 +2091,8 @@ bool ceph_is_new_interval(const struct ceph_osds *old_acting, old_size != new_size || old_min_size != new_min_size || ceph_pg_is_split(pgid, old_pg_num, new_pg_num) || - old_sort_bitwise != new_sort_bitwise; + old_sort_bitwise != new_sort_bitwise || + old_recovery_deletes != new_recovery_deletes; } static int calc_pg_rank(int osd, const struct ceph_osds *acting) @@ -2301,10 +2308,17 @@ static u32 raw_pg_to_pps(struct ceph_pg_pool_info *pi, } } +/* + * Magic value used for a "default" fallback choose_args, used if the + * crush_choose_arg_map passed to do_crush() does not exist. If this + * also doesn't exist, fall back to canonical weights. + */ +#define CEPH_DEFAULT_CHOOSE_ARGS -1 + static int do_crush(struct ceph_osdmap *map, int ruleno, int x, int *result, int result_max, const __u32 *weight, int weight_max, - u64 choose_args_index) + s64 choose_args_index) { struct crush_choose_arg_map *arg_map; int r; @@ -2313,6 +2327,9 @@ static int do_crush(struct ceph_osdmap *map, int ruleno, int x, arg_map = lookup_choose_arg_map(&map->crush->choose_args, choose_args_index); + if (!arg_map) + arg_map = lookup_choose_arg_map(&map->crush->choose_args, + CEPH_DEFAULT_CHOOSE_ARGS); mutex_lock(&map->crush_workspace_mutex); r = crush_do_rule(map->crush, ruleno, x, result, result_max, @@ -2423,40 +2440,23 @@ static void apply_upmap(struct ceph_osdmap *osdmap, for (i = 0; i < pg->pg_upmap.len; i++) raw->osds[i] = pg->pg_upmap.osds[i]; raw->size = pg->pg_upmap.len; - return; + /* check and apply pg_upmap_items, if any */ } pg = lookup_pg_mapping(&osdmap->pg_upmap_items, pgid); if (pg) { - /* - * Note: this approach does not allow a bidirectional swap, - * e.g., [[1,2],[2,1]] applied to [0,1,2] -> [0,2,1]. - */ - for (i = 0; i < pg->pg_upmap_items.len; i++) { - int from = pg->pg_upmap_items.from_to[i][0]; - int to = pg->pg_upmap_items.from_to[i][1]; - int pos = -1; - bool exists = false; - - /* make sure replacement doesn't already appear */ - for (j = 0; j < raw->size; j++) { - int osd = raw->osds[j]; - - if (osd == to) { - exists = true; + for (i = 0; i < raw->size; i++) { + for (j = 0; j < pg->pg_upmap_items.len; j++) { + int from = pg->pg_upmap_items.from_to[j][0]; + int to = pg->pg_upmap_items.from_to[j][1]; + + if (from == raw->osds[i]) { + if (!(to != CRUSH_ITEM_NONE && + to < osdmap->max_osd && + osdmap->osd_weight[to] == 0)) + raw->osds[i] = to; break; } - /* ignore mapping if target is marked out */ - if (osd == from && pos < 0 && - !(to != CRUSH_ITEM_NONE && - to < osdmap->max_osd && - osdmap->osd_weight[to] == 0)) { - pos = j; - } - } - if (!exists && pos >= 0) { - raw->osds[pos] = to; - return; } } } diff --git a/net/core/datagram.c b/net/core/datagram.c index ee5647bd91b3..8c2f4489ff8f 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -169,14 +169,20 @@ struct sk_buff *__skb_try_recv_from_queue(struct sock *sk, int *peeked, int *off, int *err, struct sk_buff **last) { + bool peek_at_off = false; struct sk_buff *skb; - int _off = *off; + int _off = 0; + + if (unlikely(flags & MSG_PEEK && *off >= 0)) { + peek_at_off = true; + _off = *off; + } *last = queue->prev; skb_queue_walk(queue, skb) { if (flags & MSG_PEEK) { - if (_off >= skb->len && (skb->len || _off || - skb->peeked)) { + if (peek_at_off && _off >= skb->len && + (_off || skb->peeked)) { _off -= skb->len; continue; } @@ -356,7 +362,7 @@ int __sk_queue_drop_skb(struct sock *sk, struct sk_buff_head *sk_queue, if (flags & MSG_PEEK) { err = -ENOENT; spin_lock_bh(&sk_queue->lock); - if (skb == skb_peek(sk_queue)) { + if (skb->next) { __skb_unlink(skb, sk_queue); refcount_dec(&skb->users); if (destructor) diff --git a/net/core/dev.c b/net/core/dev.c index 8515f8fe0460..86b4b0a79e7a 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -2739,7 +2739,7 @@ static inline bool skb_needs_check(struct sk_buff *skb, bool tx_path) { if (tx_path) return skb->ip_summed != CHECKSUM_PARTIAL && - skb->ip_summed != CHECKSUM_NONE; + skb->ip_summed != CHECKSUM_UNNECESSARY; return skb->ip_summed == CHECKSUM_NONE; } @@ -5289,6 +5289,7 @@ static void busy_poll_stop(struct napi_struct *napi, void *have_poll_lock) * Ideally, a new ndo_busy_poll_stop() could avoid another round. */ rc = napi->poll(napi, BUSY_POLL_BUDGET); + trace_napi_poll(napi, rc, BUSY_POLL_BUDGET); netpoll_poll_unlock(have_poll_lock); if (rc == BUSY_POLL_BUDGET) __napi_schedule(napi); @@ -5667,12 +5668,13 @@ EXPORT_SYMBOL(netdev_has_upper_dev_all_rcu); * Find out if a device is linked to an upper device and return true in case * it is. The caller must hold the RTNL lock. */ -static bool netdev_has_any_upper_dev(struct net_device *dev) +bool netdev_has_any_upper_dev(struct net_device *dev) { ASSERT_RTNL(); return !list_empty(&dev->adj_list.upper); } +EXPORT_SYMBOL(netdev_has_any_upper_dev); /** * netdev_master_upper_dev_get - Get master upper device diff --git a/net/core/dev_ioctl.c b/net/core/dev_ioctl.c index 06b147d7d9e2..709a4e6fb447 100644 --- a/net/core/dev_ioctl.c +++ b/net/core/dev_ioctl.c @@ -263,6 +263,8 @@ static int dev_ifsioc(struct net *net, struct ifreq *ifr, unsigned int cmd) return dev_set_mtu(dev, ifr->ifr_mtu); case SIOCSIFHWADDR: + if (dev->addr_len > sizeof(struct sockaddr)) + return -EINVAL; return dev_set_mac_address(dev, &ifr->ifr_hwaddr); case SIOCSIFHWBROADCAST: diff --git a/net/core/filter.c b/net/core/filter.c index f44fc22fd45a..169974998c76 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2836,15 +2836,12 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_prot->setsockopt == tcp_setsockopt) { if (optname == TCP_CONGESTION) { char name[TCP_CA_NAME_MAX]; + bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN; strncpy(name, optval, min_t(long, optlen, TCP_CA_NAME_MAX-1)); name[TCP_CA_NAME_MAX-1] = 0; - ret = tcp_set_congestion_control(sk, name, false); - if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN) - /* replacing an existing ca */ - tcp_reinit_congestion_control(sk, - inet_csk(sk)->icsk_ca_ops); + ret = tcp_set_congestion_control(sk, name, false, reinit); } else { struct tcp_sock *tp = tcp_sk(sk); @@ -2872,7 +2869,6 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, ret = -EINVAL; } } - ret = -EINVAL; #endif } else { ret = -EINVAL; @@ -3505,6 +3501,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, bpf_target_off(struct sk_buff, tc_index, 2, target_size)); #else + *target_size = 2; if (type == BPF_WRITE) *insn++ = BPF_MOV64_REG(si->dst_reg, si->dst_reg); else @@ -3520,6 +3517,7 @@ static u32 bpf_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_JMP_IMM(BPF_JGE, si->dst_reg, MIN_NAPI_ID, 1); *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #else + *target_size = 4; *insn++ = BPF_MOV64_IMM(si->dst_reg, 0); #endif break; diff --git a/net/core/netpoll.c b/net/core/netpoll.c index 8357f164c660..912731bed7b7 100644 --- a/net/core/netpoll.c +++ b/net/core/netpoll.c @@ -666,7 +666,7 @@ int netpoll_setup(struct netpoll *np) int err; rtnl_lock(); - if (np->dev_name) { + if (np->dev_name[0]) { struct net *net = current->nsproxy->net_ns; ndev = __dev_get_by_name(net, np->dev_name); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index f990eb8b30a9..e07556606284 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1363,18 +1363,20 @@ struct sk_buff *skb_copy_expand(const struct sk_buff *skb, EXPORT_SYMBOL(skb_copy_expand); /** - * skb_pad - zero pad the tail of an skb + * __skb_pad - zero pad the tail of an skb * @skb: buffer to pad * @pad: space to pad + * @free_on_error: free buffer on error * * Ensure that a buffer is followed by a padding area that is zero * filled. Used by network drivers which may DMA or transfer data * beyond the buffer end onto the wire. * - * May return error in out of memory cases. The skb is freed on error. + * May return error in out of memory cases. The skb is freed on error + * if @free_on_error is true. */ -int skb_pad(struct sk_buff *skb, int pad) +int __skb_pad(struct sk_buff *skb, int pad, bool free_on_error) { int err; int ntail; @@ -1403,10 +1405,11 @@ int skb_pad(struct sk_buff *skb, int pad) return 0; free_skb: - kfree_skb(skb); + if (free_on_error) + kfree_skb(skb); return err; } -EXPORT_SYMBOL(skb_pad); +EXPORT_SYMBOL(__skb_pad); /** * pskb_put - add data to the tail of a potentially fragmented buffer diff --git a/net/dccp/feat.c b/net/dccp/feat.c index 1704948e6a12..f227f002c73d 100644 --- a/net/dccp/feat.c +++ b/net/dccp/feat.c @@ -1471,9 +1471,12 @@ int dccp_feat_init(struct sock *sk) * singleton values (which always leads to failure). * These settings can still (later) be overridden via sockopts. */ - if (ccid_get_builtin_ccids(&tx.val, &tx.len) || - ccid_get_builtin_ccids(&rx.val, &rx.len)) + if (ccid_get_builtin_ccids(&tx.val, &tx.len)) return -ENOBUFS; + if (ccid_get_builtin_ccids(&rx.val, &rx.len)) { + kfree(tx.val); + return -ENOBUFS; + } if (!dccp_feat_prefer(sysctl_dccp_tx_ccid, tx.val, tx.len) || !dccp_feat_prefer(sysctl_dccp_rx_ccid, rx.val, rx.len)) diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index f85d901f4e3f..1b202f16531f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -631,6 +631,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index c376af5bfdfb..1b58eac8aad3 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -380,6 +380,7 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) goto drop_and_free; inet_csk_reqsk_queue_hash_add(sk, req, DCCP_TIMEOUT_INIT); + reqsk_put(req); return 0; drop_and_free: diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 9fe25bf63296..b68168fcc06a 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -24,6 +24,7 @@ #include <net/checksum.h> #include <net/inet_sock.h> +#include <net/inet_common.h> #include <net/sock.h> #include <net/xfrm.h> @@ -170,6 +171,15 @@ const char *dccp_packet_name(const int type) EXPORT_SYMBOL_GPL(dccp_packet_name); +static void dccp_sk_destruct(struct sock *sk) +{ + struct dccp_sock *dp = dccp_sk(sk); + + ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); + dp->dccps_hc_tx_ccid = NULL; + inet_sock_destruct(sk); +} + int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) { struct dccp_sock *dp = dccp_sk(sk); @@ -179,6 +189,7 @@ int dccp_init_sock(struct sock *sk, const __u8 ctl_sock_initialized) icsk->icsk_syn_retries = sysctl_dccp_request_retries; sk->sk_state = DCCP_CLOSED; sk->sk_write_space = dccp_write_space; + sk->sk_destruct = dccp_sk_destruct; icsk->icsk_sync_mss = dccp_sync_mss; dp->dccps_mss_cache = 536; dp->dccps_rate_last = jiffies; @@ -201,10 +212,7 @@ void dccp_destroy_sock(struct sock *sk) { struct dccp_sock *dp = dccp_sk(sk); - /* - * DCCP doesn't use sk_write_queue, just sk_send_head - * for retransmissions - */ + __skb_queue_purge(&sk->sk_write_queue); if (sk->sk_send_head != NULL) { kfree_skb(sk->sk_send_head); sk->sk_send_head = NULL; @@ -222,8 +230,7 @@ void dccp_destroy_sock(struct sock *sk) dp->dccps_hc_rx_ackvec = NULL; } ccid_hc_rx_delete(dp->dccps_hc_rx_ccid, sk); - ccid_hc_tx_delete(dp->dccps_hc_tx_ccid, sk); - dp->dccps_hc_rx_ccid = dp->dccps_hc_tx_ccid = NULL; + dp->dccps_hc_rx_ccid = NULL; /* clean up feature negotiation state */ dccp_feat_list_purge(&dp->dccps_featneg); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 56e46090526b..20bc9c56fca0 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -509,21 +509,22 @@ static int dsa_cpu_parse(struct dsa_port *port, u32 index, dst->cpu_dp->netdev = ethernet_dev; } + /* Initialize cpu_port_mask now for drv->setup() + * to have access to a correct value, just like what + * net/dsa/dsa.c::dsa_switch_setup_one does. + */ + ds->cpu_port_mask |= BIT(index); + tag_protocol = ds->ops->get_tag_protocol(ds); dst->tag_ops = dsa_resolve_tag_protocol(tag_protocol); if (IS_ERR(dst->tag_ops)) { dev_warn(ds->dev, "No tagger for this switch\n"); + ds->cpu_port_mask &= ~BIT(index); return PTR_ERR(dst->tag_ops); } dst->rcv = dst->tag_ops->rcv; - /* Initialize cpu_port_mask now for drv->setup() - * to have access to a correct value, just like what - * net/dsa/dsa.c::dsa_switch_setup_one does. - */ - ds->cpu_port_mask |= BIT(index); - return 0; } @@ -576,7 +577,7 @@ static int dsa_dst_parse(struct dsa_switch_tree *dst) return err; } - if (!dst->cpu_dp->netdev) { + if (!dst->cpu_dp) { pr_warn("Tree has no master device\n"); return -EINVAL; } diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c index fab41de8e983..fcd90f79458e 100644 --- a/net/dsa/tag_ksz.c +++ b/net/dsa/tag_ksz.c @@ -42,6 +42,10 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) padlen = (skb->len >= ETH_ZLEN) ? 0 : ETH_ZLEN - skb->len; if (skb_tailroom(skb) >= padlen + KSZ_INGRESS_TAG_LEN) { + /* Let dsa_slave_xmit() free skb */ + if (__skb_put_padto(skb, skb->len + padlen, false)) + return NULL; + nskb = skb; } else { nskb = alloc_skb(NET_IP_ALIGN + skb->len + @@ -56,12 +60,15 @@ static struct sk_buff *ksz_xmit(struct sk_buff *skb, struct net_device *dev) skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - kfree_skb(skb); - } - /* skb is freed when it fails */ - if (skb_put_padto(nskb, nskb->len + padlen)) - return NULL; + /* Let skb_put_padto() free nskb, and let dsa_slave_xmit() free + * skb + */ + if (skb_put_padto(nskb, nskb->len + padlen)) + return NULL; + + consume_skb(skb); + } tag = skb_put(nskb, KSZ_INGRESS_TAG_LEN); tag[0] = 0; diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c index b09e56214005..9c7b1d74a5c6 100644 --- a/net/dsa/tag_trailer.c +++ b/net/dsa/tag_trailer.c @@ -40,7 +40,7 @@ static struct sk_buff *trailer_xmit(struct sk_buff *skb, struct net_device *dev) skb_set_network_header(nskb, skb_network_header(skb) - skb->head); skb_set_transport_header(nskb, skb_transport_header(skb) - skb->head); skb_copy_and_csum_dev(skb, skb_put(nskb, skb->len)); - kfree_skb(skb); + consume_skb(skb); if (padlen) { skb_put_zero(nskb, padlen); diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index 4e7bdb213cd0..172d8309f89e 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -314,7 +314,8 @@ static void send_hsr_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->MacAddressA, master->dev->dev_addr); - skb_put_padto(skb, ETH_ZLEN + HSR_HLEN); + if (skb_put_padto(skb, ETH_ZLEN + HSR_HLEN)) + return; hsr_forward_skb(skb, master); return; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 76c2077c3f5b..2e548eca3489 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1731,6 +1731,13 @@ static __net_init int inet_init_net(struct net *net) net->ipv4.sysctl_ip_prot_sock = PROT_SOCK; #endif + /* Some igmp sysctl, whose values are always used */ + net->ipv4.sysctl_igmp_max_memberships = 20; + net->ipv4.sysctl_igmp_max_msf = 10; + /* IGMP reports for link-local multicast groups are enabled by default */ + net->ipv4.sysctl_igmp_llm_reports = 1; + net->ipv4.sysctl_igmp_qrv = 2; + return 0; } diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c4c6e1969ed0..2ae8f54cb321 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1523,9 +1523,17 @@ unsigned char *cipso_v4_optptr(const struct sk_buff *skb) int taglen; for (optlen = iph->ihl*4 - sizeof(struct iphdr); optlen > 0; ) { - if (optptr[0] == IPOPT_CIPSO) + switch (optptr[0]) { + case IPOPT_CIPSO: return optptr; - taglen = optptr[1]; + case IPOPT_END: + return NULL; + case IPOPT_NOOP: + taglen = 1; + break; + default: + taglen = optptr[1]; + } optlen -= taglen; optptr += taglen; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 0cbee0a666ff..df68963dc90a 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -258,7 +258,7 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * esp_output_udp_encap(x, skb, esp); if (!skb_cloned(skb)) { - if (tailen <= skb_availroom(skb)) { + if (tailen <= skb_tailroom(skb)) { nfrags = 1; trailer = skb; tail = skb_tail_pointer(trailer); @@ -292,8 +292,6 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * kunmap_atomic(vaddr); - spin_unlock_bh(&x->lock); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, @@ -301,6 +299,9 @@ int esp_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * skb_shinfo(skb)->nr_frags = ++nfrags; pfrag->offset = pfrag->offset + allocsize; + + spin_unlock_bh(&x->lock); + nfrags++; skb->len += tailen; @@ -381,7 +382,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; if (!esp->inplace) { int allocsize; @@ -392,7 +393,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * spin_lock_bh(&x->lock); if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { spin_unlock_bh(&x->lock); - goto error; + goto error_free; } skb_shinfo(skb)->nr_frags = 1; @@ -409,7 +410,7 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -442,8 +443,9 @@ int esp_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info * if (sg != dsg) esp_ssg_unref(x, tmp); - kfree(tmp); +error_free: + kfree(tmp); error: return err; } @@ -695,8 +697,10 @@ skip_cow: sg_init_table(sg, nfrags); err = skb_to_sgvec(skb, sg, 0, skb->len); - if (unlikely(err < 0)) + if (unlikely(err < 0)) { + kfree(tmp); goto out; + } skb->ip_summed = CHECKSUM_NONE; diff --git a/net/ipv4/esp4_offload.c b/net/ipv4/esp4_offload.c index e0666016a764..50112324fa5c 100644 --- a/net/ipv4/esp4_offload.c +++ b/net/ipv4/esp4_offload.c @@ -257,7 +257,7 @@ static int esp_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features_ esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); err = esp_output_tail(x, skb, &esp); - if (err < 0) + if (err) return err; secpath_reset(skb); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 222100103808..ec3a9ce281a6 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -1083,15 +1083,17 @@ struct fib_info *fib_create_info(struct fib_config *cfg, fi = kzalloc(sizeof(*fi)+nhs*sizeof(struct fib_nh), GFP_KERNEL); if (!fi) goto failure; - fib_info_cnt++; if (cfg->fc_mx) { fi->fib_metrics = kzalloc(sizeof(*fi->fib_metrics), GFP_KERNEL); - if (!fi->fib_metrics) - goto failure; + if (unlikely(!fi->fib_metrics)) { + kfree(fi); + return ERR_PTR(err); + } atomic_set(&fi->fib_metrics->refcnt, 1); - } else + } else { fi->fib_metrics = (struct dst_metrics *)&dst_default_metrics; - + } + fib_info_cnt++; fi->fib_net = net; fi->fib_protocol = cfg->fc_protocol; fi->fib_scope = cfg->fc_scope; @@ -1452,7 +1454,7 @@ static int call_fib_nh_notifiers(struct fib_nh *fib_nh, return call_fib_notifiers(dev_net(fib_nh->nh_dev), event_type, &info.info); case FIB_EVENT_NH_DEL: - if ((IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && + if ((in_dev && IN_DEV_IGNORE_ROUTES_WITH_LINKDOWN(in_dev) && fib_nh->nh_flags & RTNH_F_LINKDOWN) || (fib_nh->nh_flags & RTNH_F_DEAD)) return call_fib_notifiers(dev_net(fib_nh->nh_dev), diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c index 8e0257d01200..1540db65241a 100644 --- a/net/ipv4/fou.c +++ b/net/ipv4/fou.c @@ -450,6 +450,7 @@ out_unlock: out: NAPI_GRO_CB(skb)->flush |= flush; skb_gro_remcsum_cleanup(skb, &grc); + skb->remcsum_offload = 0; return pp; } diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 28f14afd0dd3..caf2f1101d02 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1007,10 +1007,18 @@ int igmp_rcv(struct sk_buff *skb) { /* This basically follows the spec line by line -- see RFC1112 */ struct igmphdr *ih; - struct in_device *in_dev = __in_dev_get_rcu(skb->dev); + struct net_device *dev = skb->dev; + struct in_device *in_dev; int len = skb->len; bool dropped = true; + if (netif_is_l3_master(dev)) { + dev = dev_get_by_index_rcu(dev_net(dev), IPCB(skb)->iif); + if (!dev) + goto drop; + } + + in_dev = __in_dev_get_rcu(dev); if (!in_dev) goto drop; @@ -2974,12 +2982,6 @@ static int __net_init igmp_net_init(struct net *net) goto out_sock; } - /* Sysctl initialization */ - net->ipv4.sysctl_igmp_max_memberships = 20; - net->ipv4.sysctl_igmp_max_msf = 10; - /* IGMP reports for link-local multicast groups are enabled by default */ - net->ipv4.sysctl_igmp_llm_reports = 1; - net->ipv4.sysctl_igmp_qrv = 2; return 0; out_sock: diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 50c74cd890bc..e153c40c2436 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -965,11 +965,12 @@ static int __ip_append_data(struct sock *sk, csummode = CHECKSUM_PARTIAL; cork->length += length; - if ((((length + (skb ? skb->len : fragheaderlen)) > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : fragheaderlen)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx) { + (sk->sk_type == SOCK_DGRAM) && !sk->sk_no_check_tx)) { err = ip_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, transhdrlen, maxfraglen, flags); @@ -1288,6 +1289,7 @@ ssize_t ip_append_page(struct sock *sk, struct flowi4 *fl4, struct page *page, return -EINVAL; if ((size + skb->len > mtu) && + (skb_queue_len(&sk->sk_write_queue) == 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO)) { if (skb->ip_summed != CHECKSUM_PARTIAL) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 0bc3c3d73e61..9e9d9afd18f7 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -268,14 +268,14 @@ unsigned int arpt_do_table(struct sk_buff *skb, acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); - /* Target might have changed stuff. */ - arp = arp_hdr(skb); - - if (verdict == XT_CONTINUE) + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + arp = arp_hdr(skb); e = arpt_next_entry(e); - else + } else { /* Verdict */ break; + } } while (!acpar.hotdrop); xt_write_recseq_end(addend); local_bh_enable(); diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 2a55a40211cb..622ed2887cd5 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -352,13 +352,14 @@ ipt_do_table(struct sk_buff *skb, acpar.targinfo = t->data; verdict = t->u.kernel.target->target(skb, &acpar); - /* Target might have changed stuff. */ - ip = ip_hdr(skb); - if (verdict == XT_CONTINUE) + if (verdict == XT_CONTINUE) { + /* Target might have changed stuff. */ + ip = ip_hdr(skb); e = ipt_next_entry(e); - else + } else { /* Verdict */ break; + } } while (!acpar.hotdrop); xt_write_recseq_end(addend); diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 7d72decb80f9..efaa04dcc80e 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -117,7 +117,8 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ #ifdef CONFIG_PROC_FS - proc_remove(c->pde); + if (cn->procdir) + proc_remove(c->pde); #endif return; } @@ -815,6 +816,7 @@ static void clusterip_net_exit(struct net *net) #ifdef CONFIG_PROC_FS struct clusterip_net *cn = net_generic(net, clusterip_net_id); proc_remove(cn->procdir); + cn->procdir = NULL; #endif nf_unregister_net_hook(net, &cip_arp_ops); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 0383e66f59bc..2331de20ca50 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1267,7 +1267,7 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst) if (mtu) return mtu; - mtu = dst->dev->mtu; + mtu = READ_ONCE(dst->dev->mtu); if (unlikely(dst_metric_locked(dst, RTAX_MTU))) { if (rt->rt_uses_gateway && mtu > 576) @@ -2750,26 +2750,34 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, err = 0; if (IS_ERR(rt)) err = PTR_ERR(rt); + else + skb_dst_set(skb, &rt->dst); } if (err) goto errout_free; - skb_dst_set(skb, &rt->dst); if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; if (rtm->rtm_flags & RTM_F_LOOKUP_TABLE) table_id = rt->rt_table_id; - if (rtm->rtm_flags & RTM_F_FIB_MATCH) + if (rtm->rtm_flags & RTM_F_FIB_MATCH) { + if (!res.fi) { + err = fib_props[res.type].error; + if (!err) + err = -EHOSTUNREACH; + goto errout_free; + } err = fib_dump_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, table_id, rt->rt_type, res.prefix, res.prefixlen, fl4.flowi4_tos, res.fi, 0); - else + } else { err = rt_fill_info(net, dst, src, table_id, &fl4, skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); + } if (err < 0) goto errout_free; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 71ce33decd97..a3e91b552edc 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level, name[val] = 0; lock_sock(sk); - err = tcp_set_congestion_control(sk, name, true); + err = tcp_set_congestion_control(sk, name, true, true); release_sock(sk); return err; } diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c index fde983f6376b..421ea1b918da 100644 --- a/net/ipv4/tcp_cong.c +++ b/net/ipv4/tcp_cong.c @@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk) INET_ECN_dontxmit(sk); } -void tcp_reinit_congestion_control(struct sock *sk, - const struct tcp_congestion_ops *ca) +static void tcp_reinit_congestion_control(struct sock *sk, + const struct tcp_congestion_ops *ca) { struct inet_connection_sock *icsk = inet_csk(sk); @@ -338,7 +338,7 @@ out: * tcp_reinit_congestion_control (if the current congestion control was * already initialized. */ -int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) +int tcp_set_congestion_control(struct sock *sk, const char *name, bool load, bool reinit) { struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_congestion_ops *ca; @@ -360,9 +360,18 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load) if (!ca) { err = -ENOENT; } else if (!load) { - icsk->icsk_ca_ops = ca; - if (!try_module_get(ca->owner)) + const struct tcp_congestion_ops *old_ca = icsk->icsk_ca_ops; + + if (try_module_get(ca->owner)) { + if (reinit) { + tcp_reinit_congestion_control(sk, ca); + } else { + icsk->icsk_ca_ops = ca; + module_put(old_ca->owner); + } + } else { err = -EBUSY; + } } else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) || ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) { err = -EPERM; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2920e0cb09f8..bab7f0493098 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -107,6 +107,7 @@ int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2; #define FLAG_ORIG_SACK_ACKED 0x200 /* Never retransmitted data are (s)acked */ #define FLAG_SND_UNA_ADVANCED 0x400 /* Snd_una was changed (!= FLAG_DATA_ACKED) */ #define FLAG_DSACKING_ACK 0x800 /* SACK blocks contained D-SACK info */ +#define FLAG_SET_XMIT_TIMER 0x1000 /* Set TLP or RTO timer */ #define FLAG_SACK_RENEGING 0x2000 /* snd_una advanced to a sacked seq */ #define FLAG_UPDATE_TS_RECENT 0x4000 /* tcp_replace_ts_recent() */ #define FLAG_NO_CHALLENGE_ACK 0x8000 /* do not call tcp_send_challenge_ack() */ @@ -2520,8 +2521,8 @@ static inline void tcp_end_cwnd_reduction(struct sock *sk) return; /* Reset cwnd to ssthresh in CWR or Recovery (unless it's undone) */ - if (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || - (tp->undo_marker && tp->snd_ssthresh < TCP_INFINITE_SSTHRESH)) { + if (tp->snd_ssthresh < TCP_INFINITE_SSTHRESH && + (inet_csk(sk)->icsk_ca_state == TCP_CA_CWR || tp->undo_marker)) { tp->snd_cwnd = tp->snd_ssthresh; tp->snd_cwnd_stamp = tcp_jiffies32; } @@ -3004,21 +3005,24 @@ void tcp_rearm_rto(struct sock *sk) /* Offset the time elapsed after installing regular RTO */ if (icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT || icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { - struct sk_buff *skb = tcp_write_queue_head(sk); - u64 rto_time_stamp = skb->skb_mstamp + - jiffies_to_usecs(rto); - s64 delta_us = rto_time_stamp - tp->tcp_mstamp; + s64 delta_us = tcp_rto_delta_us(sk); /* delta_us may not be positive if the socket is locked * when the retrans timer fires and is rescheduled. */ - if (delta_us > 0) - rto = usecs_to_jiffies(delta_us); + rto = usecs_to_jiffies(max_t(int, delta_us, 1)); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto, TCP_RTO_MAX); } } +/* Try to schedule a loss probe; if that doesn't work, then schedule an RTO. */ +static void tcp_set_xmit_timer(struct sock *sk) +{ + if (!tcp_schedule_loss_probe(sk)) + tcp_rearm_rto(sk); +} + /* If we get here, the whole TSO packet has not been acked. */ static u32 tcp_tso_acked(struct sock *sk, struct sk_buff *skb) { @@ -3180,7 +3184,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, ca_rtt_us, sack->rate); if (flag & FLAG_ACKED) { - tcp_rearm_rto(sk); + flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */ if (unlikely(icsk->icsk_mtup.probe_size && !after(tp->mtu_probe.probe_seq_end, tp->snd_una))) { tcp_mtup_probe_success(sk); @@ -3208,7 +3212,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, int prior_fackets, * after when the head was last (re)transmitted. Otherwise the * timeout may continue to extend in loss recovery. */ - tcp_rearm_rto(sk); + flag |= FLAG_SET_XMIT_TIMER; /* set TLP or RTO timer */ } if (icsk->icsk_ca_ops->pkts_acked) { @@ -3580,9 +3584,6 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) if (after(ack, tp->snd_nxt)) goto invalid_ack; - if (icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) - tcp_rearm_rto(sk); - if (after(ack, prior_snd_una)) { flag |= FLAG_SND_UNA_ADVANCED; icsk->icsk_retransmits = 0; @@ -3647,18 +3648,20 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag) flag |= tcp_clean_rtx_queue(sk, prior_fackets, prior_snd_una, &acked, &sack_state); + if (tp->tlp_high_seq) + tcp_process_tlp_ack(sk, ack, flag); + /* If needed, reset TLP/RTO timer; RACK may later override this. */ + if (flag & FLAG_SET_XMIT_TIMER) + tcp_set_xmit_timer(sk); + if (tcp_ack_is_dubious(sk, flag)) { is_dupack = !(flag & (FLAG_SND_UNA_ADVANCED | FLAG_NOT_DUP)); tcp_fastretrans_alert(sk, acked, is_dupack, &flag, &rexmit); } - if (tp->tlp_high_seq) - tcp_process_tlp_ack(sk, ack, flag); if ((flag & FLAG_FORWARD_PROGRESS) || !(flag & FLAG_NOT_DUP)) sk_dst_confirm(sk); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) - tcp_schedule_loss_probe(sk); delivered = tp->delivered - delivered; /* freshly ACKed or SACKed */ lost = tp->lost - lost; /* freshly marked lost */ tcp_rate_gen(sk, delivered, lost, sack_state.rate); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a20e7f03d5f7..e9252c7df809 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1722,6 +1722,8 @@ process: */ sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1729,8 +1731,6 @@ process: } if (nsk == sk) { reqsk_put(req); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v4_send_reset(nsk, skb); goto discard_and_relse; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 4e985dea1dd2..b7661a68d498 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2202,9 +2202,10 @@ static bool tcp_small_queue_check(struct sock *sk, const struct sk_buff *skb, static void tcp_chrono_set(struct tcp_sock *tp, const enum tcp_chrono new) { const u32 now = tcp_jiffies32; + enum tcp_chrono old = tp->chrono_type; - if (tp->chrono_type > TCP_CHRONO_UNSPEC) - tp->chrono_stat[tp->chrono_type - 1] += now - tp->chrono_start; + if (old > TCP_CHRONO_UNSPEC) + tp->chrono_stat[old - 1] += now - tp->chrono_start; tp->chrono_start = now; tp->chrono_type = new; } @@ -2376,24 +2377,15 @@ bool tcp_schedule_loss_probe(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); struct tcp_sock *tp = tcp_sk(sk); - u32 timeout, tlp_time_stamp, rto_time_stamp; u32 rtt = usecs_to_jiffies(tp->srtt_us >> 3); + u32 timeout, rto_delta_us; - /* No consecutive loss probes. */ - if (WARN_ON(icsk->icsk_pending == ICSK_TIME_LOSS_PROBE)) { - tcp_rearm_rto(sk); - return false; - } /* Don't do any loss probe on a Fast Open connection before 3WHS * finishes. */ if (tp->fastopen_rsk) return false; - /* TLP is only scheduled when next timer event is RTO. */ - if (icsk->icsk_pending != ICSK_TIME_RETRANS) - return false; - /* Schedule a loss probe in 2*RTT for SACK capable connections * in Open state, that are either limited by cwnd or application. */ @@ -2416,14 +2408,10 @@ bool tcp_schedule_loss_probe(struct sock *sk) (rtt + (rtt >> 1) + TCP_DELACK_MAX)); timeout = max_t(u32, timeout, msecs_to_jiffies(10)); - /* If RTO is shorter, just schedule TLP in its place. */ - tlp_time_stamp = tcp_jiffies32 + timeout; - rto_time_stamp = (u32)inet_csk(sk)->icsk_timeout; - if ((s32)(tlp_time_stamp - rto_time_stamp) > 0) { - s32 delta = rto_time_stamp - tcp_jiffies32; - if (delta > 0) - timeout = delta; - } + /* If the RTO formula yields an earlier time, then use that time. */ + rto_delta_us = tcp_rto_delta_us(sk); /* How far in future is RTO? */ + if (rto_delta_us > 0) + timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us)); inet_csk_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX); @@ -3448,6 +3436,10 @@ int tcp_connect(struct sock *sk) int err; tcp_call_bpf(sk, BPF_SOCK_OPS_TCP_CONNECT_CB); + + if (inet_csk(sk)->icsk_af_ops->rebuild_header(sk)) + return -EHOSTUNREACH; /* Routing failure or similar. */ + tcp_connect_init(sk); if (unlikely(tp->repair)) { diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index c0feeeef962a..e906014890b6 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -652,7 +652,8 @@ static void tcp_keepalive_timer (unsigned long data) goto death; } - if (!sock_flag(sk, SOCK_KEEPOPEN) || sk->sk_state == TCP_CLOSE) + if (!sock_flag(sk, SOCK_KEEPOPEN) || + ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_SYN_SENT))) goto out; elapsed = keepalive_time_when(tp); diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 2417f55374c5..6bb9e14c710a 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -122,14 +122,14 @@ int tcp_set_ulp(struct sock *sk, const char *name) ulp_ops = __tcp_ulp_find_autoload(name); if (!ulp_ops) - err = -ENOENT; - else - err = ulp_ops->init(sk); + return -ENOENT; - if (err) - goto out; + err = ulp_ops->init(sk); + if (err) { + module_put(ulp_ops->owner); + return err; + } icsk->icsk_ulp_ops = ulp_ops; - out: - return err; + return 0; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b057653ceca9..38e795e0c4bf 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -802,7 +802,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4) if (is_udplite) /* UDP-Lite */ csum = udplite_csum(skb); - else if (sk->sk_no_check_tx) { /* UDP csum disabled */ + else if (sk->sk_no_check_tx && !skb_is_gso(skb)) { /* UDP csum off */ skb->ip_summed = CHECKSUM_NONE; goto send; @@ -1163,34 +1163,32 @@ out: return ret; } -#if BITS_PER_LONG == 64 +#define UDP_SKB_IS_STATELESS 0x80000000 + static void udp_set_dev_scratch(struct sk_buff *skb) { - struct udp_dev_scratch *scratch; + struct udp_dev_scratch *scratch = udp_skb_scratch(skb); BUILD_BUG_ON(sizeof(struct udp_dev_scratch) > sizeof(long)); - scratch = (struct udp_dev_scratch *)&skb->dev_scratch; - scratch->truesize = skb->truesize; + scratch->_tsize_state = skb->truesize; +#if BITS_PER_LONG == 64 scratch->len = skb->len; scratch->csum_unnecessary = !!skb_csum_unnecessary(skb); scratch->is_linear = !skb_is_nonlinear(skb); +#endif + if (likely(!skb->_skb_refdst && !skb_sec_path(skb))) + scratch->_tsize_state |= UDP_SKB_IS_STATELESS; } static int udp_skb_truesize(struct sk_buff *skb) { - return ((struct udp_dev_scratch *)&skb->dev_scratch)->truesize; -} -#else -static void udp_set_dev_scratch(struct sk_buff *skb) -{ - skb->dev_scratch = skb->truesize; + return udp_skb_scratch(skb)->_tsize_state & ~UDP_SKB_IS_STATELESS; } -static int udp_skb_truesize(struct sk_buff *skb) +static bool udp_skb_has_head_state(struct sk_buff *skb) { - return skb->dev_scratch; + return !(udp_skb_scratch(skb)->_tsize_state & UDP_SKB_IS_STATELESS); } -#endif /* fully reclaim rmem/fwd memory allocated for skb */ static void udp_rmem_release(struct sock *sk, int size, int partial, @@ -1388,10 +1386,10 @@ void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) unlock_sock_fast(sk, slow); } - /* we cleared the head states previously only if the skb lacks any IP - * options, see __udp_queue_rcv_skb(). + /* In the more common cases we cleared the head states previously, + * see __udp_queue_rcv_skb(). */ - if (unlikely(IPCB(skb)->opt.optlen > 0)) + if (unlikely(udp_skb_has_head_state(skb))) skb_release_head_state(skb); consume_stateless_skb(skb); } @@ -1576,7 +1574,8 @@ int udp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, return ip_recv_error(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; @@ -1784,11 +1783,11 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) sk_mark_napi_id_once(sk, skb); } - /* At recvmsg() time we need skb->dst to process IP options-related - * cmsg, elsewhere can we clear all pending head states while they are - * hot in the cache + /* At recvmsg() time we may access skb->dst or skb->sp depending on + * the IP options and the cmsg flags, elsewhere can we clear all + * pending head states while they are hot in the cache */ - if (likely(IPCB(skb)->opt.optlen == 0)) + if (likely(IPCB(skb)->opt.optlen == 0 && !skb_sec_path(skb))) skb_release_head_state(skb); rc = __udp_enqueue_schedule_skb(sk, skb); @@ -1811,8 +1810,7 @@ static int __udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) static struct static_key udp_encap_needed __read_mostly; void udp_encap_enable(void) { - if (!static_key_enabled(&udp_encap_needed)) - static_key_slow_inc(&udp_encap_needed); + static_key_enable(&udp_encap_needed); } EXPORT_SYMBOL(udp_encap_enable); @@ -1930,15 +1928,18 @@ drop: /* For TCP sockets, sk_rx_dst is protected by socket lock * For UDP, we use xchg() to guard against concurrent changes. */ -static void udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) +bool udp_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) { struct dst_entry *old; if (dst_hold_safe(dst)) { old = xchg(&sk->sk_rx_dst, dst); dst_release(old); + return old != dst; } + return false; } +EXPORT_SYMBOL(udp_sk_rx_dst_set); /* * Multicasts and broadcasts go to each listener. diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 781250151d40..0932c85b42af 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -235,7 +235,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3c46e9513a31..936e9ab4dda5 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -5556,7 +5556,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) * our DAD process, so we don't need * to do it again */ - if (!(ifp->rt->rt6i_node)) + if (!rcu_access_pointer(ifp->rt->rt6i_node)) ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 9ed35473dcb5..ab64f367d11c 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -226,7 +226,7 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info int tailen = esp->tailen; if (!skb_cloned(skb)) { - if (tailen <= skb_availroom(skb)) { + if (tailen <= skb_tailroom(skb)) { nfrags = 1; trailer = skb; tail = skb_tail_pointer(trailer); @@ -260,8 +260,6 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info kunmap_atomic(vaddr); - spin_unlock_bh(&x->lock); - nfrags = skb_shinfo(skb)->nr_frags; __skb_fill_page_desc(skb, nfrags, page, pfrag->offset, @@ -269,6 +267,9 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info skb_shinfo(skb)->nr_frags = ++nfrags; pfrag->offset = pfrag->offset + allocsize; + + spin_unlock_bh(&x->lock); + nfrags++; skb->len += tailen; @@ -345,7 +346,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; if (!esp->inplace) { int allocsize; @@ -356,7 +357,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info spin_lock_bh(&x->lock); if (unlikely(!skb_page_frag_refill(allocsize, pfrag, GFP_ATOMIC))) { spin_unlock_bh(&x->lock); - goto error; + goto error_free; } skb_shinfo(skb)->nr_frags = 1; @@ -373,7 +374,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info (unsigned char *)esph - skb->data, assoclen + ivlen + esp->clen + alen); if (unlikely(err < 0)) - goto error; + goto error_free; } if ((x->props.flags & XFRM_STATE_ESN)) @@ -406,8 +407,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info if (sg != dsg) esp_ssg_unref(x, tmp); - kfree(tmp); +error_free: + kfree(tmp); error: return err; } diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c index f02f131f6435..1cf437f75b0b 100644 --- a/net/ipv6/esp6_offload.c +++ b/net/ipv6/esp6_offload.c @@ -286,7 +286,7 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb, netdev_features esp.seqno = cpu_to_be64(xo->seq.low + ((u64)xo->seq.hi << 32)); err = esp6_output_tail(x, skb, &esp); - if (err < 0) + if (err) return err; secpath_reset(skb); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 4996d734f1d2..3cec529c6113 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -756,6 +756,7 @@ static bool ipv6_hop_jumbo(struct sk_buff *skb, int optoff) if (pskb_trim_rcsum(skb, pkt_len + sizeof(struct ipv6hdr))) goto drop; + IP6CB(skb)->flags |= IP6SKB_JUMBOGRAM; return true; drop: diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ebb299cf72b7..e1c85bb4eac0 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -148,11 +148,23 @@ static struct fib6_node *node_alloc(void) return fn; } -static void node_free(struct fib6_node *fn) +static void node_free_immediate(struct fib6_node *fn) { kmem_cache_free(fib6_node_kmem, fn); } +static void node_free_rcu(struct rcu_head *head) +{ + struct fib6_node *fn = container_of(head, struct fib6_node, rcu); + + kmem_cache_free(fib6_node_kmem, fn); +} + +static void node_free(struct fib6_node *fn) +{ + call_rcu(&fn->rcu, node_free_rcu); +} + static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) { int cpu; @@ -601,9 +613,9 @@ insert_above: if (!in || !ln) { if (in) - node_free(in); + node_free_immediate(in); if (ln) - node_free(ln); + node_free_immediate(ln); return ERR_PTR(-ENOMEM); } @@ -877,7 +889,7 @@ add: rt->dst.rt6_next = iter; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); atomic_inc(&rt->rt6i_ref); if (!info->skip_notify) inet6_rt_notify(RTM_NEWROUTE, rt, info, nlflags); @@ -903,7 +915,7 @@ add: return err; *ins = rt; - rt->rt6i_node = fn; + rcu_assign_pointer(rt->rt6i_node, fn); rt->dst.rt6_next = iter->dst.rt6_next; atomic_inc(&rt->rt6i_ref); if (!info->skip_notify) @@ -914,6 +926,8 @@ add: } nsiblings = iter->rt6i_nsiblings; fib6_purge_rt(iter, fn, info->nl_net); + if (fn->rr_ptr == iter) + fn->rr_ptr = NULL; rt6_release(iter); if (nsiblings) { @@ -926,6 +940,8 @@ add: if (rt6_qualify_for_ecmp(iter)) { *ins = iter->dst.rt6_next; fib6_purge_rt(iter, fn, info->nl_net); + if (fn->rr_ptr == iter) + fn->rr_ptr = NULL; rt6_release(iter); nsiblings--; } else { @@ -1014,7 +1030,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, /* Create subtree root node */ sfn = node_alloc(); if (!sfn) - goto st_failure; + goto failure; sfn->leaf = info->nl_net->ipv6.ip6_null_entry; atomic_inc(&info->nl_net->ipv6.ip6_null_entry->rt6i_ref); @@ -1031,12 +1047,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (IS_ERR(sn)) { /* If it is failed, discard just allocated - root, and then (in st_failure) stale node + root, and then (in failure) stale node in main tree. */ - node_free(sfn); + node_free_immediate(sfn); err = PTR_ERR(sn); - goto st_failure; + goto failure; } /* Now link new subtree to main tree */ @@ -1051,7 +1067,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, if (IS_ERR(sn)) { err = PTR_ERR(sn); - goto st_failure; + goto failure; } } @@ -1092,18 +1108,17 @@ out: atomic_inc(&pn->leaf->rt6i_ref); } #endif - /* Always release dst as dst->__refcnt is guaranteed - * to be taken before entering this function - */ - dst_release_immediate(&rt->dst); + goto failure; } return err; -#ifdef CONFIG_IPV6_SUBTREES - /* Subtree creation failed, probably main tree node - is orphan. If it is, shoot it. +failure: + /* fn->leaf could be NULL if fn is an intermediate node and we + * failed to add the new route to it in both subtree creation + * failure and fib6_add_rt2node() failure case. + * In both cases, fib6_repair_tree() should be called to fix + * fn->leaf. */ -st_failure: if (fn && !(fn->fn_flags & (RTN_RTINFO|RTN_ROOT))) fib6_repair_tree(info->nl_net, fn); /* Always release dst as dst->__refcnt is guaranteed @@ -1111,7 +1126,6 @@ st_failure: */ dst_release_immediate(&rt->dst); return err; -#endif } /* @@ -1466,8 +1480,9 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, int fib6_del(struct rt6_info *rt, struct nl_info *info) { + struct fib6_node *fn = rcu_dereference_protected(rt->rt6i_node, + lockdep_is_held(&rt->rt6i_table->tb6_lock)); struct net *net = info->nl_net; - struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; #if RT6_DEBUG >= 2 @@ -1656,7 +1671,9 @@ static int fib6_clean_node(struct fib6_walker *w) if (res) { #if RT6_DEBUG >= 2 pr_debug("%s: del failed: rt=%p@%p err=%d\n", - __func__, rt, rt->rt6i_node, res); + __func__, rt, + rcu_access_pointer(rt->rt6i_node), + res); #endif continue; } @@ -1778,8 +1795,10 @@ static int fib6_age(struct rt6_info *rt, void *arg) } gc_args->more++; } else if (rt->rt6i_flags & RTF_CACHE) { + if (time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) + rt->dst.obsolete = DST_OBSOLETE_KILL; if (atomic_read(&rt->dst.__refcnt) == 1 && - time_after_eq(now, rt->dst.lastuse + gc_args->timeout)) { + rt->dst.obsolete == DST_OBSOLETE_KILL) { RT6_TRACE("aging clone %p\n", rt); return -1; } else if (rt->rt6i_flags & RTF_GATEWAY) { diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1422d6c08377..2dfe50d8d609 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -673,8 +673,6 @@ int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, *prevhdr = NEXTHDR_FRAGMENT; tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC); if (!tmp_hdr) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -789,8 +787,6 @@ slow_path: frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) + hroom + troom, GFP_ATOMIC); if (!frag) { - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); err = -ENOMEM; goto fail; } @@ -1385,11 +1381,12 @@ emsgsize: */ cork->length += length; - if ((((length + (skb ? skb->len : headersize)) > mtu) || - (skb && skb_is_gso(skb))) && + if ((skb && skb_is_gso(skb)) || + (((length + (skb ? skb->len : headersize)) > mtu) && + (skb_queue_len(queue) <= 1) && (sk->sk_protocol == IPPROTO_UDP) && (rt->dst.dev->features & NETIF_F_UFO) && !dst_xfrm(&rt->dst) && - (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk)) { + (sk->sk_type == SOCK_DGRAM) && !udp_get_no_check6_tx(sk))) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, exthdrlen, transhdrlen, mtu, flags, fl6); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 02d795fe3d7f..a5e466d4e093 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -242,7 +242,6 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, pktopt = xchg(&np->pktoptions, NULL); kfree_skb(pktopt); - sk->sk_destruct = inet_sock_destruct; /* * ... and add it to the refcnt debug socks count * in the new family. -acme diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index abb2c307fbe8..a338bbc33cf3 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -86,7 +86,6 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) while (offset <= packet_len) { struct ipv6_opt_hdr *exthdr; - unsigned int len; switch (**nexthdr) { @@ -112,10 +111,9 @@ int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr) exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) + offset); - len = ipv6_optlen(exthdr); - if (len + offset >= IPV6_MAXPLEN) + offset += ipv6_optlen(exthdr); + if (offset > IPV6_MAXPLEN) return -EINVAL; - offset += len; *nexthdr = &exthdr->nexthdr; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4d30c96a819d..2d0e7798c793 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -417,14 +417,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, struct net_device *loopback_dev = dev_net(dev)->loopback_dev; - if (dev != loopback_dev) { - if (idev && idev->dev == dev) { - struct inet6_dev *loopback_idev = - in6_dev_get(loopback_dev); - if (loopback_idev) { - rt->rt6i_idev = loopback_idev; - in6_dev_put(idev); - } + if (idev && idev->dev != loopback_dev) { + struct inet6_dev *loopback_idev = in6_dev_get(loopback_dev); + if (loopback_idev) { + rt->rt6i_idev = loopback_idev; + in6_dev_put(idev); } } } @@ -443,7 +440,8 @@ static bool rt6_check_expired(const struct rt6_info *rt) if (time_after(jiffies, rt->dst.expires)) return true; } else if (rt->dst.from) { - return rt6_check_expired((struct rt6_info *) rt->dst.from); + return rt->dst.obsolete != DST_OBSOLETE_FORCE_CHK || + rt6_check_expired((struct rt6_info *)rt->dst.from); } return false; } @@ -1292,7 +1290,9 @@ static void rt6_dst_from_metrics_check(struct rt6_info *rt) static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) { - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + u32 rt_cookie = 0; + + if (!rt6_get_cookie_safe(rt, &rt_cookie) || rt_cookie != cookie) return NULL; if (rt6_check_expired(rt)) @@ -1360,8 +1360,14 @@ static void ip6_link_failure(struct sk_buff *skb) if (rt->rt6i_flags & RTF_CACHE) { if (dst_hold_safe(&rt->dst)) ip6_del_rt(rt); - } else if (rt->rt6i_node && (rt->rt6i_flags & RTF_DEFAULT)) { - rt->rt6i_node->fn_sernum = -1; + } else { + struct fib6_node *fn; + + rcu_read_lock(); + fn = rcu_dereference(rt->rt6i_node); + if (fn && (rt->rt6i_flags & RTF_DEFAULT)) + fn->fn_sernum = -1; + rcu_read_unlock(); } } } @@ -1378,7 +1384,8 @@ static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) { return !(rt->rt6i_flags & RTF_CACHE) && - (rt->rt6i_flags & RTF_PCPU || rt->rt6i_node); + (rt->rt6i_flags & RTF_PCPU || + rcu_access_pointer(rt->rt6i_node)); } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, @@ -2351,6 +2358,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu if (on_link) nrt->rt6i_flags &= ~RTF_GATEWAY; + nrt->rt6i_protocol = RTPROT_REDIRECT; nrt->rt6i_gateway = *(struct in6_addr *)neigh->primary_key; if (ip6_ins_rt(nrt)) @@ -2461,6 +2469,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), + .fc_protocol = RTPROT_RA, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, @@ -2513,6 +2522,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + .fc_protocol = RTPROT_RA, .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = dev_net(dev), @@ -3424,14 +3434,6 @@ static int rt6_fill_node(struct net *net, rtm->rtm_flags = 0; rtm->rtm_scope = RT_SCOPE_UNIVERSE; rtm->rtm_protocol = rt->rt6i_protocol; - if (rt->rt6i_flags & RTF_DYNAMIC) - rtm->rtm_protocol = RTPROT_REDIRECT; - else if (rt->rt6i_flags & RTF_ADDRCONF) { - if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ROUTEINFO)) - rtm->rtm_protocol = RTPROT_RA; - else - rtm->rtm_protocol = RTPROT_KERNEL; - } if (rt->rt6i_flags & RTF_CACHE) rtm->rtm_flags |= RTM_F_CLONED; @@ -3729,10 +3731,10 @@ static int ip6_route_dev_notify(struct notifier_block *this, /* NETDEV_UNREGISTER could be fired for multiple times by * netdev_wait_allrefs(). Make sure we only call this once. */ - in6_dev_put(net->ipv6.ip6_null_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_null_entry->rt6i_idev); #ifdef CONFIG_IPV6_MULTIPLE_TABLES - in6_dev_put(net->ipv6.ip6_prohibit_entry->rt6i_idev); - in6_dev_put(net->ipv6.ip6_blk_hole_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_prohibit_entry->rt6i_idev); + in6_dev_put_clear(&net->ipv6.ip6_blk_hole_entry->rt6i_idev); #endif } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2521690d62d6..206210125fd7 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1456,6 +1456,8 @@ process: } sock_hold(sk); refcounted = true; + if (tcp_filter(sk, skb)) + goto discard_and_relse; nsk = tcp_check_req(sk, skb, req, false); if (!nsk) { reqsk_put(req); @@ -1464,8 +1466,6 @@ process: if (nsk == sk) { reqsk_put(req); tcp_v6_restore_cb(skb); - } else if (tcp_filter(sk, skb)) { - goto discard_and_relse; } else if (tcp_child_process(sk, nsk, skb)) { tcp_v6_send_reset(nsk, skb); goto discard_and_relse; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 4a3e65626e8b..56030d45823a 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -291,11 +291,7 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, struct udp_table *udptable) { const struct ipv6hdr *iph = ipv6_hdr(skb); - struct sock *sk; - sk = skb_steal_sock(skb); - if (unlikely(sk)) - return sk; return __udp6_lib_lookup(dev_net(skb->dev), &iph->saddr, sport, &iph->daddr, dport, inet6_iif(skb), udptable, skb); @@ -332,6 +328,15 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be EXPORT_SYMBOL_GPL(udp6_lib_lookup); #endif +/* do not use the scratch area len for jumbogram: their length execeeds the + * scratch area space; note that the IP6CB flags is still in the first + * cacheline, so checking for jumbograms is cheap + */ +static int udp6_skb_len(struct sk_buff *skb) +{ + return unlikely(inet6_is_jumbogram(skb)) ? skb->len : udp_skb_len(skb); +} + /* * This should be easy, if there is something there we * return it, otherwise we block. @@ -357,12 +362,13 @@ int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, return ipv6_recv_rxpmtu(sk, msg, len, addr_len); try_again: - peeking = off = sk_peek_offset(sk, flags); + peeking = flags & MSG_PEEK; + off = sk_peek_offset(sk, flags); skb = __skb_recv_udp(sk, flags, noblock, &peeked, &off, &err); if (!skb) return err; - ulen = udp_skb_len(skb); + ulen = udp6_skb_len(skb); copied = len; if (copied > ulen - off) copied = ulen - off; @@ -569,8 +575,7 @@ static __inline__ void udpv6_err(struct sk_buff *skb, static struct static_key udpv6_encap_needed __read_mostly; void udpv6_encap_enable(void) { - if (!static_key_enabled(&udpv6_encap_needed)) - static_key_slow_inc(&udpv6_encap_needed); + static_key_enable(&udpv6_encap_needed); } EXPORT_SYMBOL(udpv6_encap_enable); @@ -762,6 +767,15 @@ start_lookup: return 0; } +static void udp6_sk_rx_dst_set(struct sock *sk, struct dst_entry *dst) +{ + if (udp_sk_rx_dst_set(sk, dst)) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + + inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); + } +} + int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, int proto) { @@ -804,6 +818,24 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, if (udp6_csum_init(skb, uh, proto)) goto csum_error; + /* Check if the socket is already available, e.g. due to early demux */ + sk = skb_steal_sock(skb); + if (sk) { + struct dst_entry *dst = skb_dst(skb); + int ret; + + if (unlikely(sk->sk_rx_dst != dst)) + udp6_sk_rx_dst_set(sk, dst); + + ret = udpv6_queue_rcv_skb(sk, skb); + sock_put(sk); + + /* a return value > 0 means to resubmit the input */ + if (ret > 0) + return ret; + return 0; + } + /* * Multicast receive code */ @@ -812,11 +844,6 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, saddr, daddr, udptable, proto); /* Unicast */ - - /* - * check socket cache ... must talk to Alan about his plans - * for sock caches... i'll skip this for now. - */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); if (sk) { int ret; diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index a2267f80febb..e7d378c032cb 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -72,7 +72,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, if (uh->check == 0) uh->check = CSUM_MANGLED_0; - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = CHECKSUM_UNNECESSARY; /* If there is no outer header we can fake a checksum offload * due to the fact that we have already done the checksum in diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 2e6990f8b80b..23fa7c8b09a5 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -2213,7 +2213,7 @@ static int irda_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; struct irda_sock *self = irda_sk(sk); - struct irda_device_list list; + struct irda_device_list list = { 0 }; struct irda_device_info *discoveries; struct irda_ias_set * ias_opt; /* IAS get/query params */ struct ias_object * ias_obj; /* Object in IAS */ diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index da49191f7ad0..4abf6287d7e1 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -1383,6 +1383,10 @@ static int kcm_attach(struct socket *sock, struct socket *csock, if (!csk) return -EINVAL; + /* We must prevent loops or risk deadlock ! */ + if (csk->sk_family == PF_KCM) + return -EOPNOTSUPP; + psock = kmem_cache_zalloc(kcm_psockp, GFP_KERNEL); if (!psock) return -ENOMEM; diff --git a/net/key/af_key.c b/net/key/af_key.c index ca9d3ae665e7..98f4d8211b9a 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -228,7 +228,7 @@ static int pfkey_broadcast_one(struct sk_buff *skb, struct sk_buff **skb2, #define BROADCAST_ONE 1 #define BROADCAST_REGISTERED 2 #define BROADCAST_PROMISC_ONLY 4 -static int pfkey_broadcast(struct sk_buff *skb, +static int pfkey_broadcast(struct sk_buff *skb, gfp_t allocation, int broadcast_flags, struct sock *one_sk, struct net *net) { @@ -278,7 +278,7 @@ static int pfkey_broadcast(struct sk_buff *skb, rcu_read_unlock(); if (one_sk != NULL) - err = pfkey_broadcast_one(skb, &skb2, GFP_KERNEL, one_sk); + err = pfkey_broadcast_one(skb, &skb2, allocation, one_sk); kfree_skb(skb2); kfree_skb(skb); @@ -311,7 +311,7 @@ static int pfkey_do_dump(struct pfkey_sock *pfk) hdr = (struct sadb_msg *) pfk->dump.skb->data; hdr->sadb_msg_seq = 0; hdr->sadb_msg_errno = rc; - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = NULL; } @@ -355,7 +355,7 @@ static int pfkey_error(const struct sadb_msg *orig, int err, struct sock *sk) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1389,7 +1389,7 @@ static int pfkey_getspi(struct sock *sk, struct sk_buff *skb, const struct sadb_ xfrm_state_put(x); - pfkey_broadcast(resp_skb, BROADCAST_ONE, sk, net); + pfkey_broadcast(resp_skb, GFP_KERNEL, BROADCAST_ONE, sk, net); return 0; } @@ -1476,7 +1476,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_seq = c->seq; hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, xs_net(x)); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); return 0; } @@ -1589,7 +1589,7 @@ static int pfkey_get(struct sock *sk, struct sk_buff *skb, const struct sadb_msg out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, sock_net(sk)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, sock_net(sk)); return 0; } @@ -1694,8 +1694,8 @@ static int pfkey_register(struct sock *sk, struct sk_buff *skb, const struct sad return -ENOBUFS; } - pfkey_broadcast(supp_skb, BROADCAST_REGISTERED, sk, sock_net(sk)); - + pfkey_broadcast(supp_skb, GFP_KERNEL, BROADCAST_REGISTERED, sk, + sock_net(sk)); return 0; } @@ -1712,7 +1712,8 @@ static int unicast_flush_resp(struct sock *sk, const struct sadb_msg *ihdr) hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); - return pfkey_broadcast(skb, BROADCAST_ONE, sk, sock_net(sk)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ONE, sk, + sock_net(sk)); } static int key_notify_sa_flush(const struct km_event *c) @@ -1733,7 +1734,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -1790,7 +1791,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -1878,7 +1879,7 @@ static int pfkey_promisc(struct sock *sk, struct sk_buff *skb, const struct sadb new_hdr->sadb_msg_errno = 0; } - pfkey_broadcast(skb, BROADCAST_ALL, NULL, sock_net(sk)); + pfkey_broadcast(skb, GFP_KERNEL, BROADCAST_ALL, NULL, sock_net(sk)); return 0; } @@ -2206,7 +2207,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; out_hdr->sadb_msg_pid = c->portid; - pfkey_broadcast(out_skb, BROADCAST_ALL, NULL, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; } @@ -2426,7 +2427,7 @@ static int key_pol_get_resp(struct sock *sk, struct xfrm_policy *xp, const struc out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = hdr->sadb_msg_seq; out_hdr->sadb_msg_pid = hdr->sadb_msg_pid; - pfkey_broadcast(out_skb, BROADCAST_ONE, sk, xp_net(xp)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ONE, sk, xp_net(xp)); err = 0; out: @@ -2682,7 +2683,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) - pfkey_broadcast(pfk->dump.skb, BROADCAST_ONE, + pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, &pfk->sk, sock_net(&pfk->sk)); pfk->dump.skb = out_skb; @@ -2739,7 +2740,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); hdr->sadb_msg_reserved = 0; - pfkey_broadcast(skb_out, BROADCAST_ALL, NULL, c->net); + pfkey_broadcast(skb_out, GFP_ATOMIC, BROADCAST_ALL, NULL, c->net); return 0; } @@ -2803,7 +2804,7 @@ static int pfkey_process(struct sock *sk, struct sk_buff *skb, const struct sadb void *ext_hdrs[SADB_EXT_MAX]; int err; - pfkey_broadcast(skb_clone(skb, GFP_KERNEL), + pfkey_broadcast(skb_clone(skb, GFP_KERNEL), GFP_KERNEL, BROADCAST_PROMISC_ONLY, NULL, sock_net(sk)); memset(ext_hdrs, 0, sizeof(ext_hdrs)); @@ -3024,7 +3025,8 @@ static int key_notify_sa_expire(struct xfrm_state *x, const struct km_event *c) out_hdr->sadb_msg_seq = 0; out_hdr->sadb_msg_pid = 0; - pfkey_broadcast(out_skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); return 0; } @@ -3212,7 +3214,8 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_ctx->ctx_len); } - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, @@ -3408,7 +3411,8 @@ static int pfkey_send_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, n_port->sadb_x_nat_t_port_port = sport; n_port->sadb_x_nat_t_port_reserved = 0; - return pfkey_broadcast(skb, BROADCAST_REGISTERED, NULL, xs_net(x)); + return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL, + xs_net(x)); } #ifdef CONFIG_NET_KEY_MIGRATE @@ -3599,7 +3603,7 @@ static int pfkey_send_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, } /* broadcast migrate message to sockets */ - pfkey_broadcast(skb, BROADCAST_ALL, NULL, &init_net); + pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, &init_net); return 0; diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index b0c2d4ae781d..90165a6874bc 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -113,7 +113,6 @@ struct l2tp_net { spinlock_t l2tp_session_hlist_lock; }; -static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel); static inline struct l2tp_tunnel *l2tp_tunnel(struct sock *sk) { @@ -127,39 +126,6 @@ static inline struct l2tp_net *l2tp_pernet(const struct net *net) return net_generic(net, l2tp_net_id); } -/* Tunnel reference counts. Incremented per session that is added to - * the tunnel. - */ -static inline void l2tp_tunnel_inc_refcount_1(struct l2tp_tunnel *tunnel) -{ - refcount_inc(&tunnel->ref_count); -} - -static inline void l2tp_tunnel_dec_refcount_1(struct l2tp_tunnel *tunnel) -{ - if (refcount_dec_and_test(&tunnel->ref_count)) - l2tp_tunnel_free(tunnel); -} -#ifdef L2TP_REFCNT_DEBUG -#define l2tp_tunnel_inc_refcount(_t) \ -do { \ - pr_debug("l2tp_tunnel_inc_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_t)->name, \ - refcount_read(&_t->ref_count)); \ - l2tp_tunnel_inc_refcount_1(_t); \ -} while (0) -#define l2tp_tunnel_dec_refcount(_t) \ -do { \ - pr_debug("l2tp_tunnel_dec_refcount: %s:%d %s: cnt=%d\n", \ - __func__, __LINE__, (_t)->name, \ - refcount_read(&_t->ref_count)); \ - l2tp_tunnel_dec_refcount_1(_t); \ -} while (0) -#else -#define l2tp_tunnel_inc_refcount(t) l2tp_tunnel_inc_refcount_1(t) -#define l2tp_tunnel_dec_refcount(t) l2tp_tunnel_dec_refcount_1(t) -#endif - /* Session hash global list for L2TPv3. * The session_id SHOULD be random according to RFC3931, but several * L2TP implementations use incrementing session_ids. So we do a real @@ -229,6 +195,27 @@ l2tp_session_id_hash(struct l2tp_tunnel *tunnel, u32 session_id) return &tunnel->session_hlist[hash_32(session_id, L2TP_HASH_BITS)]; } +/* Lookup a tunnel. A new reference is held on the returned tunnel. */ +struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id) +{ + const struct l2tp_net *pn = l2tp_pernet(net); + struct l2tp_tunnel *tunnel; + + rcu_read_lock_bh(); + list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) { + if (tunnel->tunnel_id == tunnel_id) { + l2tp_tunnel_inc_refcount(tunnel); + rcu_read_unlock_bh(); + + return tunnel; + } + } + rcu_read_unlock_bh(); + + return NULL; +} +EXPORT_SYMBOL_GPL(l2tp_tunnel_get); + /* Lookup a session. A new reference is held on the returned session. * Optionally calls session->ref() too if do_ref is true. */ @@ -1348,17 +1335,6 @@ static void l2tp_udp_encap_destroy(struct sock *sk) } } -/* Really kill the tunnel. - * Come here only when all sessions have been cleared from the tunnel. - */ -static void l2tp_tunnel_free(struct l2tp_tunnel *tunnel) -{ - BUG_ON(refcount_read(&tunnel->ref_count) != 0); - BUG_ON(tunnel->sock != NULL); - l2tp_info(tunnel, L2TP_MSG_CONTROL, "%s: free...\n", tunnel->name); - kfree_rcu(tunnel, rcu); -} - /* Workqueue tunnel deletion function */ static void l2tp_tunnel_del_work(struct work_struct *work) { @@ -1844,6 +1820,8 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn l2tp_session_set_header_len(session, tunnel->version); + refcount_set(&session->ref_count, 1); + err = l2tp_session_add_to_tunnel(tunnel, session); if (err) { kfree(session); @@ -1851,10 +1829,6 @@ struct l2tp_session *l2tp_session_create(int priv_size, struct l2tp_tunnel *tunn return ERR_PTR(err); } - /* Bump the reference count. The session context is deleted - * only when this drops to zero. - */ - refcount_set(&session->ref_count, 1); l2tp_tunnel_inc_refcount(tunnel); /* Ensure tunnel socket isn't deleted */ diff --git a/net/l2tp/l2tp_core.h b/net/l2tp/l2tp_core.h index cdb6e3327f74..9101297f27ad 100644 --- a/net/l2tp/l2tp_core.h +++ b/net/l2tp/l2tp_core.h @@ -231,6 +231,8 @@ out: return tunnel; } +struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id); + struct l2tp_session *l2tp_session_get(const struct net *net, struct l2tp_tunnel *tunnel, u32 session_id, bool do_ref); @@ -269,6 +271,17 @@ int l2tp_nl_register_ops(enum l2tp_pwtype pw_type, void l2tp_nl_unregister_ops(enum l2tp_pwtype pw_type); int l2tp_ioctl(struct sock *sk, int cmd, unsigned long arg); +static inline void l2tp_tunnel_inc_refcount(struct l2tp_tunnel *tunnel) +{ + refcount_inc(&tunnel->ref_count); +} + +static inline void l2tp_tunnel_dec_refcount(struct l2tp_tunnel *tunnel) +{ + if (refcount_dec_and_test(&tunnel->ref_count)) + kfree_rcu(tunnel, rcu); +} + /* Session reference counts. Incremented when code obtains a reference * to a session. */ diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index 12cfcd0ca807..57427d430f10 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -65,10 +65,12 @@ static struct l2tp_session *l2tp_nl_session_get(struct genl_info *info, (info->attrs[L2TP_ATTR_CONN_ID])) { tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel) + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (tunnel) { session = l2tp_session_get(net, tunnel, session_id, do_ref); + l2tp_tunnel_dec_refcount(tunnel); + } } return session; @@ -271,8 +273,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel == NULL) { + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (!tunnel) { ret = -ENODEV; goto out; } @@ -282,6 +284,8 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info (void) l2tp_tunnel_delete(tunnel); + l2tp_tunnel_dec_refcount(tunnel); + out: return ret; } @@ -299,8 +303,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel == NULL) { + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (!tunnel) { ret = -ENODEV; goto out; } @@ -311,6 +315,8 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info ret = l2tp_tunnel_notify(&l2tp_nl_family, info, tunnel, L2TP_CMD_TUNNEL_MODIFY); + l2tp_tunnel_dec_refcount(tunnel); + out: return ret; } @@ -438,34 +444,37 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) if (!info->attrs[L2TP_ATTR_CONN_ID]) { ret = -EINVAL; - goto out; + goto err; } tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); - if (tunnel == NULL) { - ret = -ENODEV; - goto out; - } - msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) { ret = -ENOMEM; - goto out; + goto err; + } + + tunnel = l2tp_tunnel_get(net, tunnel_id); + if (!tunnel) { + ret = -ENODEV; + goto err_nlmsg; } ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET); if (ret < 0) - goto err_out; + goto err_nlmsg_tunnel; + + l2tp_tunnel_dec_refcount(tunnel); return genlmsg_unicast(net, msg, info->snd_portid); -err_out: +err_nlmsg_tunnel: + l2tp_tunnel_dec_refcount(tunnel); +err_nlmsg: nlmsg_free(msg); - -out: +err: return ret; } @@ -509,8 +518,9 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf ret = -EINVAL; goto out; } + tunnel_id = nla_get_u32(info->attrs[L2TP_ATTR_CONN_ID]); - tunnel = l2tp_tunnel_find(net, tunnel_id); + tunnel = l2tp_tunnel_get(net, tunnel_id); if (!tunnel) { ret = -ENODEV; goto out; @@ -518,24 +528,24 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf if (!info->attrs[L2TP_ATTR_SESSION_ID]) { ret = -EINVAL; - goto out; + goto out_tunnel; } session_id = nla_get_u32(info->attrs[L2TP_ATTR_SESSION_ID]); if (!info->attrs[L2TP_ATTR_PEER_SESSION_ID]) { ret = -EINVAL; - goto out; + goto out_tunnel; } peer_session_id = nla_get_u32(info->attrs[L2TP_ATTR_PEER_SESSION_ID]); if (!info->attrs[L2TP_ATTR_PW_TYPE]) { ret = -EINVAL; - goto out; + goto out_tunnel; } cfg.pw_type = nla_get_u16(info->attrs[L2TP_ATTR_PW_TYPE]); if (cfg.pw_type >= __L2TP_PWTYPE_MAX) { ret = -EINVAL; - goto out; + goto out_tunnel; } if (tunnel->version > 2) { @@ -557,7 +567,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf u16 len = nla_len(info->attrs[L2TP_ATTR_COOKIE]); if (len > 8) { ret = -EINVAL; - goto out; + goto out_tunnel; } cfg.cookie_len = len; memcpy(&cfg.cookie[0], nla_data(info->attrs[L2TP_ATTR_COOKIE]), len); @@ -566,7 +576,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf u16 len = nla_len(info->attrs[L2TP_ATTR_PEER_COOKIE]); if (len > 8) { ret = -EINVAL; - goto out; + goto out_tunnel; } cfg.peer_cookie_len = len; memcpy(&cfg.peer_cookie[0], nla_data(info->attrs[L2TP_ATTR_PEER_COOKIE]), len); @@ -609,7 +619,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf if ((l2tp_nl_cmd_ops[cfg.pw_type] == NULL) || (l2tp_nl_cmd_ops[cfg.pw_type]->session_create == NULL)) { ret = -EPROTONOSUPPORT; - goto out; + goto out_tunnel; } /* Check that pseudowire-specific params are present */ @@ -619,7 +629,7 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf case L2TP_PWTYPE_ETH_VLAN: if (!info->attrs[L2TP_ATTR_VLAN_ID]) { ret = -EINVAL; - goto out; + goto out_tunnel; } break; case L2TP_PWTYPE_ETH: @@ -647,6 +657,8 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf } } +out_tunnel: + l2tp_tunnel_dec_refcount(tunnel); out: return ret; } diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index 8708cbe8af5b..2b36eff5d97e 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -7,7 +7,7 @@ * Copyright 2006-2007 Jiri Benc <jbenc@suse.cz> * Copyright 2007, Michael Wu <flamingice@sourmilk.net> * Copyright 2007-2010, Intel Corporation - * Copyright(c) 2015 Intel Deutschland GmbH + * Copyright(c) 2015-2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -466,3 +466,23 @@ void ieee80211_manage_rx_ba_offl(struct ieee80211_vif *vif, rcu_read_unlock(); } EXPORT_SYMBOL(ieee80211_manage_rx_ba_offl); + +void ieee80211_rx_ba_timer_expired(struct ieee80211_vif *vif, + const u8 *addr, unsigned int tid) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct sta_info *sta; + + rcu_read_lock(); + sta = sta_info_get_bss(sdata, addr); + if (!sta) + goto unlock; + + set_bit(tid, sta->ampdu_mlme.tid_rx_timer_expired); + ieee80211_queue_work(&local->hw, &sta->ampdu_mlme.work); + + unlock: + rcu_read_unlock(); +} +EXPORT_SYMBOL(ieee80211_rx_ba_timer_expired); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 9979f46c81dc..51390febd5e3 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -96,19 +96,26 @@ static struct conntrack_gc_work conntrack_gc_work; void nf_conntrack_lock(spinlock_t *lock) __acquires(lock) { + /* 1) Acquire the lock */ spin_lock(lock); - while (unlikely(nf_conntrack_locks_all)) { - spin_unlock(lock); - /* - * Order the 'nf_conntrack_locks_all' load vs. the - * spin_unlock_wait() loads below, to ensure - * that 'nf_conntrack_locks_all_lock' is indeed held: - */ - smp_rmb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ - spin_unlock_wait(&nf_conntrack_locks_all_lock); - spin_lock(lock); - } + /* 2) read nf_conntrack_locks_all, with ACQUIRE semantics + * It pairs with the smp_store_release() in nf_conntrack_all_unlock() + */ + if (likely(smp_load_acquire(&nf_conntrack_locks_all) == false)) + return; + + /* fast path failed, unlock */ + spin_unlock(lock); + + /* Slow path 1) get global lock */ + spin_lock(&nf_conntrack_locks_all_lock); + + /* Slow path 2) get the lock we want */ + spin_lock(lock); + + /* Slow path 3) release the global lock */ + spin_unlock(&nf_conntrack_locks_all_lock); } EXPORT_SYMBOL_GPL(nf_conntrack_lock); @@ -149,28 +156,27 @@ static void nf_conntrack_all_lock(void) int i; spin_lock(&nf_conntrack_locks_all_lock); - nf_conntrack_locks_all = true; - /* - * Order the above store of 'nf_conntrack_locks_all' against - * the spin_unlock_wait() loads below, such that if - * nf_conntrack_lock() observes 'nf_conntrack_locks_all' - * we must observe nf_conntrack_locks[] held: - */ - smp_mb(); /* spin_lock(&nf_conntrack_locks_all_lock) */ + nf_conntrack_locks_all = true; for (i = 0; i < CONNTRACK_LOCKS; i++) { - spin_unlock_wait(&nf_conntrack_locks[i]); + spin_lock(&nf_conntrack_locks[i]); + + /* This spin_unlock provides the "release" to ensure that + * nf_conntrack_locks_all==true is visible to everyone that + * acquired spin_lock(&nf_conntrack_locks[]). + */ + spin_unlock(&nf_conntrack_locks[i]); } } static void nf_conntrack_all_unlock(void) { - /* - * All prior stores must be complete before we clear + /* All prior stores must be complete before we clear * 'nf_conntrack_locks_all'. Otherwise nf_conntrack_lock() * might observe the false value but not the entire - * critical section: + * critical section. + * It pairs with the smp_load_acquire() in nf_conntrack_lock() */ smp_store_release(&nf_conntrack_locks_all, false); spin_unlock(&nf_conntrack_locks_all_lock); diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index eb541786ccb7..b1d3740ae36a 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -441,7 +441,7 @@ nf_nat_setup_info(struct nf_conn *ct, else ct->status |= IPS_DST_NAT; - if (nfct_help(ct)) + if (nfct_help(ct) && !nfct_seqadj(ct)) if (!nfct_seqadj_ext_add(ct)) return NF_DROP; } diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c index f5a7cb68694e..b89f4f65b2a0 100644 --- a/net/netfilter/nft_compat.c +++ b/net/netfilter/nft_compat.c @@ -305,7 +305,7 @@ static int nft_target_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (!(hook_mask & target->hooks)) + if (target->hooks && !(hook_mask & target->hooks)) return -EINVAL; ret = nft_compat_chain_validate_dependency(target->table, @@ -484,7 +484,7 @@ static int nft_match_validate(const struct nft_ctx *ctx, const struct nf_hook_ops *ops = &basechain->ops[0]; hook_mask = 1 << ops->hooknum; - if (!(hook_mask & match->hooks)) + if (match->hooks && !(hook_mask & match->hooks)) return -EINVAL; ret = nft_compat_chain_validate_dependency(match->table, diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c index 18dd57a52651..14538b1d4d11 100644 --- a/net/netfilter/nft_limit.c +++ b/net/netfilter/nft_limit.c @@ -65,19 +65,23 @@ static int nft_limit_init(struct nft_limit *limit, limit->nsecs = unit * NSEC_PER_SEC; if (limit->rate == 0 || limit->nsecs < unit) return -EOVERFLOW; - limit->tokens = limit->tokens_max = limit->nsecs; - - if (tb[NFTA_LIMIT_BURST]) { - u64 rate; + if (tb[NFTA_LIMIT_BURST]) limit->burst = ntohl(nla_get_be32(tb[NFTA_LIMIT_BURST])); + else + limit->burst = 0; + + if (limit->rate + limit->burst < limit->rate) + return -EOVERFLOW; - rate = limit->rate + limit->burst; - if (rate < limit->rate) - return -EOVERFLOW; + /* The token bucket size limits the number of tokens can be + * accumulated. tokens_max specifies the bucket size. + * tokens_max = unit * (rate + burst) / rate. + */ + limit->tokens = div_u64(limit->nsecs * (limit->rate + limit->burst), + limit->rate); + limit->tokens_max = limit->tokens; - limit->rate = rate; - } if (tb[NFTA_LIMIT_FLAGS]) { u32 flags = ntohl(nla_get_be32(tb[NFTA_LIMIT_FLAGS])); @@ -95,9 +99,8 @@ static int nft_limit_dump(struct sk_buff *skb, const struct nft_limit *limit, { u32 flags = limit->invert ? NFT_LIMIT_F_INV : 0; u64 secs = div_u64(limit->nsecs, NSEC_PER_SEC); - u64 rate = limit->rate - limit->burst; - if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(rate), + if (nla_put_be64(skb, NFTA_LIMIT_RATE, cpu_to_be64(limit->rate), NFTA_LIMIT_PAD) || nla_put_be64(skb, NFTA_LIMIT_UNIT, cpu_to_be64(secs), NFTA_LIMIT_PAD) || diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index e4610676299b..a54a556fcdb5 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -1337,6 +1337,7 @@ int ovs_execute_actions(struct datapath *dp, struct sk_buff *skb, goto out; } + OVS_CB(skb)->acts_origlen = acts->orig_len; err = do_execute_actions(dp, skb, key, acts->actions, acts->actions_len); diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index e3c4c6c3fef7..03859e386b47 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1310,8 +1310,8 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, nla_for_each_nested(a, attr, rem) { int type = nla_type(a); - int maxlen = ovs_ct_attr_lens[type].maxlen; - int minlen = ovs_ct_attr_lens[type].minlen; + int maxlen; + int minlen; if (type > OVS_CT_ATTR_MAX) { OVS_NLERR(log, @@ -1319,6 +1319,9 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, type, OVS_CT_ATTR_MAX); return -EINVAL; } + + maxlen = ovs_ct_attr_lens[type].maxlen; + minlen = ovs_ct_attr_lens[type].minlen; if (nla_len(a) < minlen || nla_len(a) > maxlen) { OVS_NLERR(log, "Conntrack attr type has unexpected length (type=%d, length=%d, expected=%d)", diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 45fe8c8a884d..6b44fe405282 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -381,7 +381,7 @@ static int queue_gso_packets(struct datapath *dp, struct sk_buff *skb, } static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, - unsigned int hdrlen) + unsigned int hdrlen, int actions_attrlen) { size_t size = NLMSG_ALIGN(sizeof(struct ovs_header)) + nla_total_size(hdrlen) /* OVS_PACKET_ATTR_PACKET */ @@ -398,7 +398,7 @@ static size_t upcall_msg_size(const struct dp_upcall_info *upcall_info, /* OVS_PACKET_ATTR_ACTIONS */ if (upcall_info->actions_len) - size += nla_total_size(upcall_info->actions_len); + size += nla_total_size(actions_attrlen); /* OVS_PACKET_ATTR_MRU */ if (upcall_info->mru) @@ -465,7 +465,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb, else hlen = skb->len; - len = upcall_msg_size(upcall_info, hlen - cutlen); + len = upcall_msg_size(upcall_info, hlen - cutlen, + OVS_CB(skb)->acts_origlen); user_skb = genlmsg_new(len, GFP_ATOMIC); if (!user_skb) { err = -ENOMEM; diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 5d8dcd88815f..480600649d0b 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -99,11 +99,13 @@ struct datapath { * when a packet is received by OVS. * @mru: The maximum received fragement size; 0 if the packet is not * fragmented. + * @acts_origlen: The netlink size of the flow actions applied to this skb. * @cutlen: The number of bytes from the packet end to be removed. */ struct ovs_skb_cb { struct vport *input_vport; u16 mru; + u16 acts_origlen; u32 cutlen; }; #define OVS_CB(skb) ((struct ovs_skb_cb *)(skb)->cb) diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 008bb34ee324..1c61af9af67d 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -2191,6 +2191,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct timespec ts; __u32 ts_status; bool is_drop_n_account = false; + bool do_vnet = false; /* struct tpacket{2,3}_hdr is aligned to a multiple of TPACKET_ALIGNMENT. * We may add members to them until current aligned size without forcing @@ -2241,8 +2242,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, netoff = TPACKET_ALIGN(po->tp_hdrlen + (maclen < 16 ? 16 : maclen)) + po->tp_reserve; - if (po->has_vnet_hdr) + if (po->has_vnet_hdr) { netoff += sizeof(struct virtio_net_hdr); + do_vnet = true; + } macoff = netoff - maclen; } if (po->tp_version <= TPACKET_V2) { @@ -2259,8 +2262,10 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, skb_set_owner_r(copy_skb, sk); } snaplen = po->rx_ring.frame_size - macoff; - if ((int)snaplen < 0) + if ((int)snaplen < 0) { snaplen = 0; + do_vnet = false; + } } } else if (unlikely(macoff + snaplen > GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len)) { @@ -2273,6 +2278,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, if (unlikely((int)snaplen < 0)) { snaplen = 0; macoff = GET_PBDQC_FROM_RB(&po->rx_ring)->max_frame_len; + do_vnet = false; } } spin_lock(&sk->sk_receive_queue.lock); @@ -2298,7 +2304,7 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, } spin_unlock(&sk->sk_receive_queue.lock); - if (po->has_vnet_hdr) { + if (do_vnet) { if (virtio_net_hdr_from_skb(skb, h.raw + macoff - sizeof(struct virtio_net_hdr), vio_le(), true)) { @@ -3700,14 +3706,19 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv if (optlen != sizeof(val)) return -EINVAL; - if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) - return -EBUSY; if (copy_from_user(&val, optval, sizeof(val))) return -EFAULT; if (val > INT_MAX) return -EINVAL; - po->tp_reserve = val; - return 0; + lock_sock(sk); + if (po->rx_ring.pg_vec || po->tx_ring.pg_vec) { + ret = -EBUSY; + } else { + po->tp_reserve = val; + ret = 0; + } + release_sock(sk); + return ret; } case PACKET_LOSS: { @@ -4329,7 +4340,7 @@ static int packet_set_ring(struct sock *sk, union tpacket_req_u *req_u, register_prot_hook(sk); } spin_unlock(&po->bind_lock); - if (closing && (po->tp_version > TPACKET_V2)) { + if (pg_vec && (po->tp_version > TPACKET_V2)) { /* Because we don't support block-based V3 on tx-ring */ if (!tx_ring) prb_shutdown_retire_blk_timer(po, rb_queue); diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c index e10624aa6959..9722bf839d9d 100644 --- a/net/rds/ib_recv.c +++ b/net/rds/ib_recv.c @@ -1015,8 +1015,10 @@ void rds_ib_recv_cqe_handler(struct rds_ib_connection *ic, if (rds_ib_ring_empty(&ic->i_recv_ring)) rds_ib_stats_inc(s_ib_rx_ring_empty); - if (rds_ib_ring_low(&ic->i_recv_ring)) + if (rds_ib_ring_low(&ic->i_recv_ring)) { rds_ib_recv_refill(conn, 0, GFP_NOWAIT); + rds_ib_stats_inc(s_ib_rx_refill_from_cq); + } } int rds_ib_recv_path(struct rds_conn_path *cp) @@ -1029,6 +1031,7 @@ int rds_ib_recv_path(struct rds_conn_path *cp) if (rds_conn_up(conn)) { rds_ib_attempt_ack(ic); rds_ib_recv_refill(conn, 0, GFP_KERNEL); + rds_ib_stats_inc(s_ib_rx_refill_from_thread); } return ret; diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index dd30d74824b0..ec3383f97d4c 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -223,6 +223,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx) tail = b->call_backlog_tail; while (CIRC_CNT(head, tail, size) > 0) { struct rxrpc_call *call = b->call_backlog[tail]; + call->socket = rx; if (rx->discard_new_call) { _debug("discard %lx", call->user_call_ID); rx->discard_new_call(call, call->user_call_ID); diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 36f0ced9e60c..541707802a23 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -36,11 +36,12 @@ static struct tc_action_ops act_ipt_ops; static unsigned int xt_net_id; static struct tc_action_ops act_xt_ops; -static int ipt_init_target(struct xt_entry_target *t, char *table, - unsigned int hook) +static int ipt_init_target(struct net *net, struct xt_entry_target *t, + char *table, unsigned int hook) { struct xt_tgchk_param par; struct xt_target *target; + struct ipt_entry e = {}; int ret = 0; target = xt_request_find_target(AF_INET, t->u.user.name, @@ -49,8 +50,10 @@ static int ipt_init_target(struct xt_entry_target *t, char *table, return PTR_ERR(target); t->u.kernel.target = target; + memset(&par, 0, sizeof(par)); + par.net = net; par.table = table; - par.entryinfo = NULL; + par.entryinfo = &e; par.target = target; par.targinfo = t->data; par.hook_mask = hook; @@ -91,10 +94,11 @@ static const struct nla_policy ipt_policy[TCA_IPT_MAX + 1] = { [TCA_IPT_TARG] = { .len = sizeof(struct xt_entry_target) }, }; -static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, +static int __tcf_ipt_init(struct net *net, unsigned int id, struct nlattr *nla, struct nlattr *est, struct tc_action **a, const struct tc_action_ops *ops, int ovr, int bind) { + struct tc_action_net *tn = net_generic(net, id); struct nlattr *tb[TCA_IPT_MAX + 1]; struct tcf_ipt *ipt; struct xt_entry_target *td, *t; @@ -159,7 +163,7 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, if (unlikely(!t)) goto err2; - err = ipt_init_target(t, tname, hook); + err = ipt_init_target(net, t, tname, hook); if (err < 0) goto err3; @@ -193,18 +197,16 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return __tcf_ipt_init(tn, nla, est, a, &act_ipt_ops, ovr, bind); + return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, ovr, + bind); } static int tcf_xt_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, int ovr, int bind) { - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return __tcf_ipt_init(tn, nla, est, a, &act_xt_ops, ovr, bind); + return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, ovr, + bind); } static int tcf_ipt(struct sk_buff *skb, const struct tc_action *a, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 39da0c5801c9..6c5ea84d2682 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -205,7 +205,7 @@ static void tcf_chain_flush(struct tcf_chain *chain) { struct tcf_proto *tp; - if (*chain->p_filter_chain) + if (chain->p_filter_chain) RCU_INIT_POINTER(*chain->p_filter_chain, NULL); while ((tp = rtnl_dereference(chain->filter_chain)) != NULL) { RCU_INIT_POINTER(chain->filter_chain, tp->next); @@ -215,9 +215,15 @@ static void tcf_chain_flush(struct tcf_chain *chain) static void tcf_chain_destroy(struct tcf_chain *chain) { - list_del(&chain->list); - tcf_chain_flush(chain); - kfree(chain); + /* May be already removed from the list by the previous call. */ + if (!list_empty(&chain->list)) + list_del_init(&chain->list); + + /* There might still be a reference held when we got here from + * tcf_block_put. Wait for the user to drop reference before free. + */ + if (!chain->refcnt) + kfree(chain); } struct tcf_chain *tcf_chain_get(struct tcf_block *block, u32 chain_index, @@ -288,8 +294,10 @@ void tcf_block_put(struct tcf_block *block) if (!block) return; - list_for_each_entry_safe(chain, tmp, &block->chain_list, list) + list_for_each_entry_safe(chain, tmp, &block->chain_list, list) { + tcf_chain_flush(chain); tcf_chain_destroy(chain); + } kfree(block); } EXPORT_SYMBOL(tcf_block_put); diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bd24a550e0f9..4fb5a3222d0d 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -286,9 +286,6 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) void qdisc_hash_add(struct Qdisc *q, bool invisible) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) { - struct Qdisc *root = qdisc_dev(q)->qdisc; - - WARN_ON_ONCE(root == &noop_qdisc); ASSERT_RTNL(); hash_add_rcu(qdisc_dev(q)->qdisc_hash, &q->hash, q->handle); if (invisible) @@ -839,7 +836,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, old = dev_graft_qdisc(dev_queue, new); if (new && i > 0) - refcount_inc(&new->refcnt); + qdisc_refcount_inc(new); if (!ingress) qdisc_destroy(old); @@ -850,7 +847,7 @@ skip: notify_and_destroy(net, skb, n, classid, dev->qdisc, new); if (new && !new->ops->attach) - refcount_inc(&new->refcnt); + qdisc_refcount_inc(new); dev->qdisc = new ? : &noop_qdisc; if (new && new->ops->attach) @@ -1259,7 +1256,7 @@ replay: if (q == p || (p && check_loop(q, p, 0))) return -ELOOP; - refcount_inc(&q->refcnt); + qdisc_refcount_inc(q); goto graft; } else { if (!q) diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 572fe2584e48..c403c87aff7a 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -572,8 +572,10 @@ static void atm_tc_destroy(struct Qdisc *sch) struct atm_flow_data *flow, *tmp; pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p); - list_for_each_entry(flow, &p->flows, list) + list_for_each_entry(flow, &p->flows, list) { tcf_block_put(flow->block); + flow->block = NULL; + } list_for_each_entry_safe(flow, tmp, &p->flows, list) { if (flow->ref > 1) diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 481036f6b54e..156c8a33c677 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1139,6 +1139,13 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) struct tc_ratespec *r; int err; + qdisc_watchdog_init(&q->watchdog, sch); + hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); + q->delay_timer.function = cbq_undelay; + + if (!opt) + return -EINVAL; + err = nla_parse_nested(tb, TCA_CBQ_MAX, opt, cbq_policy, NULL); if (err < 0) return err; @@ -1177,9 +1184,6 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.avpkt = q->link.allot/2; q->link.minidle = -0x7FFFFFFF; - qdisc_watchdog_init(&q->watchdog, sch); - hrtimer_init(&q->delay_timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS_PINNED); - q->delay_timer.function = cbq_undelay; q->toplevel = TC_CBQ_MAXLEVEL; q->now = psched_get_time(); @@ -1431,8 +1435,10 @@ static void cbq_destroy(struct Qdisc *sch) * be bound to classes which have been destroyed already. --TGR '04 */ for (h = 0; h < q->clhash.hashsize; h++) { - hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h], diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 337f2d6d81e4..2c0c05f2cc34 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -491,10 +491,8 @@ static int fq_codel_init(struct Qdisc *sch, struct nlattr *opt) if (!q->flows) return -ENOMEM; q->backlogs = kvzalloc(q->flows_cnt * sizeof(u32), GFP_KERNEL); - if (!q->backlogs) { - kvfree(q->flows); + if (!q->backlogs) return -ENOMEM; - } for (i = 0; i < q->flows_cnt; i++) { struct fq_codel_flow *flow = q->flows + i; diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 57ba406f1437..4ba6da5fb254 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -785,7 +785,7 @@ static void attach_default_qdiscs(struct net_device *dev) dev->priv_flags & IFF_NO_QUEUE) { netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL); dev->qdisc = txq->qdisc_sleeping; - refcount_inc(&dev->qdisc->refcnt); + qdisc_refcount_inc(dev->qdisc); } else { qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index b52f74610dc7..11ab8dace901 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1418,6 +1418,8 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) struct tc_hfsc_qopt *qopt; int err; + qdisc_watchdog_init(&q->watchdog, sch); + if (opt == NULL || nla_len(opt) < sizeof(*qopt)) return -EINVAL; qopt = nla_data(opt); @@ -1428,6 +1430,10 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) return err; q->eligible = RB_ROOT; + err = tcf_block_get(&q->root.block, &q->root.filter_list); + if (err) + return err; + q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; q->root.sched = q; @@ -1444,8 +1450,6 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) qdisc_class_hash_insert(&q->clhash, &q->root.cl_common); qdisc_class_hash_grow(sch, &q->clhash); - qdisc_watchdog_init(&q->watchdog, sch); - return 0; } @@ -1522,8 +1526,10 @@ hfsc_destroy_qdisc(struct Qdisc *sch) unsigned int i; for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], diff --git a/net/sched/sch_hhf.c b/net/sched/sch_hhf.c index 51d3ba682af9..73a53c08091b 100644 --- a/net/sched/sch_hhf.c +++ b/net/sched/sch_hhf.c @@ -477,6 +477,9 @@ static void hhf_destroy(struct Qdisc *sch) kvfree(q->hhf_valid_bits[i]); } + if (!q->hh_flows) + return; + for (i = 0; i < HH_FLOWS_CNT; i++) { struct hh_flow_state *flow, *next; struct list_head *head = &q->hh_flows[i]; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 203286ab4427..5bf5177b2bd3 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1017,6 +1017,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) int err; int i; + qdisc_watchdog_init(&q->watchdog, sch); + INIT_WORK(&q->work, htb_work_func); + if (!opt) return -EINVAL; @@ -1041,8 +1044,6 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < TC_HTB_NUMPRIO; i++) INIT_LIST_HEAD(q->drops + i); - qdisc_watchdog_init(&q->watchdog, sch); - INIT_WORK(&q->work, htb_work_func); qdisc_skb_head_init(&q->direct_queue); if (tb[TCA_HTB_DIRECT_QLEN]) @@ -1258,8 +1259,10 @@ static void htb_destroy(struct Qdisc *sch) tcf_block_put(q->block); for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) + hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { tcf_block_put(cl->block); + cl->block = NULL; + } } for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry_safe(cl, next, &q->clhash.hash[i], diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index f143b7bbaa0d..9c454f5d6c38 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -257,12 +257,7 @@ static int multiq_init(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->max_bands; i++) q->queues[i] = &noop_qdisc; - err = multiq_tune(sch, opt); - - if (err) - kfree(q->queues); - - return err; + return multiq_tune(sch, opt); } static int multiq_dump(struct Qdisc *sch, struct sk_buff *skb) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 1b3dd6190e93..14d1724e0dc4 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -933,11 +933,11 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) struct netem_sched_data *q = qdisc_priv(sch); int ret; + qdisc_watchdog_init(&q->watchdog, sch); + if (!opt) return -EINVAL; - qdisc_watchdog_init(&q->watchdog, sch); - q->loss_model = CLG_RANDOM; ret = netem_change(sch, opt); if (ret) diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f80ea2cc5f1f..fc69fc5956e9 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -437,6 +437,7 @@ congestion_drop: qdisc_drop(head, sch, to_free); slot_queue_add(slot, skb); + qdisc_tree_reduce_backlog(sch, 0, delta); return NET_XMIT_CN; } @@ -468,8 +469,10 @@ enqueue: /* Return Congestion Notification only if we dropped a packet * from this flow. */ - if (qlen != slot->qlen) + if (qlen != slot->qlen) { + qdisc_tree_reduce_backlog(sch, 0, dropped - qdisc_pkt_len(skb)); return NET_XMIT_CN; + } /* As we dropped a packet, better let upper stack know this */ qdisc_tree_reduce_backlog(sch, 1, dropped); @@ -713,13 +716,13 @@ static int sfq_init(struct Qdisc *sch, struct nlattr *opt) int i; int err; + setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, + (unsigned long)sch); + err = tcf_block_get(&q->block, &q->filter_list); if (err) return err; - setup_deferrable_timer(&q->perturb_timer, sfq_perturbation, - (unsigned long)sch); - for (i = 0; i < SFQ_MAX_DEPTH + 1; i++) { q->dep[i].next = i + SFQ_MAX_FLOWS; q->dep[i].prev = i + SFQ_MAX_FLOWS; diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index b2e4b6ad241a..493270f0d5b0 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -425,12 +425,13 @@ static int tbf_init(struct Qdisc *sch, struct nlattr *opt) { struct tbf_sched_data *q = qdisc_priv(sch); + qdisc_watchdog_init(&q->watchdog, sch); + q->qdisc = &noop_qdisc; + if (opt == NULL) return -EINVAL; q->t_c = ktime_get_ns(); - qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = &noop_qdisc; return tbf_change(sch, opt); } diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 2a186b201ad2..a4b6ffb61495 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -512,7 +512,9 @@ static void sctp_v6_to_addr(union sctp_addr *addr, struct in6_addr *saddr, { addr->sa.sa_family = AF_INET6; addr->v6.sin6_port = port; + addr->v6.sin6_flowinfo = 0; addr->v6.sin6_addr = *saddr; + addr->v6.sin6_scope_id = 0; } /* Compare addresses exactly. diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 9a647214a91e..e99518e79b52 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -70,7 +70,8 @@ static int inet_diag_msg_sctpladdrs_fill(struct sk_buff *skb, info = nla_data(attr); list_for_each_entry_rcu(laddr, address_list, list) { - memcpy(info, &laddr->a, addrlen); + memcpy(info, &laddr->a, sizeof(laddr->a)); + memset(info + sizeof(laddr->a), 0, addrlen - sizeof(laddr->a)); info += addrlen; } @@ -93,7 +94,9 @@ static int inet_diag_msg_sctpaddrs_fill(struct sk_buff *skb, info = nla_data(attr); list_for_each_entry(from, &asoc->peer.transport_addr_list, transports) { - memcpy(info, &from->ipaddr, addrlen); + memcpy(info, &from->ipaddr, sizeof(from->ipaddr)); + memset(info + sizeof(from->ipaddr), 0, + addrlen - sizeof(from->ipaddr)); info += addrlen; } diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 1db478e34520..8d760863bc41 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4538,8 +4538,7 @@ int sctp_get_sctp_info(struct sock *sk, struct sctp_association *asoc, info->sctpi_ictrlchunks = asoc->stats.ictrlchunks; prim = asoc->peer.primary_path; - memcpy(&info->sctpi_p_address, &prim->ipaddr, - sizeof(struct sockaddr_storage)); + memcpy(&info->sctpi_p_address, &prim->ipaddr, sizeof(prim->ipaddr)); info->sctpi_p_state = prim->state; info->sctpi_p_cwnd = prim->cwnd; info->sctpi_p_srtt = prim->srtt; diff --git a/net/socket.c b/net/socket.c index bf2122691fba..ad22df1ffbd1 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1916,7 +1916,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (copy_from_user(&msg, umsg, sizeof(*umsg))) return -EFAULT; - kmsg->msg_control = msg.msg_control; + kmsg->msg_control = (void __force *)msg.msg_control; kmsg->msg_controllen = msg.msg_controllen; kmsg->msg_flags = msg.msg_flags; @@ -1935,7 +1935,8 @@ static int copy_msghdr_from_user(struct msghdr *kmsg, if (msg.msg_name && kmsg->msg_namelen) { if (!save_addr) { - err = move_addr_to_kernel(msg.msg_name, kmsg->msg_namelen, + err = move_addr_to_kernel(msg.msg_name, + kmsg->msg_namelen, kmsg->msg_name); if (err < 0) return err; diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c index 2b720fa35c4f..e18500151236 100644 --- a/net/sunrpc/svcsock.c +++ b/net/sunrpc/svcsock.c @@ -421,6 +421,9 @@ static void svc_data_ready(struct sock *sk) dprintk("svc: socket %p(inet %p), busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); if (!test_and_set_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags)) svc_xprt_enqueue(&svsk->sk_xprt); @@ -437,6 +440,9 @@ static void svc_write_space(struct sock *sk) if (svsk) { dprintk("svc: socket %p(inet %p), write_space busy=%d\n", svsk, sk, test_bit(XPT_BUSY, &svsk->sk_xprt.xpt_flags)); + + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_owspace(sk); svc_xprt_enqueue(&svsk->sk_xprt); } @@ -760,8 +766,12 @@ static void svc_tcp_listen_data_ready(struct sock *sk) dprintk("svc: socket %p TCP (listen) state change %d\n", sk, sk->sk_state); - if (svsk) + if (svsk) { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_odata(sk); + } + /* * This callback may called twice when a new connection * is established as a child socket inherits everything @@ -794,6 +804,8 @@ static void svc_tcp_state_change(struct sock *sk) if (!svsk) printk("svc: socket %p: no user data\n", sk); else { + /* Refer to svc_setup_socket() for details. */ + rmb(); svsk->sk_ostate(sk); if (sk->sk_state != TCP_ESTABLISHED) { set_bit(XPT_CLOSE, &svsk->sk_xprt.xpt_flags); @@ -1381,12 +1393,18 @@ static struct svc_sock *svc_setup_socket(struct svc_serv *serv, return ERR_PTR(err); } - inet->sk_user_data = svsk; svsk->sk_sock = sock; svsk->sk_sk = inet; svsk->sk_ostate = inet->sk_state_change; svsk->sk_odata = inet->sk_data_ready; svsk->sk_owspace = inet->sk_write_space; + /* + * This barrier is necessary in order to prevent race condition + * with svc_data_ready(), svc_listen_data_ready() and others + * when calling callbacks above. + */ + wmb(); + inet->sk_user_data = svsk; /* Initialize the socket */ if (sock->type == SOCK_DGRAM) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index d174ee3254ee..89cd061c4468 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -65,6 +65,8 @@ static struct tipc_bearer *bearer_get(struct net *net, int bearer_id) } static void bearer_disable(struct net *net, struct tipc_bearer *b); +static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); /** * tipc_media_find - locates specified media object by name @@ -428,6 +430,10 @@ int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b, /* Associate TIPC bearer with L2 bearer */ rcu_assign_pointer(b->media_ptr, dev); + b->pt.dev = dev; + b->pt.type = htons(ETH_P_TIPC); + b->pt.func = tipc_l2_rcv_msg; + dev_add_pack(&b->pt); memset(&b->bcast_addr, 0, sizeof(b->bcast_addr)); memcpy(b->bcast_addr.value, dev->broadcast, b->media->hwaddr_len); b->bcast_addr.media_id = b->media->type_id; @@ -447,6 +453,7 @@ void tipc_disable_l2_media(struct tipc_bearer *b) struct net_device *dev; dev = (struct net_device *)rtnl_dereference(b->media_ptr); + dev_remove_pack(&b->pt); RCU_INIT_POINTER(dev->tipc_ptr, NULL); synchronize_net(); dev_put(dev); @@ -594,11 +601,12 @@ static int tipc_l2_rcv_msg(struct sk_buff *skb, struct net_device *dev, struct tipc_bearer *b; rcu_read_lock(); - b = rcu_dereference_rtnl(dev->tipc_ptr); + b = rcu_dereference_rtnl(dev->tipc_ptr) ?: + rcu_dereference_rtnl(orig_dev->tipc_ptr); if (likely(b && test_bit(0, &b->up) && - (skb->pkt_type <= PACKET_BROADCAST))) { + (skb->pkt_type <= PACKET_MULTICAST))) { skb->next = NULL; - tipc_rcv(dev_net(dev), skb, b); + tipc_rcv(dev_net(b->pt.dev), skb, b); rcu_read_unlock(); return NET_RX_SUCCESS; } @@ -659,11 +667,6 @@ static int tipc_l2_device_event(struct notifier_block *nb, unsigned long evt, return NOTIFY_OK; } -static struct packet_type tipc_packet_type __read_mostly = { - .type = htons(ETH_P_TIPC), - .func = tipc_l2_rcv_msg, -}; - static struct notifier_block notifier = { .notifier_call = tipc_l2_device_event, .priority = 0, @@ -671,19 +674,12 @@ static struct notifier_block notifier = { int tipc_bearer_setup(void) { - int err; - - err = register_netdevice_notifier(¬ifier); - if (err) - return err; - dev_add_pack(&tipc_packet_type); - return 0; + return register_netdevice_notifier(¬ifier); } void tipc_bearer_cleanup(void) { unregister_netdevice_notifier(¬ifier); - dev_remove_pack(&tipc_packet_type); } void tipc_bearer_stop(struct net *net) diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h index 635c9086e19a..e07a55a80c18 100644 --- a/net/tipc/bearer.h +++ b/net/tipc/bearer.h @@ -131,6 +131,7 @@ struct tipc_media { * @name: bearer name (format = media:interface) * @media: ptr to media structure associated with bearer * @bcast_addr: media address used in broadcasting + * @pt: packet type for bearer * @rcu: rcu struct for tipc_bearer * @priority: default link priority for bearer * @window: default window size for bearer @@ -151,6 +152,7 @@ struct tipc_bearer { char name[TIPC_MAX_BEARER_NAME]; struct tipc_media *media; struct tipc_media_addr bcast_addr; + struct packet_type pt; struct rcu_head rcu; u32 priority; u32 window; diff --git a/net/tipc/msg.c b/net/tipc/msg.c index ab3087687a32..6ef379f004ac 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -479,13 +479,14 @@ bool tipc_msg_make_bundle(struct sk_buff **skb, struct tipc_msg *msg, bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) { struct sk_buff *_skb = *skb; - struct tipc_msg *hdr = buf_msg(_skb); + struct tipc_msg *hdr; struct tipc_msg ohdr; - int dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); + int dlen; if (skb_linearize(_skb)) goto exit; hdr = buf_msg(_skb); + dlen = min_t(uint, msg_data_sz(hdr), MAX_FORWARD_SIZE); if (msg_dest_droppable(hdr)) goto exit; if (msg_errcode(hdr)) @@ -511,8 +512,11 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_ATOMIC)) goto exit; + /* reassign after skb header modifications */ + hdr = buf_msg(_skb); /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); + msg_set_non_seq(hdr, 0); msg_set_origport(hdr, msg_destport(&ohdr)); msg_set_destport(hdr, msg_origport(&ohdr)); msg_set_destnode(hdr, msg_prevnode(&ohdr)); diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 9bfe886ab330..750949dfc1d7 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -258,13 +258,15 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, arg = nlmsg_new(0, GFP_KERNEL); if (!arg) { kfree_skb(msg->rep); + msg->rep = NULL; return -ENOMEM; } err = __tipc_nl_compat_dumpit(cmd, msg, arg); - if (err) + if (err) { kfree_skb(msg->rep); - + msg->rep = NULL; + } kfree_skb(arg); return err; diff --git a/net/tipc/node.c b/net/tipc/node.c index aeef8011ac7d..7dd22330a6b4 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -1126,8 +1126,8 @@ int tipc_node_get_linkname(struct net *net, u32 bearer_id, u32 addr, strncpy(linkname, tipc_link_name(link), len); err = 0; } -exit: tipc_node_read_unlock(node); +exit: tipc_node_put(node); return err; } @@ -1455,10 +1455,8 @@ static bool tipc_node_check_state(struct tipc_node *n, struct sk_buff *skb, /* Initiate synch mode if applicable */ if ((usr == TUNNEL_PROTOCOL) && (mtyp == SYNCH_MSG) && (oseqno == 1)) { syncpt = iseqno + exp_pkts - 1; - if (!tipc_link_is_up(l)) { - tipc_link_fsm_evt(l, LINK_ESTABLISH_EVT); + if (!tipc_link_is_up(l)) __tipc_node_link_up(n, bearer_id, xmitq); - } if (n->state == SELF_UP_PEER_UP) { n->sync_point = syncpt; tipc_link_fsm_evt(l, LINK_SYNCH_BEGIN_EVT); @@ -1559,6 +1557,8 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b) /* Check/update node state before receiving */ if (unlikely(skb)) { + if (unlikely(skb_linearize(skb))) + goto discard; tipc_node_write_lock(n); if (tipc_node_check_state(n, skb, bearer_id, &xmitq)) { if (le->link) { diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 101e3597338f..d50edd6e0019 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -2255,8 +2255,8 @@ void tipc_sk_reinit(struct net *net) do { tsk = ERR_PTR(rhashtable_walk_start(&iter)); - if (tsk) - continue; + if (IS_ERR(tsk)) + goto walk_stop; while ((tsk = rhashtable_walk_next(&iter)) && !IS_ERR(tsk)) { spin_lock_bh(&tsk->sk.sk_lock.slock); @@ -2265,7 +2265,7 @@ void tipc_sk_reinit(struct net *net) msg_set_orignode(msg, tn->own_addr); spin_unlock_bh(&tsk->sk.sk_lock.slock); } - +walk_stop: rhashtable_walk_stop(&iter); } while (tsk == ERR_PTR(-EAGAIN)); } diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 0bf91cd3733c..be3d9e3183dc 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -52,7 +52,6 @@ struct tipc_subscriber { struct list_head subscrp_list; }; -static void tipc_subscrp_delete(struct tipc_subscription *sub); static void tipc_subscrb_put(struct tipc_subscriber *subscriber); /** @@ -197,15 +196,19 @@ static void tipc_subscrb_subscrp_delete(struct tipc_subscriber *subscriber, { struct list_head *subscription_list = &subscriber->subscrp_list; struct tipc_subscription *sub, *temp; + u32 timeout; spin_lock_bh(&subscriber->lock); list_for_each_entry_safe(sub, temp, subscription_list, subscrp_list) { if (s && memcmp(s, &sub->evt.s, sizeof(struct tipc_subscr))) continue; - tipc_nametbl_unsubscribe(sub); - list_del(&sub->subscrp_list); - tipc_subscrp_delete(sub); + timeout = htohl(sub->evt.s.timeout, sub->swap); + if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) { + tipc_nametbl_unsubscribe(sub); + list_del(&sub->subscrp_list); + tipc_subscrp_put(sub); + } if (s) break; @@ -236,18 +239,12 @@ static void tipc_subscrb_delete(struct tipc_subscriber *subscriber) tipc_subscrb_put(subscriber); } -static void tipc_subscrp_delete(struct tipc_subscription *sub) -{ - u32 timeout = htohl(sub->evt.s.timeout, sub->swap); - - if (timeout == TIPC_WAIT_FOREVER || del_timer(&sub->timer)) - tipc_subscrp_put(sub); -} - static void tipc_subscrp_cancel(struct tipc_subscr *s, struct tipc_subscriber *subscriber) { + tipc_subscrb_get(subscriber); tipc_subscrb_subscrp_delete(subscriber, s); + tipc_subscrb_put(subscriber); } static struct tipc_subscription *tipc_subscrp_create(struct net *net, diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7b52a380d710..be8982b4f8c0 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2304,10 +2304,7 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, */ mutex_lock(&u->iolock); - if (flags & MSG_PEEK) - skip = sk_peek_offset(sk, flags); - else - skip = 0; + skip = max(sk_peek_offset(sk, flags), 0); do { int chunk; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ff61d8557929..69b16ee327d9 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -2226,7 +2226,6 @@ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, goto no_transform; } - dst_hold(&xdst->u.dst); route = xdst->route; } } @@ -3308,9 +3307,15 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_state *x_new[XFRM_MAX_DEPTH]; struct xfrm_migrate *mp; + /* Stage 0 - sanity checks */ if ((err = xfrm_migrate_check(m, num_migrate)) < 0) goto out; + if (dir >= XFRM_POLICY_MAX) { + err = -EINVAL; + goto out; + } + /* Stage 1 - find policy */ if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { err = -ENOENT; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 6c0956d10db6..a792effdb0b5 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1620,6 +1620,7 @@ int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, unsigned short family, struct net *net) { + int i; int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); if (!afinfo) @@ -1628,6 +1629,9 @@ xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, spin_lock_bh(&net->xfrm.xfrm_state_lock); /*FIXME*/ if (afinfo->tmpl_sort) err = afinfo->tmpl_sort(dst, src, n); + else + for (i = 0; i < n; i++) + dst[i] = src[i]; spin_unlock_bh(&net->xfrm.xfrm_state_lock); rcu_read_unlock(); return err; @@ -1638,6 +1642,7 @@ int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, unsigned short family) { + int i; int err = 0; struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); struct net *net = xs_net(*src); @@ -1648,6 +1653,9 @@ xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, spin_lock_bh(&net->xfrm.xfrm_state_lock); if (afinfo->state_sort) err = afinfo->state_sort(dst, src, n); + else + for (i = 0; i < n; i++) + dst[i] = src[i]; spin_unlock_bh(&net->xfrm.xfrm_state_lock); rcu_read_unlock(); return err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2be4c6af008a..9391ced05259 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -796,7 +796,7 @@ static int copy_user_offload(struct xfrm_state_offload *xso, struct sk_buff *skb return -EMSGSIZE; xuo = nla_data(attr); - + memset(xuo, 0, sizeof(*xuo)); xuo->ifindex = xso->dev->ifindex; xuo->flags = xso->flags; @@ -1869,6 +1869,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct return -EMSGSIZE; id = nlmsg_data(nlh); + memset(&id->sa_id, 0, sizeof(id->sa_id)); memcpy(&id->sa_id.daddr, &x->id.daddr, sizeof(x->id.daddr)); id->sa_id.spi = x->id.spi; id->sa_id.family = x->props.family; @@ -2578,6 +2579,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct ue = nlmsg_data(nlh); copy_to_user_state(x, &ue->state); ue->hard = (c->data.hard != 0) ? 1 : 0; + /* clear the padding bytes */ + memset(&ue->hard + 1, 0, sizeof(*ue) - offsetofend(typeof(*ue), hard)); err = xfrm_mark_put(skb, &x->mark); if (err) @@ -2715,6 +2718,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) struct nlattr *attr; id = nlmsg_data(nlh); + memset(id, 0, sizeof(*id)); memcpy(&id->daddr, &x->id.daddr, sizeof(id->daddr)); id->spi = x->id.spi; id->family = x->props.family; |