diff options
Diffstat (limited to 'net')
85 files changed, 963 insertions, 460 deletions
diff --git a/net/atm/common.c b/net/atm/common.c index 0ce530af534d..8575f5d52087 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -177,18 +177,18 @@ static void vcc_destroy_socket(struct sock *sk) set_bit(ATM_VF_CLOSE, &vcc->flags); clear_bit(ATM_VF_READY, &vcc->flags); - if (vcc->dev) { - if (vcc->dev->ops->close) - vcc->dev->ops->close(vcc); - if (vcc->push) - vcc->push(vcc, NULL); /* atmarpd has no push */ - module_put(vcc->owner); - - while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { - atm_return(vcc, skb->truesize); - kfree_skb(skb); - } + if (vcc->dev && vcc->dev->ops->close) + vcc->dev->ops->close(vcc); + if (vcc->push) + vcc->push(vcc, NULL); /* atmarpd has no push */ + module_put(vcc->owner); + + while ((skb = skb_dequeue(&sk->sk_receive_queue)) != NULL) { + atm_return(vcc, skb->truesize); + kfree_skb(skb); + } + if (vcc->dev && vcc->dev->ops->owner) { module_put(vcc->dev->ops->owner); atm_dev_put(vcc->dev); } diff --git a/net/atm/lec.c b/net/atm/lec.c index 25fa3a7b72bd..ca37f5a71f5e 100644 --- a/net/atm/lec.c +++ b/net/atm/lec.c @@ -1264,6 +1264,12 @@ static void lec_arp_clear_vccs(struct lec_arp_table *entry) entry->vcc = NULL; } if (entry->recv_vcc) { + struct atm_vcc *vcc = entry->recv_vcc; + struct lec_vcc_priv *vpriv = LEC_VCC_PRIV(vcc); + + kfree(vpriv); + vcc->user_back = NULL; + entry->recv_vcc->push = entry->old_recv_push; vcc_release_async(entry->recv_vcc, -EPIPE); entry->recv_vcc = NULL; diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index ff57ea89c27e..fd91cd34f25e 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -635,8 +635,10 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname, break; case SO_BINDTODEVICE: - if (optlen > IFNAMSIZ) - optlen = IFNAMSIZ; + if (optlen > IFNAMSIZ - 1) + optlen = IFNAMSIZ - 1; + + memset(devname, 0, sizeof(devname)); if (copy_from_user(devname, optval, optlen)) { res = -EFAULT; diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c index 969466218999..80b87b1f4e3a 100644 --- a/net/batman-adv/bat_v_ogm.c +++ b/net/batman-adv/bat_v_ogm.c @@ -893,7 +893,7 @@ static void batadv_v_ogm_process(const struct sk_buff *skb, int ogm_offset, orig_node = batadv_v_ogm_orig_get(bat_priv, ogm_packet->orig); if (!orig_node) - return; + goto out; neigh_node = batadv_neigh_node_get_or_create(orig_node, if_incoming, ethhdr->h_source); diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c index 8f0717c3f7b5..b0469d15da0e 100644 --- a/net/batman-adv/network-coding.c +++ b/net/batman-adv/network-coding.c @@ -1009,15 +1009,8 @@ static struct batadv_nc_path *batadv_nc_get_path(struct batadv_priv *bat_priv, */ static u8 batadv_nc_random_weight_tq(u8 tq) { - u8 rand_val, rand_tq; - - get_random_bytes(&rand_val, sizeof(rand_val)); - /* randomize the estimated packet loss (max TQ - estimated TQ) */ - rand_tq = rand_val * (BATADV_TQ_MAX_VALUE - tq); - - /* normalize the randomized packet loss */ - rand_tq /= BATADV_TQ_MAX_VALUE; + u8 rand_tq = prandom_u32_max(BATADV_TQ_MAX_VALUE + 1 - tq); /* convert to (randomized) estimated tq again */ return BATADV_TQ_MAX_VALUE - rand_tq; diff --git a/net/batman-adv/sysfs.c b/net/batman-adv/sysfs.c index c45962d8527b..0f962dcd239e 100644 --- a/net/batman-adv/sysfs.c +++ b/net/batman-adv/sysfs.c @@ -1150,7 +1150,7 @@ static ssize_t batadv_store_throughput_override(struct kobject *kobj, ret = batadv_parse_throughput(net_dev, buff, "throughput_override", &tp_override); if (!ret) - return count; + goto out; old_tp_override = atomic_read(&hard_iface->bat_v.throughput_override); if (old_tp_override == tp_override) @@ -1190,6 +1190,7 @@ static ssize_t batadv_show_throughput_override(struct kobject *kobj, tp_override = atomic_read(&hard_iface->bat_v.throughput_override); + batadv_hardif_put(hard_iface); return sprintf(buff, "%u.%u MBit\n", tp_override / 10, tp_override % 10); } diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index 43dab4066f91..a0f5dbee8f9c 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -612,6 +612,7 @@ int br_process_vlan_info(struct net_bridge *br, v - 1, rtm_cmd); v_change_start = 0; } + cond_resched(); } /* v_change_start is set only if the last/whole range changed */ if (v_change_start) diff --git a/net/core/dev.c b/net/core/dev.c index 522288177bbd..2d8aceee4284 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -4988,11 +4988,12 @@ static inline int nf_ingress(struct sk_buff *skb, struct packet_type **pt_prev, return 0; } -static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc, +static int __netif_receive_skb_core(struct sk_buff **pskb, bool pfmemalloc, struct packet_type **ppt_prev) { struct packet_type *ptype, *pt_prev; rx_handler_func_t *rx_handler; + struct sk_buff *skb = *pskb; struct net_device *orig_dev; bool deliver_exact = false; int ret = NET_RX_DROP; @@ -5023,8 +5024,10 @@ another_round: ret2 = do_xdp_generic(rcu_dereference(skb->dev->xdp_prog), skb); preempt_enable(); - if (ret2 != XDP_PASS) - return NET_RX_DROP; + if (ret2 != XDP_PASS) { + ret = NET_RX_DROP; + goto out; + } skb_reset_mac_len(skb); } @@ -5174,6 +5177,13 @@ drop: } out: + /* The invariant here is that if *ppt_prev is not NULL + * then skb should also be non-NULL. + * + * Apparently *ppt_prev assignment above holds this invariant due to + * skb dereferencing near it. + */ + *pskb = skb; return ret; } @@ -5183,7 +5193,7 @@ static int __netif_receive_skb_one_core(struct sk_buff *skb, bool pfmemalloc) struct packet_type *pt_prev = NULL; int ret; - ret = __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + ret = __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); if (pt_prev) ret = INDIRECT_CALL_INET(pt_prev->func, ipv6_rcv, ip_rcv, skb, skb->dev, pt_prev, orig_dev); @@ -5261,7 +5271,7 @@ static void __netif_receive_skb_list_core(struct list_head *head, bool pfmemallo struct packet_type *pt_prev = NULL; skb_list_del_init(skb); - __netif_receive_skb_core(skb, pfmemalloc, &pt_prev); + __netif_receive_skb_core(&skb, pfmemalloc, &pt_prev); if (!pt_prev) continue; if (pt_curr != pt_prev || od_curr != orig_dev) { @@ -8907,11 +8917,13 @@ static void netdev_sync_lower_features(struct net_device *upper, netdev_dbg(upper, "Disabling feature %pNF on lower dev %s.\n", &feature, lower->name); lower->wanted_features &= ~feature; - netdev_update_features(lower); + __netdev_update_features(lower); if (unlikely(lower->features & feature)) netdev_WARN(upper, "failed to disable %pNF on %s!\n", &feature, lower->name); + else + netdev_features_change(lower); } } } diff --git a/net/core/devlink.c b/net/core/devlink.c index 80f97722f31f..899edcee7dab 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -4283,6 +4283,11 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb, end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]); end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]); dump = false; + + if (start_offset == end_offset) { + err = 0; + goto nla_put_failure; + } } err = devlink_nl_region_read_snapshot_fill(skb, devlink, @@ -5363,6 +5368,7 @@ int devlink_health_report(struct devlink_health_reporter *reporter, { enum devlink_health_reporter_state prev_health_state; struct devlink *devlink = reporter->devlink; + unsigned long recover_ts_threshold; /* write a log message of the current error */ WARN_ON(!msg); @@ -5373,10 +5379,12 @@ int devlink_health_report(struct devlink_health_reporter *reporter, devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER); /* abort if the previous error wasn't recovered */ + recover_ts_threshold = reporter->last_recovery_ts + + msecs_to_jiffies(reporter->graceful_period); if (reporter->auto_recover && (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY || - jiffies - reporter->last_recovery_ts < - msecs_to_jiffies(reporter->graceful_period))) { + (reporter->last_recovery_ts && reporter->recovery_count && + time_is_after_jiffies(recover_ts_threshold)))) { trace_devlink_health_recover_aborted(devlink, reporter->ops->name, reporter->health_state, diff --git a/net/core/drop_monitor.c b/net/core/drop_monitor.c index 8e33cec9fc4e..2ee7bc4c9e03 100644 --- a/net/core/drop_monitor.c +++ b/net/core/drop_monitor.c @@ -213,6 +213,7 @@ static void sched_send_work(struct timer_list *t) static void trace_drop_common(struct sk_buff *skb, void *location) { struct net_dm_alert_msg *msg; + struct net_dm_drop_point *point; struct nlmsghdr *nlh; struct nlattr *nla; int i; @@ -231,11 +232,13 @@ static void trace_drop_common(struct sk_buff *skb, void *location) nlh = (struct nlmsghdr *)dskb->data; nla = genlmsg_data(nlmsg_data(nlh)); msg = nla_data(nla); + point = msg->points; for (i = 0; i < msg->entries; i++) { - if (!memcmp(&location, msg->points[i].pc, sizeof(void *))) { - msg->points[i].count++; + if (!memcmp(&location, &point->pc, sizeof(void *))) { + point->count++; goto out; } + point++; } if (msg->entries == dm_hit_limit) goto out; @@ -244,8 +247,8 @@ static void trace_drop_common(struct sk_buff *skb, void *location) */ __nla_reserve_nohdr(dskb, sizeof(struct net_dm_drop_point)); nla->nla_len += NLA_ALIGN(sizeof(struct net_dm_drop_point)); - memcpy(msg->points[msg->entries].pc, &location, sizeof(void *)); - msg->points[msg->entries].count = 1; + memcpy(point->pc, &location, sizeof(void *)); + point->count = 1; msg->entries++; if (!timer_pending(&data->send_timer)) { diff --git a/net/core/filter.c b/net/core/filter.c index 7d6ceaa54d21..5cc9276f1023 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2590,8 +2590,8 @@ BPF_CALL_4(bpf_msg_pop_data, struct sk_msg *, msg, u32, start, } pop = 0; } else if (pop >= sge->length - a) { - sge->length = a; pop -= (sge->length - a); + sge->length = a; } } diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3eff84824c8b..5dceed467f64 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -160,12 +160,10 @@ out: return ret; } -int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +static int flow_dissector_bpf_prog_detach(struct net *net) { struct bpf_prog *attached; - struct net *net; - net = current->nsproxy->net_ns; mutex_lock(&flow_dissector_mutex); attached = rcu_dereference_protected(net->flow_dissector_prog, lockdep_is_held(&flow_dissector_mutex)); @@ -179,6 +177,24 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) return 0; } +int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) +{ + return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns); +} + +static void __net_exit flow_dissector_pernet_pre_exit(struct net *net) +{ + /* We're not racing with attach/detach because there are no + * references to netns left when pre_exit gets called. + */ + if (rcu_access_pointer(net->flow_dissector_prog)) + flow_dissector_bpf_prog_detach(net); +} + +static struct pernet_operations flow_dissector_pernet_ops __net_initdata = { + .pre_exit = flow_dissector_pernet_pre_exit, +}; + /** * __skb_flow_get_ports - extract the upper layer ports and return them * @skb: sk_buff to extract the ports from @@ -1836,7 +1852,7 @@ static int __init init_default_flow_dissectors(void) skb_flow_dissector_init(&flow_keys_basic_dissector, flow_keys_basic_dissector_keys, ARRAY_SIZE(flow_keys_basic_dissector_keys)); - return 0; -} + return register_pernet_subsys(&flow_dissector_pernet_ops); +} core_initcall(init_default_flow_dissectors); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 39d37d0ef575..116139233d57 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1956,6 +1956,9 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, NEIGH_UPDATE_F_OVERRIDE_ISROUTER); } + if (protocol) + neigh->protocol = protocol; + if (ndm->ndm_flags & NTF_EXT_LEARNED) flags |= NEIGH_UPDATE_F_EXT_LEARNED; @@ -1969,9 +1972,6 @@ static int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, err = __neigh_update(neigh, lladdr, ndm->ndm_state, flags, NETLINK_CB(skb).portid, extack); - if (protocol) - neigh->protocol = protocol; - neigh_release(neigh); out: diff --git a/net/core/netprio_cgroup.c b/net/core/netprio_cgroup.c index 8881dd943dd0..9bd4cab7d510 100644 --- a/net/core/netprio_cgroup.c +++ b/net/core/netprio_cgroup.c @@ -236,6 +236,8 @@ static void net_prio_attach(struct cgroup_taskset *tset) struct task_struct *p; struct cgroup_subsys_state *css; + cgroup_sk_alloc_disable(); + cgroup_taskset_for_each(p, css, tset) { void *v = (void *)(unsigned long)css->id; diff --git a/net/core/sock.c b/net/core/sock.c index 90509c37d291..b714162213ae 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2364,7 +2364,6 @@ static void sk_leave_memory_pressure(struct sock *sk) } } -/* On 32bit arches, an skb frag is limited to 2^15 */ #define SKB_FRAG_PAGE_ORDER get_order(32768) DEFINE_STATIC_KEY_FALSE(net_high_order_alloc_disable_key); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index 9a271a58a41d..d90665b465b8 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -459,7 +459,7 @@ static int dsa_tree_setup_switches(struct dsa_switch_tree *dst) list_for_each_entry(dp, &dst->ports, list) { err = dsa_port_setup(dp); if (err) - goto teardown; + continue; } return 0; diff --git a/net/dsa/master.c b/net/dsa/master.c index b5c535af63a3..a621367c6e8c 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -289,7 +289,8 @@ static void dsa_master_ndo_teardown(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; - dev->netdev_ops = cpu_dp->orig_ndo_ops; + if (cpu_dp->orig_ndo_ops) + dev->netdev_ops = cpu_dp->orig_ndo_ops; cpu_dp->orig_ndo_ops = NULL; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index d1068803cd11..62f4ee3da172 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -856,20 +856,18 @@ dsa_slave_add_cls_matchall_mirred(struct net_device *dev, struct dsa_port *to_dp; int err; - act = &cls->rule->action.entries[0]; - if (!ds->ops->port_mirror_add) return -EOPNOTSUPP; - if (!act->dev) - return -EINVAL; - if (!flow_action_basic_hw_stats_check(&cls->rule->action, cls->common.extack)) return -EOPNOTSUPP; act = &cls->rule->action.entries[0]; + if (!act->dev) + return -EINVAL; + if (!dsa_slave_dev_check(act->dev)) return -EOPNOTSUPP; diff --git a/net/dsa/tag_mtk.c b/net/dsa/tag_mtk.c index b5705cba8318..d6619edd53e5 100644 --- a/net/dsa/tag_mtk.c +++ b/net/dsa/tag_mtk.c @@ -15,6 +15,7 @@ #define MTK_HDR_XMIT_TAGGED_TPID_8100 1 #define MTK_HDR_RECV_SOURCE_PORT_MASK GENMASK(2, 0) #define MTK_HDR_XMIT_DP_BIT_MASK GENMASK(5, 0) +#define MTK_HDR_XMIT_SA_DIS BIT(6) static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct net_device *dev) @@ -22,6 +23,9 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, struct dsa_port *dp = dsa_slave_to_port(dev); u8 *mtk_tag; bool is_vlan_skb = true; + unsigned char *dest = eth_hdr(skb)->h_dest; + bool is_multicast_skb = is_multicast_ether_addr(dest) && + !is_broadcast_ether_addr(dest); /* Build the special tag after the MAC Source Address. If VLAN header * is present, it's required that VLAN header and special tag is @@ -47,6 +51,10 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb, MTK_HDR_XMIT_UNTAGGED; mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK; + /* Disable SA learning for multicast frames */ + if (unlikely(is_multicast_skb)) + mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS; + /* Tag control information is kept for 802.1Q */ if (!is_vlan_skb) { mtk_tag[2] = 0; @@ -61,6 +69,9 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, { int port; __be16 *phdr, hdr; + unsigned char *dest = eth_hdr(skb)->h_dest; + bool is_multicast_skb = is_multicast_ether_addr(dest) && + !is_broadcast_ether_addr(dest); if (unlikely(!pskb_may_pull(skb, MTK_HDR_LEN))) return NULL; @@ -86,6 +97,10 @@ static struct sk_buff *mtk_tag_rcv(struct sk_buff *skb, struct net_device *dev, if (!skb->dev) return NULL; + /* Only unicast or broadcast frames are offloaded */ + if (likely(!is_multicast_skb)) + skb->offload_fwd_mark = 1; + return skb; } diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index 0c772318c023..ed5357210193 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -342,7 +342,7 @@ static int ethnl_default_doit(struct sk_buff *skb, struct genl_info *info) ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; - reply_len = ret; + reply_len = ret + ethnl_reply_header_size(); ret = -ENOMEM; rskb = ethnl_reply_init(reply_len, req_info->dev, ops->reply_cmd, ops->hdr_attr, info, &reply_payload); @@ -588,7 +588,7 @@ static void ethnl_default_notify(struct net_device *dev, unsigned int cmd, ret = ops->reply_size(req_info, reply_data); if (ret < 0) goto err_cleanup; - reply_len = ret; + reply_len = ret + ethnl_reply_header_size(); ret = -ENOMEM; skb = genlmsg_new(reply_len, GFP_KERNEL); if (!skb) diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 95eae5c68a52..0eed4e4909ab 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -324,7 +324,6 @@ static int strset_reply_size(const struct ethnl_req_info *req_base, int len = 0; int ret; - len += ethnl_reply_header_size(); for (i = 0; i < ETH_SS_COUNT; i++) { const struct strset_info *set_info = &data->sets[i]; diff --git a/net/hsr/hsr_slave.c b/net/hsr/hsr_slave.c index f4b9f7a3ce51..25b6ffba26cd 100644 --- a/net/hsr/hsr_slave.c +++ b/net/hsr/hsr_slave.c @@ -18,7 +18,7 @@ static rx_handler_result_t hsr_handle_frame(struct sk_buff **pskb) { struct sk_buff *skb = *pskb; struct hsr_port *port; - u16 protocol; + __be16 protocol; if (!skb_mac_header_was_set(skb)) { WARN_ONCE(1, "%s: skb invalid", __func__); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 0bd10a1f477f..a23094b050f8 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1258,7 +1258,8 @@ static int cipso_v4_parsetag_rbm(const struct cipso_v4_doi *doi_def, return ret_val; } - secattr->flags |= NETLBL_SECATTR_MLS_CAT; + if (secattr->attr.mls.cat) + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; @@ -1439,7 +1440,8 @@ static int cipso_v4_parsetag_rng(const struct cipso_v4_doi *doi_def, return ret_val; } - secattr->flags |= NETLBL_SECATTR_MLS_CAT; + if (secattr->attr.mls.cat) + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } return 0; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 213be9c050ad..1bf9da3a75f9 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -918,7 +918,6 @@ int ip_valid_fib_dump_req(struct net *net, const struct nlmsghdr *nlh, else filter->dump_exceptions = false; - filter->dump_all_families = (rtm->rtm_family == AF_UNSPEC); filter->flags = rtm->rtm_flags; filter->protocol = rtm->rtm_protocol; filter->rt_type = rtm->rtm_type; @@ -990,7 +989,7 @@ static int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (filter.table_id) { tb = fib_get_table(net, filter.table_id); if (!tb) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != PF_INET) return skb->len; NL_SET_ERR_MSG(cb->extack, "ipv4: FIB table does not exist"); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 5f34eb951627..65c29f2bd89f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -24,17 +24,19 @@ #include <net/addrconf.h> #if IS_ENABLED(CONFIG_IPV6) -/* match_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses if IPv6 - * only, and any IPv4 addresses if not IPv6 only - * match_wildcard == false: addresses must be exactly the same, i.e. - * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, - * and 0.0.0.0 equals to 0.0.0.0 only +/* match_sk*_wildcard == true: IPV6_ADDR_ANY equals to any IPv6 addresses + * if IPv6 only, and any IPv4 addresses + * if not IPv6 only + * match_sk*_wildcard == false: addresses must be exactly the same, i.e. + * IPV6_ADDR_ANY only equals to IPV6_ADDR_ANY, + * and 0.0.0.0 equals to 0.0.0.0 only */ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, const struct in6_addr *sk2_rcv_saddr6, __be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, bool sk1_ipv6only, bool sk2_ipv6only, - bool match_wildcard) + bool match_sk1_wildcard, + bool match_sk2_wildcard) { int addr_type = ipv6_addr_type(sk1_rcv_saddr6); int addr_type2 = sk2_rcv_saddr6 ? ipv6_addr_type(sk2_rcv_saddr6) : IPV6_ADDR_MAPPED; @@ -44,8 +46,8 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) return true; - if (!sk1_rcv_saddr || !sk2_rcv_saddr) - return match_wildcard; + return (match_sk1_wildcard && !sk1_rcv_saddr) || + (match_sk2_wildcard && !sk2_rcv_saddr); } return false; } @@ -53,11 +55,11 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, if (addr_type == IPV6_ADDR_ANY && addr_type2 == IPV6_ADDR_ANY) return true; - if (addr_type2 == IPV6_ADDR_ANY && match_wildcard && + if (addr_type2 == IPV6_ADDR_ANY && match_sk2_wildcard && !(sk2_ipv6only && addr_type == IPV6_ADDR_MAPPED)) return true; - if (addr_type == IPV6_ADDR_ANY && match_wildcard && + if (addr_type == IPV6_ADDR_ANY && match_sk1_wildcard && !(sk1_ipv6only && addr_type2 == IPV6_ADDR_MAPPED)) return true; @@ -69,18 +71,19 @@ static bool ipv6_rcv_saddr_equal(const struct in6_addr *sk1_rcv_saddr6, } #endif -/* match_wildcard == true: 0.0.0.0 equals to any IPv4 addresses - * match_wildcard == false: addresses must be exactly the same, i.e. - * 0.0.0.0 only equals to 0.0.0.0 +/* match_sk*_wildcard == true: 0.0.0.0 equals to any IPv4 addresses + * match_sk*_wildcard == false: addresses must be exactly the same, i.e. + * 0.0.0.0 only equals to 0.0.0.0 */ static bool ipv4_rcv_saddr_equal(__be32 sk1_rcv_saddr, __be32 sk2_rcv_saddr, - bool sk2_ipv6only, bool match_wildcard) + bool sk2_ipv6only, bool match_sk1_wildcard, + bool match_sk2_wildcard) { if (!sk2_ipv6only) { if (sk1_rcv_saddr == sk2_rcv_saddr) return true; - if (!sk1_rcv_saddr || !sk2_rcv_saddr) - return match_wildcard; + return (match_sk1_wildcard && !sk1_rcv_saddr) || + (match_sk2_wildcard && !sk2_rcv_saddr); } return false; } @@ -96,10 +99,12 @@ bool inet_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2, sk2->sk_rcv_saddr, ipv6_only_sock(sk), ipv6_only_sock(sk2), + match_wildcard, match_wildcard); #endif return ipv4_rcv_saddr_equal(sk->sk_rcv_saddr, sk2->sk_rcv_saddr, - ipv6_only_sock(sk2), match_wildcard); + ipv6_only_sock(sk2), match_wildcard, + match_wildcard); } EXPORT_SYMBOL(inet_rcv_saddr_equal); @@ -285,10 +290,10 @@ static inline int sk_reuseport_match(struct inet_bind_bucket *tb, tb->fast_rcv_saddr, sk->sk_rcv_saddr, tb->fast_ipv6_only, - ipv6_only_sock(sk), true); + ipv6_only_sock(sk), true, false); #endif return ipv4_rcv_saddr_equal(tb->fast_rcv_saddr, sk->sk_rcv_saddr, - ipv6_only_sock(sk), true); + ipv6_only_sock(sk), true, false); } /* Obtain a reference to a local port for the given sock, diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 2f01cf6fa0de..678575adaf3b 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -698,7 +698,7 @@ out: rtnl_link_failed: #if IS_ENABLED(CONFIG_MPLS) - xfrm4_tunnel_deregister(&mplsip_handler, AF_INET); + xfrm4_tunnel_deregister(&mplsip_handler, AF_MPLS); xfrm_tunnel_mplsip_failed: #endif diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 9cf83cc85e4a..b2363b82b48d 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -109,8 +109,10 @@ static void mroute_clean_tables(struct mr_table *mrt, int flags); static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IP_MROUTE_MULTIPLE_TABLES -#define ipmr_for_each_table(mrt, net) \ - list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list) +#define ipmr_for_each_table(mrt, net) \ + list_for_each_entry_rcu(mrt, &net->ipv4.mr_tables, list, \ + lockdep_rtnl_is_held() || \ + list_empty(&net->ipv4.mr_tables)) static struct mr_table *ipmr_mr_table_iter(struct net *net, struct mr_table *mrt) @@ -2611,7 +2613,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ipmr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IPMR) return skb->len; NL_SET_ERR_MSG(cb->extack, "ipv4: MR table does not exist"); diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c index fdfca534d094..715e14475220 100644 --- a/net/ipv4/nexthop.c +++ b/net/ipv4/nexthop.c @@ -276,6 +276,7 @@ out: return 0; nla_put_failure: + nlmsg_cancel(skb, nlh); return -EMSGSIZE; } @@ -433,7 +434,7 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[], if (!valid_group_nh(nh, len, extack)) return -EINVAL; } - for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) { + for (i = NHA_GROUP_TYPE + 1; i < __NHA_MAX; ++i) { if (!tb[i]) continue; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 788c69d9bfe0..b73f540fa19b 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -491,18 +491,16 @@ u32 ip_idents_reserve(u32 hash, int segs) atomic_t *p_id = ip_idents + hash % IP_IDENTS_SZ; u32 old = READ_ONCE(*p_tstamp); u32 now = (u32)jiffies; - u32 new, delta = 0; + u32 delta = 0; if (old != now && cmpxchg(p_tstamp, old, now) == old) delta = prandom_u32_max(now - old); - /* Do not use atomic_add_return() as it makes UBSAN unhappy */ - do { - old = (u32)atomic_read(p_id); - new = old + delta + segs; - } while (atomic_cmpxchg(p_id, old, new) != old); - - return new - segs; + /* If UBSAN reports an error there, please make sure your compiler + * supports -fno-strict-overflow before reporting it that was a bug + * in UBSAN, and it has been fixed in GCC-8. + */ + return atomic_add_return(segs + delta, p_id) - segs; } EXPORT_SYMBOL(ip_idents_reserve); @@ -915,7 +913,7 @@ void ip_rt_send_redirect(struct sk_buff *skb) /* Check for load limit; set rate_last to the latest sent * redirect. */ - if (peer->rate_tokens == 0 || + if (peer->n_redirects == 0 || time_after(jiffies, (peer->rate_last + (ip_rt_redirect_load << peer->n_redirects)))) { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 6d87de434377..dd401757eea1 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -476,9 +476,17 @@ static void tcp_tx_timestamp(struct sock *sk, u16 tsflags) static inline bool tcp_stream_is_readable(const struct tcp_sock *tp, int target, struct sock *sk) { - return (READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq) >= target) || - (sk->sk_prot->stream_memory_read ? - sk->sk_prot->stream_memory_read(sk) : false); + int avail = READ_ONCE(tp->rcv_nxt) - READ_ONCE(tp->copied_seq); + + if (avail > 0) { + if (avail >= target) + return true; + if (tcp_rmem_pressure(sk)) + return true; + } + if (sk->sk_prot->stream_memory_read) + return sk->sk_prot->stream_memory_read(sk); + return false; } /* @@ -1756,10 +1764,11 @@ static int tcp_zerocopy_receive(struct sock *sk, down_read(¤t->mm->mmap_sem); - ret = -EINVAL; vma = find_vma(current->mm, address); - if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) - goto out; + if (!vma || vma->vm_start > address || vma->vm_ops != &tcp_vm_ops) { + up_read(¤t->mm->mmap_sem); + return -EINVAL; + } zc->length = min_t(unsigned long, zc->length, vma->vm_end - address); tp = tcp_sk(sk); @@ -2154,13 +2163,15 @@ skip_copy: tp->urg_data = 0; tcp_fast_path_check(sk); } - if (used + offset < skb->len) - continue; if (TCP_SKB_CB(skb)->has_rxtstamp) { tcp_update_recv_tstamps(skb, &tss); cmsg_flags |= 2; } + + if (used + offset < skb->len) + continue; + if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) goto found_fin_ok; if (!(flags & MSG_PEEK)) diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 5a05327f97c1..629aaa9a1eb9 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -125,7 +125,6 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock, if (!ret) { msg->sg.start = i; - msg->sg.size -= apply_bytes; sk_psock_queue_msg(psock, tmp); sk_psock_data_ready(sk, psock); } else { @@ -262,14 +261,17 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, struct sk_psock *psock; int copied, ret; + if (unlikely(flags & MSG_ERRQUEUE)) + return inet_recv_error(sk, msg, len, addr_len); + psock = sk_psock_get(sk); if (unlikely(!psock)) return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); - if (unlikely(flags & MSG_ERRQUEUE)) - return inet_recv_error(sk, msg, len, addr_len); if (!skb_queue_empty(&sk->sk_receive_queue) && - sk_psock_queue_empty(psock)) + sk_psock_queue_empty(psock)) { + sk_psock_put(sk, psock); return tcp_recvmsg(sk, msg, len, nonblock, flags, addr_len); + } lock_sock(sk); msg_bytes_ready: copied = __tcp_bpf_recvmsg(sk, psock, msg, len, flags); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index bf4ced9273e8..29c6fc8c7716 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3926,10 +3926,6 @@ void tcp_parse_options(const struct net *net, */ break; #endif - case TCPOPT_MPTCP: - mptcp_parse_option(skb, ptr, opsize, opt_rx); - break; - case TCPOPT_FASTOPEN: tcp_parse_fastopen_option( opsize - TCPOLEN_FASTOPEN_BASE, @@ -4761,7 +4757,8 @@ void tcp_data_ready(struct sock *sk) const struct tcp_sock *tp = tcp_sk(sk); int avail = tp->rcv_nxt - tp->copied_seq; - if (avail < sk->sk_rcvlowat && !sock_flag(sk, SOCK_DONE)) + if (avail < sk->sk_rcvlowat && !tcp_rmem_pressure(sk) && + !sock_flag(sk, SOCK_DONE)) return; sk->sk_data_ready(sk); @@ -5990,9 +5987,6 @@ static int tcp_rcv_synsent_state_process(struct sock *sk, struct sk_buff *skb, tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); tcp_initialize_rcv_mss(sk); - if (sk_is_mptcp(sk)) - mptcp_rcv_synsent(sk); - /* Remember, tcp_poll() does not lock socket! * Change state from SYN-SENT only after copied_seq * is initialized. */ diff --git a/net/ipv6/calipso.c b/net/ipv6/calipso.c index 221c81f85cbf..8d3f66c310db 100644 --- a/net/ipv6/calipso.c +++ b/net/ipv6/calipso.c @@ -1047,7 +1047,8 @@ static int calipso_opt_getattr(const unsigned char *calipso, goto getattr_return; } - secattr->flags |= NETLBL_SECATTR_MLS_CAT; + if (secattr->attr.mls.cat) + secattr->flags |= NETLBL_SECATTR_MLS_CAT; } secattr->type = NETLBL_NLTYPE_CALIPSO; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 46ed56719476..20314895509c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -664,7 +664,7 @@ static int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) if (arg.filter.table_id) { tb = fib6_get_table(net, arg.filter.table_id); if (!tb) { - if (arg.filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != PF_INET6) goto out; NL_SET_ERR_MSG_MOD(cb->extack, "FIB table does not exist"); diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 65a54d74acc1..1f4d20e97c07 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -98,7 +98,8 @@ static void ipmr_expire_process(struct timer_list *t); #ifdef CONFIG_IPV6_MROUTE_MULTIPLE_TABLES #define ip6mr_for_each_table(mrt, net) \ list_for_each_entry_rcu(mrt, &net->ipv6.mr6_tables, list, \ - lockdep_rtnl_is_held()) + lockdep_rtnl_is_held() || \ + list_empty(&net->ipv6.mr6_tables)) static struct mr_table *ip6mr_mr_table_iter(struct net *net, struct mr_table *mrt) @@ -2502,7 +2503,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) mrt = ip6mr_get_table(sock_net(skb->sk), filter.table_id); if (!mrt) { - if (filter.dump_all_families) + if (rtnl_msg_family(cb->nlh) != RTNL_FAMILY_IP6MR) return skb->len; NL_SET_ERR_MSG_MOD(cb->extack, "MR table does not exist"); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 310cbddaa533..ff847a324220 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1385,9 +1385,18 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res) } ip6_rt_copy_init(pcpu_rt, res); pcpu_rt->rt6i_flags |= RTF_PCPU; + + if (f6i->nh) + pcpu_rt->sernum = rt_genid_ipv6(dev_net(dev)); + return pcpu_rt; } +static bool rt6_is_valid(const struct rt6_info *rt6) +{ + return rt6->sernum == rt_genid_ipv6(dev_net(rt6->dst.dev)); +} + /* It should be called with rcu_read_lock() acquired */ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) { @@ -1395,6 +1404,19 @@ static struct rt6_info *rt6_get_pcpu_route(const struct fib6_result *res) pcpu_rt = this_cpu_read(*res->nh->rt6i_pcpu); + if (pcpu_rt && pcpu_rt->sernum && !rt6_is_valid(pcpu_rt)) { + struct rt6_info *prev, **p; + + p = this_cpu_ptr(res->nh->rt6i_pcpu); + prev = xchg(p, NULL); + if (prev) { + dst_dev_put(&prev->dst); + dst_release(&prev->dst); + } + + pcpu_rt = NULL; + } + return pcpu_rt; } @@ -2593,6 +2615,9 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = container_of(dst, struct rt6_info, dst); + if (rt->sernum) + return rt6_is_valid(rt) ? dst : NULL; + rcu_read_lock(); /* All IPV6 dsts are created with ->obsolete set to the value @@ -2697,8 +2722,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; - if (dst_metric_locked(dst, RTAX_MTU)) - return; + /* Note: do *NOT* check dst_metric_locked(dst, RTAX_MTU) + * IPv6 pmtu discovery isn't optional, so 'mtu lock' cannot disable it. + * [see also comment in rt6_mtu_change_route()] + */ if (iph) { daddr = &iph->daddr; diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 4c7e0a27fa9c..37b434293bda 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -27,8 +27,9 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) { - int trailing; unsigned int tlv_offset; + int max_last_entry; + int trailing; if (srh->type != IPV6_SRCRT_TYPE_4) return false; @@ -36,7 +37,12 @@ bool seg6_validate_srh(struct ipv6_sr_hdr *srh, int len) if (((srh->hdrlen + 1) << 3) != len) return false; - if (srh->segments_left > srh->first_segment) + max_last_entry = (srh->hdrlen / 2) - 1; + + if (srh->first_segment > max_last_entry) + return false; + + if (srh->segments_left > srh->first_segment + 1) return false; tlv_offset = sizeof(*srh) + ((srh->first_segment + 1) << 4); diff --git a/net/mptcp/crypto.c b/net/mptcp/crypto.c index c151628bd416..0f5a414a9366 100644 --- a/net/mptcp/crypto.c +++ b/net/mptcp/crypto.c @@ -47,8 +47,6 @@ void mptcp_crypto_key_sha(u64 key, u32 *token, u64 *idsn) void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) { u8 input[SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE]; - __be32 mptcp_hashed_key[SHA256_DIGEST_WORDS]; - __be32 *hash_out = (__force __be32 *)hmac; struct sha256_state state; u8 key1be[8]; u8 key2be[8]; @@ -86,11 +84,7 @@ void mptcp_crypto_hmac_sha(u64 key1, u64 key2, u8 *msg, int len, void *hmac) sha256_init(&state); sha256_update(&state, input, SHA256_BLOCK_SIZE + SHA256_DIGEST_SIZE); - sha256_final(&state, (u8 *)mptcp_hashed_key); - - /* takes only first 160 bits */ - for (i = 0; i < 5; i++) - hash_out[i] = mptcp_hashed_key[i]; + sha256_final(&state, (u8 *)hmac); } #ifdef CONFIG_MPTCP_HMAC_TEST @@ -101,29 +95,29 @@ struct test_cast { }; /* we can't reuse RFC 4231 test vectors, as we have constraint on the - * input and key size, and we truncate the output. + * input and key size. */ static struct test_cast tests[] = { { .key = "0b0b0b0b0b0b0b0b", .msg = "48692054", - .result = "8385e24fb4235ac37556b6b886db106284a1da67", + .result = "8385e24fb4235ac37556b6b886db106284a1da671699f46db1f235ec622dcafa", }, { .key = "aaaaaaaaaaaaaaaa", .msg = "dddddddd", - .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492", + .result = "2c5e219164ff1dca1c4a92318d847bb6b9d44492984e1eb71aff9022f71046e9", }, { .key = "0102030405060708", .msg = "cdcdcdcd", - .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6", + .result = "e73b9ba9969969cefb04aa0d6df18ec2fcc075b6f23b4d8c4da736a5dbbc6e7d", }, }; static int __init test_mptcp_crypto(void) { - char hmac[20], hmac_hex[41]; + char hmac[32], hmac_hex[65]; u32 nonce1, nonce2; u64 key1, key2; u8 msg[8]; @@ -140,11 +134,11 @@ static int __init test_mptcp_crypto(void) put_unaligned_be32(nonce2, &msg[4]); mptcp_crypto_hmac_sha(key1, key2, msg, 8, hmac); - for (j = 0; j < 20; ++j) + for (j = 0; j < 32; ++j) sprintf(&hmac_hex[j << 1], "%02x", hmac[j] & 0xff); - hmac_hex[40] = 0; + hmac_hex[64] = 0; - if (memcmp(hmac_hex, tests[i].result, 40)) + if (memcmp(hmac_hex, tests[i].result, 64)) pr_err("test %d failed, got %s expected %s", i, hmac_hex, tests[i].result); else diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 4a7c467b99db..7793b6011fa7 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -7,6 +7,7 @@ #define pr_fmt(fmt) "MPTCP: " fmt #include <linux/kernel.h> +#include <crypto/sha.h> #include <net/tcp.h> #include <net/mptcp.h> #include "protocol.h" @@ -16,10 +17,10 @@ static bool mptcp_cap_flag_sha256(u8 flags) return (flags & MPTCP_CAP_FLAG_MASK) == MPTCP_CAP_HMAC_SHA256; } -void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, - int opsize, struct tcp_options_received *opt_rx) +static void mptcp_parse_option(const struct sk_buff *skb, + const unsigned char *ptr, int opsize, + struct mptcp_options_received *mp_opt) { - struct mptcp_options_received *mp_opt = &opt_rx->mptcp; u8 subtype = *ptr >> 4; int expected_opsize; u8 version; @@ -283,12 +284,20 @@ void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr, } void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx) + struct mptcp_options_received *mp_opt) { - const unsigned char *ptr; const struct tcphdr *th = tcp_hdr(skb); - int length = (th->doff * 4) - sizeof(struct tcphdr); + const unsigned char *ptr; + int length; + + /* initialize option status */ + mp_opt->mp_capable = 0; + mp_opt->mp_join = 0; + mp_opt->add_addr = 0; + mp_opt->rm_addr = 0; + mp_opt->dss = 0; + length = (th->doff * 4) - sizeof(struct tcphdr); ptr = (const unsigned char *)(th + 1); while (length > 0) { @@ -308,7 +317,7 @@ void mptcp_get_options(const struct sk_buff *skb, if (opsize > length) return; /* don't parse partial options */ if (opcode == TCPOPT_MPTCP) - mptcp_parse_option(skb, ptr, opsize, opt_rx); + mptcp_parse_option(skb, ptr, opsize, mp_opt); ptr += opsize - 2; length -= opsize; } @@ -344,28 +353,6 @@ bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb, return false; } -void mptcp_rcv_synsent(struct sock *sk) -{ - struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); - struct tcp_sock *tp = tcp_sk(sk); - - if (subflow->request_mptcp && tp->rx_opt.mptcp.mp_capable) { - subflow->mp_capable = 1; - subflow->can_ack = 1; - subflow->remote_key = tp->rx_opt.mptcp.sndr_key; - pr_debug("subflow=%p, remote_key=%llu", subflow, - subflow->remote_key); - } else if (subflow->request_join && tp->rx_opt.mptcp.mp_join) { - subflow->mp_join = 1; - subflow->thmac = tp->rx_opt.mptcp.thmac; - subflow->remote_nonce = tp->rx_opt.mptcp.nonce; - pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, - subflow->thmac, subflow->remote_nonce); - } else if (subflow->request_mptcp) { - tcp_sk(sk)->is_mptcp = 0; - } -} - /* MP_JOIN client subflow must wait for 4th ack before sending any data: * TCP can't schedule delack timer before the subflow is fully established. * MPTCP uses the delack timer to do 3rd ack retransmissions @@ -549,7 +536,7 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb, static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[7]; msg[0] = addr_id; @@ -559,14 +546,14 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #if IS_ENABLED(CONFIG_MPTCP_IPV6) static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, struct in6_addr *addr) { - u8 hmac[MPTCP_ADDR_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u8 msg[19]; msg[0] = addr_id; @@ -576,7 +563,7 @@ static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id, mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac); - return get_unaligned_be64(hmac); + return get_unaligned_be64(&hmac[SHA256_DIGEST_SIZE - sizeof(u64)]); } #endif @@ -709,7 +696,7 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, if (TCP_SKB_CB(skb)->seq != subflow->ssn_offset + 1) return subflow->mp_capable; - if (mp_opt->use_ack) { + if (mp_opt->dss && mp_opt->use_ack) { /* subflows are fully established as soon as we get any * additional ack. */ @@ -717,8 +704,6 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, goto fully_established; } - WARN_ON_ONCE(subflow->can_ack); - /* If the first established packet does not contain MP_CAPABLE + data * then fallback to TCP */ @@ -728,6 +713,8 @@ static bool check_fully_established(struct mptcp_sock *msk, struct sock *sk, return false; } + if (unlikely(!READ_ONCE(msk->pm.server_side))) + pr_warn_once("bogus mpc option on established client sk"); subflow->fully_established = 1; subflow->remote_key = mp_opt->sndr_key; subflow->can_ack = 1; @@ -819,41 +806,41 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); struct mptcp_sock *msk = mptcp_sk(subflow->conn); - struct mptcp_options_received *mp_opt; + struct mptcp_options_received mp_opt; struct mptcp_ext *mpext; - mp_opt = &opt_rx->mptcp; - if (!check_fully_established(msk, sk, subflow, skb, mp_opt)) + mptcp_get_options(skb, &mp_opt); + if (!check_fully_established(msk, sk, subflow, skb, &mp_opt)) return; - if (mp_opt->add_addr && add_addr_hmac_valid(msk, mp_opt)) { + if (mp_opt.add_addr && add_addr_hmac_valid(msk, &mp_opt)) { struct mptcp_addr_info addr; - addr.port = htons(mp_opt->port); - addr.id = mp_opt->addr_id; - if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { + addr.port = htons(mp_opt.port); + addr.id = mp_opt.addr_id; + if (mp_opt.family == MPTCP_ADDR_IPVERSION_4) { addr.family = AF_INET; - addr.addr = mp_opt->addr; + addr.addr = mp_opt.addr; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) - else if (mp_opt->family == MPTCP_ADDR_IPVERSION_6) { + else if (mp_opt.family == MPTCP_ADDR_IPVERSION_6) { addr.family = AF_INET6; - addr.addr6 = mp_opt->addr6; + addr.addr6 = mp_opt.addr6; } #endif - if (!mp_opt->echo) + if (!mp_opt.echo) mptcp_pm_add_addr_received(msk, &addr); - mp_opt->add_addr = 0; + mp_opt.add_addr = 0; } - if (!mp_opt->dss) + if (!mp_opt.dss) return; /* we can't wait for recvmsg() to update the ack_seq, otherwise * monodirectional flows will stuck */ - if (mp_opt->use_ack) - update_una(msk, mp_opt); + if (mp_opt.use_ack) + update_una(msk, &mp_opt); mpext = skb_ext_add(skb, SKB_EXT_MPTCP); if (!mpext) @@ -861,8 +848,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, memset(mpext, 0, sizeof(*mpext)); - if (mp_opt->use_map) { - if (mp_opt->mpc_map) { + if (mp_opt.use_map) { + if (mp_opt.mpc_map) { /* this is an MP_CAPABLE carrying MPTCP data * we know this map the first chunk of data */ @@ -872,13 +859,14 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb, mpext->subflow_seq = 1; mpext->dsn64 = 1; mpext->mpc_map = 1; + mpext->data_fin = 0; } else { - mpext->data_seq = mp_opt->data_seq; - mpext->subflow_seq = mp_opt->subflow_seq; - mpext->dsn64 = mp_opt->dsn64; - mpext->data_fin = mp_opt->data_fin; + mpext->data_seq = mp_opt.data_seq; + mpext->subflow_seq = mp_opt.subflow_seq; + mpext->dsn64 = mp_opt.dsn64; + mpext->data_fin = mp_opt.data_fin; } - mpext->data_len = mp_opt->data_len; + mpext->data_len = mp_opt.data_len; mpext->use_map = 1; } } diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b22a63ba2348..32ea8d35489a 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1316,11 +1316,12 @@ static void mptcp_copy_inaddrs(struct sock *msk, const struct sock *ssk) static int mptcp_disconnect(struct sock *sk, int flags) { - lock_sock(sk); - __mptcp_clear_xmit(sk); - release_sock(sk); - mptcp_cancel_work(sk); - return tcp_disconnect(sk, flags); + /* Should never be called. + * inet_stream_connect() calls ->disconnect, but that + * refers to the subflow socket, not the mptcp one. + */ + WARN_ON_ONCE(1); + return 0; } #if IS_ENABLED(CONFIG_MPTCP_IPV6) @@ -1333,7 +1334,7 @@ static struct ipv6_pinfo *mptcp_inet6_sk(const struct sock *sk) #endif struct sock *mptcp_sk_clone(const struct sock *sk, - const struct tcp_options_received *opt_rx, + const struct mptcp_options_received *mp_opt, struct request_sock *req) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); @@ -1372,9 +1373,9 @@ struct sock *mptcp_sk_clone(const struct sock *sk, msk->write_seq = subflow_req->idsn + 1; atomic64_set(&msk->snd_una, msk->write_seq); - if (opt_rx->mptcp.mp_capable) { + if (mp_opt->mp_capable) { msk->can_ack = true; - msk->remote_key = opt_rx->mptcp.sndr_key; + msk->remote_key = mp_opt->sndr_key; mptcp_crypto_key_sha(msk->remote_key, NULL, &ack_seq); ack_seq++; msk->ack_seq = ack_seq; @@ -1628,6 +1629,8 @@ bool mptcp_finish_join(struct sock *sk) ret = mptcp_pm_allow_new_subflow(msk); if (ret) { + subflow->map_seq = msk->ack_seq; + /* active connections are already on conn_list */ spin_lock_bh(&msk->join_list_lock); if (!WARN_ON_ONCE(!list_empty(&subflow->node))) diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index a2b3048037d0..d0803dfb8108 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -81,7 +81,6 @@ /* MPTCP ADD_ADDR flags */ #define MPTCP_ADDR_ECHO BIT(0) -#define MPTCP_ADDR_HMAC_LEN 20 #define MPTCP_ADDR_IPVERSION_4 4 #define MPTCP_ADDR_IPVERSION_6 6 @@ -91,6 +90,45 @@ #define MPTCP_WORK_RTX 2 #define MPTCP_WORK_EOF 3 +struct mptcp_options_received { + u64 sndr_key; + u64 rcvr_key; + u64 data_ack; + u64 data_seq; + u32 subflow_seq; + u16 data_len; + u16 mp_capable : 1, + mp_join : 1, + dss : 1, + add_addr : 1, + rm_addr : 1, + family : 4, + echo : 1, + backup : 1; + u32 token; + u32 nonce; + u64 thmac; + u8 hmac[20]; + u8 join_id; + u8 use_map:1, + dsn64:1, + data_fin:1, + use_ack:1, + ack64:1, + mpc_map:1, + __unused:2; + u8 addr_id; + u8 rm_id; + union { + struct in_addr addr; +#if IS_ENABLED(CONFIG_MPTCP_IPV6) + struct in6_addr addr6; +#endif + }; + u64 ahmac; + u16 port; +}; + static inline __be32 mptcp_option(u8 subopt, u8 len, u8 nib, u8 field) { return htonl((TCPOPT_MPTCP << 24) | (len << 16) | (subopt << 12) | @@ -331,10 +369,10 @@ int mptcp_proto_v6_init(void); #endif struct sock *mptcp_sk_clone(const struct sock *sk, - const struct tcp_options_received *opt_rx, + const struct mptcp_options_received *mp_opt, struct request_sock *req); void mptcp_get_options(const struct sk_buff *skb, - struct tcp_options_received *opt_rx); + struct mptcp_options_received *mp_opt); void mptcp_finish_connect(struct sock *sk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index fabd06f2ff45..8968b2c065e7 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -10,6 +10,7 @@ #include <linux/module.h> #include <linux/netdevice.h> #include <crypto/algapi.h> +#include <crypto/sha.h> #include <net/sock.h> #include <net/inet_common.h> #include <net/inet_hashtables.h> @@ -89,7 +90,7 @@ static bool subflow_token_join_request(struct request_sock *req, const struct sk_buff *skb) { struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; int local_id; @@ -124,12 +125,11 @@ static void subflow_init_req(struct request_sock *req, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk_listener); struct mptcp_subflow_request_sock *subflow_req = mptcp_subflow_rsk(req); - struct tcp_options_received rx_opt; + struct mptcp_options_received mp_opt; pr_debug("subflow_req=%p, listener=%p", subflow_req, listener); - memset(&rx_opt.mptcp, 0, sizeof(rx_opt.mptcp)); - mptcp_get_options(skb, &rx_opt); + mptcp_get_options(skb, &mp_opt); subflow_req->mp_capable = 0; subflow_req->mp_join = 0; @@ -142,16 +142,16 @@ static void subflow_init_req(struct request_sock *req, return; #endif - if (rx_opt.mptcp.mp_capable) { + if (mp_opt.mp_capable) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MPCAPABLEPASSIVE); - if (rx_opt.mptcp.mp_join) + if (mp_opt.mp_join) return; - } else if (rx_opt.mptcp.mp_join) { + } else if (mp_opt.mp_join) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINSYNRX); } - if (rx_opt.mptcp.mp_capable && listener->request_mptcp) { + if (mp_opt.mp_capable && listener->request_mptcp) { int err; err = mptcp_token_new_request(req); @@ -159,13 +159,13 @@ static void subflow_init_req(struct request_sock *req, subflow_req->mp_capable = 1; subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; - } else if (rx_opt.mptcp.mp_join && listener->request_mptcp) { + } else if (mp_opt.mp_join && listener->request_mptcp) { subflow_req->ssn_offset = TCP_SKB_CB(skb)->seq; subflow_req->mp_join = 1; - subflow_req->backup = rx_opt.mptcp.backup; - subflow_req->remote_id = rx_opt.mptcp.join_id; - subflow_req->token = rx_opt.mptcp.token; - subflow_req->remote_nonce = rx_opt.mptcp.nonce; + subflow_req->backup = mp_opt.backup; + subflow_req->remote_id = mp_opt.join_id; + subflow_req->token = mp_opt.token; + subflow_req->remote_nonce = mp_opt.nonce; pr_debug("token=%u, remote_nonce=%u", subflow_req->token, subflow_req->remote_nonce); if (!subflow_token_join_request(req, skb)) { @@ -202,7 +202,7 @@ static void subflow_v6_init_req(struct request_sock *req, /* validate received truncated hmac and create hmac for third ACK */ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) { - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; u64 thmac; subflow_generate_hmac(subflow->remote_key, subflow->local_key, @@ -221,29 +221,55 @@ static bool subflow_thmac_valid(struct mptcp_subflow_context *subflow) static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) { struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk); + struct mptcp_options_received mp_opt; struct sock *parent = subflow->conn; + struct tcp_sock *tp = tcp_sk(sk); subflow->icsk_af_ops->sk_rx_dst_set(sk, skb); - if (inet_sk_state_load(parent) != TCP_ESTABLISHED) { + if (inet_sk_state_load(parent) == TCP_SYN_SENT) { inet_sk_state_store(parent, TCP_ESTABLISHED); parent->sk_state_change(parent); } - if (subflow->conn_finished || !tcp_sk(sk)->is_mptcp) + /* be sure no special action on any packet other than syn-ack */ + if (subflow->conn_finished) + return; + + subflow->conn_finished = 1; + + mptcp_get_options(skb, &mp_opt); + if (subflow->request_mptcp && mp_opt.mp_capable) { + subflow->mp_capable = 1; + subflow->can_ack = 1; + subflow->remote_key = mp_opt.sndr_key; + pr_debug("subflow=%p, remote_key=%llu", subflow, + subflow->remote_key); + } else if (subflow->request_join && mp_opt.mp_join) { + subflow->mp_join = 1; + subflow->thmac = mp_opt.thmac; + subflow->remote_nonce = mp_opt.nonce; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, + subflow->thmac, subflow->remote_nonce); + } else if (subflow->request_mptcp) { + tp->is_mptcp = 0; + } + + if (!tp->is_mptcp) return; if (subflow->mp_capable) { pr_debug("subflow=%p, remote_key=%llu", mptcp_subflow_ctx(sk), subflow->remote_key); mptcp_finish_connect(sk); - subflow->conn_finished = 1; if (skb) { pr_debug("synack seq=%u", TCP_SKB_CB(skb)->seq); subflow->ssn_offset = TCP_SKB_CB(skb)->seq; } } else if (subflow->mp_join) { + u8 hmac[SHA256_DIGEST_SIZE]; + pr_debug("subflow=%p, thmac=%llu, remote_nonce=%u", subflow, subflow->thmac, subflow->remote_nonce); @@ -256,7 +282,9 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) subflow_generate_hmac(subflow->local_key, subflow->remote_key, subflow->local_nonce, subflow->remote_nonce, - subflow->hmac); + hmac); + + memcpy(subflow->hmac, hmac, MPTCPOPT_HMAC_LEN); if (skb) subflow->ssn_offset = TCP_SKB_CB(skb)->seq; @@ -264,7 +292,6 @@ static void subflow_finish_connect(struct sock *sk, const struct sk_buff *skb) if (!mptcp_finish_join(sk)) goto do_reset; - subflow->conn_finished = 1; MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_JOINSYNACKRX); } else { do_reset: @@ -322,10 +349,10 @@ drop: /* validate hmac received in third ACK */ static bool subflow_hmac_valid(const struct request_sock *req, - const struct tcp_options_received *rx_opt) + const struct mptcp_options_received *mp_opt) { const struct mptcp_subflow_request_sock *subflow_req; - u8 hmac[MPTCPOPT_HMAC_LEN]; + u8 hmac[SHA256_DIGEST_SIZE]; struct mptcp_sock *msk; bool ret; @@ -339,7 +366,7 @@ static bool subflow_hmac_valid(const struct request_sock *req, subflow_req->local_nonce, hmac); ret = true; - if (crypto_memneq(hmac, rx_opt->mptcp.hmac, sizeof(hmac))) + if (crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN)) ret = false; sock_put((struct sock *)msk); @@ -395,7 +422,7 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, { struct mptcp_subflow_context *listener = mptcp_subflow_ctx(sk); struct mptcp_subflow_request_sock *subflow_req; - struct tcp_options_received opt_rx; + struct mptcp_options_received mp_opt; bool fallback_is_fatal = false; struct sock *new_msk = NULL; bool fallback = false; @@ -403,7 +430,10 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, pr_debug("listener=%p, req=%p, conn=%p", listener, req, listener->conn); - opt_rx.mptcp.mp_capable = 0; + /* we need later a valid 'mp_capable' value even when options are not + * parsed + */ + mp_opt.mp_capable = 0; if (tcp_rsk(req)->is_mptcp == 0) goto create_child; @@ -418,22 +448,21 @@ static struct sock *subflow_syn_recv_sock(const struct sock *sk, goto create_msk; } - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_capable) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_capable) { fallback = true; goto create_child; } create_msk: - new_msk = mptcp_sk_clone(listener->conn, &opt_rx, req); + new_msk = mptcp_sk_clone(listener->conn, &mp_opt, req); if (!new_msk) fallback = true; } else if (subflow_req->mp_join) { fallback_is_fatal = true; - opt_rx.mptcp.mp_join = 0; - mptcp_get_options(skb, &opt_rx); - if (!opt_rx.mptcp.mp_join || - !subflow_hmac_valid(req, &opt_rx)) { + mptcp_get_options(skb, &mp_opt); + if (!mp_opt.mp_join || + !subflow_hmac_valid(req, &mp_opt)) { SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKMAC); return NULL; } @@ -473,9 +502,9 @@ create_child: /* with OoO packets we can reach here without ingress * mpc option */ - ctx->remote_key = opt_rx.mptcp.sndr_key; - ctx->fully_established = opt_rx.mptcp.mp_capable; - ctx->can_ack = opt_rx.mptcp.mp_capable; + ctx->remote_key = mp_opt.sndr_key; + ctx->fully_established = mp_opt.mp_capable; + ctx->can_ack = mp_opt.mp_capable; } else if (ctx->mp_join) { struct mptcp_sock *owner; @@ -499,7 +528,7 @@ out: /* check for expected invariant - should never trigger, just help * catching eariler subtle bugs */ - WARN_ON_ONCE(*own_req && child && tcp_sk(child)->is_mptcp && + WARN_ON_ONCE(child && *own_req && tcp_sk(child)->is_mptcp && (!mptcp_subflow_ctx(child) || !mptcp_subflow_ctx(child)->conn)); return child; @@ -988,6 +1017,16 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock) if (err) return err; + /* the newly created socket really belongs to the owning MPTCP master + * socket, even if for additional subflows the allocation is performed + * by a kernel workqueue. Adjust inode references, so that the + * procfs/diag interaces really show this one belonging to the correct + * user. + */ + SOCK_INODE(sf)->i_ino = SOCK_INODE(sk->sk_socket)->i_ino; + SOCK_INODE(sf)->i_uid = SOCK_INODE(sk->sk_socket)->i_uid; + SOCK_INODE(sf)->i_gid = SOCK_INODE(sk->sk_socket)->i_gid; + subflow = mptcp_subflow_ctx(sf->sk); pr_debug("subflow=%p", subflow); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index c4582eb71766..1d57b95d3481 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1519,9 +1519,9 @@ __nf_conntrack_alloc(struct net *net, ct->status = 0; ct->timeout = 0; write_pnet(&ct->ct_net, net); - memset(&ct->__nfct_init_offset[0], 0, + memset(&ct->__nfct_init_offset, 0, offsetof(struct nf_conn, proto) - - offsetof(struct nf_conn, __nfct_init_offset[0])); + offsetof(struct nf_conn, __nfct_init_offset)); nf_ct_zone_add(ct, zone); @@ -2139,8 +2139,19 @@ get_next_corpse(int (*iter)(struct nf_conn *i, void *data), nf_conntrack_lock(lockp); if (*bucket < nf_conntrack_htable_size) { hlist_nulls_for_each_entry(h, n, &nf_conntrack_hash[*bucket], hnnode) { - if (NF_CT_DIRECTION(h) != IP_CT_DIR_ORIGINAL) + if (NF_CT_DIRECTION(h) != IP_CT_DIR_REPLY) continue; + /* All nf_conn objects are added to hash table twice, one + * for original direction tuple, once for the reply tuple. + * + * Exception: In the IPS_NAT_CLASH case, only the reply + * tuple is added (the original tuple already existed for + * a different object). + * + * We only need to call the iterator once for each + * conntrack, so we just use the 'reply' direction + * tuple while iterating. + */ ct = nf_ct_tuplehash_to_ctrack(h); if (iter(ct, data)) goto found; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 4344e572b7f9..42da6e337276 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -284,7 +284,7 @@ static void flow_offload_del(struct nf_flowtable *flow_table, if (nf_flow_has_expired(flow)) flow_offload_fixup_ct(flow->ct); - else if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) + else flow_offload_fixup_ct_timeout(flow->ct); flow_offload_free(flow); @@ -361,8 +361,10 @@ static void nf_flow_offload_gc_step(struct flow_offload *flow, void *data) { struct nf_flowtable *flow_table = data; - if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct) || - test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { + if (nf_flow_has_expired(flow) || nf_ct_is_dying(flow->ct)) + set_bit(NF_FLOW_TEARDOWN, &flow->flags); + + if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) { if (test_bit(NF_FLOW_HW, &flow->flags)) { if (!test_bit(NF_FLOW_HW_DYING, &flow->flags)) nf_flow_offload_del(flow_table, flow); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index e3b099c14eff..2276a73ccba2 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -817,6 +817,7 @@ static void flow_offload_work_handler(struct work_struct *work) WARN_ON_ONCE(1); } + clear_bit(NF_FLOW_HW_PENDING, &offload->flow->flags); kfree(offload); } @@ -831,9 +832,14 @@ nf_flow_offload_work_alloc(struct nf_flowtable *flowtable, { struct flow_offload_work *offload; + if (test_and_set_bit(NF_FLOW_HW_PENDING, &flow->flags)) + return NULL; + offload = kmalloc(sizeof(struct flow_offload_work), GFP_ATOMIC); - if (!offload) + if (!offload) { + clear_bit(NF_FLOW_HW_PENDING, &flow->flags); return NULL; + } offload->cmd = cmd; offload->flow = flow; @@ -1056,7 +1062,7 @@ static struct flow_indr_block_entry block_ing_entry = { int nf_flow_table_offload_init(void) { nf_flow_offload_wq = alloc_workqueue("nf_flow_table_offload", - WQ_UNBOUND | WQ_MEM_RECLAIM, 0); + WQ_UNBOUND, 0); if (!nf_flow_offload_wq) return -ENOMEM; diff --git a/net/netfilter/nf_nat_proto.c b/net/netfilter/nf_nat_proto.c index 3d816a1e5442..59151dc07fdc 100644 --- a/net/netfilter/nf_nat_proto.c +++ b/net/netfilter/nf_nat_proto.c @@ -68,15 +68,13 @@ static bool udp_manip_pkt(struct sk_buff *skb, enum nf_nat_manip_type maniptype) { struct udphdr *hdr; - bool do_csum; if (skb_ensure_writable(skb, hdroff + sizeof(*hdr))) return false; hdr = (struct udphdr *)(skb->data + hdroff); - do_csum = hdr->check || skb->ip_summed == CHECKSUM_PARTIAL; + __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, !!hdr->check); - __udp_manip_pkt(skb, iphdroff, hdr, tuple, maniptype, do_csum); return true; } diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 9f5dea0064ea..916a3c7f9eaf 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -165,12 +165,12 @@ static bool nf_osf_match_one(const struct sk_buff *skb, static const struct tcphdr *nf_osf_hdr_ctx_init(struct nf_osf_hdr_ctx *ctx, const struct sk_buff *skb, const struct iphdr *ip, - unsigned char *opts) + unsigned char *opts, + struct tcphdr *_tcph) { const struct tcphdr *tcp; - struct tcphdr _tcph; - tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), &_tcph); + tcp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(struct tcphdr), _tcph); if (!tcp) return NULL; @@ -205,10 +205,11 @@ nf_osf_match(const struct sk_buff *skb, u_int8_t family, int fmatch = FMATCH_WRONG; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; + struct tcphdr _tcph; memset(&ctx, 0, sizeof(ctx)); - tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); + tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); if (!tcp) return false; @@ -265,10 +266,11 @@ bool nf_osf_find(const struct sk_buff *skb, const struct nf_osf_finger *kf; struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; + struct tcphdr _tcph; memset(&ctx, 0, sizeof(ctx)); - tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts); + tcp = nf_osf_hdr_ctx_init(&ctx, skb, ip, opts, &_tcph); if (!tcp) return false; diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 3ffef454d469..62f416bc0579 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -79,6 +79,10 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set parent = rcu_dereference_raw(parent->rb_left); continue; } + + if (nft_set_elem_expired(&rbe->ext)) + return false; + if (nft_rbtree_interval_end(rbe)) { if (nft_set_is_anonymous(set)) return false; @@ -94,6 +98,7 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && + !nft_set_elem_expired(&interval->ext) && nft_rbtree_interval_start(interval)) { *ext = &interval->ext; return true; @@ -154,6 +159,9 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, continue; } + if (nft_set_elem_expired(&rbe->ext)) + return false; + if (!nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) || (*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END) == (flags & NFT_SET_ELEM_INTERVAL_END)) { @@ -170,6 +178,7 @@ static bool __nft_rbtree_get(const struct net *net, const struct nft_set *set, if (set->flags & NFT_SET_INTERVAL && interval != NULL && nft_set_elem_active(&interval->ext, genmask) && + !nft_set_elem_expired(&interval->ext) && ((!nft_rbtree_interval_end(interval) && !(flags & NFT_SET_ELEM_INTERVAL_END)) || (nft_rbtree_interval_end(interval) && @@ -418,6 +427,8 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; + if (nft_set_elem_expired(&rbe->ext)) + goto cont; if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 409a3ae47ce2..5e1239cef000 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -734,6 +734,12 @@ int netlbl_catmap_getlong(struct netlbl_lsm_catmap *catmap, if ((off & (BITS_PER_LONG - 1)) != 0) return -EINVAL; + /* a null catmap is equivalent to an empty one */ + if (!catmap) { + *offset = (u32)-1; + return 0; + } + if (off < catmap->startbit) { off = catmap->startbit; *offset = off; diff --git a/net/qrtr/qrtr.c b/net/qrtr/qrtr.c index 7ed31b5e77e4..2d8d6131bc5f 100644 --- a/net/qrtr/qrtr.c +++ b/net/qrtr/qrtr.c @@ -854,7 +854,7 @@ static int qrtr_bcast_enqueue(struct qrtr_node *node, struct sk_buff *skb, } mutex_unlock(&qrtr_node_lock); - qrtr_local_enqueue(node, skb, type, from, to); + qrtr_local_enqueue(NULL, skb, type, from, to); return 0; } diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile index 6ffb7e9887ce..ddd0f95713a9 100644 --- a/net/rxrpc/Makefile +++ b/net/rxrpc/Makefile @@ -25,6 +25,7 @@ rxrpc-y := \ peer_event.o \ peer_object.o \ recvmsg.o \ + rtt.o \ security.o \ sendmsg.o \ skbuff.o \ diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 3eb1ab40ca5c..9fe264bec70c 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -7,6 +7,7 @@ #include <linux/atomic.h> #include <linux/seqlock.h> +#include <linux/win_minmax.h> #include <net/net_namespace.h> #include <net/netns/generic.h> #include <net/sock.h> @@ -311,11 +312,14 @@ struct rxrpc_peer { #define RXRPC_RTT_CACHE_SIZE 32 spinlock_t rtt_input_lock; /* RTT lock for input routine */ ktime_t rtt_last_req; /* Time of last RTT request */ - u64 rtt; /* Current RTT estimate (in nS) */ - u64 rtt_sum; /* Sum of cache contents */ - u64 rtt_cache[RXRPC_RTT_CACHE_SIZE]; /* Determined RTT cache */ - u8 rtt_cursor; /* next entry at which to insert */ - u8 rtt_usage; /* amount of cache actually used */ + unsigned int rtt_count; /* Number of samples we've got */ + + u32 srtt_us; /* smoothed round trip time << 3 in usecs */ + u32 mdev_us; /* medium deviation */ + u32 mdev_max_us; /* maximal mdev for the last rtt period */ + u32 rttvar_us; /* smoothed mdev_max */ + u32 rto_j; /* Retransmission timeout in jiffies */ + u8 backoff; /* Backoff timeout */ u8 cong_cwnd; /* Congestion window size */ }; @@ -1041,7 +1045,6 @@ extern unsigned long rxrpc_idle_ack_delay; extern unsigned int rxrpc_rx_window_size; extern unsigned int rxrpc_rx_mtu; extern unsigned int rxrpc_rx_jumbo_max; -extern unsigned long rxrpc_resend_timeout; extern const s8 rxrpc_ack_priority[]; @@ -1069,8 +1072,6 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); * peer_event.c */ void rxrpc_error_report(struct sock *); -void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, - rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); void rxrpc_peer_keepalive_worker(struct work_struct *); /* @@ -1103,6 +1104,14 @@ void rxrpc_notify_socket(struct rxrpc_call *); int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int); /* + * rtt.c + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *, enum rxrpc_rtt_rx_trace, + rxrpc_serial_t, rxrpc_serial_t, ktime_t, ktime_t); +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *, bool); +void rxrpc_peer_init_rtt(struct rxrpc_peer *); + +/* * rxkad.c */ #ifdef CONFIG_RXKAD diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c index 70e44abf106c..b7611cc159e5 100644 --- a/net/rxrpc/call_accept.c +++ b/net/rxrpc/call_accept.c @@ -248,7 +248,7 @@ static void rxrpc_send_ping(struct rxrpc_call *call, struct sk_buff *skb) struct rxrpc_skb_priv *sp = rxrpc_skb(skb); ktime_t now = skb->tstamp; - if (call->peer->rtt_usage < 3 || + if (call->peer->rtt_count < 3 || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), now)) rxrpc_propose_ACK(call, RXRPC_ACK_PING, sp->hdr.serial, true, true, diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index cedbbb3a7c2e..2a65ac41055f 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -111,8 +111,8 @@ static void __rxrpc_propose_ACK(struct rxrpc_call *call, u8 ack_reason, } else { unsigned long now = jiffies, ack_at; - if (call->peer->rtt_usage > 0) - ack_at = nsecs_to_jiffies(call->peer->rtt); + if (call->peer->srtt_us != 0) + ack_at = usecs_to_jiffies(call->peer->srtt_us >> 3); else ack_at = expiry; @@ -157,24 +157,18 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call) static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) { struct sk_buff *skb; - unsigned long resend_at; + unsigned long resend_at, rto_j; rxrpc_seq_t cursor, seq, top; - ktime_t now, max_age, oldest, ack_ts, timeout, min_timeo; + ktime_t now, max_age, oldest, ack_ts; int ix; u8 annotation, anno_type, retrans = 0, unacked = 0; _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); - if (call->peer->rtt_usage > 1) - timeout = ns_to_ktime(call->peer->rtt * 3 / 2); - else - timeout = ms_to_ktime(rxrpc_resend_timeout); - min_timeo = ns_to_ktime((1000000000 / HZ) * 4); - if (ktime_before(timeout, min_timeo)) - timeout = min_timeo; + rto_j = call->peer->rto_j; now = ktime_get_real(); - max_age = ktime_sub(now, timeout); + max_age = ktime_sub(now, jiffies_to_usecs(rto_j)); spin_lock_bh(&call->lock); @@ -219,7 +213,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) } resend_at = nsecs_to_jiffies(ktime_to_ns(ktime_sub(now, oldest))); - resend_at += jiffies + rxrpc_resend_timeout; + resend_at += jiffies + rto_j; WRITE_ONCE(call->resend_at, resend_at); if (unacked) @@ -234,7 +228,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) rxrpc_timer_set_for_resend); spin_unlock_bh(&call->lock); ack_ts = ktime_sub(now, call->acks_latest_ts); - if (ktime_to_ns(ack_ts) < call->peer->rtt) + if (ktime_to_us(ack_ts) < (call->peer->srtt_us >> 3)) goto out; rxrpc_propose_ACK(call, RXRPC_ACK_PING, 0, true, false, rxrpc_propose_ack_ping_for_lost_ack); diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c index 69e09d69c896..3be4177baf70 100644 --- a/net/rxrpc/input.c +++ b/net/rxrpc/input.c @@ -91,11 +91,11 @@ static void rxrpc_congestion_management(struct rxrpc_call *call, /* We analyse the number of packets that get ACK'd per RTT * period and increase the window if we managed to fill it. */ - if (call->peer->rtt_usage == 0) + if (call->peer->rtt_count == 0) goto out; if (ktime_before(skb->tstamp, - ktime_add_ns(call->cong_tstamp, - call->peer->rtt))) + ktime_add_us(call->cong_tstamp, + call->peer->srtt_us >> 3))) goto out_no_clear_ca; change = rxrpc_cong_rtt_window_end; call->cong_tstamp = skb->tstamp; @@ -803,6 +803,30 @@ static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks, } /* + * Return true if the ACK is valid - ie. it doesn't appear to have regressed + * with respect to the ack state conveyed by preceding ACKs. + */ +static bool rxrpc_is_ack_valid(struct rxrpc_call *call, + rxrpc_seq_t first_pkt, rxrpc_seq_t prev_pkt) +{ + rxrpc_seq_t base = READ_ONCE(call->ackr_first_seq); + + if (after(first_pkt, base)) + return true; /* The window advanced */ + + if (before(first_pkt, base)) + return false; /* firstPacket regressed */ + + if (after_eq(prev_pkt, call->ackr_prev_seq)) + return true; /* previousPacket hasn't regressed. */ + + /* Some rx implementations put a serial number in previousPacket. */ + if (after_eq(prev_pkt, base + call->tx_winsize)) + return false; + return true; +} + +/* * Process an ACK packet. * * ack.firstPacket is the sequence number of the first soft-ACK'd/NAK'd packet @@ -865,9 +889,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) } /* Discard any out-of-order or duplicate ACKs (outside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial, + first_soft_ack, call->ackr_first_seq, + prev_pkt, call->ackr_prev_seq); return; + } buf.info.rxMTU = 0; ioffset = offset + nr_acks + 3; @@ -878,9 +905,12 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb) spin_lock(&call->input_lock); /* Discard any out-of-order or duplicate ACKs (inside lock). */ - if (before(first_soft_ack, call->ackr_first_seq) || - before(prev_pkt, call->ackr_prev_seq)) + if (!rxrpc_is_ack_valid(call, first_soft_ack, prev_pkt)) { + trace_rxrpc_rx_discard_ack(call->debug_id, sp->hdr.serial, + first_soft_ack, call->ackr_first_seq, + prev_pkt, call->ackr_prev_seq); goto out; + } call->acks_latest_ts = skb->tstamp; call->ackr_first_seq = first_soft_ack; diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c index 214405f75346..d4144fd86f84 100644 --- a/net/rxrpc/misc.c +++ b/net/rxrpc/misc.c @@ -63,11 +63,6 @@ unsigned int rxrpc_rx_mtu = 5692; */ unsigned int rxrpc_rx_jumbo_max = 4; -/* - * Time till packet resend (in milliseconds). - */ -unsigned long rxrpc_resend_timeout = 4 * HZ; - const s8 rxrpc_ack_priority[] = { [0] = 0, [RXRPC_ACK_DELAY] = 1, diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 90e263c6aa69..f8b632a5c619 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -369,7 +369,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb, (test_and_clear_bit(RXRPC_CALL_EV_ACK_LOST, &call->events) || retrans || call->cong_mode == RXRPC_CALL_SLOW_START || - (call->peer->rtt_usage < 3 && sp->hdr.seq & 1) || + (call->peer->rtt_count < 3 && sp->hdr.seq & 1) || ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000), ktime_get_real()))) whdr.flags |= RXRPC_REQUEST_ACK; @@ -423,13 +423,10 @@ done: if (whdr.flags & RXRPC_REQUEST_ACK) { call->peer->rtt_last_req = skb->tstamp; trace_rxrpc_rtt_tx(call, rxrpc_rtt_tx_data, serial); - if (call->peer->rtt_usage > 1) { + if (call->peer->rtt_count > 1) { unsigned long nowj = jiffies, ack_lost_at; - ack_lost_at = nsecs_to_jiffies(2 * call->peer->rtt); - if (ack_lost_at < 1) - ack_lost_at = 1; - + ack_lost_at = rxrpc_get_rto_backoff(call->peer, retrans); ack_lost_at += nowj; WRITE_ONCE(call->ack_lost_at, ack_lost_at); rxrpc_reduce_call_timer(call, ack_lost_at, nowj, diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index 923b263c401b..b1449d971883 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -296,52 +296,6 @@ static void rxrpc_distribute_error(struct rxrpc_peer *peer, int error, } /* - * Add RTT information to cache. This is called in softirq mode and has - * exclusive access to the peer RTT data. - */ -void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, - rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, - ktime_t send_time, ktime_t resp_time) -{ - struct rxrpc_peer *peer = call->peer; - s64 rtt; - u64 sum = peer->rtt_sum, avg; - u8 cursor = peer->rtt_cursor, usage = peer->rtt_usage; - - rtt = ktime_to_ns(ktime_sub(resp_time, send_time)); - if (rtt < 0) - return; - - spin_lock(&peer->rtt_input_lock); - - /* Replace the oldest datum in the RTT buffer */ - sum -= peer->rtt_cache[cursor]; - sum += rtt; - peer->rtt_cache[cursor] = rtt; - peer->rtt_cursor = (cursor + 1) & (RXRPC_RTT_CACHE_SIZE - 1); - peer->rtt_sum = sum; - if (usage < RXRPC_RTT_CACHE_SIZE) { - usage++; - peer->rtt_usage = usage; - } - - spin_unlock(&peer->rtt_input_lock); - - /* Now recalculate the average */ - if (usage == RXRPC_RTT_CACHE_SIZE) { - avg = sum / RXRPC_RTT_CACHE_SIZE; - } else { - avg = sum; - do_div(avg, usage); - } - - /* Don't need to update this under lock */ - peer->rtt = avg; - trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, rtt, - usage, avg); -} - -/* * Perform keep-alive pings. */ static void rxrpc_peer_keepalive_dispatch(struct rxrpc_net *rxnet, diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c index 452163eadb98..ca29976bb193 100644 --- a/net/rxrpc/peer_object.c +++ b/net/rxrpc/peer_object.c @@ -225,6 +225,8 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp) spin_lock_init(&peer->rtt_input_lock); peer->debug_id = atomic_inc_return(&rxrpc_debug_id); + rxrpc_peer_init_rtt(peer); + if (RXRPC_TX_SMSS > 2190) peer->cong_cwnd = 2; else if (RXRPC_TX_SMSS > 1095) @@ -497,14 +499,14 @@ void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call, EXPORT_SYMBOL(rxrpc_kernel_get_peer); /** - * rxrpc_kernel_get_rtt - Get a call's peer RTT + * rxrpc_kernel_get_srtt - Get a call's peer smoothed RTT * @sock: The socket on which the call is in progress. * @call: The call to query * - * Get the call's peer RTT. + * Get the call's peer smoothed RTT. */ -u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call) +u32 rxrpc_kernel_get_srtt(struct socket *sock, struct rxrpc_call *call) { - return call->peer->rtt; + return call->peer->srtt_us >> 3; } -EXPORT_SYMBOL(rxrpc_kernel_get_rtt); +EXPORT_SYMBOL(rxrpc_kernel_get_srtt); diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c index b9d053e42821..8b179e3c802a 100644 --- a/net/rxrpc/proc.c +++ b/net/rxrpc/proc.c @@ -222,7 +222,7 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) seq_puts(seq, "Proto Local " " Remote " - " Use CW MTU LastUse RTT Rc\n" + " Use CW MTU LastUse RTT RTO\n" ); return 0; } @@ -236,15 +236,15 @@ static int rxrpc_peer_seq_show(struct seq_file *seq, void *v) now = ktime_get_seconds(); seq_printf(seq, "UDP %-47.47s %-47.47s %3u" - " %3u %5u %6llus %12llu %2u\n", + " %3u %5u %6llus %8u %8u\n", lbuff, rbuff, atomic_read(&peer->usage), peer->cong_cwnd, peer->mtu, now - peer->last_tx_at, - peer->rtt, - peer->rtt_cursor); + peer->srtt_us >> 3, + jiffies_to_usecs(peer->rto_j)); return 0; } diff --git a/net/rxrpc/rtt.c b/net/rxrpc/rtt.c new file mode 100644 index 000000000000..928d8b34a3ee --- /dev/null +++ b/net/rxrpc/rtt.c @@ -0,0 +1,195 @@ +// SPDX-License-Identifier: GPL-2.0 +/* RTT/RTO calculation. + * + * Adapted from TCP for AF_RXRPC by David Howells (dhowells@redhat.com) + * + * https://tools.ietf.org/html/rfc6298 + * https://tools.ietf.org/html/rfc1122#section-4.2.3.1 + * http://ccr.sigcomm.org/archive/1995/jan95/ccr-9501-partridge87.pdf + */ + +#include <linux/net.h> +#include "ar-internal.h" + +#define RXRPC_RTO_MAX ((unsigned)(120 * HZ)) +#define RXRPC_TIMEOUT_INIT ((unsigned)(1*HZ)) /* RFC6298 2.1 initial RTO value */ +#define rxrpc_jiffies32 ((u32)jiffies) /* As rxrpc_jiffies32 */ +#define rxrpc_min_rtt_wlen 300 /* As sysctl_tcp_min_rtt_wlen */ + +static u32 rxrpc_rto_min_us(struct rxrpc_peer *peer) +{ + return 200; +} + +static u32 __rxrpc_set_rto(const struct rxrpc_peer *peer) +{ + return _usecs_to_jiffies((peer->srtt_us >> 3) + peer->rttvar_us); +} + +static u32 rxrpc_bound_rto(u32 rto) +{ + return min(rto, RXRPC_RTO_MAX); +} + +/* + * Called to compute a smoothed rtt estimate. The data fed to this + * routine either comes from timestamps, or from segments that were + * known _not_ to have been retransmitted [see Karn/Partridge + * Proceedings SIGCOMM 87]. The algorithm is from the SIGCOMM 88 + * piece by Van Jacobson. + * NOTE: the next three routines used to be one big routine. + * To save cycles in the RFC 1323 implementation it was better to break + * it up into three procedures. -- erics + */ +static void rxrpc_rtt_estimator(struct rxrpc_peer *peer, long sample_rtt_us) +{ + long m = sample_rtt_us; /* RTT */ + u32 srtt = peer->srtt_us; + + /* The following amusing code comes from Jacobson's + * article in SIGCOMM '88. Note that rtt and mdev + * are scaled versions of rtt and mean deviation. + * This is designed to be as fast as possible + * m stands for "measurement". + * + * On a 1990 paper the rto value is changed to: + * RTO = rtt + 4 * mdev + * + * Funny. This algorithm seems to be very broken. + * These formulae increase RTO, when it should be decreased, increase + * too slowly, when it should be increased quickly, decrease too quickly + * etc. I guess in BSD RTO takes ONE value, so that it is absolutely + * does not matter how to _calculate_ it. Seems, it was trap + * that VJ failed to avoid. 8) + */ + if (srtt != 0) { + m -= (srtt >> 3); /* m is now error in rtt est */ + srtt += m; /* rtt = 7/8 rtt + 1/8 new */ + if (m < 0) { + m = -m; /* m is now abs(error) */ + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + /* This is similar to one of Eifel findings. + * Eifel blocks mdev updates when rtt decreases. + * This solution is a bit different: we use finer gain + * for mdev in this case (alpha*beta). + * Like Eifel it also prevents growth of rto, + * but also it limits too fast rto decreases, + * happening in pure Eifel. + */ + if (m > 0) + m >>= 3; + } else { + m -= (peer->mdev_us >> 2); /* similar update on mdev */ + } + + peer->mdev_us += m; /* mdev = 3/4 mdev + 1/4 new */ + if (peer->mdev_us > peer->mdev_max_us) { + peer->mdev_max_us = peer->mdev_us; + if (peer->mdev_max_us > peer->rttvar_us) + peer->rttvar_us = peer->mdev_max_us; + } + } else { + /* no previous measure. */ + srtt = m << 3; /* take the measured time to be rtt */ + peer->mdev_us = m << 1; /* make sure rto = 3*rtt */ + peer->rttvar_us = max(peer->mdev_us, rxrpc_rto_min_us(peer)); + peer->mdev_max_us = peer->rttvar_us; + } + + peer->srtt_us = max(1U, srtt); +} + +/* + * Calculate rto without backoff. This is the second half of Van Jacobson's + * routine referred to above. + */ +static void rxrpc_set_rto(struct rxrpc_peer *peer) +{ + u32 rto; + + /* 1. If rtt variance happened to be less 50msec, it is hallucination. + * It cannot be less due to utterly erratic ACK generation made + * at least by solaris and freebsd. "Erratic ACKs" has _nothing_ + * to do with delayed acks, because at cwnd>2 true delack timeout + * is invisible. Actually, Linux-2.4 also generates erratic + * ACKs in some circumstances. + */ + rto = __rxrpc_set_rto(peer); + + /* 2. Fixups made earlier cannot be right. + * If we do not estimate RTO correctly without them, + * all the algo is pure shit and should be replaced + * with correct one. It is exactly, which we pretend to do. + */ + + /* NOTE: clamping at RXRPC_RTO_MIN is not required, current algo + * guarantees that rto is higher. + */ + peer->rto_j = rxrpc_bound_rto(rto); +} + +static void rxrpc_ack_update_rtt(struct rxrpc_peer *peer, long rtt_us) +{ + if (rtt_us < 0) + return; + + //rxrpc_update_rtt_min(peer, rtt_us); + rxrpc_rtt_estimator(peer, rtt_us); + rxrpc_set_rto(peer); + + /* RFC6298: only reset backoff on valid RTT measurement. */ + peer->backoff = 0; +} + +/* + * Add RTT information to cache. This is called in softirq mode and has + * exclusive access to the peer RTT data. + */ +void rxrpc_peer_add_rtt(struct rxrpc_call *call, enum rxrpc_rtt_rx_trace why, + rxrpc_serial_t send_serial, rxrpc_serial_t resp_serial, + ktime_t send_time, ktime_t resp_time) +{ + struct rxrpc_peer *peer = call->peer; + s64 rtt_us; + + rtt_us = ktime_to_us(ktime_sub(resp_time, send_time)); + if (rtt_us < 0) + return; + + spin_lock(&peer->rtt_input_lock); + rxrpc_ack_update_rtt(peer, rtt_us); + if (peer->rtt_count < 3) + peer->rtt_count++; + spin_unlock(&peer->rtt_input_lock); + + trace_rxrpc_rtt_rx(call, why, send_serial, resp_serial, + peer->srtt_us >> 3, peer->rto_j); +} + +/* + * Get the retransmission timeout to set in jiffies, backing it off each time + * we retransmit. + */ +unsigned long rxrpc_get_rto_backoff(struct rxrpc_peer *peer, bool retrans) +{ + u64 timo_j; + u8 backoff = READ_ONCE(peer->backoff); + + timo_j = peer->rto_j; + timo_j <<= backoff; + if (retrans && timo_j * 2 <= RXRPC_RTO_MAX) + WRITE_ONCE(peer->backoff, backoff + 1); + + if (timo_j < 1) + timo_j = 1; + + return timo_j; +} + +void rxrpc_peer_init_rtt(struct rxrpc_peer *peer) +{ + peer->rto_j = RXRPC_TIMEOUT_INIT; + peer->mdev_us = jiffies_to_usecs(RXRPC_TIMEOUT_INIT); + peer->backoff = 0; + //minmax_reset(&peer->rtt_min, rxrpc_jiffies32, ~0U); +} diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 098f1f9ec53b..52a24d4ef5d8 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1148,7 +1148,7 @@ static int rxkad_verify_response(struct rxrpc_connection *conn, ret = rxkad_decrypt_ticket(conn, skb, ticket, ticket_len, &session_key, &expiry, _abort_code); if (ret < 0) - goto temporary_error_free_resp; + goto temporary_error_free_ticket; /* use the session key from inside the ticket to decrypt the * response */ @@ -1230,7 +1230,6 @@ protocol_error: temporary_error_free_ticket: kfree(ticket); -temporary_error_free_resp: kfree(response); temporary_error: /* Ignore the response packet if we got a temporary error such as diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0fcf157aa09f..5e9c43d4a314 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -66,15 +66,14 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, struct rxrpc_call *call) { rxrpc_seq_t tx_start, tx_win; - signed long rtt2, timeout; - u64 rtt; + signed long rtt, timeout; - rtt = READ_ONCE(call->peer->rtt); - rtt2 = nsecs_to_jiffies64(rtt) * 2; - if (rtt2 < 2) - rtt2 = 2; + rtt = READ_ONCE(call->peer->srtt_us) >> 3; + rtt = usecs_to_jiffies(rtt) * 2; + if (rtt < 2) + rtt = 2; - timeout = rtt2; + timeout = rtt; tx_start = READ_ONCE(call->tx_hard_ack); for (;;) { @@ -92,7 +91,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx, return -EINTR; if (tx_win != tx_start) { - timeout = rtt2; + timeout = rtt; tx_start = tx_win; } @@ -271,16 +270,9 @@ static int rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call, _debug("need instant resend %d", ret); rxrpc_instant_resend(call, ix); } else { - unsigned long now = jiffies, resend_at; + unsigned long now = jiffies; + unsigned long resend_at = now + call->peer->rto_j; - if (call->peer->rtt_usage > 1) - resend_at = nsecs_to_jiffies(call->peer->rtt * 3 / 2); - else - resend_at = rxrpc_resend_timeout; - if (resend_at < 1) - resend_at = 1; - - resend_at += now; WRITE_ONCE(call->resend_at, resend_at); rxrpc_reduce_call_timer(call, resend_at, now, rxrpc_timer_set_for_send); diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c index 2bbb38161851..18dade4e6f9a 100644 --- a/net/rxrpc/sysctl.c +++ b/net/rxrpc/sysctl.c @@ -71,15 +71,6 @@ static struct ctl_table rxrpc_sysctl_table[] = { .extra1 = (void *)&one_jiffy, .extra2 = (void *)&max_jiffies, }, - { - .procname = "resend_timeout", - .data = &rxrpc_resend_timeout, - .maxlen = sizeof(unsigned long), - .mode = 0644, - .proc_handler = proc_doulongvec_ms_jiffies_minmax, - .extra1 = (void *)&one_jiffy, - .extra2 = (void *)&max_jiffies, - }, /* Non-time values */ { diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 55bd1429678f..0a7ecc292bd3 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2070,6 +2070,7 @@ replay: err = PTR_ERR(block); goto errout; } + block->classid = parent; chain_index = tca[TCA_CHAIN] ? nla_get_u32(tca[TCA_CHAIN]) : 0; if (chain_index > TC_ACT_EXT_VAL_MASK) { @@ -2612,12 +2613,10 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) return skb->len; parent = tcm->tcm_parent; - if (!parent) { + if (!parent) q = dev->qdisc; - parent = q->handle; - } else { + else q = qdisc_lookup(dev, TC_H_MAJ(tcm->tcm_parent)); - } if (!q) goto out; cops = q->ops->cl_ops; @@ -2633,6 +2632,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) block = cops->tcf_block(q, cl, NULL); if (!block) goto out; + parent = block->classid; if (tcf_block_shared(block)) q = NULL; } @@ -3523,6 +3523,16 @@ static void tcf_sample_get_group(struct flow_action_entry *entry, #endif } +static enum flow_action_hw_stats tc_act_hw_stats(u8 hw_stats) +{ + if (WARN_ON_ONCE(hw_stats > TCA_ACT_HW_STATS_ANY)) + return FLOW_ACTION_HW_STATS_DONT_CARE; + else if (!hw_stats) + return FLOW_ACTION_HW_STATS_DISABLED; + + return hw_stats; +} + int tc_setup_flow_action(struct flow_action *flow_action, const struct tcf_exts *exts) { @@ -3546,7 +3556,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, if (err) goto err_out_locked; - entry->hw_stats = act->hw_stats; + entry->hw_stats = tc_act_hw_stats(act->hw_stats); if (is_tcf_gact_ok(act)) { entry->id = FLOW_ACTION_ACCEPT; @@ -3614,7 +3624,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->mangle.mask = tcf_pedit_mask(act, k); entry->mangle.val = tcf_pedit_val(act, k); entry->mangle.offset = tcf_pedit_offset(act, k); - entry->hw_stats = act->hw_stats; + entry->hw_stats = tc_act_hw_stats(act->hw_stats); entry = &flow_action->entries[++j]; } } else if (is_tcf_csum(act)) { diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c index a36974e9c601..1bcf8fbfd40e 100644 --- a/net/sched/sch_choke.c +++ b/net/sched/sch_choke.c @@ -323,7 +323,8 @@ static void choke_reset(struct Qdisc *sch) sch->q.qlen = 0; sch->qstats.backlog = 0; - memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); + if (q->tab) + memset(q->tab, 0, (q->tab_mask + 1) * sizeof(struct sk_buff *)); q->head = q->tail = 0; red_restart(&q->vars); } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 968519ff36e9..436160be9c18 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -416,7 +416,7 @@ static int fq_codel_change(struct Qdisc *sch, struct nlattr *opt, q->quantum = max(256U, nla_get_u32(tb[TCA_FQ_CODEL_QUANTUM])); if (tb[TCA_FQ_CODEL_DROP_BATCH_SIZE]) - q->drop_batch_size = min(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); + q->drop_batch_size = max(1U, nla_get_u32(tb[TCA_FQ_CODEL_DROP_BATCH_SIZE])); if (tb[TCA_FQ_CODEL_MEMORY_LIMIT]) q->memory_limit = min(1U << 31, nla_get_u32(tb[TCA_FQ_CODEL_MEMORY_LIMIT])); diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index c787d4d46017..5a6def5e4e6d 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -637,6 +637,15 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt) if (ctl->divisor && (!is_power_of_2(ctl->divisor) || ctl->divisor > 65536)) return -EINVAL; + + /* slot->allot is a short, make sure quantum is not too big. */ + if (ctl->quantum) { + unsigned int scaled = SFQ_ALLOT_SIZE(ctl->quantum); + + if (scaled <= 0 || scaled > SHRT_MAX) + return -EINVAL; + } + if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max, ctl_v1->Wlog)) return -EINVAL; diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index 0fb10abf7579..7a5e4c454715 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -169,6 +169,9 @@ static int skbprio_change(struct Qdisc *sch, struct nlattr *opt, { struct tc_skbprio_qopt *ctl = nla_data(opt); + if (opt->nla_len != nla_attr_size(sizeof(*ctl))) + return -EINVAL; + sch->limit = ctl->limit; return 0; } diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index 2bc29463e1dc..9f36fe911d08 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -1523,9 +1523,17 @@ static int sctp_cmd_interpreter(enum sctp_event_type event_type, timeout = asoc->timeouts[cmd->obj.to]; BUG_ON(!timeout); - timer->expires = jiffies + timeout; - sctp_association_hold(asoc); - add_timer(timer); + /* + * SCTP has a hard time with timer starts. Because we process + * timer starts as side effects, it can be hard to tell if we + * have already started a timer or not, which leads to BUG + * halts when we call add_timer. So here, instead of just starting + * a timer, if the timer is already started, and just mod + * the timer with the shorter of the two expiration times + */ + if (!timer_pending(timer)) + sctp_association_hold(asoc); + timer_reduce(timer, jiffies + timeout); break; case SCTP_CMD_TIMER_RESTART: diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 26788f4a3b9e..e86620fbd90f 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -1856,12 +1856,13 @@ static enum sctp_disposition sctp_sf_do_dupcook_a( /* Update the content of current association. */ sctp_add_cmd_sf(commands, SCTP_CMD_UPDATE_ASSOC, SCTP_ASOC(new_asoc)); sctp_add_cmd_sf(commands, SCTP_CMD_EVENT_ULP, SCTP_ULPEVENT(ev)); - if (sctp_state(asoc, SHUTDOWN_PENDING) && + if ((sctp_state(asoc, SHUTDOWN_PENDING) || + sctp_state(asoc, SHUTDOWN_SENT)) && (sctp_sstate(asoc->base.sk, CLOSING) || sock_flag(asoc->base.sk, SOCK_DEAD))) { - /* if were currently in SHUTDOWN_PENDING, but the socket - * has been closed by user, don't transition to ESTABLISHED. - * Instead trigger SHUTDOWN bundled with COOKIE_ACK. + /* If the socket has been closed by user, don't + * transition to ESTABLISHED. Instead trigger SHUTDOWN + * bundled with COOKIE_ACK. */ sctp_add_cmd_sf(commands, SCTP_CMD_REPLY, SCTP_CHUNK(repl)); return sctp_sf_do_9_2_start_shutdown(net, ep, asoc, diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 25fbd8d9de74..ac5cac0dd24b 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -2032,7 +2032,6 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, struct xdr_buf *rcv_buf = &rqstp->rq_rcv_buf; struct kvec *head = rqstp->rq_rcv_buf.head; struct rpc_auth *auth = cred->cr_auth; - unsigned int savedlen = rcv_buf->len; u32 offset, opaque_len, maj_stat; __be32 *p; @@ -2043,9 +2042,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, offset = (u8 *)(p) - (u8 *)head->iov_base; if (offset + opaque_len > rcv_buf->len) goto unwrap_failed; - rcv_buf->len = offset + opaque_len; - maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, rcv_buf); + maj_stat = gss_unwrap(ctx->gc_gss_ctx, offset, + offset + opaque_len, rcv_buf); if (maj_stat == GSS_S_CONTEXT_EXPIRED) clear_bit(RPCAUTH_CRED_UPTODATE, &cred->cr_flags); if (maj_stat != GSS_S_COMPLETE) @@ -2059,10 +2058,9 @@ gss_unwrap_resp_priv(struct rpc_task *task, struct rpc_cred *cred, */ xdr_init_decode(xdr, rcv_buf, p, rqstp); - auth->au_rslack = auth->au_verfsize + 2 + - XDR_QUADLEN(savedlen - rcv_buf->len); - auth->au_ralign = auth->au_verfsize + 2 + - XDR_QUADLEN(savedlen - rcv_buf->len); + auth->au_rslack = auth->au_verfsize + 2 + ctx->gc_gss_ctx->slack; + auth->au_ralign = auth->au_verfsize + 2 + ctx->gc_gss_ctx->align; + return 0; unwrap_failed: trace_rpcgss_unwrap_failed(task); diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c index 6f2d30d7b766..e7180da1fc6a 100644 --- a/net/sunrpc/auth_gss/gss_krb5_crypto.c +++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c @@ -851,8 +851,8 @@ out_err: } u32 -gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, - u32 *headskip, u32 *tailskip) +gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len, + struct xdr_buf *buf, u32 *headskip, u32 *tailskip) { struct xdr_buf subbuf; u32 ret = 0; @@ -881,7 +881,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, /* create a segment skipping the header and leaving out the checksum */ xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN, - (buf->len - offset - GSS_KRB5_TOK_HDR_LEN - + (len - offset - GSS_KRB5_TOK_HDR_LEN - kctx->gk5e->cksumlength)); nblocks = (subbuf.len + blocksize - 1) / blocksize; @@ -926,7 +926,7 @@ gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf, goto out_err; /* Get the packet's hmac value */ - ret = read_bytes_from_xdr_buf(buf, buf->len - kctx->gk5e->cksumlength, + ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength, pkt_hmac, kctx->gk5e->cksumlength); if (ret) goto out_err; diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c index 6c1920eed771..cf0fd170ac18 100644 --- a/net/sunrpc/auth_gss/gss_krb5_wrap.c +++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c @@ -261,7 +261,9 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset, } static u32 -gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align) { int signalg; int sealalg; @@ -279,12 +281,13 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) u32 conflen = kctx->gk5e->conflen; int crypt_offset; u8 *cksumkey; + unsigned int saved_len = buf->len; dprintk("RPC: gss_unwrap_kerberos\n"); ptr = (u8 *)buf->head[0].iov_base + offset; if (g_verify_token_header(&kctx->mech_used, &bodysize, &ptr, - buf->len - offset)) + len - offset)) return GSS_S_DEFECTIVE_TOKEN; if ((ptr[0] != ((KG_TOK_WRAP_MSG >> 8) & 0xff)) || @@ -324,6 +327,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) (!kctx->initiate && direction != 0)) return GSS_S_BAD_SIG; + buf->len = len; if (kctx->enctype == ENCTYPE_ARCFOUR_HMAC) { struct crypto_sync_skcipher *cipher; int err; @@ -376,11 +380,15 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) data_len = (buf->head[0].iov_base + buf->head[0].iov_len) - data_start; memmove(orig_start, data_start, data_len); buf->head[0].iov_len -= (data_start - orig_start); - buf->len -= (data_start - orig_start); + buf->len = len - (data_start - orig_start); if (gss_krb5_remove_padding(buf, blocksize)) return GSS_S_DEFECTIVE_TOKEN; + /* slack must include room for krb5 padding */ + *slack = XDR_QUADLEN(saved_len - buf->len); + /* The GSS blob always precedes the RPC message payload */ + *align = *slack; return GSS_S_COMPLETE; } @@ -486,7 +494,9 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset, } static u32 -gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len, + struct xdr_buf *buf, unsigned int *slack, + unsigned int *align) { time64_t now; u8 *ptr; @@ -532,7 +542,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) if (rrc != 0) rotate_left(offset + 16, buf, rrc); - err = (*kctx->gk5e->decrypt_v2)(kctx, offset, buf, + err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf, &headskip, &tailskip); if (err) return GSS_S_FAILURE; @@ -542,7 +552,7 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) * it against the original */ err = read_bytes_from_xdr_buf(buf, - buf->len - GSS_KRB5_TOK_HDR_LEN - tailskip, + len - GSS_KRB5_TOK_HDR_LEN - tailskip, decrypted_hdr, GSS_KRB5_TOK_HDR_LEN); if (err) { dprintk("%s: error %u getting decrypted_hdr\n", __func__, err); @@ -568,18 +578,19 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, struct xdr_buf *buf) * Note that buf->head[0].iov_len may indicate the available * head buffer space rather than that actually occupied. */ - movelen = min_t(unsigned int, buf->head[0].iov_len, buf->len); + movelen = min_t(unsigned int, buf->head[0].iov_len, len); movelen -= offset + GSS_KRB5_TOK_HDR_LEN + headskip; - if (offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > - buf->head[0].iov_len) - return GSS_S_FAILURE; + BUG_ON(offset + GSS_KRB5_TOK_HDR_LEN + headskip + movelen > + buf->head[0].iov_len); memmove(ptr, ptr + GSS_KRB5_TOK_HDR_LEN + headskip, movelen); buf->head[0].iov_len -= GSS_KRB5_TOK_HDR_LEN + headskip; - buf->len -= GSS_KRB5_TOK_HDR_LEN + headskip; + buf->len = len - GSS_KRB5_TOK_HDR_LEN + headskip; /* Trim off the trailing "extra count" and checksum blob */ - buf->len -= ec + GSS_KRB5_TOK_HDR_LEN + tailskip; + xdr_buf_trim(buf, ec + GSS_KRB5_TOK_HDR_LEN + tailskip); + *align = XDR_QUADLEN(GSS_KRB5_TOK_HDR_LEN + headskip); + *slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip); return GSS_S_COMPLETE; } @@ -603,7 +614,8 @@ gss_wrap_kerberos(struct gss_ctx *gctx, int offset, } u32 -gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf) +gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, + int len, struct xdr_buf *buf) { struct krb5_ctx *kctx = gctx->internal_ctx_id; @@ -613,9 +625,11 @@ gss_unwrap_kerberos(struct gss_ctx *gctx, int offset, struct xdr_buf *buf) case ENCTYPE_DES_CBC_RAW: case ENCTYPE_DES3_CBC_RAW: case ENCTYPE_ARCFOUR_HMAC: - return gss_unwrap_kerberos_v1(kctx, offset, buf); + return gss_unwrap_kerberos_v1(kctx, offset, len, buf, + &gctx->slack, &gctx->align); case ENCTYPE_AES128_CTS_HMAC_SHA1_96: case ENCTYPE_AES256_CTS_HMAC_SHA1_96: - return gss_unwrap_kerberos_v2(kctx, offset, buf); + return gss_unwrap_kerberos_v2(kctx, offset, len, buf, + &gctx->slack, &gctx->align); } } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index db550bfc2642..69316ab1b9fa 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -411,10 +411,11 @@ gss_wrap(struct gss_ctx *ctx_id, u32 gss_unwrap(struct gss_ctx *ctx_id, int offset, + int len, struct xdr_buf *buf) { return ctx_id->mech_type->gm_ops - ->gss_unwrap(ctx_id, offset, buf); + ->gss_unwrap(ctx_id, offset, len, buf); } diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c index 54ae5be62f6a..50d93c49ef1a 100644 --- a/net/sunrpc/auth_gss/svcauth_gss.c +++ b/net/sunrpc/auth_gss/svcauth_gss.c @@ -906,7 +906,7 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g if (svc_getnl(&buf->head[0]) != seq) goto out; /* trim off the mic and padding at the end before returning */ - buf->len -= 4 + round_up_to_quad(mic.len); + xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4); stat = 0; out: kfree(mic.data); @@ -934,7 +934,7 @@ static int unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx) { u32 priv_len, maj_stat; - int pad, saved_len, remaining_len, offset; + int pad, remaining_len, offset; clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags); @@ -954,12 +954,8 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs buf->len -= pad; fix_priv_head(buf, pad); - /* Maybe it would be better to give gss_unwrap a length parameter: */ - saved_len = buf->len; - buf->len = priv_len; - maj_stat = gss_unwrap(ctx, 0, buf); + maj_stat = gss_unwrap(ctx, 0, priv_len, buf); pad = priv_len - buf->len; - buf->len = saved_len; buf->len -= pad; /* The upper layers assume the buffer is aligned on 4-byte boundaries. * In the krb5p case, at least, the data ends up offset, so we need to diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 8350d3a2e9a7..61b21dafd7c0 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -889,7 +889,9 @@ static void rpc_free_client_work(struct work_struct *work) * here. */ rpc_clnt_debugfs_unregister(clnt); + rpc_free_clid(clnt); rpc_clnt_remove_pipedir(clnt); + xprt_put(rcu_dereference_raw(clnt->cl_xprt)); kfree(clnt); rpciod_down(); @@ -907,10 +909,8 @@ rpc_free_client(struct rpc_clnt *clnt) rpc_unregister_client(clnt); rpc_free_iostats(clnt->cl_metrics); clnt->cl_metrics = NULL; - xprt_put(rcu_dereference_raw(clnt->cl_xprt)); xprt_iter_destroy(&clnt->cl_xpi); put_cred(clnt->cl_cred); - rpc_free_clid(clnt); INIT_WORK(&clnt->cl_work, rpc_free_client_work); schedule_work(&clnt->cl_work); @@ -2433,6 +2433,11 @@ rpc_check_timeout(struct rpc_task *task) { struct rpc_clnt *clnt = task->tk_client; + if (RPC_SIGNALLED(task)) { + rpc_call_rpcerror(task, -ERESTARTSYS); + return; + } + if (xprt_adjust_timeout(task->tk_rqstp) == 0) return; diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 15b58c5144f9..6f7d82fb1eb0 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1150,6 +1150,47 @@ xdr_buf_subsegment(struct xdr_buf *buf, struct xdr_buf *subbuf, } EXPORT_SYMBOL_GPL(xdr_buf_subsegment); +/** + * xdr_buf_trim - lop at most "len" bytes off the end of "buf" + * @buf: buf to be trimmed + * @len: number of bytes to reduce "buf" by + * + * Trim an xdr_buf by the given number of bytes by fixing up the lengths. Note + * that it's possible that we'll trim less than that amount if the xdr_buf is + * too small, or if (for instance) it's all in the head and the parser has + * already read too far into it. + */ +void xdr_buf_trim(struct xdr_buf *buf, unsigned int len) +{ + size_t cur; + unsigned int trim = len; + + if (buf->tail[0].iov_len) { + cur = min_t(size_t, buf->tail[0].iov_len, trim); + buf->tail[0].iov_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->page_len) { + cur = min_t(unsigned int, buf->page_len, trim); + buf->page_len -= cur; + trim -= cur; + if (!trim) + goto fix_len; + } + + if (buf->head[0].iov_len) { + cur = min_t(size_t, buf->head[0].iov_len, trim); + buf->head[0].iov_len -= cur; + trim -= cur; + } +fix_len: + buf->len -= (len - trim); +} +EXPORT_SYMBOL_GPL(xdr_buf_trim); + static void __read_bytes_from_xdr_buf(struct xdr_buf *subbuf, void *obj, unsigned int len) { unsigned int this_len; diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 87466607097f..e370ad0edd76 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1739,22 +1739,21 @@ static int tipc_sk_anc_data_recv(struct msghdr *m, struct sk_buff *skb, return 0; } -static void tipc_sk_send_ack(struct tipc_sock *tsk) +static struct sk_buff *tipc_sk_build_ack(struct tipc_sock *tsk) { struct sock *sk = &tsk->sk; - struct net *net = sock_net(sk); struct sk_buff *skb = NULL; struct tipc_msg *msg; u32 peer_port = tsk_peer_port(tsk); u32 dnode = tsk_peer_node(tsk); if (!tipc_sk_connected(sk)) - return; + return NULL; skb = tipc_msg_create(CONN_MANAGER, CONN_ACK, INT_H_SIZE, 0, dnode, tsk_own_node(tsk), peer_port, tsk->portid, TIPC_OK); if (!skb) - return; + return NULL; msg = buf_msg(skb); msg_set_conn_ack(msg, tsk->rcv_unacked); tsk->rcv_unacked = 0; @@ -1764,7 +1763,19 @@ static void tipc_sk_send_ack(struct tipc_sock *tsk) tsk->rcv_win = tsk_adv_blocks(tsk->sk.sk_rcvbuf); msg_set_adv_win(msg, tsk->rcv_win); } - tipc_node_xmit_skb(net, skb, dnode, msg_link_selector(msg)); + return skb; +} + +static void tipc_sk_send_ack(struct tipc_sock *tsk) +{ + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (!skb) + return; + + tipc_node_xmit_skb(sock_net(&tsk->sk), skb, tsk_peer_node(tsk), + msg_link_selector(buf_msg(skb))); } static int tipc_wait_for_rcvmsg(struct socket *sock, long *timeop) @@ -1938,7 +1949,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, bool peek = flags & MSG_PEEK; int offset, required, copy, copied = 0; int hlen, dlen, err, rc; - bool ack = false; long timeout; /* Catch invalid receive attempts */ @@ -1983,7 +1993,6 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Copy data if msg ok, otherwise return error/partial data */ if (likely(!err)) { - ack = msg_ack_required(hdr); offset = skb_cb->bytes_read; copy = min_t(int, dlen - offset, buflen - copied); rc = skb_copy_datagram_msg(skb, hlen + offset, m, copy); @@ -2011,7 +2020,7 @@ static int tipc_recvstream(struct socket *sock, struct msghdr *m, /* Send connection flow control advertisement when applicable */ tsk->rcv_unacked += tsk_inc(tsk, hlen + dlen); - if (ack || tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) + if (tsk->rcv_unacked >= tsk->rcv_win / TIPC_ACK_RATE) tipc_sk_send_ack(tsk); /* Exit if all requested data or FIN/error received */ @@ -2105,9 +2114,11 @@ static void tipc_sk_proto_rcv(struct sock *sk, * tipc_sk_filter_connect - check incoming message for a connection-based socket * @tsk: TIPC socket * @skb: pointer to message buffer. + * @xmitq: for Nagle ACK if any * Returns true if message should be added to receive queue, false otherwise */ -static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) +static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; struct net *net = sock_net(sk); @@ -2171,8 +2182,17 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb) if (!skb_queue_empty(&sk->sk_write_queue)) tipc_sk_push_backlog(tsk); /* Accept only connection-based messages sent by peer */ - if (likely(con_msg && !err && pport == oport && pnode == onode)) + if (likely(con_msg && !err && pport == oport && + pnode == onode)) { + if (msg_ack_required(hdr)) { + struct sk_buff *skb; + + skb = tipc_sk_build_ack(tsk); + if (skb) + __skb_queue_tail(xmitq, skb); + } return true; + } if (!tsk_peer_msg(tsk, hdr)) return false; if (!err) @@ -2267,7 +2287,7 @@ static void tipc_sk_filter_rcv(struct sock *sk, struct sk_buff *skb, while ((skb = __skb_dequeue(&inputq))) { hdr = buf_msg(skb); limit = rcvbuf_limit(sk, skb); - if ((sk_conn && !tipc_sk_filter_connect(tsk, skb)) || + if ((sk_conn && !tipc_sk_filter_connect(tsk, skb, xmitq)) || (!sk_conn && msg_connected(hdr)) || (!grp && msg_in_group(hdr))) err = TIPC_ERR_NO_PORT; diff --git a/net/tipc/subscr.h b/net/tipc/subscr.h index aa015c233898..6ebbec1bedd1 100644 --- a/net/tipc/subscr.h +++ b/net/tipc/subscr.h @@ -96,6 +96,16 @@ void tipc_sub_get(struct tipc_subscription *subscription); (swap_ ? swab32(val__) : val__); \ }) +/* tipc_sub_write - write val_ to field_ of struct sub_ in user endian format + */ +#define tipc_sub_write(sub_, field_, val_) \ + ({ \ + struct tipc_subscr *sub__ = sub_; \ + u32 val__ = val_; \ + int swap_ = !((sub__)->filter & TIPC_FILTER_MASK); \ + (sub__)->field_ = swap_ ? swab32(val__) : val__; \ + }) + /* tipc_evt_write - write val_ to field_ of struct evt_ in user endian format */ #define tipc_evt_write(evt_, field_, val_) \ diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c index 3a12fc18239b..446af7bbd13e 100644 --- a/net/tipc/topsrv.c +++ b/net/tipc/topsrv.c @@ -237,8 +237,8 @@ static void tipc_conn_delete_sub(struct tipc_conn *con, struct tipc_subscr *s) if (!s || !memcmp(s, &sub->evt.s, sizeof(*s))) { tipc_sub_unsubscribe(sub); atomic_dec(&tn->subscription_count); - } else if (s) { - break; + if (s) + break; } } spin_unlock_bh(&con->sub_lock); @@ -362,9 +362,10 @@ static int tipc_conn_rcv_sub(struct tipc_topsrv *srv, { struct tipc_net *tn = tipc_net(srv->net); struct tipc_subscription *sub; + u32 s_filter = tipc_sub_read(s, filter); - if (tipc_sub_read(s, filter) & TIPC_SUB_CANCEL) { - s->filter &= __constant_ntohl(~TIPC_SUB_CANCEL); + if (s_filter & TIPC_SUB_CANCEL) { + tipc_sub_write(s, filter, s_filter & ~TIPC_SUB_CANCEL); tipc_conn_delete_sub(con, s); return 0; } @@ -400,12 +401,15 @@ static int tipc_conn_rcv_from_sock(struct tipc_conn *con) return -EWOULDBLOCK; if (ret == sizeof(s)) { read_lock_bh(&sk->sk_callback_lock); - ret = tipc_conn_rcv_sub(srv, con, &s); + /* RACE: the connection can be closed in the meantime */ + if (likely(connected(con))) + ret = tipc_conn_rcv_sub(srv, con, &s); read_unlock_bh(&sk->sk_callback_lock); + if (!ret) + return 0; } - if (ret < 0) - tipc_conn_close(con); + tipc_conn_close(con); return ret; } diff --git a/net/tipc/udp_media.c b/net/tipc/udp_media.c index d6620ad53546..28a283f26a8d 100644 --- a/net/tipc/udp_media.c +++ b/net/tipc/udp_media.c @@ -161,9 +161,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, struct udp_bearer *ub, struct udp_media_addr *src, struct udp_media_addr *dst, struct dst_cache *cache) { - struct dst_entry *ndst = dst_cache_get(cache); + struct dst_entry *ndst; int ttl, err = 0; + local_bh_disable(); + ndst = dst_cache_get(cache); if (dst->proto == htons(ETH_P_IP)) { struct rtable *rt = (struct rtable *)ndst; @@ -210,9 +212,11 @@ static int tipc_udp_xmit(struct net *net, struct sk_buff *skb, src->port, dst->port, false); #endif } + local_bh_enable(); return err; tx_error: + local_bh_enable(); kfree_skb(skb); return err; } diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index c98e602a1a2d..2d399b6c4075 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -780,7 +780,7 @@ static int tls_push_record(struct sock *sk, int flags, static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, bool full_record, u8 record_type, - size_t *copied, int flags) + ssize_t *copied, int flags) { struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx); @@ -796,10 +796,13 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk, psock = sk_psock_get(sk); if (!psock || !policy) { err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS) { + if (err && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); + err = -sk->sk_err; } + if (psock) + sk_psock_put(sk, psock); return err; } more_data: @@ -822,9 +825,10 @@ more_data: switch (psock->eval) { case __SK_PASS: err = tls_push_record(sk, flags, record_type); - if (err && err != -EINPROGRESS) { + if (err && sk->sk_err == EBADMSG) { *copied -= sk_msg_free(sk, msg); tls_free_open_rec(sk); + err = -sk->sk_err; goto out_err; } break; @@ -914,7 +918,8 @@ int tls_sw_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) unsigned char record_type = TLS_RECORD_TYPE_DATA; bool is_kvec = iov_iter_is_kvec(&msg->msg_iter); bool eor = !(msg->msg_flags & MSG_MORE); - size_t try_to_copy, copied = 0; + size_t try_to_copy; + ssize_t copied = 0; struct sk_msg *msg_pl, *msg_en; struct tls_rec *rec; int required_size; @@ -1116,7 +1121,7 @@ send_end: release_sock(sk); mutex_unlock(&tls_ctx->tx_lock); - return copied ? copied : ret; + return copied > 0 ? copied : ret; } static int tls_sw_do_sendpage(struct sock *sk, struct page *page, @@ -1130,7 +1135,7 @@ static int tls_sw_do_sendpage(struct sock *sk, struct page *page, struct sk_msg *msg_pl; struct tls_rec *rec; int num_async = 0; - size_t copied = 0; + ssize_t copied = 0; bool full_record; int record_room; int ret = 0; @@ -1232,7 +1237,7 @@ wait_for_memory: } sendpage_end: ret = sk_stream_error(sk, flags, ret); - return copied ? copied : ret; + return copied > 0 ? copied : ret; } int tls_sw_sendpage_locked(struct sock *sk, struct page *page, @@ -2081,8 +2086,9 @@ static void tls_data_ready(struct sock *sk) strp_data_ready(&ctx->strp); psock = sk_psock_get(sk); - if (psock && !list_empty(&psock->ingress_msg)) { - ctx->saved_data_ready(sk); + if (psock) { + if (!list_empty(&psock->ingress_msg)) + ctx->saved_data_ready(sk); sk_psock_put(sk, psock); } } diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 709038a4783e..69efc891885f 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -157,7 +157,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque) void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt) { + if (pkt->tap_delivered) + return; + vsock_deliver_tap(virtio_transport_build_skb, pkt); + pkt->tap_delivered = true; } EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt); diff --git a/net/x25/x25_subr.c b/net/x25/x25_subr.c index 8aa415a38814..0285aaa1e93c 100644 --- a/net/x25/x25_subr.c +++ b/net/x25/x25_subr.c @@ -357,6 +357,12 @@ void x25_disconnect(struct sock *sk, int reason, unsigned char cause, sk->sk_state_change(sk); sock_set_flag(sk, SOCK_DEAD); } + if (x25->neighbour) { + read_lock_bh(&x25_list_lock); + x25_neigh_put(x25->neighbour); + x25->neighbour = NULL; + read_unlock_bh(&x25_list_lock); + } } /* |