diff options
Diffstat (limited to 'net')
263 files changed, 6728 insertions, 4654 deletions
diff --git a/net/Kconfig.debug b/net/Kconfig.debug index e6ae11cc2fb7..5e3fffe707dd 100644 --- a/net/Kconfig.debug +++ b/net/Kconfig.debug @@ -2,7 +2,7 @@ config NET_DEV_REFCNT_TRACKER bool "Enable net device refcount tracking" - depends on DEBUG_KERNEL && STACKTRACE_SUPPORT + depends on DEBUG_KERNEL && STACKTRACE_SUPPORT && NET select REF_TRACKER default n help @@ -11,7 +11,7 @@ config NET_DEV_REFCNT_TRACKER config NET_NS_REFCNT_TRACKER bool "Enable networking namespace refcount tracking" - depends on DEBUG_KERNEL && STACKTRACE_SUPPORT + depends on DEBUG_KERNEL && STACKTRACE_SUPPORT && NET select REF_TRACKER default n help diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c index b6db999abf75..f1741fbfb617 100644 --- a/net/batman-adv/bat_v_elp.c +++ b/net/batman-adv/bat_v_elp.c @@ -125,7 +125,6 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh) /* if not a wifi interface, check if this device provides data via * ethtool (e.g. an Ethernet adapter) */ - memset(&link_settings, 0, sizeof(link_settings)); rtnl_lock(); ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings); rtnl_unlock(); diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c index b8f8da7ee3de..41c1ad33d009 100644 --- a/net/batman-adv/hard-interface.c +++ b/net/batman-adv/hard-interface.c @@ -10,6 +10,7 @@ #include <linux/atomic.h> #include <linux/byteorder/generic.h> #include <linux/container_of.h> +#include <linux/errno.h> #include <linux/gfp.h> #include <linux/if.h> #include <linux/if_arp.h> @@ -700,6 +701,9 @@ int batadv_hardif_enable_interface(struct batadv_hard_iface *hard_iface, int max_header_len = batadv_max_header_len(); int ret; + if (hard_iface->net_dev->mtu < ETH_MIN_MTU + max_header_len) + return -EINVAL; + if (hard_iface->if_status != BATADV_IF_NOT_IN_USE) goto out; diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h index 23f3d53f4b51..c48803b32bb0 100644 --- a/net/batman-adv/main.h +++ b/net/batman-adv/main.h @@ -13,7 +13,7 @@ #define BATADV_DRIVER_DEVICE "batman-adv" #ifndef BATADV_SOURCE_VERSION -#define BATADV_SOURCE_VERSION "2022.2" +#define BATADV_SOURCE_VERSION "2022.3" #endif /* B.A.T.M.A.N. parameters */ diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h index 31c8f922651d..5dd52bc5cabb 100644 --- a/net/batman-adv/trace.h +++ b/net/batman-adv/trace.h @@ -9,8 +9,6 @@ #include "main.h" -#include <linux/bug.h> -#include <linux/kernel.h> #include <linux/netdevice.h> #include <linux/percpu.h> #include <linux/printk.h> diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 2be5d4a712c5..758cd797a063 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1740,45 +1740,6 @@ struct batadv_priv { #endif }; -/** - * struct batadv_socket_client - layer2 icmp socket client data - */ -struct batadv_socket_client { - /** - * @queue_list: packet queue for packets destined for this socket client - */ - struct list_head queue_list; - - /** @queue_len: number of packets in the packet queue (queue_list) */ - unsigned int queue_len; - - /** @index: socket client's index in the batadv_socket_client_hash */ - unsigned char index; - - /** @lock: lock protecting queue_list, queue_len & index */ - spinlock_t lock; - - /** @queue_wait: socket client's wait queue */ - wait_queue_head_t queue_wait; - - /** @bat_priv: pointer to soft_iface this client belongs to */ - struct batadv_priv *bat_priv; -}; - -/** - * struct batadv_socket_packet - layer2 icmp packet for socket client - */ -struct batadv_socket_packet { - /** @list: list node for &batadv_socket_client.queue_list */ - struct list_head list; - - /** @icmp_len: size of the layer2 icmp packet */ - size_t icmp_len; - - /** @icmp_packet: layer2 icmp packet */ - u8 icmp_packet[BATADV_ICMP_MAX_PACKET_SIZE]; -}; - #ifdef CONFIG_BATMAN_ADV_BLA /** diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 9777e7b109ee..7a59c4487050 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -44,6 +44,11 @@ struct sco_param { u8 retrans_effort; }; +struct conn_handle_t { + struct hci_conn *conn; + __u16 handle; +}; + static const struct sco_param esco_param_cvsd[] = { { EDR_ESCO_MASK & ~ESCO_2EV3, 0x000a, 0x01 }, /* S3 */ { EDR_ESCO_MASK & ~ESCO_2EV3, 0x0007, 0x01 }, /* S2 */ @@ -316,17 +321,60 @@ static bool find_next_esco_param(struct hci_conn *conn, return conn->attempt <= size; } -static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle) +static int configure_datapath_sync(struct hci_dev *hdev, struct bt_codec *codec) { - struct hci_dev *hdev = conn->hdev; + int err; + __u8 vnd_len, *vnd_data = NULL; + struct hci_op_configure_data_path *cmd = NULL; + + err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, + &vnd_data); + if (err < 0) + goto error; + + cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL); + if (!cmd) { + err = -ENOMEM; + goto error; + } + + err = hdev->get_data_path_id(hdev, &cmd->data_path_id); + if (err < 0) + goto error; + + cmd->vnd_len = vnd_len; + memcpy(cmd->vnd_data, vnd_data, vnd_len); + + cmd->direction = 0x00; + __hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH, + sizeof(*cmd) + vnd_len, cmd, HCI_CMD_TIMEOUT); + + cmd->direction = 0x01; + err = __hci_cmd_sync_status(hdev, HCI_CONFIGURE_DATA_PATH, + sizeof(*cmd) + vnd_len, cmd, + HCI_CMD_TIMEOUT); +error: + + kfree(cmd); + kfree(vnd_data); + return err; +} + +static int hci_enhanced_setup_sync(struct hci_dev *hdev, void *data) +{ + struct conn_handle_t *conn_handle = data; + struct hci_conn *conn = conn_handle->conn; + __u16 handle = conn_handle->handle; struct hci_cp_enhanced_setup_sync_conn cp; const struct sco_param *param; + kfree(conn_handle); + bt_dev_dbg(hdev, "hcon %p", conn); /* for offload use case, codec needs to configured before opening SCO */ if (conn->codec.data_path) - hci_req_configure_datapath(hdev, &conn->codec); + configure_datapath_sync(hdev, &conn->codec); conn->state = BT_CONNECT; conn->out = true; @@ -344,7 +392,7 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle) case BT_CODEC_MSBC: if (!find_next_esco_param(conn, esco_param_msbc, ARRAY_SIZE(esco_param_msbc))) - return false; + return -EINVAL; param = &esco_param_msbc[conn->attempt - 1]; cp.tx_coding_format.id = 0x05; @@ -396,11 +444,11 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle) if (lmp_esco_capable(conn->link)) { if (!find_next_esco_param(conn, esco_param_cvsd, ARRAY_SIZE(esco_param_cvsd))) - return false; + return -EINVAL; param = &esco_param_cvsd[conn->attempt - 1]; } else { if (conn->attempt > ARRAY_SIZE(sco_param_cvsd)) - return false; + return -EINVAL; param = &sco_param_cvsd[conn->attempt - 1]; } cp.tx_coding_format.id = 2; @@ -423,7 +471,7 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle) cp.out_transport_unit_size = 16; break; default: - return false; + return -EINVAL; } cp.retrans_effort = param->retrans_effort; @@ -431,9 +479,9 @@ static bool hci_enhanced_setup_sync_conn(struct hci_conn *conn, __u16 handle) cp.max_latency = __cpu_to_le16(param->max_latency); if (hci_send_cmd(hdev, HCI_OP_ENHANCED_SETUP_SYNC_CONN, sizeof(cp), &cp) < 0) - return false; + return -EIO; - return true; + return 0; } static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle) @@ -490,8 +538,24 @@ static bool hci_setup_sync_conn(struct hci_conn *conn, __u16 handle) bool hci_setup_sync(struct hci_conn *conn, __u16 handle) { - if (enhanced_sync_conn_capable(conn->hdev)) - return hci_enhanced_setup_sync_conn(conn, handle); + int result; + struct conn_handle_t *conn_handle; + + if (enhanced_sync_conn_capable(conn->hdev)) { + conn_handle = kzalloc(sizeof(*conn_handle), GFP_KERNEL); + + if (!conn_handle) + return false; + + conn_handle->conn = conn; + conn_handle->handle = handle; + result = hci_cmd_sync_queue(conn->hdev, hci_enhanced_setup_sync, + conn_handle, NULL); + if (result < 0) + kfree(conn_handle); + + return result == 0; + } return hci_setup_sync_conn(conn, handle); } @@ -2696,3 +2760,79 @@ u32 hci_conn_get_phy(struct hci_conn *conn) return phys; } + +int hci_abort_conn(struct hci_conn *conn, u8 reason) +{ + int r = 0; + + switch (conn->state) { + case BT_CONNECTED: + case BT_CONFIG: + if (conn->type == AMP_LINK) { + struct hci_cp_disconn_phy_link cp; + + cp.phy_handle = HCI_PHY_HANDLE(conn->handle); + cp.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONN_PHY_LINK, + sizeof(cp), &cp); + } else { + struct hci_cp_disconnect dc; + + dc.handle = cpu_to_le16(conn->handle); + dc.reason = reason; + r = hci_send_cmd(conn->hdev, HCI_OP_DISCONNECT, + sizeof(dc), &dc); + } + + conn->state = BT_DISCONN; + + break; + case BT_CONNECT: + if (conn->type == LE_LINK) { + if (test_bit(HCI_CONN_SCANNING, &conn->flags)) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_LE_CREATE_CONN_CANCEL, 0, NULL); + } else if (conn->type == ACL_LINK) { + if (conn->hdev->hci_ver < BLUETOOTH_VER_1_2) + break; + r = hci_send_cmd(conn->hdev, + HCI_OP_CREATE_CONN_CANCEL, + 6, &conn->dst); + } + break; + case BT_CONNECT2: + if (conn->type == ACL_LINK) { + struct hci_cp_reject_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + rej.reason = reason; + + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_CONN_REQ, + sizeof(rej), &rej); + } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { + struct hci_cp_reject_sync_conn_req rej; + + bacpy(&rej.bdaddr, &conn->dst); + + /* SCO rejection has its own limited set of + * allowed error values (0x0D-0x0F) which isn't + * compatible with most values passed to this + * function. To be safe hard-code one of the + * values that's suitable for SCO. + */ + rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; + + r = hci_send_cmd(conn->hdev, + HCI_OP_REJECT_SYNC_CONN_REQ, + sizeof(rej), &rej); + } + break; + default: + conn->state = BT_CLOSED; + break; + } + + return r; +} diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index b3a5a3cc9372..0540555b3704 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -597,6 +597,15 @@ static int hci_dev_do_reset(struct hci_dev *hdev) /* Cancel these to avoid queueing non-chained pending work */ hci_dev_set_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE); + /* Wait for + * + * if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) + * queue_delayed_work(&hdev->{cmd,ncmd}_timer) + * + * inside RCU section to see the flag or complete scheduling. + */ + synchronize_rcu(); + /* Explicitly cancel works in case scheduled after setting the flag. */ cancel_delayed_work(&hdev->cmd_timer); cancel_delayed_work(&hdev->ncmd_timer); @@ -714,7 +723,7 @@ static void hci_update_passive_scan_state(struct hci_dev *hdev, u8 scan) hci_dev_set_flag(hdev, HCI_BREDR_ENABLED); if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - hci_req_update_adv_data(hdev, hdev->cur_adv_instance); + hci_update_adv_data(hdev, hdev->cur_adv_instance); mgmt_new_settings(hdev); } @@ -1706,7 +1715,8 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, u32 flags, u16 adv_data_len, u8 *adv_data, u16 scan_rsp_len, u8 *scan_rsp_data, u16 timeout, u16 duration, s8 tx_power, - u32 min_interval, u32 max_interval) + u32 min_interval, u32 max_interval, + u8 mesh_handle) { struct adv_info *adv; @@ -1717,7 +1727,7 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, memset(adv->per_adv_data, 0, sizeof(adv->per_adv_data)); } else { if (hdev->adv_instance_cnt >= hdev->le_num_of_adv_sets || - instance < 1 || instance > hdev->le_num_of_adv_sets) + instance < 1 || instance > hdev->le_num_of_adv_sets + 1) return ERR_PTR(-EOVERFLOW); adv = kzalloc(sizeof(*adv), GFP_KERNEL); @@ -1734,6 +1744,11 @@ struct adv_info *hci_add_adv_instance(struct hci_dev *hdev, u8 instance, adv->min_interval = min_interval; adv->max_interval = max_interval; adv->tx_power = tx_power; + /* Defining a mesh_handle changes the timing units to ms, + * rather than seconds, and ties the instance to the requested + * mesh_tx queue. + */ + adv->mesh = mesh_handle; hci_set_adv_instance_data(hdev, instance, adv_data_len, adv_data, scan_rsp_len, scan_rsp_data); @@ -1762,7 +1777,7 @@ struct adv_info *hci_add_per_instance(struct hci_dev *hdev, u8 instance, adv = hci_add_adv_instance(hdev, instance, flags, 0, NULL, 0, NULL, 0, 0, HCI_ADV_TX_POWER_NO_PREFERENCE, - min_interval, max_interval); + min_interval, max_interval, 0); if (IS_ERR(adv)) return adv; @@ -2391,6 +2406,10 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action, container_of(nb, struct hci_dev, suspend_notifier); int ret = 0; + /* Userspace has full control of this device. Do nothing. */ + if (hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) + return NOTIFY_DONE; + if (action == PM_SUSPEND_PREPARE) ret = hci_suspend_dev(hdev); else if (action == PM_POST_SUSPEND) @@ -2486,6 +2505,7 @@ struct hci_dev *hci_alloc_dev_priv(int sizeof_priv) mutex_init(&hdev->lock); mutex_init(&hdev->req_lock); + INIT_LIST_HEAD(&hdev->mesh_pending); INIT_LIST_HEAD(&hdev->mgmt_pending); INIT_LIST_HEAD(&hdev->reject_list); INIT_LIST_HEAD(&hdev->accept_list); @@ -3469,15 +3489,27 @@ static inline int __get_blocks(struct hci_dev *hdev, struct sk_buff *skb) return DIV_ROUND_UP(skb->len - HCI_ACL_HDR_SIZE, hdev->block_len); } -static void __check_timeout(struct hci_dev *hdev, unsigned int cnt) +static void __check_timeout(struct hci_dev *hdev, unsigned int cnt, u8 type) { - if (!hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) { - /* ACL tx timeout must be longer than maximum - * link supervision timeout (40.9 seconds) */ - if (!cnt && time_after(jiffies, hdev->acl_last_tx + - HCI_ACL_TX_TIMEOUT)) - hci_link_tx_to(hdev, ACL_LINK); + unsigned long last_tx; + + if (hci_dev_test_flag(hdev, HCI_UNCONFIGURED)) + return; + + switch (type) { + case LE_LINK: + last_tx = hdev->le_last_tx; + break; + default: + last_tx = hdev->acl_last_tx; + break; } + + /* tx timeout must be longer than maximum link supervision timeout + * (40.9 seconds) + */ + if (!cnt && time_after(jiffies, last_tx + HCI_ACL_TX_TIMEOUT)) + hci_link_tx_to(hdev, type); } /* Schedule SCO */ @@ -3535,7 +3567,7 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev) struct sk_buff *skb; int quote; - __check_timeout(hdev, cnt); + __check_timeout(hdev, cnt, ACL_LINK); while (hdev->acl_cnt && (chan = hci_chan_sent(hdev, ACL_LINK, "e))) { @@ -3578,8 +3610,6 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) int quote; u8 type; - __check_timeout(hdev, cnt); - BT_DBG("%s", hdev->name); if (hdev->dev_type == HCI_AMP) @@ -3587,6 +3617,8 @@ static void hci_sched_acl_blk(struct hci_dev *hdev) else type = ACL_LINK; + __check_timeout(hdev, cnt, type); + while (hdev->block_cnt > 0 && (chan = hci_chan_sent(hdev, type, "e))) { u32 priority = (skb_peek(&chan->data_q))->priority; @@ -3660,7 +3692,7 @@ static void hci_sched_le(struct hci_dev *hdev) cnt = hdev->le_pkts ? hdev->le_cnt : hdev->acl_cnt; - __check_timeout(hdev, cnt); + __check_timeout(hdev, cnt, LE_LINK); tmp = cnt; while (cnt && (chan = hci_chan_sent(hdev, LE_LINK, "e))) { @@ -4056,12 +4088,14 @@ static void hci_cmd_work(struct work_struct *work) if (res < 0) __hci_cmd_sync_cancel(hdev, -res); + rcu_read_lock(); if (test_bit(HCI_RESET, &hdev->flags) || hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) cancel_delayed_work(&hdev->cmd_timer); else - schedule_delayed_work(&hdev->cmd_timer, - HCI_CMD_TIMEOUT); + queue_delayed_work(hdev->workqueue, &hdev->cmd_timer, + HCI_CMD_TIMEOUT); + rcu_read_unlock(); } else { skb_queue_head(&hdev->cmd_q, skb); queue_work(hdev->workqueue, &hdev->cmd_work); diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c index 902b40a90b91..3f401ec5bb0c 100644 --- a/net/bluetooth/hci_debugfs.c +++ b/net/bluetooth/hci_debugfs.c @@ -1245,7 +1245,7 @@ void hci_debugfs_create_conn(struct hci_conn *conn) struct hci_dev *hdev = conn->hdev; char name[6]; - if (IS_ERR_OR_NULL(hdev->debugfs)) + if (IS_ERR_OR_NULL(hdev->debugfs) || conn->debugfs) return; snprintf(name, sizeof(name), "%u", conn->handle); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 6643c9c20fa4..faca701bce2a 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -712,6 +712,47 @@ static u8 hci_cc_read_local_version(struct hci_dev *hdev, void *data, return rp->status; } +static u8 hci_cc_read_enc_key_size(struct hci_dev *hdev, void *data, + struct sk_buff *skb) +{ + struct hci_rp_read_enc_key_size *rp = data; + struct hci_conn *conn; + u16 handle; + u8 status = rp->status; + + bt_dev_dbg(hdev, "status 0x%2.2x", status); + + handle = le16_to_cpu(rp->handle); + + hci_dev_lock(hdev); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) { + status = 0xFF; + goto done; + } + + /* While unexpected, the read_enc_key_size command may fail. The most + * secure approach is to then assume the key size is 0 to force a + * disconnection. + */ + if (status) { + bt_dev_err(hdev, "failed to read key size for handle %u", + handle); + conn->enc_key_size = 0; + } else { + conn->enc_key_size = rp->key_size; + status = 0; + } + + hci_encrypt_cfm(conn, 0); + +done: + hci_dev_unlock(hdev); + + return status; +} + static u8 hci_cc_read_local_commands(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -1715,6 +1756,8 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) hci_dev_set_flag(hdev, HCI_LE_SCAN); if (hdev->le_scan_type == LE_SCAN_ACTIVE) clear_pending_adv_report(hdev); + if (hci_dev_test_flag(hdev, HCI_MESH)) + hci_discovery_set_state(hdev, DISCOVERY_FINDING); break; case LE_SCAN_DISABLE: @@ -1729,7 +1772,7 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) d->last_adv_addr_type, NULL, d->last_adv_rssi, d->last_adv_flags, d->last_adv_data, - d->last_adv_data_len, NULL, 0); + d->last_adv_data_len, NULL, 0, 0); } /* Cancel this timer so that we don't try to disable scanning @@ -1745,6 +1788,9 @@ static void le_set_scan_enable_complete(struct hci_dev *hdev, u8 enable) */ if (hci_dev_test_and_clear_flag(hdev, HCI_LE_SCAN_INTERRUPTED)) hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + else if (!hci_dev_test_flag(hdev, HCI_LE_ADV) && + hdev->discovery.state == DISCOVERY_FINDING) + queue_work(hdev->workqueue, &hdev->reenable_adv_work); break; @@ -2152,7 +2198,7 @@ static u8 hci_cc_set_ext_adv_param(struct hci_dev *hdev, void *data, adv_instance->tx_power = rp->tx_power; } /* Update adv data as tx power is known now */ - hci_req_update_adv_data(hdev, cp->handle); + hci_update_adv_data(hdev, cp->handle); hci_dev_unlock(hdev); @@ -3071,7 +3117,7 @@ static void hci_inquiry_result_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, HCI_RSSI_INVALID, - flags, NULL, 0, NULL, 0); + flags, NULL, 0, NULL, 0, 0); } hci_dev_unlock(hdev); @@ -3534,47 +3580,6 @@ unlock: hci_dev_unlock(hdev); } -static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status, - u16 opcode, struct sk_buff *skb) -{ - const struct hci_rp_read_enc_key_size *rp; - struct hci_conn *conn; - u16 handle; - - BT_DBG("%s status 0x%02x", hdev->name, status); - - if (!skb || skb->len < sizeof(*rp)) { - bt_dev_err(hdev, "invalid read key size response"); - return; - } - - rp = (void *)skb->data; - handle = le16_to_cpu(rp->handle); - - hci_dev_lock(hdev); - - conn = hci_conn_hash_lookup_handle(hdev, handle); - if (!conn) - goto unlock; - - /* While unexpected, the read_enc_key_size command may fail. The most - * secure approach is to then assume the key size is 0 to force a - * disconnection. - */ - if (rp->status) { - bt_dev_err(hdev, "failed to read key size for handle %u", - handle); - conn->enc_key_size = 0; - } else { - conn->enc_key_size = rp->key_size; - } - - hci_encrypt_cfm(conn, 0); - -unlock: - hci_dev_unlock(hdev); -} - static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { @@ -3639,7 +3644,6 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, /* Try reading the encryption key size for encrypted ACL links */ if (!ev->status && ev->encrypt && conn->type == ACL_LINK) { struct hci_cp_read_enc_key_size cp; - struct hci_request req; /* Only send HCI_Read_Encryption_Key_Size if the * controller really supports it. If it doesn't, assume @@ -3650,12 +3654,9 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, void *data, goto notify; } - hci_req_init(&req, hdev); - cp.handle = cpu_to_le16(conn->handle); - hci_req_add(&req, HCI_OP_READ_ENC_KEY_SIZE, sizeof(cp), &cp); - - if (hci_req_run_skb(&req, read_enc_key_size_complete)) { + if (hci_send_cmd(hdev, HCI_OP_READ_ENC_KEY_SIZE, + sizeof(cp), &cp)) { bt_dev_err(hdev, "sending read key size failed"); conn->enc_key_size = HCI_LINK_KEY_SIZE; goto notify; @@ -3766,16 +3767,18 @@ static inline void handle_cmd_cnt_and_timer(struct hci_dev *hdev, u8 ncmd) { cancel_delayed_work(&hdev->cmd_timer); + rcu_read_lock(); if (!test_bit(HCI_RESET, &hdev->flags)) { if (ncmd) { cancel_delayed_work(&hdev->ncmd_timer); atomic_set(&hdev->cmd_cnt, 1); } else { if (!hci_dev_test_flag(hdev, HCI_CMD_DRAIN_WORKQUEUE)) - schedule_delayed_work(&hdev->ncmd_timer, - HCI_NCMD_TIMEOUT); + queue_delayed_work(hdev->workqueue, &hdev->ncmd_timer, + HCI_NCMD_TIMEOUT); } } + rcu_read_unlock(); } static u8 hci_cc_le_read_buffer_size_v2(struct hci_dev *hdev, void *data, @@ -4037,6 +4040,8 @@ static const struct hci_cc { sizeof(struct hci_rp_read_local_amp_info)), HCI_CC(HCI_OP_READ_CLOCK, hci_cc_read_clock, sizeof(struct hci_rp_read_clock)), + HCI_CC(HCI_OP_READ_ENC_KEY_SIZE, hci_cc_read_enc_key_size, + sizeof(struct hci_rp_read_enc_key_size)), HCI_CC(HCI_OP_READ_INQ_RSP_TX_POWER, hci_cc_read_inq_rsp_tx_power, sizeof(struct hci_rp_read_inq_rsp_tx_power)), HCI_CC(HCI_OP_READ_DEF_ERR_DATA_REPORTING, @@ -4829,7 +4834,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - flags, NULL, 0, NULL, 0); + flags, NULL, 0, NULL, 0, 0); } } else if (skb->len == array_size(ev->num, sizeof(struct inquiry_info_rssi))) { @@ -4860,7 +4865,7 @@ static void hci_inquiry_result_with_rssi_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - flags, NULL, 0, NULL, 0); + flags, NULL, 0, NULL, 0, 0); } } else { bt_dev_err(hdev, "Malformed HCI Event: 0x%2.2x", @@ -5116,7 +5121,7 @@ static void hci_extended_inquiry_result_evt(struct hci_dev *hdev, void *edata, mgmt_device_found(hdev, &info->bdaddr, ACL_LINK, 0x00, info->dev_class, info->rssi, - flags, info->data, eir_len, NULL, 0); + flags, info->data, eir_len, NULL, 0, 0); } hci_dev_unlock(hdev); @@ -6172,7 +6177,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev, static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, u8 bdaddr_type, bdaddr_t *direct_addr, u8 direct_addr_type, s8 rssi, u8 *data, u8 len, - bool ext_adv) + bool ext_adv, bool ctl_time, u64 instant) { struct discovery_state *d = &hdev->discovery; struct smp_irk *irk; @@ -6220,7 +6225,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * important to see if the address is matching the local * controller address. */ - if (direct_addr) { + if (!hci_dev_test_flag(hdev, HCI_MESH) && direct_addr) { direct_addr_type = ev_bdaddr_type(hdev, direct_addr_type, &bdaddr_resolved); @@ -6268,6 +6273,18 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, conn->le_adv_data_len = len; } + if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND) + flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; + else + flags = 0; + + /* All scan results should be sent up for Mesh systems */ + if (hci_dev_test_flag(hdev, HCI_MESH)) { + mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, + rssi, flags, data, len, NULL, 0, instant); + return; + } + /* Passive scanning shouldn't trigger any device found events, * except for devices marked as CONN_REPORT for which we do send * device found events, or advertisement monitoring requested. @@ -6281,12 +6298,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, idr_is_empty(&hdev->adv_monitors_idr)) return; - if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND) - flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; - else - flags = 0; mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, - rssi, flags, data, len, NULL, 0); + rssi, flags, data, len, NULL, 0, 0); return; } @@ -6305,11 +6318,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, * and just sends a scan response event, then it is marked as * not connectable as well. */ - if (type == LE_ADV_NONCONN_IND || type == LE_ADV_SCAN_IND || - type == LE_ADV_SCAN_RSP) + if (type == LE_ADV_SCAN_RSP) flags = MGMT_DEV_FOUND_NOT_CONNECTABLE; - else - flags = 0; /* If there's nothing pending either store the data from this * event or send an immediate device found event if the data @@ -6326,7 +6336,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, } mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, - rssi, flags, data, len, NULL, 0); + rssi, flags, data, len, NULL, 0, 0); return; } @@ -6345,7 +6355,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, d->last_adv_addr_type, NULL, d->last_adv_rssi, d->last_adv_flags, d->last_adv_data, - d->last_adv_data_len, NULL, 0); + d->last_adv_data_len, NULL, 0, 0); /* If the new report will trigger a SCAN_REQ store it for * later merging. @@ -6362,7 +6372,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ clear_pending_adv_report(hdev); mgmt_device_found(hdev, bdaddr, LE_LINK, bdaddr_type, NULL, - rssi, flags, data, len, NULL, 0); + rssi, flags, data, len, NULL, 0, 0); return; } @@ -6372,7 +6382,7 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr, */ mgmt_device_found(hdev, &d->last_adv_addr, LE_LINK, d->last_adv_addr_type, NULL, rssi, d->last_adv_flags, - d->last_adv_data, d->last_adv_data_len, data, len); + d->last_adv_data, d->last_adv_data_len, data, len, 0); clear_pending_adv_report(hdev); } @@ -6380,6 +6390,7 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_ev_le_advertising_report *ev = data; + u64 instant = jiffies; if (!ev->num) return; @@ -6404,7 +6415,8 @@ static void hci_le_adv_report_evt(struct hci_dev *hdev, void *data, rssi = info->data[info->length]; process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, NULL, 0, rssi, - info->data, info->length, false); + info->data, info->length, false, + false, instant); } else { bt_dev_err(hdev, "Dropping invalid advertising data"); } @@ -6461,6 +6473,7 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_ev_le_ext_adv_report *ev = data; + u64 instant = jiffies; if (!ev->num) return; @@ -6487,7 +6500,8 @@ static void hci_le_ext_adv_report_evt(struct hci_dev *hdev, void *data, process_adv_report(hdev, legacy_evt_type, &info->bdaddr, info->bdaddr_type, NULL, 0, info->rssi, info->data, info->length, - !(evt_type & LE_EXT_ADV_LEGACY_PDU)); + !(evt_type & LE_EXT_ADV_LEGACY_PDU), + false, instant); } } @@ -6710,6 +6724,7 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, struct sk_buff *skb) { struct hci_ev_le_direct_adv_report *ev = data; + u64 instant = jiffies; int i; if (!hci_le_ev_skb_pull(hdev, skb, HCI_EV_LE_DIRECT_ADV_REPORT, @@ -6727,7 +6742,7 @@ static void hci_le_direct_adv_report_evt(struct hci_dev *hdev, void *data, process_adv_report(hdev, info->type, &info->bdaddr, info->bdaddr_type, &info->direct_addr, info->direct_addr_type, info->rssi, NULL, 0, - false); + false, false, instant); } hci_dev_unlock(hdev); @@ -6776,6 +6791,13 @@ static void hci_le_cis_estabilished_evt(struct hci_dev *hdev, void *data, goto unlock; } + if (conn->type != ISO_LINK) { + bt_dev_err(hdev, + "Invalid connection link type handle 0x%4.4x", + handle); + goto unlock; + } + if (conn->role == HCI_ROLE_SLAVE) { __le32 interval; @@ -6896,6 +6918,13 @@ static void hci_le_create_big_complete_evt(struct hci_dev *hdev, void *data, if (!conn) goto unlock; + if (conn->type != ISO_LINK) { + bt_dev_err(hdev, + "Invalid connection link type handle 0x%2.2x", + ev->handle); + goto unlock; + } + if (ev->num_bis) conn->handle = __le16_to_cpu(ev->bis_handle[0]); diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index e64d558e5d69..5a0296a4352e 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -269,42 +269,10 @@ void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen, void hci_req_add(struct hci_request *req, u16 opcode, u32 plen, const void *param) { + bt_dev_err(req->hdev, "HCI_REQ-0x%4.4x", opcode); hci_req_add_ev(req, opcode, plen, param, 0); } -void __hci_req_write_fast_connectable(struct hci_request *req, bool enable) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_page_scan_activity acp; - u8 type; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - return; - - if (hdev->hci_ver < BLUETOOTH_VER_1_2) - return; - - if (enable) { - type = PAGE_SCAN_TYPE_INTERLACED; - - /* 160 msec page scan interval */ - acp.interval = cpu_to_le16(0x0100); - } else { - type = hdev->def_page_scan_type; - acp.interval = cpu_to_le16(hdev->def_page_scan_int); - } - - acp.window = cpu_to_le16(hdev->def_page_scan_window); - - if (__cpu_to_le16(hdev->page_scan_interval) != acp.interval || - __cpu_to_le16(hdev->page_scan_window) != acp.window) - hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_ACTIVITY, - sizeof(acp), &acp); - - if (hdev->page_scan_type != type) - hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type); -} - static void start_interleave_scan(struct hci_dev *hdev) { hdev->interleave_scan_state = INTERLEAVE_SCAN_NO_FILTER; @@ -357,45 +325,6 @@ static bool __hci_update_interleaved_scan(struct hci_dev *hdev) return false; } -void __hci_req_update_name(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_local_name cp; - - memcpy(cp.name, hdev->dev_name, sizeof(cp.name)); - - hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp); -} - -void __hci_req_update_eir(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_write_eir cp; - - if (!hdev_is_powered(hdev)) - return; - - if (!lmp_ext_inq_capable(hdev)) - return; - - if (!hci_dev_test_flag(hdev, HCI_SSP_ENABLED)) - return; - - if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) - return; - - memset(&cp, 0, sizeof(cp)); - - eir_create(hdev, cp.data); - - if (memcmp(cp.data, hdev->eir, sizeof(cp.data)) == 0) - return; - - memcpy(hdev->eir, cp.data, sizeof(cp.data)); - - hci_req_add(req, HCI_OP_WRITE_EIR, sizeof(cp), &cp); -} - void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn) { struct hci_dev *hdev = req->hdev; @@ -721,6 +650,96 @@ static inline bool hci_is_le_conn_scanning(struct hci_dev *hdev) return false; } +static void set_random_addr(struct hci_request *req, bdaddr_t *rpa); +static int hci_update_random_address(struct hci_request *req, + bool require_privacy, bool use_rpa, + u8 *own_addr_type) +{ + struct hci_dev *hdev = req->hdev; + int err; + + /* If privacy is enabled use a resolvable private address. If + * current RPA has expired or there is something else than + * the current RPA in use, then generate a new one. + */ + if (use_rpa) { + /* If Controller supports LL Privacy use own address type is + * 0x03 + */ + if (use_ll_privacy(hdev)) + *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; + else + *own_addr_type = ADDR_LE_DEV_RANDOM; + + if (rpa_valid(hdev)) + return 0; + + err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); + if (err < 0) { + bt_dev_err(hdev, "failed to generate new RPA"); + return err; + } + + set_random_addr(req, &hdev->rpa); + + return 0; + } + + /* In case of required privacy without resolvable private address, + * use an non-resolvable private address. This is useful for active + * scanning and non-connectable advertising. + */ + if (require_privacy) { + bdaddr_t nrpa; + + while (true) { + /* The non-resolvable private address is generated + * from random six bytes with the two most significant + * bits cleared. + */ + get_random_bytes(&nrpa, 6); + nrpa.b[5] &= 0x3f; + + /* The non-resolvable private address shall not be + * equal to the public address. + */ + if (bacmp(&hdev->bdaddr, &nrpa)) + break; + } + + *own_addr_type = ADDR_LE_DEV_RANDOM; + set_random_addr(req, &nrpa); + return 0; + } + + /* If forcing static address is in use or there is no public + * address use the static address as random address (but skip + * the HCI command if the current random address is already the + * static one. + * + * In case BR/EDR has been disabled on a dual-mode controller + * and a static address has been configured, then use that + * address instead of the public BR/EDR address. + */ + if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || + !bacmp(&hdev->bdaddr, BDADDR_ANY) || + (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && + bacmp(&hdev->static_addr, BDADDR_ANY))) { + *own_addr_type = ADDR_LE_DEV_RANDOM; + if (bacmp(&hdev->static_addr, &hdev->random_addr)) + hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, + &hdev->static_addr); + return 0; + } + + /* Neither privacy nor static address is being used so use a + * public address. + */ + *own_addr_type = ADDR_LE_DEV_PUBLIC; + + return 0; +} + /* Ensure to call hci_req_add_le_scan_disable() first to disable the * controller based address resolution to be able to reconfigure * resolving list. @@ -810,366 +829,6 @@ void hci_req_add_le_passive_scan(struct hci_request *req) addr_resolv); } -static void cancel_adv_timeout(struct hci_dev *hdev) -{ - if (hdev->adv_instance_timeout) { - hdev->adv_instance_timeout = 0; - cancel_delayed_work(&hdev->adv_instance_expire); - } -} - -static bool adv_cur_instance_is_scannable(struct hci_dev *hdev) -{ - return hci_adv_instance_is_scannable(hdev, hdev->cur_adv_instance); -} - -void __hci_req_disable_advertising(struct hci_request *req) -{ - if (ext_adv_capable(req->hdev)) { - __hci_req_disable_ext_adv_instance(req, 0x00); - } else { - u8 enable = 0x00; - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); - } -} - -static bool adv_use_rpa(struct hci_dev *hdev, uint32_t flags) -{ - /* If privacy is not enabled don't use RPA */ - if (!hci_dev_test_flag(hdev, HCI_PRIVACY)) - return false; - - /* If basic privacy mode is enabled use RPA */ - if (!hci_dev_test_flag(hdev, HCI_LIMITED_PRIVACY)) - return true; - - /* If limited privacy mode is enabled don't use RPA if we're - * both discoverable and bondable. - */ - if ((flags & MGMT_ADV_FLAG_DISCOV) && - hci_dev_test_flag(hdev, HCI_BONDABLE)) - return false; - - /* We're neither bondable nor discoverable in the limited - * privacy mode, therefore use RPA. - */ - return true; -} - -static bool is_advertising_allowed(struct hci_dev *hdev, bool connectable) -{ - /* If there is no connection we are OK to advertise. */ - if (hci_conn_num(hdev, LE_LINK) == 0) - return true; - - /* Check le_states if there is any connection in peripheral role. */ - if (hdev->conn_hash.le_num_peripheral > 0) { - /* Peripheral connection state and non connectable mode bit 20. - */ - if (!connectable && !(hdev->le_states[2] & 0x10)) - return false; - - /* Peripheral connection state and connectable mode bit 38 - * and scannable bit 21. - */ - if (connectable && (!(hdev->le_states[4] & 0x40) || - !(hdev->le_states[2] & 0x20))) - return false; - } - - /* Check le_states if there is any connection in central role. */ - if (hci_conn_num(hdev, LE_LINK) != hdev->conn_hash.le_num_peripheral) { - /* Central connection state and non connectable mode bit 18. */ - if (!connectable && !(hdev->le_states[2] & 0x02)) - return false; - - /* Central connection state and connectable mode bit 35 and - * scannable 19. - */ - if (connectable && (!(hdev->le_states[4] & 0x08) || - !(hdev->le_states[2] & 0x08))) - return false; - } - - return true; -} - -void __hci_req_enable_advertising(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct adv_info *adv; - struct hci_cp_le_set_adv_param cp; - u8 own_addr_type, enable = 0x01; - bool connectable; - u16 adv_min_interval, adv_max_interval; - u32 flags; - - flags = hci_adv_instance_flags(hdev, hdev->cur_adv_instance); - adv = hci_find_adv_instance(hdev, hdev->cur_adv_instance); - - /* If the "connectable" instance flag was not set, then choose between - * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. - */ - connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || - mgmt_get_connectable(hdev); - - if (!is_advertising_allowed(hdev, connectable)) - return; - - if (hci_dev_test_flag(hdev, HCI_LE_ADV)) - __hci_req_disable_advertising(req); - - /* Clear the HCI_LE_ADV bit temporarily so that the - * hci_update_random_address knows that it's safe to go ahead - * and write a new random address. The flag will be set back on - * as soon as the SET_ADV_ENABLE HCI command completes. - */ - hci_dev_clear_flag(hdev, HCI_LE_ADV); - - /* Set require_privacy to true only when non-connectable - * advertising is used. In that case it is fine to use a - * non-resolvable private address. - */ - if (hci_update_random_address(req, !connectable, - adv_use_rpa(hdev, flags), - &own_addr_type) < 0) - return; - - memset(&cp, 0, sizeof(cp)); - - if (adv) { - adv_min_interval = adv->min_interval; - adv_max_interval = adv->max_interval; - } else { - adv_min_interval = hdev->le_adv_min_interval; - adv_max_interval = hdev->le_adv_max_interval; - } - - if (connectable) { - cp.type = LE_ADV_IND; - } else { - if (adv_cur_instance_is_scannable(hdev)) - cp.type = LE_ADV_SCAN_IND; - else - cp.type = LE_ADV_NONCONN_IND; - - if (!hci_dev_test_flag(hdev, HCI_DISCOVERABLE) || - hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) { - adv_min_interval = DISCOV_LE_FAST_ADV_INT_MIN; - adv_max_interval = DISCOV_LE_FAST_ADV_INT_MAX; - } - } - - cp.min_interval = cpu_to_le16(adv_min_interval); - cp.max_interval = cpu_to_le16(adv_max_interval); - cp.own_address_type = own_addr_type; - cp.channel_map = hdev->le_adv_channel_map; - - hci_req_add(req, HCI_OP_LE_SET_ADV_PARAM, sizeof(cp), &cp); - - hci_req_add(req, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable); -} - -void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - if (ext_adv_capable(hdev)) { - struct { - struct hci_cp_le_set_ext_scan_rsp_data cp; - u8 data[HCI_MAX_EXT_AD_LENGTH]; - } pdu; - - memset(&pdu, 0, sizeof(pdu)); - - len = eir_create_scan_rsp(hdev, instance, pdu.data); - - if (hdev->scan_rsp_data_len == len && - !memcmp(pdu.data, hdev->scan_rsp_data, len)) - return; - - memcpy(hdev->scan_rsp_data, pdu.data, len); - hdev->scan_rsp_data_len = len; - - pdu.cp.handle = instance; - pdu.cp.length = len; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_RSP_DATA, - sizeof(pdu.cp) + len, &pdu.cp); - } else { - struct hci_cp_le_set_scan_rsp_data cp; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_scan_rsp(hdev, instance, cp.data); - - if (hdev->scan_rsp_data_len == len && - !memcmp(cp.data, hdev->scan_rsp_data, len)) - return; - - memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data)); - hdev->scan_rsp_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_SCAN_RSP_DATA, sizeof(cp), &cp); - } -} - -void __hci_req_update_adv_data(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - u8 len; - - if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) - return; - - if (ext_adv_capable(hdev)) { - struct { - struct hci_cp_le_set_ext_adv_data cp; - u8 data[HCI_MAX_EXT_AD_LENGTH]; - } pdu; - - memset(&pdu, 0, sizeof(pdu)); - - len = eir_create_adv_data(hdev, instance, pdu.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(pdu.data, hdev->adv_data, len) == 0) - return; - - memcpy(hdev->adv_data, pdu.data, len); - hdev->adv_data_len = len; - - pdu.cp.length = len; - pdu.cp.handle = instance; - pdu.cp.operation = LE_SET_ADV_DATA_OP_COMPLETE; - pdu.cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG; - - hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_DATA, - sizeof(pdu.cp) + len, &pdu.cp); - } else { - struct hci_cp_le_set_adv_data cp; - - memset(&cp, 0, sizeof(cp)); - - len = eir_create_adv_data(hdev, instance, cp.data); - - /* There's nothing to do if the data hasn't changed */ - if (hdev->adv_data_len == len && - memcmp(cp.data, hdev->adv_data, len) == 0) - return; - - memcpy(hdev->adv_data, cp.data, sizeof(cp.data)); - hdev->adv_data_len = len; - - cp.length = len; - - hci_req_add(req, HCI_OP_LE_SET_ADV_DATA, sizeof(cp), &cp); - } -} - -int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance) -{ - struct hci_request req; - - hci_req_init(&req, hdev); - __hci_req_update_adv_data(&req, instance); - - return hci_req_run(&req, NULL); -} - -static void enable_addr_resolution_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - BT_DBG("%s status %u", hdev->name, status); -} - -void hci_req_disable_address_resolution(struct hci_dev *hdev) -{ - struct hci_request req; - __u8 enable = 0x00; - - if (!hci_dev_test_flag(hdev, HCI_LL_RPA_RESOLUTION)) - return; - - hci_req_init(&req, hdev); - - hci_req_add(&req, HCI_OP_LE_SET_ADDR_RESOLV_ENABLE, 1, &enable); - - hci_req_run(&req, enable_addr_resolution_complete); -} - -static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - bt_dev_dbg(hdev, "status %u", status); -} - -void hci_req_reenable_advertising(struct hci_dev *hdev) -{ - struct hci_request req; - - if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && - list_empty(&hdev->adv_instances)) - return; - - hci_req_init(&req, hdev); - - if (hdev->cur_adv_instance) { - __hci_req_schedule_adv_instance(&req, hdev->cur_adv_instance, - true); - } else { - if (ext_adv_capable(hdev)) { - __hci_req_start_ext_adv(&req, 0x00); - } else { - __hci_req_update_adv_data(&req, 0x00); - __hci_req_update_scan_rsp_data(&req, 0x00); - __hci_req_enable_advertising(&req); - } - } - - hci_req_run(&req, adv_enable_complete); -} - -static void adv_timeout_expire(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - adv_instance_expire.work); - - struct hci_request req; - u8 instance; - - bt_dev_dbg(hdev, ""); - - hci_dev_lock(hdev); - - hdev->adv_instance_timeout = 0; - - instance = hdev->cur_adv_instance; - if (instance == 0x00) - goto unlock; - - hci_req_init(&req, hdev); - - hci_req_clear_adv_instance(hdev, NULL, &req, instance, false); - - if (list_empty(&hdev->adv_instances)) - __hci_req_disable_advertising(&req); - - hci_req_run(&req, NULL); - -unlock: - hci_dev_unlock(hdev); -} - static int hci_req_add_le_interleaved_scan(struct hci_request *req, unsigned long opt) { @@ -1226,84 +885,6 @@ static void interleave_scan_work(struct work_struct *work) &hdev->interleave_scan, timeout); } -int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, - bool use_rpa, struct adv_info *adv_instance, - u8 *own_addr_type, bdaddr_t *rand_addr) -{ - int err; - - bacpy(rand_addr, BDADDR_ANY); - - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired then generate a new one. - */ - if (use_rpa) { - /* If Controller supports LL Privacy use own address type is - * 0x03 - */ - if (use_ll_privacy(hdev)) - *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; - else - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (adv_instance) { - if (adv_rpa_valid(adv_instance)) - return 0; - } else { - if (rpa_valid(hdev)) - return 0; - } - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - bt_dev_err(hdev, "failed to generate new RPA"); - return err; - } - - bacpy(rand_addr, &hdev->rpa); - - return 0; - } - - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for - * non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; - - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; - - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } - - *own_addr_type = ADDR_LE_DEV_RANDOM; - bacpy(rand_addr, &nrpa); - - return 0; - } - - /* No privacy so use a public address. */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; - - return 0; -} - -void __hci_req_clear_ext_adv_sets(struct hci_request *req) -{ - hci_req_add(req, HCI_OP_LE_CLEAR_ADV_SETS, 0, NULL); -} - static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) { struct hci_dev *hdev = req->hdev; @@ -1328,933 +909,8 @@ static void set_random_addr(struct hci_request *req, bdaddr_t *rpa) hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa); } -int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance) -{ - struct hci_cp_le_set_ext_adv_params cp; - struct hci_dev *hdev = req->hdev; - bool connectable; - u32 flags; - bdaddr_t random_addr; - u8 own_addr_type; - int err; - struct adv_info *adv; - bool secondary_adv, require_privacy; - - if (instance > 0) { - adv = hci_find_adv_instance(hdev, instance); - if (!adv) - return -EINVAL; - } else { - adv = NULL; - } - - flags = hci_adv_instance_flags(hdev, instance); - - /* If the "connectable" instance flag was not set, then choose between - * ADV_IND and ADV_NONCONN_IND based on the global connectable setting. - */ - connectable = (flags & MGMT_ADV_FLAG_CONNECTABLE) || - mgmt_get_connectable(hdev); - - if (!is_advertising_allowed(hdev, connectable)) - return -EPERM; - - /* Set require_privacy to true only when non-connectable - * advertising is used. In that case it is fine to use a - * non-resolvable private address. - */ - require_privacy = !connectable; - - /* Don't require privacy for periodic adv? */ - if (adv && adv->periodic) - require_privacy = false; - - err = hci_get_random_address(hdev, require_privacy, - adv_use_rpa(hdev, flags), adv, - &own_addr_type, &random_addr); - if (err < 0) - return err; - - memset(&cp, 0, sizeof(cp)); - - if (adv) { - hci_cpu_to_le24(adv->min_interval, cp.min_interval); - hci_cpu_to_le24(adv->max_interval, cp.max_interval); - cp.tx_power = adv->tx_power; - } else { - hci_cpu_to_le24(hdev->le_adv_min_interval, cp.min_interval); - hci_cpu_to_le24(hdev->le_adv_max_interval, cp.max_interval); - cp.tx_power = HCI_ADV_TX_POWER_NO_PREFERENCE; - } - - secondary_adv = (flags & MGMT_ADV_FLAG_SEC_MASK); - - if (connectable) { - if (secondary_adv) - cp.evt_properties = cpu_to_le16(LE_EXT_ADV_CONN_IND); - else - cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_IND); - } else if (hci_adv_instance_is_scannable(hdev, instance) || - (flags & MGMT_ADV_PARAM_SCAN_RSP)) { - if (secondary_adv) - cp.evt_properties = cpu_to_le16(LE_EXT_ADV_SCAN_IND); - else - cp.evt_properties = cpu_to_le16(LE_LEGACY_ADV_SCAN_IND); - } else { - /* Secondary and periodic cannot use legacy PDUs */ - if (secondary_adv || (adv && adv->periodic)) - cp.evt_properties = cpu_to_le16(LE_EXT_ADV_NON_CONN_IND); - else - cp.evt_properties = cpu_to_le16(LE_LEGACY_NONCONN_IND); - } - - cp.own_addr_type = own_addr_type; - cp.channel_map = hdev->le_adv_channel_map; - cp.handle = instance; - - if (flags & MGMT_ADV_FLAG_SEC_2M) { - cp.primary_phy = HCI_ADV_PHY_1M; - cp.secondary_phy = HCI_ADV_PHY_2M; - } else if (flags & MGMT_ADV_FLAG_SEC_CODED) { - cp.primary_phy = HCI_ADV_PHY_CODED; - cp.secondary_phy = HCI_ADV_PHY_CODED; - } else { - /* In all other cases use 1M */ - cp.primary_phy = HCI_ADV_PHY_1M; - cp.secondary_phy = HCI_ADV_PHY_1M; - } - - hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_PARAMS, sizeof(cp), &cp); - - if ((own_addr_type == ADDR_LE_DEV_RANDOM || - own_addr_type == ADDR_LE_DEV_RANDOM_RESOLVED) && - bacmp(&random_addr, BDADDR_ANY)) { - struct hci_cp_le_set_adv_set_rand_addr cp; - - /* Check if random address need to be updated */ - if (adv) { - if (!bacmp(&random_addr, &adv->random_addr)) - return 0; - } else { - if (!bacmp(&random_addr, &hdev->random_addr)) - return 0; - /* Instance 0x00 doesn't have an adv_info, instead it - * uses hdev->random_addr to track its address so - * whenever it needs to be updated this also set the - * random address since hdev->random_addr is shared with - * scan state machine. - */ - set_random_addr(req, &random_addr); - } - - memset(&cp, 0, sizeof(cp)); - - cp.handle = instance; - bacpy(&cp.bdaddr, &random_addr); - - hci_req_add(req, - HCI_OP_LE_SET_ADV_SET_RAND_ADDR, - sizeof(cp), &cp); - } - - return 0; -} - -int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_ext_adv_enable *cp; - struct hci_cp_ext_adv_set *adv_set; - u8 data[sizeof(*cp) + sizeof(*adv_set) * 1]; - struct adv_info *adv_instance; - - if (instance > 0) { - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return -EINVAL; - } else { - adv_instance = NULL; - } - - cp = (void *) data; - adv_set = (void *) cp->data; - - memset(cp, 0, sizeof(*cp)); - - cp->enable = 0x01; - cp->num_of_sets = 0x01; - - memset(adv_set, 0, sizeof(*adv_set)); - - adv_set->handle = instance; - - /* Set duration per instance since controller is responsible for - * scheduling it. - */ - if (adv_instance && adv_instance->duration) { - u16 duration = adv_instance->timeout * MSEC_PER_SEC; - - /* Time = N * 10 ms */ - adv_set->duration = cpu_to_le16(duration / 10); - } - - hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, - sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets, - data); - - return 0; -} - -int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct hci_cp_le_set_ext_adv_enable *cp; - struct hci_cp_ext_adv_set *adv_set; - u8 data[sizeof(*cp) + sizeof(*adv_set) * 1]; - u8 req_size; - - /* If request specifies an instance that doesn't exist, fail */ - if (instance > 0 && !hci_find_adv_instance(hdev, instance)) - return -EINVAL; - - memset(data, 0, sizeof(data)); - - cp = (void *)data; - adv_set = (void *)cp->data; - - /* Instance 0x00 indicates all advertising instances will be disabled */ - cp->num_of_sets = !!instance; - cp->enable = 0x00; - - adv_set->handle = instance; - - req_size = sizeof(*cp) + sizeof(*adv_set) * cp->num_of_sets; - hci_req_add(req, HCI_OP_LE_SET_EXT_ADV_ENABLE, req_size, data); - - return 0; -} - -int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - - /* If request specifies an instance that doesn't exist, fail */ - if (instance > 0 && !hci_find_adv_instance(hdev, instance)) - return -EINVAL; - - hci_req_add(req, HCI_OP_LE_REMOVE_ADV_SET, sizeof(instance), &instance); - - return 0; -} - -int __hci_req_start_ext_adv(struct hci_request *req, u8 instance) -{ - struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance = hci_find_adv_instance(hdev, instance); - int err; - - /* If instance isn't pending, the chip knows about it, and it's safe to - * disable - */ - if (adv_instance && !adv_instance->pending) - __hci_req_disable_ext_adv_instance(req, instance); - - err = __hci_req_setup_ext_adv_instance(req, instance); - if (err < 0) - return err; - - __hci_req_update_scan_rsp_data(req, instance); - __hci_req_enable_ext_advertising(req, instance); - - return 0; -} - -int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, - bool force) -{ - struct hci_dev *hdev = req->hdev; - struct adv_info *adv_instance = NULL; - u16 timeout; - - if (hci_dev_test_flag(hdev, HCI_ADVERTISING) || - list_empty(&hdev->adv_instances)) - return -EPERM; - - if (hdev->adv_instance_timeout) - return -EBUSY; - - adv_instance = hci_find_adv_instance(hdev, instance); - if (!adv_instance) - return -ENOENT; - - /* A zero timeout means unlimited advertising. As long as there is - * only one instance, duration should be ignored. We still set a timeout - * in case further instances are being added later on. - * - * If the remaining lifetime of the instance is more than the duration - * then the timeout corresponds to the duration, otherwise it will be - * reduced to the remaining instance lifetime. - */ - if (adv_instance->timeout == 0 || - adv_instance->duration <= adv_instance->remaining_time) - timeout = adv_instance->duration; - else - timeout = adv_instance->remaining_time; - - /* The remaining time is being reduced unless the instance is being - * advertised without time limit. - */ - if (adv_instance->timeout) - adv_instance->remaining_time = - adv_instance->remaining_time - timeout; - - /* Only use work for scheduling instances with legacy advertising */ - if (!ext_adv_capable(hdev)) { - hdev->adv_instance_timeout = timeout; - queue_delayed_work(hdev->req_workqueue, - &hdev->adv_instance_expire, - msecs_to_jiffies(timeout * 1000)); - } - - /* If we're just re-scheduling the same instance again then do not - * execute any HCI commands. This happens when a single instance is - * being advertised. - */ - if (!force && hdev->cur_adv_instance == instance && - hci_dev_test_flag(hdev, HCI_LE_ADV)) - return 0; - - hdev->cur_adv_instance = instance; - if (ext_adv_capable(hdev)) { - __hci_req_start_ext_adv(req, instance); - } else { - __hci_req_update_adv_data(req, instance); - __hci_req_update_scan_rsp_data(req, instance); - __hci_req_enable_advertising(req); - } - - return 0; -} - -/* For a single instance: - * - force == true: The instance will be removed even when its remaining - * lifetime is not zero. - * - force == false: the instance will be deactivated but kept stored unless - * the remaining lifetime is zero. - * - * For instance == 0x00: - * - force == true: All instances will be removed regardless of their timeout - * setting. - * - force == false: Only instances that have a timeout will be removed. - */ -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, - struct hci_request *req, u8 instance, - bool force) -{ - struct adv_info *adv_instance, *n, *next_instance = NULL; - int err; - u8 rem_inst; - - /* Cancel any timeout concerning the removed instance(s). */ - if (!instance || hdev->cur_adv_instance == instance) - cancel_adv_timeout(hdev); - - /* Get the next instance to advertise BEFORE we remove - * the current one. This can be the same instance again - * if there is only one instance. - */ - if (instance && hdev->cur_adv_instance == instance) - next_instance = hci_get_next_instance(hdev, instance); - - if (instance == 0x00) { - list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, - list) { - if (!(force || adv_instance->timeout)) - continue; - - rem_inst = adv_instance->instance; - err = hci_remove_adv_instance(hdev, rem_inst); - if (!err) - mgmt_advertising_removed(sk, hdev, rem_inst); - } - } else { - adv_instance = hci_find_adv_instance(hdev, instance); - - if (force || (adv_instance && adv_instance->timeout && - !adv_instance->remaining_time)) { - /* Don't advertise a removed instance. */ - if (next_instance && - next_instance->instance == instance) - next_instance = NULL; - - err = hci_remove_adv_instance(hdev, instance); - if (!err) - mgmt_advertising_removed(sk, hdev, instance); - } - } - - if (!req || !hdev_is_powered(hdev) || - hci_dev_test_flag(hdev, HCI_ADVERTISING)) - return; - - if (next_instance && !ext_adv_capable(hdev)) - __hci_req_schedule_adv_instance(req, next_instance->instance, - false); -} - -int hci_update_random_address(struct hci_request *req, bool require_privacy, - bool use_rpa, u8 *own_addr_type) -{ - struct hci_dev *hdev = req->hdev; - int err; - - /* If privacy is enabled use a resolvable private address. If - * current RPA has expired or there is something else than - * the current RPA in use, then generate a new one. - */ - if (use_rpa) { - /* If Controller supports LL Privacy use own address type is - * 0x03 - */ - if (use_ll_privacy(hdev)) - *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; - else - *own_addr_type = ADDR_LE_DEV_RANDOM; - - if (rpa_valid(hdev)) - return 0; - - err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); - if (err < 0) { - bt_dev_err(hdev, "failed to generate new RPA"); - return err; - } - - set_random_addr(req, &hdev->rpa); - - return 0; - } - - /* In case of required privacy without resolvable private address, - * use an non-resolvable private address. This is useful for active - * scanning and non-connectable advertising. - */ - if (require_privacy) { - bdaddr_t nrpa; - - while (true) { - /* The non-resolvable private address is generated - * from random six bytes with the two most significant - * bits cleared. - */ - get_random_bytes(&nrpa, 6); - nrpa.b[5] &= 0x3f; - - /* The non-resolvable private address shall not be - * equal to the public address. - */ - if (bacmp(&hdev->bdaddr, &nrpa)) - break; - } - - *own_addr_type = ADDR_LE_DEV_RANDOM; - set_random_addr(req, &nrpa); - return 0; - } - - /* If forcing static address is in use or there is no public - * address use the static address as random address (but skip - * the HCI command if the current random address is already the - * static one. - * - * In case BR/EDR has been disabled on a dual-mode controller - * and a static address has been configured, then use that - * address instead of the public BR/EDR address. - */ - if (hci_dev_test_flag(hdev, HCI_FORCE_STATIC_ADDR) || - !bacmp(&hdev->bdaddr, BDADDR_ANY) || - (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) && - bacmp(&hdev->static_addr, BDADDR_ANY))) { - *own_addr_type = ADDR_LE_DEV_RANDOM; - if (bacmp(&hdev->static_addr, &hdev->random_addr)) - hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, - &hdev->static_addr); - return 0; - } - - /* Neither privacy nor static address is being used so use a - * public address. - */ - *own_addr_type = ADDR_LE_DEV_PUBLIC; - - return 0; -} - -static bool disconnected_accept_list_entries(struct hci_dev *hdev) -{ - struct bdaddr_list *b; - - list_for_each_entry(b, &hdev->accept_list, list) { - struct hci_conn *conn; - - conn = hci_conn_hash_lookup_ba(hdev, ACL_LINK, &b->bdaddr); - if (!conn) - return true; - - if (conn->state != BT_CONNECTED && conn->state != BT_CONFIG) - return true; - } - - return false; -} - -void __hci_req_update_scan(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - u8 scan; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - return; - - if (!hdev_is_powered(hdev)) - return; - - if (mgmt_powering_down(hdev)) - return; - - if (hdev->scanning_paused) - return; - - if (hci_dev_test_flag(hdev, HCI_CONNECTABLE) || - disconnected_accept_list_entries(hdev)) - scan = SCAN_PAGE; - else - scan = SCAN_DISABLED; - - if (hci_dev_test_flag(hdev, HCI_DISCOVERABLE)) - scan |= SCAN_INQUIRY; - - if (test_bit(HCI_PSCAN, &hdev->flags) == !!(scan & SCAN_PAGE) && - test_bit(HCI_ISCAN, &hdev->flags) == !!(scan & SCAN_INQUIRY)) - return; - - hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan); -} - -static u8 get_service_classes(struct hci_dev *hdev) -{ - struct bt_uuid *uuid; - u8 val = 0; - - list_for_each_entry(uuid, &hdev->uuids, list) - val |= uuid->svc_hint; - - return val; -} - -void __hci_req_update_class(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - u8 cod[3]; - - bt_dev_dbg(hdev, ""); - - if (!hdev_is_powered(hdev)) - return; - - if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED)) - return; - - if (hci_dev_test_flag(hdev, HCI_SERVICE_CACHE)) - return; - - cod[0] = hdev->minor_class; - cod[1] = hdev->major_class; - cod[2] = get_service_classes(hdev); - - if (hci_dev_test_flag(hdev, HCI_LIMITED_DISCOVERABLE)) - cod[1] |= 0x20; - - if (memcmp(cod, hdev->dev_class, 3) == 0) - return; - - hci_req_add(req, HCI_OP_WRITE_CLASS_OF_DEV, sizeof(cod), cod); -} - -void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, - u8 reason) -{ - switch (conn->state) { - case BT_CONNECTED: - case BT_CONFIG: - if (conn->type == AMP_LINK) { - struct hci_cp_disconn_phy_link cp; - - cp.phy_handle = HCI_PHY_HANDLE(conn->handle); - cp.reason = reason; - hci_req_add(req, HCI_OP_DISCONN_PHY_LINK, sizeof(cp), - &cp); - } else { - struct hci_cp_disconnect dc; - - dc.handle = cpu_to_le16(conn->handle); - dc.reason = reason; - hci_req_add(req, HCI_OP_DISCONNECT, sizeof(dc), &dc); - } - - conn->state = BT_DISCONN; - - break; - case BT_CONNECT: - if (conn->type == LE_LINK) { - if (test_bit(HCI_CONN_SCANNING, &conn->flags)) - break; - hci_req_add(req, HCI_OP_LE_CREATE_CONN_CANCEL, - 0, NULL); - } else if (conn->type == ACL_LINK) { - if (req->hdev->hci_ver < BLUETOOTH_VER_1_2) - break; - hci_req_add(req, HCI_OP_CREATE_CONN_CANCEL, - 6, &conn->dst); - } - break; - case BT_CONNECT2: - if (conn->type == ACL_LINK) { - struct hci_cp_reject_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - rej.reason = reason; - - hci_req_add(req, HCI_OP_REJECT_CONN_REQ, - sizeof(rej), &rej); - } else if (conn->type == SCO_LINK || conn->type == ESCO_LINK) { - struct hci_cp_reject_sync_conn_req rej; - - bacpy(&rej.bdaddr, &conn->dst); - - /* SCO rejection has its own limited set of - * allowed error values (0x0D-0x0F) which isn't - * compatible with most values passed to this - * function. To be safe hard-code one of the - * values that's suitable for SCO. - */ - rej.reason = HCI_ERROR_REJ_LIMITED_RESOURCES; - - hci_req_add(req, HCI_OP_REJECT_SYNC_CONN_REQ, - sizeof(rej), &rej); - } - break; - default: - conn->state = BT_CLOSED; - break; - } -} - -static void abort_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode) -{ - if (status) - bt_dev_dbg(hdev, "Failed to abort connection: status 0x%2.2x", status); -} - -int hci_abort_conn(struct hci_conn *conn, u8 reason) -{ - struct hci_request req; - int err; - - hci_req_init(&req, conn->hdev); - - __hci_abort_conn(&req, conn, reason); - - err = hci_req_run(&req, abort_conn_complete); - if (err && err != -ENODATA) { - bt_dev_err(conn->hdev, "failed to run HCI request: err %d", err); - return err; - } - - return 0; -} - -static int le_scan_disable(struct hci_request *req, unsigned long opt) -{ - hci_req_add_le_scan_disable(req, false); - return 0; -} - -static int bredr_inquiry(struct hci_request *req, unsigned long opt) -{ - u8 length = opt; - const u8 giac[3] = { 0x33, 0x8b, 0x9e }; - const u8 liac[3] = { 0x00, 0x8b, 0x9e }; - struct hci_cp_inquiry cp; - - if (test_bit(HCI_INQUIRY, &req->hdev->flags)) - return 0; - - bt_dev_dbg(req->hdev, ""); - - hci_dev_lock(req->hdev); - hci_inquiry_cache_flush(req->hdev); - hci_dev_unlock(req->hdev); - - memset(&cp, 0, sizeof(cp)); - - if (req->hdev->discovery.limited) - memcpy(&cp.lap, liac, sizeof(cp.lap)); - else - memcpy(&cp.lap, giac, sizeof(cp.lap)); - - cp.length = length; - - hci_req_add(req, HCI_OP_INQUIRY, sizeof(cp), &cp); - - return 0; -} - -static void le_scan_disable_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_disable.work); - u8 status; - - bt_dev_dbg(hdev, ""); - - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return; - - cancel_delayed_work(&hdev->le_scan_restart); - - hci_req_sync(hdev, le_scan_disable, 0, HCI_CMD_TIMEOUT, &status); - if (status) { - bt_dev_err(hdev, "failed to disable LE scan: status 0x%02x", - status); - return; - } - - hdev->discovery.scan_start = 0; - - /* If we were running LE only scan, change discovery state. If - * we were running both LE and BR/EDR inquiry simultaneously, - * and BR/EDR inquiry is already finished, stop discovery, - * otherwise BR/EDR inquiry will stop discovery when finished. - * If we will resolve remote device name, do not change - * discovery state. - */ - - if (hdev->discovery.type == DISCOV_TYPE_LE) - goto discov_stopped; - - if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED) - return; - - if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { - if (!test_bit(HCI_INQUIRY, &hdev->flags) && - hdev->discovery.state != DISCOVERY_RESOLVING) - goto discov_stopped; - - return; - } - - hci_req_sync(hdev, bredr_inquiry, DISCOV_INTERLEAVED_INQUIRY_LEN, - HCI_CMD_TIMEOUT, &status); - if (status) { - bt_dev_err(hdev, "inquiry failed: status 0x%02x", status); - goto discov_stopped; - } - - return; - -discov_stopped: - hci_dev_lock(hdev); - hci_discovery_set_state(hdev, DISCOVERY_STOPPED); - hci_dev_unlock(hdev); -} - -static int le_scan_restart(struct hci_request *req, unsigned long opt) -{ - struct hci_dev *hdev = req->hdev; - - /* If controller is not scanning we are done. */ - if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) - return 0; - - if (hdev->scanning_paused) { - bt_dev_dbg(hdev, "Scanning is paused for suspend"); - return 0; - } - - hci_req_add_le_scan_disable(req, false); - - if (use_ext_scan(hdev)) { - struct hci_cp_le_set_ext_scan_enable ext_enable_cp; - - memset(&ext_enable_cp, 0, sizeof(ext_enable_cp)); - ext_enable_cp.enable = LE_SCAN_ENABLE; - ext_enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - - hci_req_add(req, HCI_OP_LE_SET_EXT_SCAN_ENABLE, - sizeof(ext_enable_cp), &ext_enable_cp); - } else { - struct hci_cp_le_set_scan_enable cp; - - memset(&cp, 0, sizeof(cp)); - cp.enable = LE_SCAN_ENABLE; - cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE; - hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp); - } - - return 0; -} - -static void le_scan_restart_work(struct work_struct *work) -{ - struct hci_dev *hdev = container_of(work, struct hci_dev, - le_scan_restart.work); - unsigned long timeout, duration, scan_start, now; - u8 status; - - bt_dev_dbg(hdev, ""); - - hci_req_sync(hdev, le_scan_restart, 0, HCI_CMD_TIMEOUT, &status); - if (status) { - bt_dev_err(hdev, "failed to restart LE scan: status %d", - status); - return; - } - - hci_dev_lock(hdev); - - if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || - !hdev->discovery.scan_start) - goto unlock; - - /* When the scan was started, hdev->le_scan_disable has been queued - * after duration from scan_start. During scan restart this job - * has been canceled, and we need to queue it again after proper - * timeout, to make sure that scan does not run indefinitely. - */ - duration = hdev->discovery.scan_duration; - scan_start = hdev->discovery.scan_start; - now = jiffies; - if (now - scan_start <= duration) { - int elapsed; - - if (now >= scan_start) - elapsed = now - scan_start; - else - elapsed = ULONG_MAX - scan_start + now; - - timeout = duration - elapsed; - } else { - timeout = 0; - } - - queue_delayed_work(hdev->req_workqueue, - &hdev->le_scan_disable, timeout); - -unlock: - hci_dev_unlock(hdev); -} - -bool hci_req_stop_discovery(struct hci_request *req) -{ - struct hci_dev *hdev = req->hdev; - struct discovery_state *d = &hdev->discovery; - struct hci_cp_remote_name_req_cancel cp; - struct inquiry_entry *e; - bool ret = false; - - bt_dev_dbg(hdev, "state %u", hdev->discovery.state); - - if (d->state == DISCOVERY_FINDING || d->state == DISCOVERY_STOPPING) { - if (test_bit(HCI_INQUIRY, &hdev->flags)) - hci_req_add(req, HCI_OP_INQUIRY_CANCEL, 0, NULL); - - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - cancel_delayed_work(&hdev->le_scan_disable); - cancel_delayed_work(&hdev->le_scan_restart); - hci_req_add_le_scan_disable(req, false); - } - - ret = true; - } else { - /* Passive scanning */ - if (hci_dev_test_flag(hdev, HCI_LE_SCAN)) { - hci_req_add_le_scan_disable(req, false); - ret = true; - } - } - - /* No further actions needed for LE-only discovery */ - if (d->type == DISCOV_TYPE_LE) - return ret; - - if (d->state == DISCOVERY_RESOLVING || d->state == DISCOVERY_STOPPING) { - e = hci_inquiry_cache_lookup_resolve(hdev, BDADDR_ANY, - NAME_PENDING); - if (!e) - return ret; - - bacpy(&cp.bdaddr, &e->data.bdaddr); - hci_req_add(req, HCI_OP_REMOTE_NAME_REQ_CANCEL, sizeof(cp), - &cp); - ret = true; - } - - return ret; -} - -static void config_data_path_complete(struct hci_dev *hdev, u8 status, - u16 opcode) -{ - bt_dev_dbg(hdev, "status %u", status); -} - -int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec) -{ - struct hci_request req; - int err; - __u8 vnd_len, *vnd_data = NULL; - struct hci_op_configure_data_path *cmd = NULL; - - hci_req_init(&req, hdev); - - err = hdev->get_codec_config_data(hdev, ESCO_LINK, codec, &vnd_len, - &vnd_data); - if (err < 0) - goto error; - - cmd = kzalloc(sizeof(*cmd) + vnd_len, GFP_KERNEL); - if (!cmd) { - err = -ENOMEM; - goto error; - } - - err = hdev->get_data_path_id(hdev, &cmd->data_path_id); - if (err < 0) - goto error; - - cmd->vnd_len = vnd_len; - memcpy(cmd->vnd_data, vnd_data, vnd_len); - - cmd->direction = 0x00; - hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd); - - cmd->direction = 0x01; - hci_req_add(&req, HCI_CONFIGURE_DATA_PATH, sizeof(*cmd) + vnd_len, cmd); - - err = hci_req_run(&req, config_data_path_complete); -error: - - kfree(cmd); - kfree(vnd_data); - return err; -} - void hci_request_setup(struct hci_dev *hdev) { - INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work); - INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work); - INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); INIT_DELAYED_WORK(&hdev->interleave_scan, interleave_scan_work); } @@ -2262,13 +918,5 @@ void hci_request_cancel_all(struct hci_dev *hdev) { __hci_cmd_sync_cancel(hdev, ENODEV); - cancel_delayed_work_sync(&hdev->le_scan_disable); - cancel_delayed_work_sync(&hdev->le_scan_restart); - - if (hdev->adv_instance_timeout) { - cancel_delayed_work_sync(&hdev->adv_instance_expire); - hdev->adv_instance_timeout = 0; - } - cancel_interleave_scan(hdev); } diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h index 39d001fa3acf..b9c5a9823837 100644 --- a/net/bluetooth/hci_request.h +++ b/net/bluetooth/hci_request.h @@ -68,63 +68,10 @@ int __hci_req_sync(struct hci_dev *hdev, int (*func)(struct hci_request *req, struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen, const void *param); -void __hci_req_write_fast_connectable(struct hci_request *req, bool enable); -void __hci_req_update_name(struct hci_request *req); -void __hci_req_update_eir(struct hci_request *req); - void hci_req_add_le_scan_disable(struct hci_request *req, bool rpa_le_conn); void hci_req_add_le_passive_scan(struct hci_request *req); void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next); -void hci_req_disable_address_resolution(struct hci_dev *hdev); -void hci_req_reenable_advertising(struct hci_dev *hdev); -void __hci_req_enable_advertising(struct hci_request *req); -void __hci_req_disable_advertising(struct hci_request *req); -void __hci_req_update_adv_data(struct hci_request *req, u8 instance); -int hci_req_update_adv_data(struct hci_dev *hdev, u8 instance); -int hci_req_start_per_adv(struct hci_dev *hdev, u8 instance, u32 flags, - u16 min_interval, u16 max_interval, - u16 sync_interval); -void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance); - -int __hci_req_schedule_adv_instance(struct hci_request *req, u8 instance, - bool force); -void hci_req_clear_adv_instance(struct hci_dev *hdev, struct sock *sk, - struct hci_request *req, u8 instance, - bool force); - -int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance); -int __hci_req_setup_per_adv_instance(struct hci_request *req, u8 instance, - u16 min_interval, u16 max_interval); -int __hci_req_start_ext_adv(struct hci_request *req, u8 instance); -int __hci_req_start_per_adv(struct hci_request *req, u8 instance, u32 flags, - u16 min_interval, u16 max_interval, - u16 sync_interval); -int __hci_req_enable_ext_advertising(struct hci_request *req, u8 instance); -int __hci_req_enable_per_advertising(struct hci_request *req, u8 instance); -int __hci_req_disable_ext_adv_instance(struct hci_request *req, u8 instance); -int __hci_req_remove_ext_adv_instance(struct hci_request *req, u8 instance); -void __hci_req_clear_ext_adv_sets(struct hci_request *req); -int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, - bool use_rpa, struct adv_info *adv_instance, - u8 *own_addr_type, bdaddr_t *rand_addr); - -void __hci_req_update_class(struct hci_request *req); - -/* Returns true if HCI commands were queued */ -bool hci_req_stop_discovery(struct hci_request *req); - -int hci_req_configure_datapath(struct hci_dev *hdev, struct bt_codec *codec); - -void __hci_req_update_scan(struct hci_request *req); - -int hci_update_random_address(struct hci_request *req, bool require_privacy, - bool use_rpa, u8 *own_addr_type); - -int hci_abort_conn(struct hci_conn *conn, u8 reason); -void __hci_abort_conn(struct hci_request *req, struct hci_conn *conn, - u8 reason); - void hci_request_setup(struct hci_dev *hdev); void hci_request_cancel_all(struct hci_dev *hdev); diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 0d015d4a8e41..06581223238c 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -887,7 +887,6 @@ static int hci_sock_release(struct socket *sock) */ hci_dev_do_close(hdev); hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); - hci_register_suspend_notifier(hdev); mgmt_index_added(hdev); } @@ -1216,7 +1215,6 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, } mgmt_index_removed(hdev); - hci_unregister_suspend_notifier(hdev); err = hci_dev_open(hdev->id); if (err) { @@ -1231,7 +1229,6 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, err = 0; } else { hci_dev_clear_flag(hdev, HCI_USER_CHANNEL); - hci_register_suspend_notifier(hdev); mgmt_index_added(hdev); hci_dev_put(hdev); goto done; @@ -2065,6 +2062,7 @@ static int hci_sock_getsockopt(struct socket *sock, int level, int optname, static void hci_sock_destruct(struct sock *sk) { + mgmt_cleanup(sk); skb_queue_purge(&sk->sk_receive_queue); skb_queue_purge(&sk->sk_write_queue); } diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c index 187786454d98..76c3107c9f91 100644 --- a/net/bluetooth/hci_sync.c +++ b/net/bluetooth/hci_sync.c @@ -246,7 +246,7 @@ int __hci_cmd_sync_status_sk(struct hci_dev *hdev, u16 opcode, u32 plen, skb = __hci_cmd_sync_sk(hdev, opcode, plen, param, event, timeout, sk); if (IS_ERR(skb)) { bt_dev_err(hdev, "Opcode 0x%4x failed: %ld", opcode, - PTR_ERR(skb)); + PTR_ERR(skb)); return PTR_ERR(skb); } @@ -321,6 +321,307 @@ static void hci_cmd_sync_cancel_work(struct work_struct *work) wake_up_interruptible(&hdev->req_wait_q); } +static int hci_scan_disable_sync(struct hci_dev *hdev); +static int scan_disable_sync(struct hci_dev *hdev, void *data) +{ + return hci_scan_disable_sync(hdev); +} + +static int hci_inquiry_sync(struct hci_dev *hdev, u8 length); +static int interleaved_inquiry_sync(struct hci_dev *hdev, void *data) +{ + return hci_inquiry_sync(hdev, DISCOV_INTERLEAVED_INQUIRY_LEN); +} + +static void le_scan_disable(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_disable.work); + int status; + + bt_dev_dbg(hdev, ""); + hci_dev_lock(hdev); + + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + goto _return; + + cancel_delayed_work(&hdev->le_scan_restart); + + status = hci_cmd_sync_queue(hdev, scan_disable_sync, NULL, NULL); + if (status) { + bt_dev_err(hdev, "failed to disable LE scan: %d", status); + goto _return; + } + + hdev->discovery.scan_start = 0; + + /* If we were running LE only scan, change discovery state. If + * we were running both LE and BR/EDR inquiry simultaneously, + * and BR/EDR inquiry is already finished, stop discovery, + * otherwise BR/EDR inquiry will stop discovery when finished. + * If we will resolve remote device name, do not change + * discovery state. + */ + + if (hdev->discovery.type == DISCOV_TYPE_LE) + goto discov_stopped; + + if (hdev->discovery.type != DISCOV_TYPE_INTERLEAVED) + goto _return; + + if (test_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks)) { + if (!test_bit(HCI_INQUIRY, &hdev->flags) && + hdev->discovery.state != DISCOVERY_RESOLVING) + goto discov_stopped; + + goto _return; + } + + status = hci_cmd_sync_queue(hdev, interleaved_inquiry_sync, NULL, NULL); + if (status) { + bt_dev_err(hdev, "inquiry failed: status %d", status); + goto discov_stopped; + } + + goto _return; + +discov_stopped: + hci_discovery_set_state(hdev, DISCOVERY_STOPPED); + +_return: + hci_dev_unlock(hdev); +} + +static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val, + u8 filter_dup); +static int hci_le_scan_restart_sync(struct hci_dev *hdev) +{ + /* If controller is not scanning we are done. */ + if (!hci_dev_test_flag(hdev, HCI_LE_SCAN)) + return 0; + + if (hdev->scanning_paused) { + bt_dev_dbg(hdev, "Scanning is paused for suspend"); + return 0; + } + + hci_le_set_scan_enable_sync(hdev, LE_SCAN_DISABLE, 0x00); + return hci_le_set_scan_enable_sync(hdev, LE_SCAN_ENABLE, + LE_SCAN_FILTER_DUP_ENABLE); +} + +static int le_scan_restart_sync(struct hci_dev *hdev, void *data) +{ + return hci_le_scan_restart_sync(hdev); +} + +static void le_scan_restart(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + le_scan_restart.work); + unsigned long timeout, duration, scan_start, now; + int status; + + bt_dev_dbg(hdev, ""); + + hci_dev_lock(hdev); + + status = hci_cmd_sync_queue(hdev, le_scan_restart_sync, NULL, NULL); + if (status) { + bt_dev_err(hdev, "failed to restart LE scan: status %d", + status); + goto unlock; + } + + if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) || + !hdev->discovery.scan_start) + goto unlock; + + /* When the scan was started, hdev->le_scan_disable has been queued + * after duration from scan_start. During scan restart this job + * has been canceled, and we need to queue it again after proper + * timeout, to make sure that scan does not run indefinitely. + */ + duration = hdev->discovery.scan_duration; + scan_start = hdev->discovery.scan_start; + now = jiffies; + if (now - scan_start <= duration) { + int elapsed; + + if (now >= scan_start) + elapsed = now - scan_start; + else + elapsed = ULONG_MAX - scan_start + now; + + timeout = duration - elapsed; + } else { + timeout = 0; + } + + queue_delayed_work(hdev->req_workqueue, + &hdev->le_scan_disable, timeout); + +unlock: + hci_dev_unlock(hdev); +} + +static int reenable_adv_sync(struct hci_dev *hdev, void *data) +{ + bt_dev_dbg(hdev, ""); + + if (!hci_dev_test_flag(hdev, HCI_ADVERTISING) && + list_empty(&hdev->adv_instances)) + return 0; + + if (hdev->cur_adv_instance) { + return hci_schedule_adv_instance_sync(hdev, + hdev->cur_adv_instance, + true); + } else { + if (ext_adv_capable(hdev)) { + hci_start_ext_adv_sync(hdev, 0x00); + } else { + hci_update_adv_data_sync(hdev, 0x00); + hci_update_scan_rsp_data_sync(hdev, 0x00); + hci_enable_advertising_sync(hdev); + } + } + + return 0; +} + +static void reenable_adv(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + reenable_adv_work); + int status; + + bt_dev_dbg(hdev, ""); + + hci_dev_lock(hdev); + + status = hci_cmd_sync_queue(hdev, reenable_adv_sync, NULL, NULL); + if (status) + bt_dev_err(hdev, "failed to reenable ADV: %d", status); + + hci_dev_unlock(hdev); +} + +static void cancel_adv_timeout(struct hci_dev *hdev) +{ + if (hdev->adv_instance_timeout) { + hdev->adv_instance_timeout = 0; + cancel_delayed_work(&hdev->adv_instance_expire); + } +} + +/* For a single instance: + * - force == true: The instance will be removed even when its remaining + * lifetime is not zero. + * - force == false: the instance will be deactivated but kept stored unless + * the remaining lifetime is zero. + * + * For instance == 0x00: + * - force == true: All instances will be removed regardless of their timeout + * setting. + * - force == false: Only instances that have a timeout will be removed. + */ +int hci_clear_adv_instance_sync(struct hci_dev *hdev, struct sock *sk, + u8 instance, bool force) +{ + struct adv_info *adv_instance, *n, *next_instance = NULL; + int err; + u8 rem_inst; + + /* Cancel any timeout concerning the removed instance(s). */ + if (!instance || hdev->cur_adv_instance == instance) + cancel_adv_timeout(hdev); + + /* Get the next instance to advertise BEFORE we remove + * the current one. This can be the same instance again + * if there is only one instance. + */ + if (instance && hdev->cur_adv_instance == instance) + next_instance = hci_get_next_instance(hdev, instance); + + if (instance == 0x00) { + list_for_each_entry_safe(adv_instance, n, &hdev->adv_instances, + list) { + if (!(force || adv_instance->timeout)) + continue; + + rem_inst = adv_instance->instance; + err = hci_remove_adv_instance(hdev, rem_inst); + if (!err) + mgmt_advertising_removed(sk, hdev, rem_inst); + } + } else { + adv_instance = hci_find_adv_instance(hdev, instance); + + if (force || (adv_instance && adv_instance->timeout && + !adv_instance->remaining_time)) { + /* Don't advertise a removed instance. */ + if (next_instance && + next_instance->instance == instance) + next_instance = NULL; + + err = hci_remove_adv_instance(hdev, instance); + if (!err) + mgmt_advertising_removed(sk, hdev, instance); + } + } + + if (!hdev_is_powered(hdev) || hci_dev_test_flag(hdev, HCI_ADVERTISING)) + return 0; + + if (next_instance && !ext_adv_capable(hdev)) + return hci_schedule_adv_instance_sync(hdev, + next_instance->instance, + false); + + return 0; +} + +static int adv_timeout_expire_sync(struct hci_dev *hdev, void *data) +{ + u8 instance = *(u8 *)data; + + kfree(data); + + hci_clear_adv_instance_sync(hdev, NULL, instance, false); + + if (list_empty(&hdev->adv_instances)) + return hci_disable_advertising_sync(hdev); + + return 0; +} + +static void adv_timeout_expire(struct work_struct *work) +{ + u8 *inst_ptr; + struct hci_dev *hdev = container_of(work, struct hci_dev, + adv_instance_expire.work); + + bt_dev_dbg(hdev, ""); + + hci_dev_lock(hdev); + + hdev->adv_instance_timeout = 0; + + if (hdev->cur_adv_instance == 0x00) + goto unlock; + + inst_ptr = kmalloc(1, GFP_KERNEL); + if (!inst_ptr) + goto unlock; + + *inst_ptr = hdev->cur_adv_instance; + hci_cmd_sync_queue(hdev, adv_timeout_expire_sync, inst_ptr, NULL); + +unlock: + hci_dev_unlock(hdev); +} + void hci_cmd_sync_init(struct hci_dev *hdev) { INIT_WORK(&hdev->cmd_sync_work, hci_cmd_sync_work); @@ -328,6 +629,10 @@ void hci_cmd_sync_init(struct hci_dev *hdev) mutex_init(&hdev->cmd_sync_work_lock); INIT_WORK(&hdev->cmd_sync_cancel_work, hci_cmd_sync_cancel_work); + INIT_WORK(&hdev->reenable_adv_work, reenable_adv); + INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable); + INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart); + INIT_DELAYED_WORK(&hdev->adv_instance_expire, adv_timeout_expire); } void hci_cmd_sync_clear(struct hci_dev *hdev) @@ -335,6 +640,7 @@ void hci_cmd_sync_clear(struct hci_dev *hdev) struct hci_cmd_sync_work_entry *entry, *tmp; cancel_work_sync(&hdev->cmd_sync_work); + cancel_work_sync(&hdev->reenable_adv_work); list_for_each_entry_safe(entry, tmp, &hdev->cmd_sync_work_list, list) { if (entry->destroy) @@ -1333,14 +1639,6 @@ int hci_le_terminate_big_sync(struct hci_dev *hdev, u8 handle, u8 reason) sizeof(cp), &cp, HCI_CMD_TIMEOUT); } -static void cancel_adv_timeout(struct hci_dev *hdev) -{ - if (hdev->adv_instance_timeout) { - hdev->adv_instance_timeout = 0; - cancel_delayed_work(&hdev->adv_instance_expire); - } -} - static int hci_set_ext_adv_data_sync(struct hci_dev *hdev, u8 instance) { struct { @@ -1492,10 +1790,13 @@ static int hci_clear_adv_sets_sync(struct hci_dev *hdev, struct sock *sk) static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) { struct adv_info *adv, *n; + int err = 0; if (ext_adv_capable(hdev)) /* Remove all existing sets */ - return hci_clear_adv_sets_sync(hdev, sk); + err = hci_clear_adv_sets_sync(hdev, sk); + if (ext_adv_capable(hdev)) + return err; /* This is safe as long as there is no command send while the lock is * held. @@ -1523,11 +1824,13 @@ static int hci_clear_adv_sync(struct hci_dev *hdev, struct sock *sk, bool force) static int hci_remove_adv_sync(struct hci_dev *hdev, u8 instance, struct sock *sk) { - int err; + int err = 0; /* If we use extended advertising, instance has to be removed first. */ if (ext_adv_capable(hdev)) - return hci_remove_ext_adv_instance_sync(hdev, instance, sk); + err = hci_remove_ext_adv_instance_sync(hdev, instance, sk); + if (ext_adv_capable(hdev)) + return err; /* This is safe as long as there is no command send while the lock is * held. @@ -1626,13 +1929,16 @@ int hci_read_tx_power_sync(struct hci_dev *hdev, __le16 handle, u8 type) int hci_disable_advertising_sync(struct hci_dev *hdev) { u8 enable = 0x00; + int err = 0; /* If controller is not advertising we are done. */ if (!hci_dev_test_flag(hdev, HCI_LE_ADV)) return 0; if (ext_adv_capable(hdev)) - return hci_disable_ext_adv_instance_sync(hdev, 0x00); + err = hci_disable_ext_adv_instance_sync(hdev, 0x00); + if (ext_adv_capable(hdev)) + return err; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_ADV_ENABLE, sizeof(enable), &enable, HCI_CMD_TIMEOUT); @@ -1645,7 +1951,11 @@ static int hci_le_set_ext_scan_enable_sync(struct hci_dev *hdev, u8 val, memset(&cp, 0, sizeof(cp)); cp.enable = val; - cp.filter_dup = filter_dup; + + if (hci_dev_test_flag(hdev, HCI_MESH)) + cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE; + else + cp.filter_dup = filter_dup; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_EXT_SCAN_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); @@ -1661,7 +1971,11 @@ static int hci_le_set_scan_enable_sync(struct hci_dev *hdev, u8 val, memset(&cp, 0, sizeof(cp)); cp.enable = val; - cp.filter_dup = filter_dup; + + if (val && hci_dev_test_flag(hdev, HCI_MESH)) + cp.filter_dup = LE_SCAN_FILTER_DUP_DISABLE; + else + cp.filter_dup = filter_dup; return __hci_cmd_sync_status(hdev, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp, HCI_CMD_TIMEOUT); @@ -2300,6 +2614,7 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) u8 own_addr_type; u8 filter_policy; u16 window, interval; + u8 filter_dups = LE_SCAN_FILTER_DUP_ENABLE; int err; if (hdev->scanning_paused) { @@ -2362,11 +2677,16 @@ static int hci_passive_scan_sync(struct hci_dev *hdev) interval = hdev->le_scan_interval; } + /* Disable all filtering for Mesh */ + if (hci_dev_test_flag(hdev, HCI_MESH)) { + filter_policy = 0; + filter_dups = LE_SCAN_FILTER_DUP_DISABLE; + } + bt_dev_dbg(hdev, "LE passive scan with acceptlist = %d", filter_policy); return hci_start_scan_sync(hdev, LE_SCAN_PASSIVE, interval, window, - own_addr_type, filter_policy, - LE_SCAN_FILTER_DUP_ENABLE); + own_addr_type, filter_policy, filter_dups); } /* This function controls the passive scanning based on hdev->pend_le_conns @@ -2416,7 +2736,8 @@ int hci_update_passive_scan_sync(struct hci_dev *hdev) bt_dev_dbg(hdev, "ADV monitoring is %s", hci_is_adv_monitoring(hdev) ? "on" : "off"); - if (list_empty(&hdev->pend_le_conns) && + if (!hci_dev_test_flag(hdev, HCI_MESH) && + list_empty(&hdev->pend_le_conns) && list_empty(&hdev->pend_le_reports) && !hci_is_adv_monitoring(hdev) && !hci_dev_test_flag(hdev, HCI_PA_SYNC)) { @@ -3018,12 +3339,6 @@ static const struct hci_init_stage amp_init2[] = { /* Read Buffer Size (ACL mtu, max pkt, etc.) */ static int hci_read_buffer_size_sync(struct hci_dev *hdev) { - /* Use Read LE Buffer Size V2 if supported */ - if (hdev->commands[41] & 0x20) - return __hci_cmd_sync_status(hdev, - HCI_OP_LE_READ_BUFFER_SIZE_V2, - 0, NULL, HCI_CMD_TIMEOUT); - return __hci_cmd_sync_status(hdev, HCI_OP_READ_BUFFER_SIZE, 0, NULL, HCI_CMD_TIMEOUT); } @@ -3237,6 +3552,12 @@ static const struct hci_init_stage hci_init2[] = { /* Read LE Buffer Size */ static int hci_le_read_buffer_size_sync(struct hci_dev *hdev) { + /* Use Read LE Buffer Size V2 if supported */ + if (hdev->commands[41] & 0x20) + return __hci_cmd_sync_status(hdev, + HCI_OP_LE_READ_BUFFER_SIZE_V2, + 0, NULL, HCI_CMD_TIMEOUT); + return __hci_cmd_sync_status(hdev, HCI_OP_LE_READ_BUFFER_SIZE, 0, NULL, HCI_CMD_TIMEOUT); } @@ -4355,6 +4676,7 @@ int hci_dev_open_sync(struct hci_dev *hdev) hci_dev_test_flag(hdev, HCI_MGMT) && hdev->dev_type == HCI_PRIMARY) { ret = hci_powered_update_sync(hdev); + mgmt_power_on(hdev, ret); } } else { /* Init failed, cleanup */ @@ -4406,6 +4728,31 @@ static void hci_pend_le_actions_clear(struct hci_dev *hdev) BT_DBG("All LE pending actions cleared"); } +static int hci_dev_shutdown(struct hci_dev *hdev) +{ + int err = 0; + /* Similar to how we first do setup and then set the exclusive access + * bit for userspace, we must first unset userchannel and then clean up. + * Otherwise, the kernel can't properly use the hci channel to clean up + * the controller (some shutdown routines require sending additional + * commands to the controller for example). + */ + bool was_userchannel = + hci_dev_test_and_clear_flag(hdev, HCI_USER_CHANNEL); + + if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && + test_bit(HCI_UP, &hdev->flags)) { + /* Execute vendor specific shutdown routine */ + if (hdev->shutdown) + err = hdev->shutdown(hdev); + } + + if (was_userchannel) + hci_dev_set_flag(hdev, HCI_USER_CHANNEL); + + return err; +} + int hci_dev_close_sync(struct hci_dev *hdev) { bool auto_off; @@ -4415,17 +4762,18 @@ int hci_dev_close_sync(struct hci_dev *hdev) cancel_delayed_work(&hdev->power_off); cancel_delayed_work(&hdev->ncmd_timer); + cancel_delayed_work(&hdev->le_scan_disable); + cancel_delayed_work(&hdev->le_scan_restart); hci_request_cancel_all(hdev); - if (!hci_dev_test_flag(hdev, HCI_UNREGISTER) && - !hci_dev_test_flag(hdev, HCI_USER_CHANNEL) && - test_bit(HCI_UP, &hdev->flags)) { - /* Execute vendor specific shutdown routine */ - if (hdev->shutdown) - err = hdev->shutdown(hdev); + if (hdev->adv_instance_timeout) { + cancel_delayed_work_sync(&hdev->adv_instance_expire); + hdev->adv_instance_timeout = 0; } + err = hci_dev_shutdown(hdev); + if (!test_and_clear_bit(HCI_UP, &hdev->flags)) { cancel_delayed_work_sync(&hdev->cmd_timer); return err; @@ -5023,7 +5371,7 @@ static int hci_active_scan_sync(struct hci_dev *hdev, uint16_t interval) /* Pause advertising since active scanning disables address resolution * which advertising depend on in order to generate its RPAs. */ - if (use_ll_privacy(hdev)) { + if (use_ll_privacy(hdev) && hci_dev_test_flag(hdev, HCI_PRIVACY)) { err = hci_pause_advertising_sync(hdev); if (err) { bt_dev_err(hdev, "pause advertising failed: %d", err); @@ -5737,3 +6085,96 @@ int hci_le_pa_terminate_sync(struct hci_dev *hdev, u16 handle) return __hci_cmd_sync_status(hdev, HCI_OP_LE_PA_TERM_SYNC, sizeof(cp), &cp, HCI_CMD_TIMEOUT); } + +int hci_get_random_address(struct hci_dev *hdev, bool require_privacy, + bool use_rpa, struct adv_info *adv_instance, + u8 *own_addr_type, bdaddr_t *rand_addr) +{ + int err; + + bacpy(rand_addr, BDADDR_ANY); + + /* If privacy is enabled use a resolvable private address. If + * current RPA has expired then generate a new one. + */ + if (use_rpa) { + /* If Controller supports LL Privacy use own address type is + * 0x03 + */ + if (use_ll_privacy(hdev)) + *own_addr_type = ADDR_LE_DEV_RANDOM_RESOLVED; + else + *own_addr_type = ADDR_LE_DEV_RANDOM; + + if (adv_instance) { + if (adv_rpa_valid(adv_instance)) + return 0; + } else { + if (rpa_valid(hdev)) + return 0; + } + + err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa); + if (err < 0) { + bt_dev_err(hdev, "failed to generate new RPA"); + return err; + } + + bacpy(rand_addr, &hdev->rpa); + + return 0; + } + + /* In case of required privacy without resolvable private address, + * use an non-resolvable private address. This is useful for + * non-connectable advertising. + */ + if (require_privacy) { + bdaddr_t nrpa; + + while (true) { + /* The non-resolvable private address is generated + * from random six bytes with the two most significant + * bits cleared. + */ + get_random_bytes(&nrpa, 6); + nrpa.b[5] &= 0x3f; + + /* The non-resolvable private address shall not be + * equal to the public address. + */ + if (bacmp(&hdev->bdaddr, &nrpa)) + break; + } + + *own_addr_type = ADDR_LE_DEV_RANDOM; + bacpy(rand_addr, &nrpa); + + return 0; + } + + /* No privacy so use a public address. */ + *own_addr_type = ADDR_LE_DEV_PUBLIC; + + return 0; +} + +static int _update_adv_data_sync(struct hci_dev *hdev, void *data) +{ + u8 instance = *(u8 *)data; + + kfree(data); + + return hci_update_adv_data_sync(hdev, instance); +} + +int hci_update_adv_data(struct hci_dev *hdev, u8 instance) +{ + u8 *inst_ptr = kmalloc(1, GFP_KERNEL); + + if (!inst_ptr) + return -ENOMEM; + + *inst_ptr = instance; + return hci_cmd_sync_queue(hdev, _update_adv_data_sync, inst_ptr, NULL); +} diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 4e3e0451b08c..08542dfc2dc5 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -48,6 +48,9 @@ void hci_conn_add_sysfs(struct hci_conn *conn) BT_DBG("conn %p", conn); + if (device_is_registered(&conn->dev)) + return; + dev_set_name(&conn->dev, "%s:%d", hdev->name, conn->handle); if (device_add(&conn->dev) < 0) { diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c index 2c9de67daadc..1f34b82ca0ec 100644 --- a/net/bluetooth/l2cap_core.c +++ b/net/bluetooth/l2cap_core.c @@ -61,6 +61,9 @@ static void l2cap_send_disconn_req(struct l2cap_chan *chan, int err); static void l2cap_tx(struct l2cap_chan *chan, struct l2cap_ctrl *control, struct sk_buff_head *skbs, u8 event); +static void l2cap_retrans_timeout(struct work_struct *work); +static void l2cap_monitor_timeout(struct work_struct *work); +static void l2cap_ack_timeout(struct work_struct *work); static inline u8 bdaddr_type(u8 link_type, u8 bdaddr_type) { @@ -476,6 +479,9 @@ struct l2cap_chan *l2cap_chan_create(void) write_unlock(&chan_list_lock); INIT_DELAYED_WORK(&chan->chan_timer, l2cap_chan_timeout); + INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout); + INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout); + INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout); chan->state = BT_OPEN; @@ -3320,10 +3326,6 @@ int l2cap_ertm_init(struct l2cap_chan *chan) chan->rx_state = L2CAP_RX_STATE_RECV; chan->tx_state = L2CAP_TX_STATE_XMIT; - INIT_DELAYED_WORK(&chan->retrans_timer, l2cap_retrans_timeout); - INIT_DELAYED_WORK(&chan->monitor_timer, l2cap_monitor_timeout); - INIT_DELAYED_WORK(&chan->ack_timer, l2cap_ack_timeout); - skb_queue_head_init(&chan->srej_q); err = l2cap_seq_list_init(&chan->srej_list, chan->tx_win); @@ -4307,6 +4309,12 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, } } + chan = l2cap_chan_hold_unless_zero(chan); + if (!chan) { + err = -EBADSLT; + goto unlock; + } + err = 0; l2cap_chan_lock(chan); @@ -4336,6 +4344,7 @@ static int l2cap_connect_create_rsp(struct l2cap_conn *conn, } l2cap_chan_unlock(chan); + l2cap_chan_put(chan); unlock: mutex_unlock(&conn->chan_lock); diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c index 72e6595a71cc..a92e7e485feb 100644 --- a/net/bluetooth/mgmt.c +++ b/net/bluetooth/mgmt.c @@ -129,6 +129,10 @@ static const u16 mgmt_commands[] = { MGMT_OP_ADD_EXT_ADV_PARAMS, MGMT_OP_ADD_EXT_ADV_DATA, MGMT_OP_ADD_ADV_PATTERNS_MONITOR_RSSI, + MGMT_OP_SET_MESH_RECEIVER, + MGMT_OP_MESH_READ_FEATURES, + MGMT_OP_MESH_SEND, + MGMT_OP_MESH_SEND_CANCEL, }; static const u16 mgmt_events[] = { @@ -1048,9 +1052,66 @@ static void discov_off(struct work_struct *work) hci_dev_unlock(hdev); } +static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev); + +static void mesh_send_complete(struct hci_dev *hdev, + struct mgmt_mesh_tx *mesh_tx, bool silent) +{ + u8 handle = mesh_tx->handle; + + if (!silent) + mgmt_event(MGMT_EV_MESH_PACKET_CMPLT, hdev, &handle, + sizeof(handle), NULL); + + mgmt_mesh_remove(mesh_tx); +} + +static int mesh_send_done_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_mesh_tx *mesh_tx; + + hci_dev_clear_flag(hdev, HCI_MESH_SENDING); + hci_disable_advertising_sync(hdev); + mesh_tx = mgmt_mesh_next(hdev, NULL); + + if (mesh_tx) + mesh_send_complete(hdev, mesh_tx, false); + + return 0; +} + +static int mesh_send_sync(struct hci_dev *hdev, void *data); +static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err); +static void mesh_next(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_mesh_tx *mesh_tx = mgmt_mesh_next(hdev, NULL); + + if (!mesh_tx) + return; + + err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx, + mesh_send_start_complete); + + if (err < 0) + mesh_send_complete(hdev, mesh_tx, false); + else + hci_dev_set_flag(hdev, HCI_MESH_SENDING); +} + +static void mesh_send_done(struct work_struct *work) +{ + struct hci_dev *hdev = container_of(work, struct hci_dev, + mesh_send_done.work); + + if (!hci_dev_test_flag(hdev, HCI_MESH_SENDING)) + return; + + hci_cmd_sync_queue(hdev, mesh_send_done_sync, NULL, mesh_next); +} + static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) { - if (hci_dev_test_and_set_flag(hdev, HCI_MGMT)) + if (hci_dev_test_flag(hdev, HCI_MGMT)) return; BT_INFO("MGMT ver %d.%d", MGMT_VERSION, MGMT_REVISION); @@ -1058,6 +1119,7 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) INIT_DELAYED_WORK(&hdev->discov_off, discov_off); INIT_DELAYED_WORK(&hdev->service_cache, service_cache_off); INIT_DELAYED_WORK(&hdev->rpa_expired, rpa_expired); + INIT_DELAYED_WORK(&hdev->mesh_send_done, mesh_send_done); /* Non-mgmt controlled devices get this bit set * implicitly so that pairing works for them, however @@ -1065,6 +1127,8 @@ static void mgmt_init_hdev(struct sock *sk, struct hci_dev *hdev) * it */ hci_dev_clear_flag(hdev, HCI_BONDABLE); + + hci_dev_set_flag(hdev, HCI_MGMT); } static int read_controller_info(struct sock *sk, struct hci_dev *hdev, @@ -2058,6 +2122,8 @@ static int set_le_sync(struct hci_dev *hdev, void *data) int err; if (!val) { + hci_clear_adv_instance_sync(hdev, NULL, 0x00, true); + if (hci_dev_test_flag(hdev, HCI_LE_ADV)) hci_disable_advertising_sync(hdev); @@ -2092,6 +2158,317 @@ static int set_le_sync(struct hci_dev *hdev, void *data) return err; } +static void set_mesh_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_pending_cmd *cmd = data; + u8 status = mgmt_status(err); + struct sock *sk = cmd->sk; + + if (status) { + mgmt_pending_foreach(MGMT_OP_SET_MESH_RECEIVER, hdev, + cmd_status_rsp, &status); + return; + } + + mgmt_pending_remove(cmd); + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, 0, NULL, 0); +} + +static int set_mesh_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_set_mesh *cp = cmd->param; + size_t len = cmd->param_len; + + memset(hdev->mesh_ad_types, 0, sizeof(hdev->mesh_ad_types)); + + if (cp->enable) + hci_dev_set_flag(hdev, HCI_MESH); + else + hci_dev_clear_flag(hdev, HCI_MESH); + + len -= sizeof(*cp); + + /* If filters don't fit, forward all adv pkts */ + if (len <= sizeof(hdev->mesh_ad_types)) + memcpy(hdev->mesh_ad_types, cp->ad_types, len); + + hci_update_passive_scan_sync(hdev); + return 0; +} + +static int set_mesh(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) +{ + struct mgmt_cp_set_mesh *cp = data; + struct mgmt_pending_cmd *cmd; + int err = 0; + + bt_dev_dbg(hdev, "sock %p", sk); + + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_NOT_SUPPORTED); + + if (cp->enable != 0x00 && cp->enable != 0x01) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_INVALID_PARAMS); + + hci_dev_lock(hdev); + + cmd = mgmt_pending_add(sk, MGMT_OP_SET_MESH_RECEIVER, hdev, data, len); + if (!cmd) + err = -ENOMEM; + else + err = hci_cmd_sync_queue(hdev, set_mesh_sync, cmd, + set_mesh_complete); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_MESH_RECEIVER, + MGMT_STATUS_FAILED); + + if (cmd) + mgmt_pending_remove(cmd); + } + + hci_dev_unlock(hdev); + return err; +} + +static void mesh_send_start_complete(struct hci_dev *hdev, void *data, int err) +{ + struct mgmt_mesh_tx *mesh_tx = data; + struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param; + unsigned long mesh_send_interval; + u8 mgmt_err = mgmt_status(err); + + /* Report any errors here, but don't report completion */ + + if (mgmt_err) { + hci_dev_clear_flag(hdev, HCI_MESH_SENDING); + /* Send Complete Error Code for handle */ + mesh_send_complete(hdev, mesh_tx, false); + return; + } + + mesh_send_interval = msecs_to_jiffies((send->cnt) * 25); + queue_delayed_work(hdev->req_workqueue, &hdev->mesh_send_done, + mesh_send_interval); +} + +static int mesh_send_sync(struct hci_dev *hdev, void *data) +{ + struct mgmt_mesh_tx *mesh_tx = data; + struct mgmt_cp_mesh_send *send = (void *)mesh_tx->param; + struct adv_info *adv, *next_instance; + u8 instance = hdev->le_num_of_adv_sets + 1; + u16 timeout, duration; + int err = 0; + + if (hdev->le_num_of_adv_sets <= hdev->adv_instance_cnt) + return MGMT_STATUS_BUSY; + + timeout = 1000; + duration = send->cnt * INTERVAL_TO_MS(hdev->le_adv_max_interval); + adv = hci_add_adv_instance(hdev, instance, 0, + send->adv_data_len, send->adv_data, + 0, NULL, + timeout, duration, + HCI_ADV_TX_POWER_NO_PREFERENCE, + hdev->le_adv_min_interval, + hdev->le_adv_max_interval, + mesh_tx->handle); + + if (!IS_ERR(adv)) + mesh_tx->instance = instance; + else + err = PTR_ERR(adv); + + if (hdev->cur_adv_instance == instance) { + /* If the currently advertised instance is being changed then + * cancel the current advertising and schedule the next + * instance. If there is only one instance then the overridden + * advertising data will be visible right away. + */ + cancel_adv_timeout(hdev); + + next_instance = hci_get_next_instance(hdev, instance); + if (next_instance) + instance = next_instance->instance; + else + instance = 0; + } else if (hdev->adv_instance_timeout) { + /* Immediately advertise the new instance if no other, or + * let it go naturally from queue if ADV is already happening + */ + instance = 0; + } + + if (instance) + return hci_schedule_adv_instance_sync(hdev, instance, true); + + return err; +} + +static void send_count(struct mgmt_mesh_tx *mesh_tx, void *data) +{ + struct mgmt_rp_mesh_read_features *rp = data; + + if (rp->used_handles >= rp->max_handles) + return; + + rp->handles[rp->used_handles++] = mesh_tx->handle; +} + +static int mesh_features(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_rp_mesh_read_features rp; + + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, + MGMT_STATUS_NOT_SUPPORTED); + + memset(&rp, 0, sizeof(rp)); + rp.index = cpu_to_le16(hdev->id); + if (hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + rp.max_handles = MESH_HANDLES_MAX; + + hci_dev_lock(hdev); + + if (rp.max_handles) + mgmt_mesh_foreach(hdev, send_count, &rp, sk); + + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_READ_FEATURES, 0, &rp, + rp.used_handles + sizeof(rp) - MESH_HANDLES_MAX); + + hci_dev_unlock(hdev); + return 0; +} + +static int send_cancel(struct hci_dev *hdev, void *data) +{ + struct mgmt_pending_cmd *cmd = data; + struct mgmt_cp_mesh_send_cancel *cancel = (void *)cmd->param; + struct mgmt_mesh_tx *mesh_tx; + + if (!cancel->handle) { + do { + mesh_tx = mgmt_mesh_next(hdev, cmd->sk); + + if (mesh_tx) + mesh_send_complete(hdev, mesh_tx, false); + } while (mesh_tx); + } else { + mesh_tx = mgmt_mesh_find(hdev, cancel->handle); + + if (mesh_tx && mesh_tx->sk == cmd->sk) + mesh_send_complete(hdev, mesh_tx, false); + } + + mgmt_cmd_complete(cmd->sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + 0, NULL, 0); + mgmt_pending_free(cmd); + + return 0; +} + +static int mesh_send_cancel(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_pending_cmd *cmd; + int err; + + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + MGMT_STATUS_NOT_SUPPORTED); + + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + MGMT_STATUS_REJECTED); + + hci_dev_lock(hdev); + cmd = mgmt_pending_new(sk, MGMT_OP_MESH_SEND_CANCEL, hdev, data, len); + if (!cmd) + err = -ENOMEM; + else + err = hci_cmd_sync_queue(hdev, send_cancel, cmd, NULL); + + if (err < 0) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND_CANCEL, + MGMT_STATUS_FAILED); + + if (cmd) + mgmt_pending_free(cmd); + } + + hci_dev_unlock(hdev); + return err; +} + +static int mesh_send(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) +{ + struct mgmt_mesh_tx *mesh_tx; + struct mgmt_cp_mesh_send *send = data; + struct mgmt_rp_mesh_read_features rp; + bool sending; + int err = 0; + + if (!lmp_le_capable(hdev) || + !hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_NOT_SUPPORTED); + if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) || + len <= MGMT_MESH_SEND_SIZE || + len > (MGMT_MESH_SEND_SIZE + 31)) + return mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_REJECTED); + + hci_dev_lock(hdev); + + memset(&rp, 0, sizeof(rp)); + rp.max_handles = MESH_HANDLES_MAX; + + mgmt_mesh_foreach(hdev, send_count, &rp, sk); + + if (rp.max_handles <= rp.used_handles) { + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_BUSY); + goto done; + } + + sending = hci_dev_test_flag(hdev, HCI_MESH_SENDING); + mesh_tx = mgmt_mesh_add(sk, hdev, send, len); + + if (!mesh_tx) + err = -ENOMEM; + else if (!sending) + err = hci_cmd_sync_queue(hdev, mesh_send_sync, mesh_tx, + mesh_send_start_complete); + + if (err < 0) { + bt_dev_err(hdev, "Send Mesh Failed %d", err); + err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_MESH_SEND, + MGMT_STATUS_FAILED); + + if (mesh_tx) { + if (sending) + mgmt_mesh_remove(mesh_tx); + } + } else { + hci_dev_set_flag(hdev, HCI_MESH_SENDING); + + mgmt_cmd_complete(sk, hdev->id, MGMT_OP_MESH_SEND, 0, + &mesh_tx->handle, 1); + } + +done: + hci_dev_unlock(hdev); + return err; +} + static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { struct mgmt_mode *cp = data; @@ -2131,9 +2508,6 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) val = !!cp->val; enabled = lmp_host_le_capable(hdev); - if (!val) - hci_req_clear_adv_instance(hdev, NULL, NULL, 0x00, true); - if (!hdev_is_powered(hdev) || val == enabled) { bool changed = false; @@ -3186,6 +3560,18 @@ unlock: return err; } +static int abort_conn_sync(struct hci_dev *hdev, void *data) +{ + struct hci_conn *conn; + u16 handle = PTR_ERR(data); + + conn = hci_conn_hash_lookup_handle(hdev, handle); + if (!conn) + return 0; + + return hci_abort_conn_sync(hdev, conn, HCI_ERROR_REMOTE_USER_TERM); +} + static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, u16 len) { @@ -3236,7 +3622,8 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data, le_addr_type(addr->type)); if (conn->conn_reason == CONN_REASON_PAIR_DEVICE) - hci_abort_conn(conn, HCI_ERROR_REMOTE_USER_TERM); + hci_cmd_sync_queue(hdev, abort_conn_sync, ERR_PTR(conn->handle), + NULL); unlock: hci_dev_unlock(hdev); @@ -3991,17 +4378,28 @@ static const u8 iso_socket_uuid[16] = { 0x6a, 0x49, 0xe0, 0x05, 0x88, 0xf1, 0xba, 0x6f, }; +/* 2ce463d7-7a03-4d8d-bf05-5f24e8f36e76 */ +static const u8 mgmt_mesh_uuid[16] = { + 0x76, 0x6e, 0xf3, 0xe8, 0x24, 0x5f, 0x05, 0xbf, + 0x8d, 0x4d, 0x03, 0x7a, 0xd7, 0x63, 0xe4, 0x2c, +}; + static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, void *data, u16 data_len) { - char buf[122]; /* Enough space for 6 features: 2 + 20 * 6 */ - struct mgmt_rp_read_exp_features_info *rp = (void *)buf; + struct mgmt_rp_read_exp_features_info *rp; + size_t len; u16 idx = 0; u32 flags; + int status; bt_dev_dbg(hdev, "sock %p", sk); - memset(&buf, 0, sizeof(buf)); + /* Enough space for 7 features */ + len = sizeof(*rp) + (sizeof(rp->features[0]) * 7); + rp = kzalloc(len, GFP_KERNEL); + if (!rp) + return -ENOMEM; #ifdef CONFIG_BT_FEATURE_DEBUG if (!hdev) { @@ -4065,6 +4463,17 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, idx++; } + if (hdev && lmp_le_capable(hdev)) { + if (hci_dev_test_flag(hdev, HCI_MESH_EXPERIMENTAL)) + flags = BIT(0); + else + flags = 0; + + memcpy(rp->features[idx].uuid, mgmt_mesh_uuid, 16); + rp->features[idx].flags = cpu_to_le32(flags); + idx++; + } + rp->feature_count = cpu_to_le16(idx); /* After reading the experimental features information, enable @@ -4072,9 +4481,12 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev, */ hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); - return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, - MGMT_OP_READ_EXP_FEATURES_INFO, - 0, rp, sizeof(*rp) + (20 * idx)); + status = mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE, + MGMT_OP_READ_EXP_FEATURES_INFO, + 0, rp, sizeof(*rp) + (20 * idx)); + + kfree(rp); + return status; } static int exp_ll_privacy_feature_changed(bool enabled, struct hci_dev *hdev, @@ -4202,6 +4614,63 @@ static int set_debug_func(struct sock *sk, struct hci_dev *hdev, } #endif +static int set_mgmt_mesh_func(struct sock *sk, struct hci_dev *hdev, + struct mgmt_cp_set_exp_feature *cp, u16 data_len) +{ + struct mgmt_rp_set_exp_feature rp; + bool val, changed; + int err; + + /* Command requires to use the controller index */ + if (!hdev) + return mgmt_cmd_status(sk, MGMT_INDEX_NONE, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_INDEX); + + /* Changes can only be made when controller is powered down */ + if (hdev_is_powered(hdev)) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_REJECTED); + + /* Parameters are limited to a single octet */ + if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + /* Only boolean on/off is supported */ + if (cp->param[0] != 0x00 && cp->param[0] != 0x01) + return mgmt_cmd_status(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, + MGMT_STATUS_INVALID_PARAMS); + + val = !!cp->param[0]; + + if (val) { + changed = !hci_dev_test_and_set_flag(hdev, + HCI_MESH_EXPERIMENTAL); + } else { + hci_dev_clear_flag(hdev, HCI_MESH); + changed = hci_dev_test_and_clear_flag(hdev, + HCI_MESH_EXPERIMENTAL); + } + + memcpy(rp.uuid, mgmt_mesh_uuid, 16); + rp.flags = cpu_to_le32(val ? BIT(0) : 0); + + hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS); + + err = mgmt_cmd_complete(sk, hdev->id, + MGMT_OP_SET_EXP_FEATURE, 0, + &rp, sizeof(rp)); + + if (changed) + exp_feature_changed(hdev, mgmt_mesh_uuid, val, sk); + + return err; +} + static int set_rpa_resolution_func(struct sock *sk, struct hci_dev *hdev, struct mgmt_cp_set_exp_feature *cp, u16 data_len) @@ -4517,6 +4986,7 @@ static const struct mgmt_exp_feature { #ifdef CONFIG_BT_FEATURE_DEBUG EXP_FEAT(debug_uuid, set_debug_func), #endif + EXP_FEAT(mgmt_mesh_uuid, set_mgmt_mesh_func), EXP_FEAT(rpa_resolution_uuid, set_rpa_resolution_func), EXP_FEAT(quality_report_uuid, set_quality_report_func), EXP_FEAT(offload_codecs_uuid, set_offload_codec_func), @@ -5981,6 +6451,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data, if (!hdev_is_powered(hdev) || (val == hci_dev_test_flag(hdev, HCI_ADVERTISING) && (cp->val == 0x02) == hci_dev_test_flag(hdev, HCI_ADVERTISING_CONNECTABLE)) || + hci_dev_test_flag(hdev, HCI_MESH) || hci_conn_num(hdev, LE_LINK) > 0 || (hci_dev_test_flag(hdev, HCI_LE_SCAN) && hdev->le_scan_type == LE_SCAN_ACTIVE)) { @@ -7909,8 +8380,7 @@ static u32 get_supported_adv_flags(struct hci_dev *hdev) /* In extended adv TX_POWER returned from Set Adv Param * will be always valid. */ - if ((hdev->adv_tx_power != HCI_TX_POWER_INVALID) || - ext_adv_capable(hdev)) + if (hdev->adv_tx_power != HCI_TX_POWER_INVALID || ext_adv_capable(hdev)) flags |= MGMT_ADV_FLAG_TX_POWER; if (ext_adv_capable(hdev)) { @@ -7963,8 +8433,14 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev, instance = rp->instance; list_for_each_entry(adv_instance, &hdev->adv_instances, list) { - *instance = adv_instance->instance; - instance++; + /* Only instances 1-le_num_of_adv_sets are externally visible */ + if (adv_instance->instance <= hdev->adv_instance_cnt) { + *instance = adv_instance->instance; + instance++; + } else { + rp->num_instances--; + rp_len--; + } } hci_dev_unlock(hdev); @@ -8226,7 +8702,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev, timeout, duration, HCI_ADV_TX_POWER_NO_PREFERENCE, hdev->le_adv_min_interval, - hdev->le_adv_max_interval); + hdev->le_adv_max_interval, 0); if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_ADVERTISING, MGMT_STATUS_FAILED); @@ -8430,7 +8906,7 @@ static int add_ext_adv_params(struct sock *sk, struct hci_dev *hdev, /* Create advertising instance with no advertising or response data */ adv = hci_add_adv_instance(hdev, cp->instance, flags, 0, NULL, 0, NULL, timeout, duration, tx_power, min_interval, - max_interval); + max_interval, 0); if (IS_ERR(adv)) { err = mgmt_cmd_status(sk, hdev->id, MGMT_OP_ADD_EXT_ADV_PARAMS, @@ -8876,8 +9352,13 @@ static const struct hci_mgmt_handler mgmt_handlers[] = { { add_ext_adv_data, MGMT_ADD_EXT_ADV_DATA_SIZE, HCI_MGMT_VAR_LEN }, { add_adv_patterns_monitor_rssi, - MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE, + MGMT_ADD_ADV_PATTERNS_MONITOR_RSSI_SIZE }, + { set_mesh, MGMT_SET_MESH_RECEIVER_SIZE, + HCI_MGMT_VAR_LEN }, + { mesh_features, MGMT_MESH_READ_FEATURES_SIZE }, + { mesh_send, MGMT_MESH_SEND_SIZE, HCI_MGMT_VAR_LEN }, + { mesh_send_cancel, MGMT_MESH_SEND_CANCEL_SIZE }, }; void mgmt_index_added(struct hci_dev *hdev) @@ -9817,14 +10298,86 @@ static void mgmt_adv_monitor_device_found(struct hci_dev *hdev, kfree_skb(skb); } +static void mesh_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, + u8 addr_type, s8 rssi, u32 flags, u8 *eir, + u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, + u64 instant) +{ + struct sk_buff *skb; + struct mgmt_ev_mesh_device_found *ev; + int i, j; + + if (!hdev->mesh_ad_types[0]) + goto accepted; + + /* Scan for requested AD types */ + if (eir_len > 0) { + for (i = 0; i + 1 < eir_len; i += eir[i] + 1) { + for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) { + if (!hdev->mesh_ad_types[j]) + break; + + if (hdev->mesh_ad_types[j] == eir[i + 1]) + goto accepted; + } + } + } + + if (scan_rsp_len > 0) { + for (i = 0; i + 1 < scan_rsp_len; i += scan_rsp[i] + 1) { + for (j = 0; j < sizeof(hdev->mesh_ad_types); j++) { + if (!hdev->mesh_ad_types[j]) + break; + + if (hdev->mesh_ad_types[j] == scan_rsp[i + 1]) + goto accepted; + } + } + } + + return; + +accepted: + skb = mgmt_alloc_skb(hdev, MGMT_EV_MESH_DEVICE_FOUND, + sizeof(*ev) + eir_len + scan_rsp_len); + if (!skb) + return; + + ev = skb_put(skb, sizeof(*ev)); + + bacpy(&ev->addr.bdaddr, bdaddr); + ev->addr.type = link_to_bdaddr(LE_LINK, addr_type); + ev->rssi = rssi; + ev->flags = cpu_to_le32(flags); + ev->instant = cpu_to_le64(instant); + + if (eir_len > 0) + /* Copy EIR or advertising data into event */ + skb_put_data(skb, eir, eir_len); + + if (scan_rsp_len > 0) + /* Append scan response data to event */ + skb_put_data(skb, scan_rsp, scan_rsp_len); + + ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len); + + mgmt_event_skb(skb, NULL); +} + void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type, u8 addr_type, u8 *dev_class, s8 rssi, u32 flags, - u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len) + u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len, + u64 instant) { struct sk_buff *skb; struct mgmt_ev_device_found *ev; bool report_device = hci_discovery_active(hdev); + if (hci_dev_test_flag(hdev, HCI_MESH) && link_type == LE_LINK) + mesh_device_found(hdev, bdaddr, addr_type, rssi, flags, + eir, eir_len, scan_rsp, scan_rsp_len, + instant); + /* Don't send events for a non-kernel initiated discovery. With * LE one exception is if we have pend_le_reports > 0 in which * case we're doing passive scanning and want these events. @@ -9983,3 +10536,22 @@ void mgmt_exit(void) { hci_mgmt_chan_unregister(&chan); } + +void mgmt_cleanup(struct sock *sk) +{ + struct mgmt_mesh_tx *mesh_tx; + struct hci_dev *hdev; + + read_lock(&hci_dev_list_lock); + + list_for_each_entry(hdev, &hci_dev_list, list) { + do { + mesh_tx = mgmt_mesh_next(hdev, sk); + + if (mesh_tx) + mesh_send_complete(hdev, mesh_tx, true); + } while (mesh_tx); + } + + read_unlock(&hci_dev_list_lock); +} diff --git a/net/bluetooth/mgmt_util.c b/net/bluetooth/mgmt_util.c index b69cfed62088..0115f783bde8 100644 --- a/net/bluetooth/mgmt_util.c +++ b/net/bluetooth/mgmt_util.c @@ -314,3 +314,77 @@ void mgmt_pending_remove(struct mgmt_pending_cmd *cmd) list_del(&cmd->list); mgmt_pending_free(cmd); } + +void mgmt_mesh_foreach(struct hci_dev *hdev, + void (*cb)(struct mgmt_mesh_tx *mesh_tx, void *data), + void *data, struct sock *sk) +{ + struct mgmt_mesh_tx *mesh_tx, *tmp; + + list_for_each_entry_safe(mesh_tx, tmp, &hdev->mgmt_pending, list) { + if (!sk || mesh_tx->sk == sk) + cb(mesh_tx, data); + } +} + +struct mgmt_mesh_tx *mgmt_mesh_next(struct hci_dev *hdev, struct sock *sk) +{ + struct mgmt_mesh_tx *mesh_tx; + + if (list_empty(&hdev->mesh_pending)) + return NULL; + + list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) { + if (!sk || mesh_tx->sk == sk) + return mesh_tx; + } + + return NULL; +} + +struct mgmt_mesh_tx *mgmt_mesh_find(struct hci_dev *hdev, u8 handle) +{ + struct mgmt_mesh_tx *mesh_tx; + + if (list_empty(&hdev->mesh_pending)) + return NULL; + + list_for_each_entry(mesh_tx, &hdev->mesh_pending, list) { + if (mesh_tx->handle == handle) + return mesh_tx; + } + + return NULL; +} + +struct mgmt_mesh_tx *mgmt_mesh_add(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len) +{ + struct mgmt_mesh_tx *mesh_tx; + + mesh_tx = kzalloc(sizeof(*mesh_tx), GFP_KERNEL); + if (!mesh_tx) + return NULL; + + hdev->mesh_send_ref++; + if (!hdev->mesh_send_ref) + hdev->mesh_send_ref++; + + mesh_tx->handle = hdev->mesh_send_ref; + mesh_tx->index = hdev->id; + memcpy(mesh_tx->param, data, len); + mesh_tx->param_len = len; + mesh_tx->sk = sk; + sock_hold(sk); + + list_add_tail(&mesh_tx->list, &hdev->mesh_pending); + + return mesh_tx; +} + +void mgmt_mesh_remove(struct mgmt_mesh_tx *mesh_tx) +{ + list_del(&mesh_tx->list); + sock_put(mesh_tx->sk); + kfree(mesh_tx); +} diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h index 98e40395a383..6a8b7e84293d 100644 --- a/net/bluetooth/mgmt_util.h +++ b/net/bluetooth/mgmt_util.h @@ -20,6 +20,16 @@ SOFTWARE IS DISCLAIMED. */ +struct mgmt_mesh_tx { + struct list_head list; + int index; + size_t param_len; + struct sock *sk; + u8 handle; + u8 instance; + u8 param[sizeof(struct mgmt_cp_mesh_send) + 29]; +}; + struct mgmt_pending_cmd { struct list_head list; u16 opcode; @@ -59,3 +69,11 @@ struct mgmt_pending_cmd *mgmt_pending_new(struct sock *sk, u16 opcode, void *data, u16 len); void mgmt_pending_free(struct mgmt_pending_cmd *cmd); void mgmt_pending_remove(struct mgmt_pending_cmd *cmd); +void mgmt_mesh_foreach(struct hci_dev *hdev, + void (*cb)(struct mgmt_mesh_tx *mesh_tx, void *data), + void *data, struct sock *sk); +struct mgmt_mesh_tx *mgmt_mesh_find(struct hci_dev *hdev, u8 handle); +struct mgmt_mesh_tx *mgmt_mesh_next(struct hci_dev *hdev, struct sock *sk); +struct mgmt_mesh_tx *mgmt_mesh_add(struct sock *sk, struct hci_dev *hdev, + void *data, u16 len); +void mgmt_mesh_remove(struct mgmt_mesh_tx *mesh_tx); diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 4bf4ea6cbb5e..21e24da4847f 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -902,7 +902,10 @@ static int rfcomm_sock_shutdown(struct socket *sock, int how) lock_sock(sk); if (!sk->sk_shutdown) { sk->sk_shutdown = SHUTDOWN_MASK; + + release_sock(sk); __rfcomm_sock_close(sk); + lock_sock(sk); if (sock_flag(sk, SOCK_LINGER) && sk->sk_lingertime && !(current->flags & PF_EXITING)) diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c index efbd93e92ce2..228fd5b20f10 100644 --- a/net/bridge/br_if.c +++ b/net/bridge/br_if.c @@ -40,12 +40,21 @@ static int port_cost(struct net_device *dev) switch (ecmd.base.speed) { case SPEED_10000: return 2; - case SPEED_1000: + case SPEED_5000: + return 3; + case SPEED_2500: return 4; + case SPEED_1000: + return 5; case SPEED_100: return 19; case SPEED_10: return 100; + case SPEED_UNKNOWN: + return 100; + default: + if (ecmd.base.speed > SPEED_10000) + return 1; } } diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index ff4779036649..f20f4373ff40 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -384,6 +384,7 @@ static int br_nf_pre_routing_finish(struct net *net, struct sock *sk, struct sk_ /* - Bridged-and-DNAT'ed traffic doesn't * require ip_forwarding. */ if (rt->dst.dev == dev) { + skb_dst_drop(skb); skb_dst_set(skb, &rt->dst); goto bridged_dnat; } @@ -413,6 +414,7 @@ bridged_dnat: kfree_skb(skb); return 0; } + skb_dst_drop(skb); skb_dst_set_noref(skb, &rt->dst); } diff --git a/net/bridge/br_netfilter_ipv6.c b/net/bridge/br_netfilter_ipv6.c index e4e0c836c3f5..6b07f30675bb 100644 --- a/net/bridge/br_netfilter_ipv6.c +++ b/net/bridge/br_netfilter_ipv6.c @@ -197,6 +197,7 @@ static int br_nf_pre_routing_finish_ipv6(struct net *net, struct sock *sk, struc kfree_skb(skb); return 0; } + skb_dst_drop(skb); skb_dst_set_noref(skb, &rt->dst); } diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index 8f6639e095a0..ce5dfa3babd2 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -1040,8 +1040,10 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl, goto free_iterate; } - if (repl->valid_hooks != t->valid_hooks) + if (repl->valid_hooks != t->valid_hooks) { + ret = -EINVAL; goto free_unlock; + } if (repl->num_counters && repl->num_counters != t->private->nentries) { ret = -EINVAL; diff --git a/net/can/af_can.c b/net/can/af_can.c index 1fb49d51b25d..9503ab10f9b8 100644 --- a/net/can/af_can.c +++ b/net/can/af_can.c @@ -199,27 +199,26 @@ static int can_create(struct net *net, struct socket *sock, int protocol, int can_send(struct sk_buff *skb, int loop) { struct sk_buff *newskb = NULL; - struct canfd_frame *cfd = (struct canfd_frame *)skb->data; struct can_pkg_stats *pkg_stats = dev_net(skb->dev)->can.pkg_stats; int err = -EINVAL; - if (skb->len == CAN_MTU) { + if (can_is_canxl_skb(skb)) { + skb->protocol = htons(ETH_P_CANXL); + } else if (can_is_can_skb(skb)) { skb->protocol = htons(ETH_P_CAN); - if (unlikely(cfd->len > CAN_MAX_DLEN)) - goto inval_skb; - } else if (skb->len == CANFD_MTU) { + } else if (can_is_canfd_skb(skb)) { + struct canfd_frame *cfd = (struct canfd_frame *)skb->data; + skb->protocol = htons(ETH_P_CANFD); - if (unlikely(cfd->len > CANFD_MAX_DLEN)) - goto inval_skb; + + /* set CAN FD flag for CAN FD frames by default */ + cfd->flags |= CANFD_FDF; } else { goto inval_skb; } - /* Make sure the CAN frame can pass the selected CAN netdevice. - * As structs can_frame and canfd_frame are similar, we can provide - * CAN FD frames to legacy CAN drivers as long as the length is <= 8 - */ - if (unlikely(skb->len > skb->dev->mtu && cfd->len > CAN_MAX_DLEN)) { + /* Make sure the CAN frame can pass the selected CAN netdevice. */ + if (unlikely(skb->len > skb->dev->mtu)) { err = -EMSGSIZE; goto inval_skb; } @@ -678,53 +677,46 @@ static void can_receive(struct sk_buff *skb, struct net_device *dev) static int can_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CAN_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || (!can_is_can_skb(skb)))) { pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d\n", dev->type, skb->len); - goto free_skb; - } - /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ - if (unlikely(cfd->len > CAN_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN skbuff: dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len); - goto free_skb; + kfree_skb(skb); + return NET_RX_DROP; } can_receive(skb, dev); return NET_RX_SUCCESS; - -free_skb: - kfree_skb(skb); - return NET_RX_DROP; } static int canfd_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { - struct canfd_frame *cfd = (struct canfd_frame *)skb->data; - - if (unlikely(dev->type != ARPHRD_CAN || skb->len != CANFD_MTU)) { + if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canfd_skb(skb)))) { pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d\n", dev->type, skb->len); - goto free_skb; - } - /* This check is made separately since cfd->len would be uninitialized if skb->len = 0. */ - if (unlikely(cfd->len > CANFD_MAX_DLEN)) { - pr_warn_once("PF_CAN: dropped non conform CAN FD skbuff: dev type %d, len %d, datalen %d\n", - dev->type, skb->len, cfd->len); - goto free_skb; + kfree_skb(skb); + return NET_RX_DROP; } can_receive(skb, dev); return NET_RX_SUCCESS; +} -free_skb: - kfree_skb(skb); - return NET_RX_DROP; +static int canxl_rcv(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + if (unlikely(dev->type != ARPHRD_CAN || (!can_is_canxl_skb(skb)))) { + pr_warn_once("PF_CAN: dropped non conform CAN XL skbuff: dev type %d, len %d\n", + dev->type, skb->len); + + kfree_skb(skb); + return NET_RX_DROP; + } + + can_receive(skb, dev); + return NET_RX_SUCCESS; } /* af_can protocol functions */ @@ -851,6 +843,11 @@ static struct packet_type canfd_packet __read_mostly = { .func = canfd_rcv, }; +static struct packet_type canxl_packet __read_mostly = { + .type = cpu_to_be16(ETH_P_CANXL), + .func = canxl_rcv, +}; + static const struct net_proto_family can_family_ops = { .family = PF_CAN, .create = can_create, @@ -890,6 +887,7 @@ static __init int can_init(void) dev_add_pack(&can_packet); dev_add_pack(&canfd_packet); + dev_add_pack(&canxl_packet); return 0; diff --git a/net/can/bcm.c b/net/can/bcm.c index e60161bec850..27706f6ace34 100644 --- a/net/can/bcm.c +++ b/net/can/bcm.c @@ -274,6 +274,7 @@ static void bcm_can_tx(struct bcm_op *op) struct sk_buff *skb; struct net_device *dev; struct canfd_frame *cf = op->frames + op->cfsiz * op->currframe; + int err; /* no target device? => exit */ if (!op->ifindex) @@ -298,11 +299,11 @@ static void bcm_can_tx(struct bcm_op *op) /* send with loopback */ skb->dev = dev; can_skb_set_owner(skb, op->sk); - can_send(skb, 1); + err = can_send(skb, 1); + if (!err) + op->frames_abs++; - /* update statistics */ op->currframe++; - op->frames_abs++; /* reached last frame? */ if (op->currframe >= op->nframes) @@ -648,8 +649,13 @@ static void bcm_rx_handler(struct sk_buff *skb, void *data) return; /* make sure to handle the correct frame type (CAN / CAN FD) */ - if (skb->len != op->cfsiz) - return; + if (op->flags & CAN_FD_FRAME) { + if (!can_is_canfd_skb(skb)) + return; + } else { + if (!can_is_can_skb(skb)) + return; + } /* disable timeout */ hrtimer_cancel(&op->timer); @@ -1744,15 +1750,27 @@ static int __init bcm_module_init(void) pr_info("can: broadcast manager protocol\n"); + err = register_pernet_subsys(&canbcm_pernet_ops); + if (err) + return err; + + err = register_netdevice_notifier(&canbcm_notifier); + if (err) + goto register_notifier_failed; + err = can_proto_register(&bcm_can_proto); if (err < 0) { printk(KERN_ERR "can: registration of bcm protocol failed\n"); - return err; + goto register_proto_failed; } - register_pernet_subsys(&canbcm_pernet_ops); - register_netdevice_notifier(&canbcm_notifier); return 0; + +register_proto_failed: + unregister_netdevice_notifier(&canbcm_notifier); +register_notifier_failed: + unregister_pernet_subsys(&canbcm_pernet_ops); + return err; } static void __exit bcm_module_exit(void) diff --git a/net/can/gw.c b/net/can/gw.c index 1ea4cc527db3..23a3d89cad81 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -463,10 +463,10 @@ static void can_can_gw_rcv(struct sk_buff *skb, void *data) /* process strictly Classic CAN or CAN FD frames */ if (gwj->flags & CGW_FLAGS_CAN_FD) { - if (skb->len != CANFD_MTU) + if (!can_is_canfd_skb(skb)) return; } else { - if (skb->len != CAN_MTU) + if (!can_is_can_skb(skb)) return; } diff --git a/net/can/isotp.c b/net/can/isotp.c index 43a27d19cdac..a9d1357f8489 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -669,7 +669,7 @@ static void isotp_rcv(struct sk_buff *skb, void *data) if (cf->len <= CAN_MAX_DLEN) { isotp_rcv_sf(sk, cf, SF_PCI_SZ4 + ae, skb, sf_dl); } else { - if (skb->len == CANFD_MTU) { + if (can_is_canfd_skb(skb)) { /* We have a CAN FD frame and CAN_DL is greater than 8: * Only frames with the SF_DL == 0 ESC value are valid. * diff --git a/net/can/j1939/main.c b/net/can/j1939/main.c index 8452b0fbb78c..144c86b0e3ff 100644 --- a/net/can/j1939/main.c +++ b/net/can/j1939/main.c @@ -42,6 +42,10 @@ static void j1939_can_recv(struct sk_buff *iskb, void *data) struct j1939_sk_buff_cb *skcb, *iskcb; struct can_frame *cf; + /* make sure we only get Classical CAN frames */ + if (!can_is_can_skb(iskb)) + return; + /* create a copy of the skb * j1939 only delivers the real data bytes, * the header goes into sockaddr. diff --git a/net/can/raw.c b/net/can/raw.c index d1bd9cc51ebe..3eb7d3e2b541 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -50,6 +50,7 @@ #include <linux/skbuff.h> #include <linux/can.h> #include <linux/can/core.h> +#include <linux/can/dev.h> /* for can_is_canxl_dev_mtu() */ #include <linux/can/skb.h> #include <linux/can/raw.h> #include <net/sock.h> @@ -87,6 +88,7 @@ struct raw_sock { int loopback; int recv_own_msgs; int fd_frames; + int xl_frames; int join_filters; int count; /* number of active filters */ struct can_filter dfilter; /* default/single filter */ @@ -129,21 +131,21 @@ static void raw_rcv(struct sk_buff *oskb, void *data) if (!ro->recv_own_msgs && oskb->sk == sk) return; - /* do not pass non-CAN2.0 frames to a legacy socket */ - if (!ro->fd_frames && oskb->len != CAN_MTU) + /* make sure to not pass oversized frames to the socket */ + if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) || + (can_is_canxl_skb(oskb) && !ro->xl_frames)) return; /* eliminate multiple filter matches for the same skb */ if (this_cpu_ptr(ro->uniq)->skb == oskb && this_cpu_ptr(ro->uniq)->skbcnt == can_skb_prv(oskb)->skbcnt) { - if (ro->join_filters) { - this_cpu_inc(ro->uniq->join_rx_count); - /* drop frame until all enabled filters matched */ - if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count) - return; - } else { + if (!ro->join_filters) + return; + + this_cpu_inc(ro->uniq->join_rx_count); + /* drop frame until all enabled filters matched */ + if (this_cpu_ptr(ro->uniq)->join_rx_count < ro->count) return; - } } else { this_cpu_ptr(ro->uniq)->skb = oskb; this_cpu_ptr(ro->uniq)->skbcnt = can_skb_prv(oskb)->skbcnt; @@ -346,6 +348,7 @@ static int raw_init(struct sock *sk) ro->loopback = 1; ro->recv_own_msgs = 0; ro->fd_frames = 0; + ro->xl_frames = 0; ro->join_filters = 0; /* alloc_percpu provides zero'ed memory */ @@ -669,6 +672,15 @@ static int raw_setsockopt(struct socket *sock, int level, int optname, break; + case CAN_RAW_XL_FRAMES: + if (optlen != sizeof(ro->xl_frames)) + return -EINVAL; + + if (copy_from_sockptr(&ro->xl_frames, optval, optlen)) + return -EFAULT; + + break; + case CAN_RAW_JOIN_FILTERS: if (optlen != sizeof(ro->join_filters)) return -EINVAL; @@ -751,6 +763,12 @@ static int raw_getsockopt(struct socket *sock, int level, int optname, val = &ro->fd_frames; break; + case CAN_RAW_XL_FRAMES: + if (len > sizeof(int)) + len = sizeof(int); + val = &ro->xl_frames; + break; + case CAN_RAW_JOIN_FILTERS: if (len > sizeof(int)) len = sizeof(int); @@ -776,7 +794,11 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) struct sk_buff *skb; struct net_device *dev; int ifindex; - int err; + int err = -EINVAL; + + /* check for valid CAN frame sizes */ + if (size < CANXL_HDR_SIZE + CANXL_MIN_DLEN || size > CANXL_MTU) + return -EINVAL; if (msg->msg_name) { DECLARE_SOCKADDR(struct sockaddr_can *, addr, msg->msg_name); @@ -796,15 +818,6 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) if (!dev) return -ENXIO; - err = -EINVAL; - if (ro->fd_frames && dev->mtu == CANFD_MTU) { - if (unlikely(size != CANFD_MTU && size != CAN_MTU)) - goto put_dev; - } else { - if (unlikely(size != CAN_MTU)) - goto put_dev; - } - skb = sock_alloc_send_skb(sk, size + sizeof(struct can_skb_priv), msg->msg_flags & MSG_DONTWAIT, &err); if (!skb) @@ -814,10 +827,27 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size) can_skb_prv(skb)->ifindex = dev->ifindex; can_skb_prv(skb)->skbcnt = 0; + /* fill the skb before testing for valid CAN frames */ err = memcpy_from_msg(skb_put(skb, size), msg, size); if (err < 0) goto free_skb; + err = -EINVAL; + if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) { + /* CAN XL, CAN FD and Classical CAN */ + if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) && + !can_is_can_skb(skb)) + goto free_skb; + } else if (ro->fd_frames && dev->mtu == CANFD_MTU) { + /* CAN FD and Classical CAN */ + if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb)) + goto free_skb; + } else { + /* Classical CAN */ + if (!can_is_can_skb(skb)) + goto free_skb; + } + sockcm_init(&sockc, sk); if (msg->msg_controllen) { err = sock_cmsg_send(sk, msg, &sockc); @@ -942,12 +972,20 @@ static __init int raw_module_init(void) pr_info("can: raw protocol\n"); + err = register_netdevice_notifier(&canraw_notifier); + if (err) + return err; + err = can_proto_register(&raw_can_proto); - if (err < 0) + if (err < 0) { pr_err("can: registration of raw protocol failed\n"); - else - register_netdevice_notifier(&canraw_notifier); + goto register_proto_failed; + } + + return 0; +register_proto_failed: + unregister_netdevice_notifier(&canraw_notifier); return err; } diff --git a/net/compat.c b/net/compat.c index fe9be3c56ef7..385f04a6be2f 100644 --- a/net/compat.c +++ b/net/compat.c @@ -52,6 +52,7 @@ int __get_compat_msghdr(struct msghdr *kmsg, kmsg->msg_namelen = sizeof(struct sockaddr_storage); kmsg->msg_control_is_user = true; + kmsg->msg_get_inq = 0; kmsg->msg_control_user = compat_ptr(msg->msg_control); kmsg->msg_controllen = msg->msg_controllen; diff --git a/net/core/.gitignore b/net/core/.gitignore deleted file mode 100644 index df1e74372cce..000000000000 --- a/net/core/.gitignore +++ /dev/null @@ -1 +0,0 @@ -dropreason_str.c diff --git a/net/core/Makefile b/net/core/Makefile index e8ce3bd283a6..5857cec87b83 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -5,7 +5,7 @@ obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \ gen_stats.o gen_estimator.o net_namespace.o secure_seq.o \ - flow_dissector.o dropreason_str.o + flow_dissector.o obj-$(CONFIG_SYSCTL) += sysctl_net_core.o @@ -40,23 +40,3 @@ obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o obj-$(CONFIG_BPF_SYSCALL) += sock_map.o obj-$(CONFIG_BPF_SYSCALL) += bpf_sk_storage.o obj-$(CONFIG_OF) += of_net.o - -clean-files := dropreason_str.c - -quiet_cmd_dropreason_str = GEN $@ -cmd_dropreason_str = awk -F ',' 'BEGIN{ print "\#include <net/dropreason.h>\n"; \ - print "const char * const drop_reasons[] = {" }\ - /^enum skb_drop/ { dr=1; }\ - /^\};/ { dr=0; }\ - /^\tSKB_DROP_REASON_/ {\ - if (dr) {\ - sub(/\tSKB_DROP_REASON_/, "", $$1);\ - printf "\t[SKB_DROP_REASON_%s] = \"%s\",\n", $$1, $$1;\ - }\ - }\ - END{ print "};" }' $< > $@ - -$(obj)/dropreason_str.c: $(srctree)/include/net/dropreason.h - $(call cmd,dropreason_str) - -$(obj)/dropreason_str.o: $(obj)/dropreason_str.c diff --git a/net/core/datagram.c b/net/core/datagram.c index 7255531f63ae..e4ff2db40c98 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -677,7 +677,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, page_ref_sub(last_head, refs); refs = 0; } - skb_fill_page_desc(skb, frag++, head, start, size); + skb_fill_page_desc_noacc(skb, frag++, head, start, size); } if (refs) page_ref_sub(last_head, refs); diff --git a/net/core/dev.c b/net/core/dev.c index d66c73c1c734..fa53830d0683 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6358,23 +6358,6 @@ int dev_set_threaded(struct net_device *dev, bool threaded) } EXPORT_SYMBOL(dev_set_threaded); -/* Double check that napi_get_frags() allocates skbs with - * skb->head being backed by slab, not a page fragment. - * This is to make sure bug fixed in 3226b158e67c - * ("net: avoid 32 x truesize under-estimation for tiny skbs") - * does not accidentally come back. - */ -static void napi_get_frags_check(struct napi_struct *napi) -{ - struct sk_buff *skb; - - local_bh_disable(); - skb = napi_get_frags(napi); - WARN_ON_ONCE(skb && skb->head_frag); - napi_free_frags(napi); - local_bh_enable(); -} - void netif_napi_add_weight(struct net_device *dev, struct napi_struct *napi, int (*poll)(struct napi_struct *, int), int weight) { diff --git a/net/core/devlink.c b/net/core/devlink.c index 7776dc82f88d..89baa7c0938b 100644 --- a/net/core/devlink.c +++ b/net/core/devlink.c @@ -371,6 +371,13 @@ static struct devlink *devlink_get_from_attrs(struct net *net, return ERR_PTR(-ENODEV); } +#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \ + WARN_ON_ONCE(!(devlink_port)->registered) +#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \ + WARN_ON_ONCE((devlink_port)->registered) +#define ASSERT_DEVLINK_PORT_INITIALIZED(devlink_port) \ + WARN_ON_ONCE(!(devlink_port)->initialized) + static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink, unsigned int port_index) { @@ -9848,6 +9855,44 @@ static void devlink_port_type_warn_cancel(struct devlink_port *devlink_port) } /** + * devlink_port_init() - Init devlink port + * + * @devlink: devlink + * @devlink_port: devlink port + * + * Initialize essencial stuff that is needed for functions + * that may be called before devlink port registration. + * Call to this function is optional and not needed + * in case the driver does not use such functions. + */ +void devlink_port_init(struct devlink *devlink, + struct devlink_port *devlink_port) +{ + if (devlink_port->initialized) + return; + devlink_port->devlink = devlink; + INIT_LIST_HEAD(&devlink_port->region_list); + devlink_port->initialized = true; +} +EXPORT_SYMBOL_GPL(devlink_port_init); + +/** + * devlink_port_fini() - Deinitialize devlink port + * + * @devlink_port: devlink port + * + * Deinitialize essencial stuff that is in use for functions + * that may be called after devlink port unregistration. + * Call to this function is optional and not needed + * in case the driver does not use such functions. + */ +void devlink_port_fini(struct devlink_port *devlink_port) +{ + WARN_ON(!list_empty(&devlink_port->region_list)); +} +EXPORT_SYMBOL_GPL(devlink_port_fini); + +/** * devl_port_register() - Register devlink port * * @devlink: devlink @@ -9869,14 +9914,15 @@ int devl_port_register(struct devlink *devlink, if (devlink_port_index_exists(devlink, port_index)) return -EEXIST; - WARN_ON(devlink_port->devlink); - devlink_port->devlink = devlink; + ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + + devlink_port_init(devlink, devlink_port); + devlink_port->registered = true; devlink_port->index = port_index; spin_lock_init(&devlink_port->type_lock); INIT_LIST_HEAD(&devlink_port->reporter_list); mutex_init(&devlink_port->reporters_lock); list_add_tail(&devlink_port->list, &devlink->port_list); - INIT_LIST_HEAD(&devlink_port->region_list); INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn); devlink_port_type_warn_schedule(devlink_port); @@ -9926,8 +9972,8 @@ void devl_port_unregister(struct devlink_port *devlink_port) devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL); list_del(&devlink_port->list); WARN_ON(!list_empty(&devlink_port->reporter_list)); - WARN_ON(!list_empty(&devlink_port->region_list)); mutex_destroy(&devlink_port->reporters_lock); + devlink_port->registered = false; } EXPORT_SYMBOL_GPL(devl_port_unregister); @@ -9952,8 +9998,8 @@ static void __devlink_port_type_set(struct devlink_port *devlink_port, enum devlink_port_type type, void *type_dev) { - if (WARN_ON(!devlink_port->devlink)) - return; + ASSERT_DEVLINK_PORT_REGISTERED(devlink_port); + devlink_port_type_warn_cancel(devlink_port); spin_lock_bh(&devlink_port->type_lock); devlink_port->type = type; @@ -10072,8 +10118,8 @@ void devlink_port_attrs_set(struct devlink_port *devlink_port, { int ret; - if (WARN_ON(devlink_port->devlink)) - return; + ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + devlink_port->attrs = *attrs; ret = __devlink_port_attrs_set(devlink_port, attrs->flavour); if (ret) @@ -10096,8 +10142,8 @@ void devlink_port_attrs_pci_pf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->devlink)) - return; + ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_PF); if (ret) @@ -10123,8 +10169,8 @@ void devlink_port_attrs_pci_vf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->devlink)) - return; + ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_VF); if (ret) @@ -10151,8 +10197,8 @@ void devlink_port_attrs_pci_sf_set(struct devlink_port *devlink_port, u32 contro struct devlink_port_attrs *attrs = &devlink_port->attrs; int ret; - if (WARN_ON(devlink_port->devlink)) - return; + ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + ret = __devlink_port_attrs_set(devlink_port, DEVLINK_PORT_FLAVOUR_PCI_SF); if (ret) @@ -10267,8 +10313,8 @@ EXPORT_SYMBOL_GPL(devl_rate_nodes_destroy); void devlink_port_linecard_set(struct devlink_port *devlink_port, struct devlink_linecard *linecard) { - if (WARN_ON(devlink_port->devlink)) - return; + ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port); + devlink_port->linecard = linecard; } EXPORT_SYMBOL_GPL(devlink_port_linecard_set); @@ -11339,6 +11385,8 @@ devlink_port_region_create(struct devlink_port *port, struct devlink_region *region; int err = 0; + ASSERT_DEVLINK_PORT_INITIALIZED(port); + if (WARN_ON(!ops) || WARN_ON(!ops->destructor)) return ERR_PTR(-EINVAL); diff --git a/net/core/filter.c b/net/core/filter.c index ac4c45c02da5..bb0136e7a8e4 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -6412,6 +6412,7 @@ static const struct bpf_func_proto bpf_lwt_seg6_adjust_srh_proto = { static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, int dif, int sdif, u8 family, u8 proto) { + struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; bool refcounted = false; struct sock *sk = NULL; @@ -6420,7 +6421,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, __be32 dst4 = tuple->ipv4.daddr; if (proto == IPPROTO_TCP) - sk = __inet_lookup(net, &tcp_hashinfo, NULL, 0, + sk = __inet_lookup(net, hinfo, NULL, 0, src4, tuple->ipv4.sport, dst4, tuple->ipv4.dport, dif, sdif, &refcounted); @@ -6434,7 +6435,7 @@ static struct sock *sk_lookup(struct net *net, struct bpf_sock_tuple *tuple, struct in6_addr *dst6 = (struct in6_addr *)&tuple->ipv6.daddr; if (proto == IPPROTO_TCP) - sk = __inet6_lookup(net, &tcp_hashinfo, NULL, 0, + sk = __inet6_lookup(net, hinfo, NULL, 0, src6, tuple->ipv6.sport, dst6, ntohs(tuple->ipv6.dport), dif, sdif, &refcounted); diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 990429c69ccd..25cd35f5922e 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -204,6 +204,30 @@ static void __skb_flow_dissect_icmp(const struct sk_buff *skb, skb_flow_get_icmp_tci(skb, key_icmp, data, thoff, hlen); } +static void __skb_flow_dissect_l2tpv3(const struct sk_buff *skb, + struct flow_dissector *flow_dissector, + void *target_container, const void *data, + int nhoff, int hlen) +{ + struct flow_dissector_key_l2tpv3 *key_l2tpv3; + struct { + __be32 session_id; + } *hdr, _hdr; + + if (!dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_L2TPV3)) + return; + + hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr); + if (!hdr) + return; + + key_l2tpv3 = skb_flow_dissector_target(flow_dissector, + FLOW_DISSECTOR_KEY_L2TPV3, + target_container); + + key_l2tpv3->session_id = hdr->session_id; +} + void skb_flow_dissect_meta(const struct sk_buff *skb, struct flow_dissector *flow_dissector, void *target_container) @@ -1177,8 +1201,8 @@ proto_again: nhoff += sizeof(*vlan); } - if (dissector_uses_key(flow_dissector, - FLOW_DISSECTOR_KEY_NUM_OF_VLANS)) { + if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS) && + !(key_control->flags & FLOW_DIS_ENCAPSULATION)) { struct flow_dissector_key_num_of_vlans *key_nvs; key_nvs = skb_flow_dissector_target(flow_dissector, @@ -1501,6 +1525,10 @@ ip_proto_again: __skb_flow_dissect_icmp(skb, flow_dissector, target_container, data, nhoff, hlen); break; + case IPPROTO_L2TP: + __skb_flow_dissect_l2tpv3(skb, flow_dissector, target_container, + data, nhoff, hlen); + break; default: break; @@ -1615,9 +1643,8 @@ static inline void __flow_hash_consistentify(struct flow_keys *keys) switch (keys->control.addr_type) { case FLOW_DISSECTOR_KEY_IPV4_ADDRS: - addr_diff = (__force u32)keys->addrs.v4addrs.dst - - (__force u32)keys->addrs.v4addrs.src; - if (addr_diff < 0) + if ((__force u32)keys->addrs.v4addrs.dst < + (__force u32)keys->addrs.v4addrs.src) swap(keys->addrs.v4addrs.src, keys->addrs.v4addrs.dst); if ((__force u16)keys->ports.dst < diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index 8cfb63528d18..abe423fd5736 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -237,6 +237,13 @@ void flow_rule_match_pppoe(const struct flow_rule *rule, } EXPORT_SYMBOL(flow_rule_match_pppoe); +void flow_rule_match_l2tpv3(const struct flow_rule *rule, + struct flow_match_l2tpv3 *out) +{ + FLOW_DISSECTOR_MATCH(rule, FLOW_DISSECTOR_KEY_L2TPV3, out); +} +EXPORT_SYMBOL(flow_rule_match_l2tpv3); + struct flow_block_cb *flow_block_cb_alloc(flow_setup_cb_t *cb, void *cb_ident, void *cb_priv, void (*release)(void *cb_priv)) diff --git a/net/core/gro.c b/net/core/gro.c index b4190eb08467..bc9451743307 100644 --- a/net/core/gro.c +++ b/net/core/gro.c @@ -160,6 +160,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) unsigned int gro_max_size; unsigned int new_truesize; struct sk_buff *lp; + int segs; /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */ gro_max_size = READ_ONCE(p->dev->gro_max_size); @@ -175,6 +176,7 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb) return -E2BIG; } + segs = NAPI_GRO_CB(skb)->count; lp = NAPI_GRO_CB(p)->last; pinfo = skb_shinfo(lp); @@ -265,7 +267,7 @@ merge: lp = p; done: - NAPI_GRO_CB(p)->count++; + NAPI_GRO_CB(p)->count += segs; p->data_len += len; p->truesize += delta_truesize; p->len += len; @@ -496,8 +498,15 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff BUILD_BUG_ON(!IS_ALIGNED(offsetof(struct napi_gro_cb, zeroed), sizeof(u32))); /* Avoid slow unaligned acc */ *(u32 *)&NAPI_GRO_CB(skb)->zeroed = 0; - NAPI_GRO_CB(skb)->flush = skb_is_gso(skb) || skb_has_frag_list(skb); + NAPI_GRO_CB(skb)->flush = skb_has_frag_list(skb); NAPI_GRO_CB(skb)->is_atomic = 1; + NAPI_GRO_CB(skb)->count = 1; + if (unlikely(skb_is_gso(skb))) { + NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs; + /* Only support TCP at the moment. */ + if (!skb_is_gso_tcp(skb)) + NAPI_GRO_CB(skb)->flush = 1; + } /* Setup for GRO checksum validation */ switch (skb->ip_summed) { @@ -545,10 +554,10 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff else gro_list->count++; - NAPI_GRO_CB(skb)->count = 1; NAPI_GRO_CB(skb)->age = jiffies; NAPI_GRO_CB(skb)->last = skb; - skb_shinfo(skb)->gso_size = skb_gro_len(skb); + if (!skb_is_gso(skb)) + skb_shinfo(skb)->gso_size = skb_gro_len(skb); list_add(&skb->list, &gro_list->list); ret = GRO_HELD; @@ -660,6 +669,7 @@ static void napi_reuse_skb(struct napi_struct *napi, struct sk_buff *skb) skb->encapsulation = 0; skb_shinfo(skb)->gso_type = 0; + skb_shinfo(skb)->gso_size = 0; if (unlikely(skb->slow_gro)) { skb_orphan(skb); skb_ext_reset(skb); diff --git a/net/core/gro_cells.c b/net/core/gro_cells.c index 21619c70a82b..ed5ec5de47f6 100644 --- a/net/core/gro_cells.c +++ b/net/core/gro_cells.c @@ -81,8 +81,7 @@ int gro_cells_init(struct gro_cells *gcells, struct net_device *dev) set_bit(NAPI_STATE_NO_BUSY_POLL, &cell->napi.state); - netif_napi_add(dev, &cell->napi, gro_cell_poll, - NAPI_POLL_WEIGHT); + netif_napi_add(dev, &cell->napi, gro_cell_poll); napi_enable(&cell->napi); } return 0; diff --git a/net/core/lwtunnel.c b/net/core/lwtunnel.c index 9ccd64e8a666..6fac2f0ef074 100644 --- a/net/core/lwtunnel.c +++ b/net/core/lwtunnel.c @@ -50,6 +50,7 @@ static const char *lwtunnel_encap_str(enum lwtunnel_encap_types encap_type) return "IOAM6"; case LWTUNNEL_ENCAP_IP6: case LWTUNNEL_ENCAP_IP: + case LWTUNNEL_ENCAP_XFRM: case LWTUNNEL_ENCAP_NONE: case __LWTUNNEL_ENCAP_MAX: /* should not have got here */ diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index d61afd21aab5..8409d41405df 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -59,7 +59,7 @@ static ssize_t netdev_show(const struct device *dev, #define NETDEVICE_SHOW(field, format_string) \ static ssize_t format_##field(const struct net_device *dev, char *buf) \ { \ - return sprintf(buf, format_string, dev->field); \ + return sysfs_emit(buf, format_string, dev->field); \ } \ static ssize_t field##_show(struct device *dev, \ struct device_attribute *attr, char *buf) \ @@ -118,13 +118,13 @@ static ssize_t iflink_show(struct device *dev, struct device_attribute *attr, { struct net_device *ndev = to_net_dev(dev); - return sprintf(buf, fmt_dec, dev_get_iflink(ndev)); + return sysfs_emit(buf, fmt_dec, dev_get_iflink(ndev)); } static DEVICE_ATTR_RO(iflink); static ssize_t format_name_assign_type(const struct net_device *dev, char *buf) { - return sprintf(buf, fmt_dec, dev->name_assign_type); + return sysfs_emit(buf, fmt_dec, dev->name_assign_type); } static ssize_t name_assign_type_show(struct device *dev, @@ -194,7 +194,7 @@ static ssize_t carrier_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); if (netif_running(netdev)) - return sprintf(buf, fmt_dec, !!netif_carrier_ok(netdev)); + return sysfs_emit(buf, fmt_dec, !!netif_carrier_ok(netdev)); return -EINVAL; } @@ -219,7 +219,7 @@ static ssize_t speed_show(struct device *dev, struct ethtool_link_ksettings cmd; if (!__ethtool_get_link_ksettings(netdev, &cmd)) - ret = sprintf(buf, fmt_dec, cmd.base.speed); + ret = sysfs_emit(buf, fmt_dec, cmd.base.speed); } rtnl_unlock(); return ret; @@ -258,7 +258,7 @@ static ssize_t duplex_show(struct device *dev, duplex = "unknown"; break; } - ret = sprintf(buf, "%s\n", duplex); + ret = sysfs_emit(buf, "%s\n", duplex); } } rtnl_unlock(); @@ -272,7 +272,7 @@ static ssize_t testing_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); if (netif_running(netdev)) - return sprintf(buf, fmt_dec, !!netif_testing(netdev)); + return sysfs_emit(buf, fmt_dec, !!netif_testing(netdev)); return -EINVAL; } @@ -284,7 +284,7 @@ static ssize_t dormant_show(struct device *dev, struct net_device *netdev = to_net_dev(dev); if (netif_running(netdev)) - return sprintf(buf, fmt_dec, !!netif_dormant(netdev)); + return sysfs_emit(buf, fmt_dec, !!netif_dormant(netdev)); return -EINVAL; } @@ -315,7 +315,7 @@ static ssize_t operstate_show(struct device *dev, if (operstate >= ARRAY_SIZE(operstates)) return -EINVAL; /* should not happen */ - return sprintf(buf, "%s\n", operstates[operstate]); + return sysfs_emit(buf, "%s\n", operstates[operstate]); } static DEVICE_ATTR_RO(operstate); @@ -325,9 +325,9 @@ static ssize_t carrier_changes_show(struct device *dev, { struct net_device *netdev = to_net_dev(dev); - return sprintf(buf, fmt_dec, - atomic_read(&netdev->carrier_up_count) + - atomic_read(&netdev->carrier_down_count)); + return sysfs_emit(buf, fmt_dec, + atomic_read(&netdev->carrier_up_count) + + atomic_read(&netdev->carrier_down_count)); } static DEVICE_ATTR_RO(carrier_changes); @@ -337,7 +337,7 @@ static ssize_t carrier_up_count_show(struct device *dev, { struct net_device *netdev = to_net_dev(dev); - return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_up_count)); + return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_up_count)); } static DEVICE_ATTR_RO(carrier_up_count); @@ -347,7 +347,7 @@ static ssize_t carrier_down_count_show(struct device *dev, { struct net_device *netdev = to_net_dev(dev); - return sprintf(buf, fmt_dec, atomic_read(&netdev->carrier_down_count)); + return sysfs_emit(buf, fmt_dec, atomic_read(&netdev->carrier_down_count)); } static DEVICE_ATTR_RO(carrier_down_count); @@ -462,7 +462,7 @@ static ssize_t ifalias_show(struct device *dev, ret = dev_get_alias(netdev, tmp, sizeof(tmp)); if (ret > 0) - ret = sprintf(buf, "%s\n", tmp); + ret = sysfs_emit(buf, "%s\n", tmp); return ret; } static DEVICE_ATTR_RW(ifalias); @@ -514,7 +514,7 @@ static ssize_t phys_port_id_show(struct device *dev, ret = dev_get_phys_port_id(netdev, &ppid); if (!ret) - ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); } rtnl_unlock(); @@ -543,7 +543,7 @@ static ssize_t phys_port_name_show(struct device *dev, ret = dev_get_phys_port_name(netdev, name, sizeof(name)); if (!ret) - ret = sprintf(buf, "%s\n", name); + ret = sysfs_emit(buf, "%s\n", name); } rtnl_unlock(); @@ -573,7 +573,7 @@ static ssize_t phys_switch_id_show(struct device *dev, ret = dev_get_port_parent_id(netdev, &ppid, false); if (!ret) - ret = sprintf(buf, "%*phN\n", ppid.id_len, ppid.id); + ret = sysfs_emit(buf, "%*phN\n", ppid.id_len, ppid.id); } rtnl_unlock(); @@ -591,7 +591,7 @@ static ssize_t threaded_show(struct device *dev, return restart_syscall(); if (dev_isalive(netdev)) - ret = sprintf(buf, fmt_dec, netdev->threaded); + ret = sysfs_emit(buf, fmt_dec, netdev->threaded); rtnl_unlock(); return ret; @@ -673,7 +673,7 @@ static ssize_t netstat_show(const struct device *d, struct rtnl_link_stats64 temp; const struct rtnl_link_stats64 *stats = dev_get_stats(dev, &temp); - ret = sprintf(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset)); + ret = sysfs_emit(buf, fmt_u64, *(u64 *)(((u8 *)stats) + offset)); } read_unlock(&dev_base_lock); return ret; @@ -824,7 +824,7 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf) for (i = 0; i < map->len; i++) cpumask_set_cpu(map->cpus[i], mask); - len = snprintf(buf, PAGE_SIZE, "%*pb\n", cpumask_pr_args(mask)); + len = sysfs_emit(buf, "%*pb\n", cpumask_pr_args(mask)); rcu_read_unlock(); free_cpumask_var(mask); @@ -910,7 +910,7 @@ static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue, val = (unsigned long)flow_table->mask + 1; rcu_read_unlock(); - return sprintf(buf, "%lu\n", val); + return sysfs_emit(buf, "%lu\n", val); } static void rps_dev_flow_table_release(struct rcu_head *rcu) @@ -1208,7 +1208,7 @@ static ssize_t tx_timeout_show(struct netdev_queue *queue, char *buf) { unsigned long trans_timeout = atomic_long_read(&queue->trans_timeout); - return sprintf(buf, fmt_ulong, trans_timeout); + return sysfs_emit(buf, fmt_ulong, trans_timeout); } static unsigned int get_netdev_queue_index(struct netdev_queue *queue) @@ -1255,15 +1255,15 @@ static ssize_t traffic_class_show(struct netdev_queue *queue, * belongs to the root device it will be reported with just the * traffic class, so just "0" for TC 0 for example. */ - return num_tc < 0 ? sprintf(buf, "%d%d\n", tc, num_tc) : - sprintf(buf, "%d\n", tc); + return num_tc < 0 ? sysfs_emit(buf, "%d%d\n", tc, num_tc) : + sysfs_emit(buf, "%d\n", tc); } #ifdef CONFIG_XPS static ssize_t tx_maxrate_show(struct netdev_queue *queue, char *buf) { - return sprintf(buf, "%lu\n", queue->tx_maxrate); + return sysfs_emit(buf, "%lu\n", queue->tx_maxrate); } static ssize_t tx_maxrate_store(struct netdev_queue *queue, @@ -1317,7 +1317,7 @@ static struct netdev_queue_attribute queue_traffic_class __ro_after_init */ static ssize_t bql_show(char *buf, unsigned int value) { - return sprintf(buf, "%u\n", value); + return sysfs_emit(buf, "%u\n", value); } static ssize_t bql_set(const char *buf, const size_t count, @@ -1346,7 +1346,7 @@ static ssize_t bql_show_hold_time(struct netdev_queue *queue, { struct dql *dql = &queue->dql; - return sprintf(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); + return sysfs_emit(buf, "%u\n", jiffies_to_msecs(dql->slack_hold_time)); } static ssize_t bql_set_hold_time(struct netdev_queue *queue, @@ -1374,7 +1374,7 @@ static ssize_t bql_show_inflight(struct netdev_queue *queue, { struct dql *dql = &queue->dql; - return sprintf(buf, "%u\n", dql->num_queued - dql->num_completed); + return sysfs_emit(buf, "%u\n", dql->num_queued - dql->num_completed); } static struct netdev_queue_attribute bql_inflight_attribute __ro_after_init = diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 6b9f19122ec1..0ec2f5906a27 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -18,7 +18,6 @@ #include <linux/user_namespace.h> #include <linux/net_namespace.h> #include <linux/sched/task.h> -#include <linux/sched/mm.h> #include <linux/uidgid.h> #include <linux/cookie.h> @@ -1144,13 +1143,7 @@ static int __register_pernet_operations(struct list_head *list, * setup_net() and cleanup_net() are not possible. */ for_each_net(net) { - struct mem_cgroup *old, *memcg; - - memcg = mem_cgroup_or_root(get_mem_cgroup_from_obj(net)); - old = set_active_memcg(memcg); error = ops_init(ops, net); - set_active_memcg(old); - mem_cgroup_put(memcg); if (error) goto out_undo; list_add_tail(&net->exit_list, &net_exit_list); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index f5e87fe57c83..74864dc46a7e 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1057,6 +1057,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(4) /* IFLA_MASTER */ + nla_total_size(1) /* IFLA_CARRIER */ + nla_total_size(4) /* IFLA_PROMISCUITY */ + + nla_total_size(4) /* IFLA_ALLMULTI */ + nla_total_size(4) /* IFLA_NUM_TX_QUEUES */ + nla_total_size(4) /* IFLA_NUM_RX_QUEUES */ + nla_total_size(4) /* IFLA_GSO_MAX_SEGS */ @@ -1765,6 +1766,7 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, nla_put_u32(skb, IFLA_MAX_MTU, dev->max_mtu) || nla_put_u32(skb, IFLA_GROUP, dev->group) || nla_put_u32(skb, IFLA_PROMISCUITY, dev->promiscuity) || + nla_put_u32(skb, IFLA_ALLMULTI, dev->allmulti) || nla_put_u32(skb, IFLA_NUM_TX_QUEUES, dev->num_tx_queues) || nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) || nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) || @@ -1926,6 +1928,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_GRO_MAX_SIZE] = { .type = NLA_U32 }, [IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT }, [IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT }, + [IFLA_ALLMULTI] = { .type = NLA_REJECT }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -2774,13 +2777,6 @@ static int do_setlink(const struct sk_buff *skb, call_netdevice_notifiers(NETDEV_CHANGEADDR, dev); } - if (ifm->ifi_flags || ifm->ifi_change) { - err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), - extack); - if (err < 0) - goto errout; - } - if (tb[IFLA_MASTER]) { err = do_set_master(dev, nla_get_u32(tb[IFLA_MASTER]), extack); if (err) @@ -2788,6 +2784,13 @@ static int do_setlink(const struct sk_buff *skb, status |= DO_SETLINK_MODIFIED; } + if (ifm->ifi_flags || ifm->ifi_change) { + err = dev_change_flags(dev, rtnl_dev_combine_flags(dev, ifm), + extack); + if (err < 0) + goto errout; + } + if (tb[IFLA_CARRIER]) { err = dev_change_carrier(dev, nla_get_u8(tb[IFLA_CARRIER])); if (err) diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 48ecfbf29174..1d9719e72f9d 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -91,7 +91,11 @@ static struct kmem_cache *skbuff_ext_cache __ro_after_init; int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS; EXPORT_SYMBOL(sysctl_max_skb_frags); -/* The array 'drop_reasons' is auto-generated in dropreason_str.c */ +#undef FN +#define FN(reason) [SKB_DROP_REASON_##reason] = #reason, +const char * const drop_reasons[] = { + DEFINE_DROP_REASON(FN, FN) +}; EXPORT_SYMBOL(drop_reasons); /** @@ -130,8 +134,66 @@ static void skb_under_panic(struct sk_buff *skb, unsigned int sz, void *addr) #define NAPI_SKB_CACHE_BULK 16 #define NAPI_SKB_CACHE_HALF (NAPI_SKB_CACHE_SIZE / 2) +#if PAGE_SIZE == SZ_4K + +#define NAPI_HAS_SMALL_PAGE_FRAG 1 +#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) ((nc).pfmemalloc) + +/* specialized page frag allocator using a single order 0 page + * and slicing it into 1K sized fragment. Constrained to systems + * with a very limited amount of 1K fragments fitting a single + * page - to avoid excessive truesize underestimation + */ + +struct page_frag_1k { + void *va; + u16 offset; + bool pfmemalloc; +}; + +static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp) +{ + struct page *page; + int offset; + + offset = nc->offset - SZ_1K; + if (likely(offset >= 0)) + goto use_frag; + + page = alloc_pages_node(NUMA_NO_NODE, gfp, 0); + if (!page) + return NULL; + + nc->va = page_address(page); + nc->pfmemalloc = page_is_pfmemalloc(page); + offset = PAGE_SIZE - SZ_1K; + page_ref_add(page, offset / SZ_1K); + +use_frag: + nc->offset = offset; + return nc->va + offset; +} +#else + +/* the small page is actually unused in this build; add dummy helpers + * to please the compiler and avoid later preprocessor's conditionals + */ +#define NAPI_HAS_SMALL_PAGE_FRAG 0 +#define NAPI_SMALL_PAGE_PFMEMALLOC(nc) false + +struct page_frag_1k { +}; + +static void *page_frag_alloc_1k(struct page_frag_1k *nc, gfp_t gfp_mask) +{ + return NULL; +} + +#endif + struct napi_alloc_cache { struct page_frag_cache page; + struct page_frag_1k page_small; unsigned int skb_count; void *skb_cache[NAPI_SKB_CACHE_SIZE]; }; @@ -139,6 +201,23 @@ struct napi_alloc_cache { static DEFINE_PER_CPU(struct page_frag_cache, netdev_alloc_cache); static DEFINE_PER_CPU(struct napi_alloc_cache, napi_alloc_cache); +/* Double check that napi_get_frags() allocates skbs with + * skb->head being backed by slab, not a page fragment. + * This is to make sure bug fixed in 3226b158e67c + * ("net: avoid 32 x truesize under-estimation for tiny skbs") + * does not accidentally come back. + */ +void napi_get_frags_check(struct napi_struct *napi) +{ + struct sk_buff *skb; + + local_bh_disable(); + skb = napi_get_frags(napi); + WARN_ON_ONCE(!NAPI_HAS_SMALL_PAGE_FRAG && skb && skb->head_frag); + napi_free_frags(napi); + local_bh_enable(); +} + void *__napi_alloc_frag_align(unsigned int fragsz, unsigned int align_mask) { struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache); @@ -557,6 +636,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, { struct napi_alloc_cache *nc; struct sk_buff *skb; + bool pfmemalloc; void *data; DEBUG_NET_WARN_ON_ONCE(!in_softirq()); @@ -564,8 +644,10 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, /* If requested length is either too small or too big, * we use kmalloc() for skb->head allocation. + * When the small frag allocator is available, prefer it over kmalloc + * for small fragments */ - if (len <= SKB_WITH_OVERHEAD(1024) || + if ((!NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) || len > SKB_WITH_OVERHEAD(PAGE_SIZE) || (gfp_mask & (__GFP_DIRECT_RECLAIM | GFP_DMA))) { skb = __alloc_skb(len, gfp_mask, SKB_ALLOC_RX | SKB_ALLOC_NAPI, @@ -576,13 +658,33 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, } nc = this_cpu_ptr(&napi_alloc_cache); - len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - len = SKB_DATA_ALIGN(len); if (sk_memalloc_socks()) gfp_mask |= __GFP_MEMALLOC; - data = page_frag_alloc(&nc->page, len, gfp_mask); + if (NAPI_HAS_SMALL_PAGE_FRAG && len <= SKB_WITH_OVERHEAD(1024)) { + /* we are artificially inflating the allocation size, but + * that is not as bad as it may look like, as: + * - 'len' less than GRO_MAX_HEAD makes little sense + * - On most systems, larger 'len' values lead to fragment + * size above 512 bytes + * - kmalloc would use the kmalloc-1k slab for such values + * - Builds with smaller GRO_MAX_HEAD will very likely do + * little networking, as that implies no WiFi and no + * tunnels support, and 32 bits arches. + */ + len = SZ_1K; + + data = page_frag_alloc_1k(&nc->page_small, gfp_mask); + pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small); + } else { + len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + len = SKB_DATA_ALIGN(len); + + data = page_frag_alloc(&nc->page, len, gfp_mask); + pfmemalloc = nc->page.pfmemalloc; + } + if (unlikely(!data)) return NULL; @@ -592,7 +694,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len, return NULL; } - if (nc->page.pfmemalloc) + if (pfmemalloc) skb->pfmemalloc = 1; skb->head_frag = 1; @@ -1184,7 +1286,7 @@ EXPORT_SYMBOL_GPL(mm_unaccount_pinned_pages); static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) { - struct ubuf_info *uarg; + struct ubuf_info_msgzc *uarg; struct sk_buff *skb; WARN_ON_ONCE(!in_task()); @@ -1202,19 +1304,19 @@ static struct ubuf_info *msg_zerocopy_alloc(struct sock *sk, size_t size) return NULL; } - uarg->callback = msg_zerocopy_callback; + uarg->ubuf.callback = msg_zerocopy_callback; uarg->id = ((u32)atomic_inc_return(&sk->sk_zckey)) - 1; uarg->len = 1; uarg->bytelen = size; uarg->zerocopy = 1; - uarg->flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; - refcount_set(&uarg->refcnt, 1); + uarg->ubuf.flags = SKBFL_ZEROCOPY_FRAG | SKBFL_DONT_ORPHAN; + refcount_set(&uarg->ubuf.refcnt, 1); sock_hold(sk); - return uarg; + return &uarg->ubuf; } -static inline struct sk_buff *skb_from_uarg(struct ubuf_info *uarg) +static inline struct sk_buff *skb_from_uarg(struct ubuf_info_msgzc *uarg) { return container_of((void *)uarg, struct sk_buff, cb); } @@ -1223,6 +1325,7 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, struct ubuf_info *uarg) { if (uarg) { + struct ubuf_info_msgzc *uarg_zc; const u32 byte_limit = 1 << 19; /* limit to a few TSO */ u32 bytelen, next; @@ -1238,8 +1341,9 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, return NULL; } - bytelen = uarg->bytelen + size; - if (uarg->len == USHRT_MAX - 1 || bytelen > byte_limit) { + uarg_zc = uarg_to_msgzc(uarg); + bytelen = uarg_zc->bytelen + size; + if (uarg_zc->len == USHRT_MAX - 1 || bytelen > byte_limit) { /* TCP can create new skb to attach new uarg */ if (sk->sk_type == SOCK_STREAM) goto new_alloc; @@ -1247,11 +1351,11 @@ struct ubuf_info *msg_zerocopy_realloc(struct sock *sk, size_t size, } next = (u32)atomic_read(&sk->sk_zckey); - if ((u32)(uarg->id + uarg->len) == next) { - if (mm_account_pinned_pages(&uarg->mmp, size)) + if ((u32)(uarg_zc->id + uarg_zc->len) == next) { + if (mm_account_pinned_pages(&uarg_zc->mmp, size)) return NULL; - uarg->len++; - uarg->bytelen = bytelen; + uarg_zc->len++; + uarg_zc->bytelen = bytelen; atomic_set(&sk->sk_zckey, ++next); /* no extra ref when appending to datagram (MSG_MORE) */ @@ -1287,7 +1391,7 @@ static bool skb_zerocopy_notify_extend(struct sk_buff *skb, u32 lo, u16 len) return true; } -static void __msg_zerocopy_callback(struct ubuf_info *uarg) +static void __msg_zerocopy_callback(struct ubuf_info_msgzc *uarg) { struct sk_buff *tail, *skb = skb_from_uarg(uarg); struct sock_exterr_skb *serr; @@ -1340,19 +1444,21 @@ release: void msg_zerocopy_callback(struct sk_buff *skb, struct ubuf_info *uarg, bool success) { - uarg->zerocopy = uarg->zerocopy & success; + struct ubuf_info_msgzc *uarg_zc = uarg_to_msgzc(uarg); + + uarg_zc->zerocopy = uarg_zc->zerocopy & success; if (refcount_dec_and_test(&uarg->refcnt)) - __msg_zerocopy_callback(uarg); + __msg_zerocopy_callback(uarg_zc); } EXPORT_SYMBOL_GPL(msg_zerocopy_callback); void msg_zerocopy_put_abort(struct ubuf_info *uarg, bool have_uref) { - struct sock *sk = skb_from_uarg(uarg)->sk; + struct sock *sk = skb_from_uarg(uarg_to_msgzc(uarg))->sk; atomic_dec(&sk->sk_zckey); - uarg->len--; + uarg_to_msgzc(uarg)->len--; if (have_uref) msg_zerocopy_callback(NULL, uarg, true); diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 725891527814..5b1ce656baa1 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -29,7 +29,6 @@ static int int_3600 = 3600; static int min_sndbuf = SOCK_MIN_SNDBUF; static int min_rcvbuf = SOCK_MIN_RCVBUF; static int max_skb_frags = MAX_SKB_FRAGS; -static long long_max __maybe_unused = LONG_MAX; static int net_msg_warn; /* Unused, but still a sysctl */ diff --git a/net/core/xdp.c b/net/core/xdp.c index 24420209bf0e..844c9d99dc0e 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -375,19 +375,17 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, struct xdp_buff *xdp) { - struct xdp_mem_allocator *xa; struct page *page; switch (mem->type) { case MEM_TYPE_PAGE_POOL: - rcu_read_lock(); - /* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */ - xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params); page = virt_to_head_page(data); if (napi_direct && xdp_return_frame_no_direct()) napi_direct = false; - page_pool_put_full_page(xa->page_pool, page, napi_direct); - rcu_read_unlock(); + /* No need to check ((page->pp_magic & ~0x3UL) == PP_SIGNATURE) + * as mem->type knows this a page_pool page + */ + page_pool_put_full_page(page->pp, page, napi_direct); break; case MEM_TYPE_PAGE_SHARED: page_frag_free(data); diff --git a/net/dccp/proto.c b/net/dccp/proto.c index 7cd4a6cc99fc..c548ca3e9b0e 100644 --- a/net/dccp/proto.c +++ b/net/dccp/proto.c @@ -1197,6 +1197,8 @@ static int __init dccp_init(void) INIT_HLIST_HEAD(&dccp_hashinfo.bhash2[i].chain); } + dccp_hashinfo.pernet = false; + rc = dccp_mib_init(); if (rc) goto out_free_dccp_bhash2; diff --git a/net/dsa/Makefile b/net/dsa/Makefile index af28c24ead18..bf57ef3bce2a 100644 --- a/net/dsa/Makefile +++ b/net/dsa/Makefile @@ -1,7 +1,15 @@ # SPDX-License-Identifier: GPL-2.0 # the core obj-$(CONFIG_NET_DSA) += dsa_core.o -dsa_core-y += dsa.o dsa2.o master.o port.o slave.o switch.o tag_8021q.o +dsa_core-y += \ + dsa.o \ + dsa2.o \ + master.o \ + netlink.o \ + port.o \ + slave.o \ + switch.o \ + tag_8021q.o # tagging formats obj-$(CONFIG_NET_DSA_TAG_AR9331) += tag_ar9331.o diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index be7b320cda76..64b14f655b23 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -536,8 +536,16 @@ static int __init dsa_init_module(void) dsa_tag_driver_register(&DSA_TAG_DRIVER_NAME(none_ops), THIS_MODULE); + rc = rtnl_link_register(&dsa_link_ops); + if (rc) + goto netlink_register_fail; + return 0; +netlink_register_fail: + dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); + dsa_slave_unregister_notifier(); + dev_remove_pack(&dsa_pack_type); register_notifier_fail: destroy_workqueue(dsa_owq); @@ -547,6 +555,7 @@ module_init(dsa_init_module); static void __exit dsa_cleanup_module(void) { + rtnl_link_unregister(&dsa_link_ops); dsa_tag_driver_unregister(&DSA_TAG_DRIVER_NAME(none_ops)); dsa_slave_unregister_notifier(); diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c index ed56c7a554b8..af0e2c0394ac 100644 --- a/net/dsa/dsa2.c +++ b/net/dsa/dsa2.c @@ -387,6 +387,20 @@ static struct dsa_port *dsa_tree_find_first_cpu(struct dsa_switch_tree *dst) return NULL; } +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst) +{ + struct device_node *ethernet; + struct net_device *master; + struct dsa_port *cpu_dp; + + cpu_dp = dsa_tree_find_first_cpu(dst); + ethernet = of_parse_phandle(cpu_dp->dn, "ethernet", 0); + master = of_find_net_device_by_node(ethernet); + of_node_put(ethernet); + + return master; +} + /* Assign the default CPU port (the first one in the tree) to all ports of the * fabric which don't already have one as part of their own switch. */ @@ -447,6 +461,72 @@ static void dsa_tree_teardown_cpu_ports(struct dsa_switch_tree *dst) dp->cpu_dp = NULL; } +static int dsa_port_devlink_setup(struct dsa_port *dp) +{ + struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch_tree *dst = dp->ds->dst; + struct devlink_port_attrs attrs = {}; + struct devlink *dl = dp->ds->devlink; + struct dsa_switch *ds = dp->ds; + const unsigned char *id; + unsigned char len; + int err; + + memset(dlp, 0, sizeof(*dlp)); + devlink_port_init(dl, dlp); + + if (ds->ops->port_setup) { + err = ds->ops->port_setup(ds, dp->index); + if (err) + return err; + } + + id = (const unsigned char *)&dst->index; + len = sizeof(dst->index); + + attrs.phys.port_number = dp->index; + memcpy(attrs.switch_id.id, id, len); + attrs.switch_id.id_len = len; + + switch (dp->type) { + case DSA_PORT_TYPE_UNUSED: + attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED; + break; + case DSA_PORT_TYPE_CPU: + attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU; + break; + case DSA_PORT_TYPE_DSA: + attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA; + break; + case DSA_PORT_TYPE_USER: + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + break; + } + + devlink_port_attrs_set(dlp, &attrs); + err = devlink_port_register(dl, dlp, dp->index); + if (err) { + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + return err; + } + + return 0; +} + +static void dsa_port_devlink_teardown(struct dsa_port *dp) +{ + struct devlink_port *dlp = &dp->devlink_port; + struct dsa_switch *ds = dp->ds; + + devlink_port_unregister(dlp); + + if (ds->ops->port_teardown) + ds->ops->port_teardown(ds, dp->index); + + devlink_port_fini(dlp); +} + static int dsa_port_setup(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; @@ -458,11 +538,9 @@ static int dsa_port_setup(struct dsa_port *dp) if (dp->setup) return 0; - if (ds->ops->port_setup) { - err = ds->ops->port_setup(ds, dp->index); - if (err) - return err; - } + err = dsa_port_devlink_setup(dp); + if (err) + return err; switch (dp->type) { case DSA_PORT_TYPE_UNUSED: @@ -519,8 +597,7 @@ static int dsa_port_setup(struct dsa_port *dp) if (err && dsa_port_link_registered) dsa_shared_port_link_unregister_of(dp); if (err) { - if (ds->ops->port_teardown) - ds->ops->port_teardown(ds, dp->index); + dsa_port_devlink_teardown(dp); return err; } @@ -529,59 +606,13 @@ static int dsa_port_setup(struct dsa_port *dp) return 0; } -static int dsa_port_devlink_setup(struct dsa_port *dp) -{ - struct devlink_port *dlp = &dp->devlink_port; - struct dsa_switch_tree *dst = dp->ds->dst; - struct devlink_port_attrs attrs = {}; - struct devlink *dl = dp->ds->devlink; - const unsigned char *id; - unsigned char len; - int err; - - id = (const unsigned char *)&dst->index; - len = sizeof(dst->index); - - attrs.phys.port_number = dp->index; - memcpy(attrs.switch_id.id, id, len); - attrs.switch_id.id_len = len; - memset(dlp, 0, sizeof(*dlp)); - - switch (dp->type) { - case DSA_PORT_TYPE_UNUSED: - attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED; - break; - case DSA_PORT_TYPE_CPU: - attrs.flavour = DEVLINK_PORT_FLAVOUR_CPU; - break; - case DSA_PORT_TYPE_DSA: - attrs.flavour = DEVLINK_PORT_FLAVOUR_DSA; - break; - case DSA_PORT_TYPE_USER: - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - break; - } - - devlink_port_attrs_set(dlp, &attrs); - err = devlink_port_register(dl, dlp, dp->index); - - if (!err) - dp->devlink_port_setup = true; - - return err; -} - static void dsa_port_teardown(struct dsa_port *dp) { struct devlink_port *dlp = &dp->devlink_port; - struct dsa_switch *ds = dp->ds; if (!dp->setup) return; - if (ds->ops->port_teardown) - ds->ops->port_teardown(ds, dp->index); - devlink_port_type_clear(dlp); switch (dp->type) { @@ -605,46 +636,15 @@ static void dsa_port_teardown(struct dsa_port *dp) break; } - dp->setup = false; -} - -static void dsa_port_devlink_teardown(struct dsa_port *dp) -{ - struct devlink_port *dlp = &dp->devlink_port; + dsa_port_devlink_teardown(dp); - if (dp->devlink_port_setup) - devlink_port_unregister(dlp); - dp->devlink_port_setup = false; + dp->setup = false; } -/* Destroy the current devlink port, and create a new one which has the UNUSED - * flavour. At this point, any call to ds->ops->port_setup has been already - * balanced out by a call to ds->ops->port_teardown, so we know that any - * devlink port regions the driver had are now unregistered. We then call its - * ds->ops->port_setup again, in order for the driver to re-create them on the - * new devlink port. - */ -static int dsa_port_reinit_as_unused(struct dsa_port *dp) +static int dsa_port_setup_as_unused(struct dsa_port *dp) { - struct dsa_switch *ds = dp->ds; - int err; - - dsa_port_devlink_teardown(dp); dp->type = DSA_PORT_TYPE_UNUSED; - err = dsa_port_devlink_setup(dp); - if (err) - return err; - - if (ds->ops->port_setup) { - /* On error, leave the devlink port registered, - * dsa_switch_teardown will clean it up later. - */ - err = ds->ops->port_setup(ds, dp->index); - if (err) - return err; - } - - return 0; + return dsa_port_setup(dp); } static int dsa_devlink_info_get(struct devlink *dl, @@ -868,7 +868,6 @@ static int dsa_switch_setup(struct dsa_switch *ds) { struct dsa_devlink_priv *dl_priv; struct device_node *dn; - struct dsa_port *dp; int err; if (ds->setup) @@ -891,18 +890,9 @@ static int dsa_switch_setup(struct dsa_switch *ds) dl_priv = devlink_priv(ds->devlink); dl_priv->ds = ds; - /* Setup devlink port instances now, so that the switch - * setup() can register regions etc, against the ports - */ - dsa_switch_for_each_port(dp, ds) { - err = dsa_port_devlink_setup(dp); - if (err) - goto unregister_devlink_ports; - } - err = dsa_switch_register_notifier(ds); if (err) - goto unregister_devlink_ports; + goto devlink_free; ds->configure_vlan_while_not_filtering = true; @@ -943,9 +933,7 @@ teardown: ds->ops->teardown(ds); unregister_notifier: dsa_switch_unregister_notifier(ds); -unregister_devlink_ports: - dsa_switch_for_each_port(dp, ds) - dsa_port_devlink_teardown(dp); +devlink_free: devlink_free(ds->devlink); ds->devlink = NULL; return err; @@ -953,8 +941,6 @@ unregister_devlink_ports: static void dsa_switch_teardown(struct dsa_switch *ds) { - struct dsa_port *dp; - if (!ds->setup) return; @@ -973,8 +959,6 @@ static void dsa_switch_teardown(struct dsa_switch *ds) dsa_switch_unregister_notifier(ds); if (ds->devlink) { - dsa_switch_for_each_port(dp, ds) - dsa_port_devlink_teardown(dp); devlink_free(ds->devlink); ds->devlink = NULL; } @@ -1027,7 +1011,7 @@ static int dsa_tree_setup_ports(struct dsa_switch_tree *dst) if (dsa_port_is_user(dp) || dsa_port_is_unused(dp)) { err = dsa_port_setup(dp); if (err) { - err = dsa_port_reinit_as_unused(dp); + err = dsa_port_setup_as_unused(dp); if (err) goto teardown; } @@ -1263,11 +1247,11 @@ int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, * attempts to change the tagging protocol. If we ever lift the IFF_UP * restriction, there needs to be another mutex which serializes this. */ - list_for_each_entry(dp, &dst->ports, list) { - if (dsa_port_is_cpu(dp) && (dp->master->flags & IFF_UP)) + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp)->flags & IFF_UP) goto out_unlock; - if (dsa_port_is_user(dp) && (dp->slave->flags & IFF_UP)) + if (dp->slave->flags & IFF_UP) goto out_unlock; } @@ -1312,6 +1296,12 @@ void dsa_tree_master_admin_state_change(struct dsa_switch_tree *dst, struct dsa_port *cpu_dp = master->dsa_ptr; bool notify = false; + /* Don't keep track of admin state on LAG DSA masters, + * but rather just of physical DSA masters + */ + if (netif_is_lag_master(master)) + return; + if ((dsa_port_master_is_operational(cpu_dp)) != (up && cpu_dp->master_oper_up)) notify = true; @@ -1329,6 +1319,12 @@ void dsa_tree_master_oper_state_change(struct dsa_switch_tree *dst, struct dsa_port *cpu_dp = master->dsa_ptr; bool notify = false; + /* Don't keep track of oper state on LAG DSA masters, + * but rather just of physical DSA masters + */ + if (netif_is_lag_master(master)) + return; + if ((dsa_port_master_is_operational(cpu_dp)) != (cpu_dp->master_admin_up && up)) notify = true; @@ -1797,7 +1793,7 @@ void dsa_switch_shutdown(struct dsa_switch *ds) rtnl_lock(); dsa_switch_for_each_user_port(dp, ds) { - master = dp->cpu_dp->master; + master = dsa_port_to_master(dp); slave_dev = dp->slave; netdev_upper_dev_unlink(master, slave_dev); diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h index 614fbba8fe39..6e65c7ffd6f3 100644 --- a/net/dsa/dsa_priv.h +++ b/net/dsa/dsa_priv.h @@ -88,6 +88,7 @@ struct dsa_notifier_lag_info { const struct dsa_port *dp; struct dsa_lag lag; struct netdev_lag_upper_info *info; + struct netlink_ext_ack *extack; }; /* DSA_NOTIFIER_VLAN_* */ @@ -184,6 +185,11 @@ static inline int dsa_tag_protocol_overhead(const struct dsa_device_ops *ops) /* master.c */ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp); void dsa_master_teardown(struct net_device *dev); +int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack); +void dsa_master_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp); static inline struct net_device *dsa_master_find_slave(struct net_device *dev, int device, int port) @@ -200,6 +206,9 @@ static inline struct net_device *dsa_master_find_slave(struct net_device *dev, return NULL; } +/* netlink.c */ +extern struct rtnl_link_ops dsa_link_ops __read_mostly; + /* port.c */ void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops); @@ -285,6 +294,7 @@ int dsa_port_mrp_add_ring_role(const struct dsa_port *dp, int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, const struct switchdev_obj_ring_role_mrp *mrp); int dsa_port_phylink_create(struct dsa_port *dp); +void dsa_port_phylink_destroy(struct dsa_port *dp); int dsa_shared_port_link_register_of(struct dsa_port *dp); void dsa_shared_port_link_unregister_of(struct dsa_port *dp); int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr); @@ -292,6 +302,8 @@ void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr); int dsa_port_tag_8021q_vlan_add(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_tag_8021q_vlan_del(struct dsa_port *dp, u16 vid, bool broadcast); void dsa_port_set_host_flood(struct dsa_port *dp, bool uc, bool mc); +int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, + struct netlink_ext_ack *extack); /* slave.c */ extern const struct dsa_device_ops notag_netdev_ops; @@ -305,8 +317,12 @@ int dsa_slave_suspend(struct net_device *slave_dev); int dsa_slave_resume(struct net_device *slave_dev); int dsa_slave_register_notifier(void); void dsa_slave_unregister_notifier(void); +void dsa_slave_sync_ha(struct net_device *dev); +void dsa_slave_unsync_ha(struct net_device *dev); void dsa_slave_setup_tagger(struct net_device *slave); int dsa_slave_change_mtu(struct net_device *dev, int new_mtu); +int dsa_slave_change_master(struct net_device *dev, struct net_device *master, + struct netlink_ext_ack *extack); int dsa_slave_manage_vlan_filtering(struct net_device *dev, bool vlan_filtering); @@ -322,7 +338,7 @@ dsa_slave_to_master(const struct net_device *dev) { struct dsa_port *dp = dsa_slave_to_port(dev); - return dp->cpu_dp->master; + return dsa_port_to_master(dp); } /* If under a bridge with vlan_filtering=0, make sure to send pvid-tagged @@ -542,6 +558,7 @@ void dsa_lag_map(struct dsa_switch_tree *dst, struct dsa_lag *lag); void dsa_lag_unmap(struct dsa_switch_tree *dst, struct dsa_lag *lag); struct dsa_lag *dsa_tree_lag_find(struct dsa_switch_tree *dst, const struct net_device *lag_dev); +struct net_device *dsa_tree_find_first_master(struct dsa_switch_tree *dst); int dsa_tree_notify(struct dsa_switch_tree *dst, unsigned long e, void *v); int dsa_broadcast(unsigned long e, void *v); int dsa_tree_change_tag_proto(struct dsa_switch_tree *dst, diff --git a/net/dsa/master.c b/net/dsa/master.c index fb810edc8281..40367ab41cf8 100644 --- a/net/dsa/master.c +++ b/net/dsa/master.c @@ -226,6 +226,9 @@ static int dsa_master_ethtool_setup(struct net_device *dev) struct dsa_switch *ds = cpu_dp->ds; struct ethtool_ops *ops; + if (netif_is_lag_master(dev)) + return 0; + ops = devm_kzalloc(ds->dev, sizeof(*ops), GFP_KERNEL); if (!ops) return -ENOMEM; @@ -250,6 +253,9 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) { struct dsa_port *cpu_dp = dev->dsa_ptr; + if (netif_is_lag_master(dev)) + return; + dev->ethtool_ops = cpu_dp->orig_ethtool_ops; cpu_dp->orig_ethtool_ops = NULL; } @@ -257,6 +263,9 @@ static void dsa_master_ethtool_teardown(struct net_device *dev) static void dsa_netdev_ops_set(struct net_device *dev, const struct dsa_netdevice_ops *ops) { + if (netif_is_lag_master(dev)) + return; + dev->dsa_ptr->netdev_ops = ops; } @@ -355,12 +364,14 @@ int dsa_master_setup(struct net_device *dev, struct dsa_port *cpu_dp) mtu = ETH_DATA_LEN + dsa_tag_protocol_overhead(tag_ops); /* The DSA master must use SET_NETDEV_DEV for this to work. */ - consumer_link = device_link_add(ds->dev, dev->dev.parent, - DL_FLAG_AUTOREMOVE_CONSUMER); - if (!consumer_link) - netdev_err(dev, - "Failed to create a device link to DSA switch %s\n", - dev_name(ds->dev)); + if (!netif_is_lag_master(dev)) { + consumer_link = device_link_add(ds->dev, dev->dev.parent, + DL_FLAG_AUTOREMOVE_CONSUMER); + if (!consumer_link) + netdev_err(dev, + "Failed to create a device link to DSA switch %s\n", + dev_name(ds->dev)); + } /* The switch driver may not implement ->port_change_mtu(), case in * which dsa_slave_change_mtu() will not update the master MTU either, @@ -417,3 +428,52 @@ void dsa_master_teardown(struct net_device *dev) */ wmb(); } + +int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + bool master_setup = false; + int err; + + if (!netdev_uses_dsa(lag_dev)) { + err = dsa_master_setup(lag_dev, cpu_dp); + if (err) + return err; + + master_setup = true; + } + + err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack); + if (err) { + if (extack && !extack->_msg) + NL_SET_ERR_MSG_MOD(extack, + "CPU port failed to join LAG"); + goto out_master_teardown; + } + + return 0; + +out_master_teardown: + if (master_setup) + dsa_master_teardown(lag_dev); + return err; +} + +/* Tear down a master if there isn't any other user port on it, + * optionally also destroying LAG information. + */ +void dsa_master_lag_teardown(struct net_device *lag_dev, + struct dsa_port *cpu_dp) +{ + struct net_device *upper; + struct list_head *iter; + + dsa_port_lag_leave(cpu_dp, lag_dev); + + netdev_for_each_upper_dev_rcu(lag_dev, upper, iter) + if (dsa_slave_dev_check(upper)) + return; + + dsa_master_teardown(lag_dev); +} diff --git a/net/dsa/netlink.c b/net/dsa/netlink.c new file mode 100644 index 000000000000..ecf9ed1de185 --- /dev/null +++ b/net/dsa/netlink.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright 2022 NXP + */ +#include <linux/netdevice.h> +#include <net/rtnetlink.h> + +#include "dsa_priv.h" + +static const struct nla_policy dsa_policy[IFLA_DSA_MAX + 1] = { + [IFLA_DSA_MASTER] = { .type = NLA_U32 }, +}; + +static int dsa_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + int err; + + if (!data) + return 0; + + if (data[IFLA_DSA_MASTER]) { + u32 ifindex = nla_get_u32(data[IFLA_DSA_MASTER]); + struct net_device *master; + + master = __dev_get_by_index(dev_net(dev), ifindex); + if (!master) + return -EINVAL; + + err = dsa_slave_change_master(dev, master, extack); + if (err) + return err; + } + + return 0; +} + +static size_t dsa_get_size(const struct net_device *dev) +{ + return nla_total_size(sizeof(u32)) + /* IFLA_DSA_MASTER */ + 0; +} + +static int dsa_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct net_device *master = dsa_slave_to_master(dev); + + if (nla_put_u32(skb, IFLA_DSA_MASTER, master->ifindex)) + return -EMSGSIZE; + + return 0; +} + +struct rtnl_link_ops dsa_link_ops __read_mostly = { + .kind = "dsa", + .priv_size = sizeof(struct dsa_port), + .maxtype = IFLA_DSA_MAX, + .policy = dsa_policy, + .changelink = dsa_changelink, + .get_size = dsa_get_size, + .fill_info = dsa_fill_info, + .netns_refund = true, +}; diff --git a/net/dsa/port.c b/net/dsa/port.c index 7afc35db0c29..e4a0513816bb 100644 --- a/net/dsa/port.c +++ b/net/dsa/port.c @@ -7,6 +7,7 @@ */ #include <linux/if_bridge.h> +#include <linux/netdevice.h> #include <linux/notifier.h> #include <linux/of_mdio.h> #include <linux/of_net.h> @@ -634,6 +635,7 @@ int dsa_port_lag_join(struct dsa_port *dp, struct net_device *lag_dev, struct dsa_notifier_lag_info info = { .dp = dp, .info = uinfo, + .extack = extack, }; struct net_device *bridge_dev; int err; @@ -1026,7 +1028,7 @@ int dsa_port_standalone_host_fdb_add(struct dsa_port *dp, int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, @@ -1037,8 +1039,8 @@ int dsa_port_bridge_host_fdb_add(struct dsa_port *dp, * requires rtnl_lock(), since we can't guarantee that is held here, * and we can't take it either. */ - if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_add(cpu_dp->master, addr); + if (master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_add(master, addr); if (err) return err; } @@ -1077,15 +1079,15 @@ int dsa_port_standalone_host_fdb_del(struct dsa_port *dp, int dsa_port_bridge_host_fdb_del(struct dsa_port *dp, const unsigned char *addr, u16 vid) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, }; int err; - if (cpu_dp->master->priv_flags & IFF_UNICAST_FLT) { - err = dev_uc_del(cpu_dp->master, addr); + if (master->priv_flags & IFF_UNICAST_FLT) { + err = dev_uc_del(master, addr); if (err) return err; } @@ -1208,14 +1210,14 @@ int dsa_port_standalone_host_mdb_add(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_add(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, }; int err; - err = dev_mc_add(cpu_dp->master, mdb->addr); + err = dev_mc_add(master, mdb->addr); if (err) return err; @@ -1252,14 +1254,14 @@ int dsa_port_standalone_host_mdb_del(const struct dsa_port *dp, int dsa_port_bridge_host_mdb_del(const struct dsa_port *dp, const struct switchdev_obj_port_mdb *mdb) { - struct dsa_port *cpu_dp = dp->cpu_dp; + struct net_device *master = dsa_port_to_master(dp); struct dsa_db db = { .type = DSA_DB_BRIDGE, .bridge = *dp->bridge, }; int err; - err = dev_mc_del(cpu_dp->master, mdb->addr); + err = dev_mc_del(master, mdb->addr); if (err) return err; @@ -1294,19 +1296,19 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan, struct netlink_ext_ack *extack) { + struct net_device *master = dsa_port_to_master(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, .extack = extack, }; - struct dsa_port *cpu_dp = dp->cpu_dp; int err; err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_ADD, &info); if (err && err != -EOPNOTSUPP) return err; - vlan_vid_add(cpu_dp->master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_add(master, htons(ETH_P_8021Q), vlan->vid); return err; } @@ -1314,18 +1316,18 @@ int dsa_port_host_vlan_add(struct dsa_port *dp, int dsa_port_host_vlan_del(struct dsa_port *dp, const struct switchdev_obj_port_vlan *vlan) { + struct net_device *master = dsa_port_to_master(dp); struct dsa_notifier_vlan_info info = { .dp = dp, .vlan = vlan, }; - struct dsa_port *cpu_dp = dp->cpu_dp; int err; err = dsa_port_notify(dp, DSA_NOTIFIER_HOST_VLAN_DEL, &info); if (err && err != -EOPNOTSUPP) return err; - vlan_vid_del(cpu_dp->master, htons(ETH_P_8021Q), vlan->vid); + vlan_vid_del(master, htons(ETH_P_8021Q), vlan->vid); return err; } @@ -1374,6 +1376,136 @@ int dsa_port_mrp_del_ring_role(const struct dsa_port *dp, return ds->ops->port_mrp_del_ring_role(ds, dp->index, mrp); } +static int dsa_port_assign_master(struct dsa_port *dp, + struct net_device *master, + struct netlink_ext_ack *extack, + bool fail_on_err) +{ + struct dsa_switch *ds = dp->ds; + int port = dp->index, err; + + err = ds->ops->port_change_master(ds, port, master, extack); + if (err && !fail_on_err) + dev_err(ds->dev, "port %d failed to assign master %s: %pe\n", + port, master->name, ERR_PTR(err)); + + if (err && fail_on_err) + return err; + + dp->cpu_dp = master->dsa_ptr; + dp->cpu_port_in_lag = netif_is_lag_master(master); + + return 0; +} + +/* Change the dp->cpu_dp affinity for a user port. Note that both cross-chip + * notifiers and drivers have implicit assumptions about user-to-CPU-port + * mappings, so we unfortunately cannot delay the deletion of the objects + * (switchdev, standalone addresses, standalone VLANs) on the old CPU port + * until the new CPU port has been set up. So we need to completely tear down + * the old CPU port before changing it, and restore it on errors during the + * bringup of the new one. + */ +int dsa_port_change_master(struct dsa_port *dp, struct net_device *master, + struct netlink_ext_ack *extack) +{ + struct net_device *bridge_dev = dsa_port_bridge_dev_get(dp); + struct net_device *old_master = dsa_port_to_master(dp); + struct net_device *dev = dp->slave; + struct dsa_switch *ds = dp->ds; + bool vlan_filtering; + int err, tmp; + + /* Bridges may hold host FDB, MDB and VLAN objects. These need to be + * migrated, so dynamically unoffload and later reoffload the bridge + * port. + */ + if (bridge_dev) { + dsa_port_pre_bridge_leave(dp, bridge_dev); + dsa_port_bridge_leave(dp, bridge_dev); + } + + /* The port might still be VLAN filtering even if it's no longer + * under a bridge, either due to ds->vlan_filtering_is_global or + * ds->needs_standalone_vlan_filtering. In turn this means VLANs + * on the CPU port. + */ + vlan_filtering = dsa_port_is_vlan_filtering(dp); + if (vlan_filtering) { + err = dsa_slave_manage_vlan_filtering(dev, false); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to remove standalone VLANs"); + goto rewind_old_bridge; + } + } + + /* Standalone addresses, and addresses of upper interfaces like + * VLAN, LAG, HSR need to be migrated. + */ + dsa_slave_unsync_ha(dev); + + err = dsa_port_assign_master(dp, master, extack, true); + if (err) + goto rewind_old_addrs; + + dsa_slave_sync_ha(dev); + + if (vlan_filtering) { + err = dsa_slave_manage_vlan_filtering(dev, true); + if (err) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to restore standalone VLANs"); + goto rewind_new_addrs; + } + } + + if (bridge_dev) { + err = dsa_port_bridge_join(dp, bridge_dev, extack); + if (err && err == -EOPNOTSUPP) { + NL_SET_ERR_MSG_MOD(extack, + "Failed to reoffload bridge"); + goto rewind_new_vlan; + } + } + + return 0; + +rewind_new_vlan: + if (vlan_filtering) + dsa_slave_manage_vlan_filtering(dev, false); + +rewind_new_addrs: + dsa_slave_unsync_ha(dev); + + dsa_port_assign_master(dp, old_master, NULL, false); + +/* Restore the objects on the old CPU port */ +rewind_old_addrs: + dsa_slave_sync_ha(dev); + + if (vlan_filtering) { + tmp = dsa_slave_manage_vlan_filtering(dev, true); + if (tmp) { + dev_err(ds->dev, + "port %d failed to restore standalone VLANs: %pe\n", + dp->index, ERR_PTR(tmp)); + } + } + +rewind_old_bridge: + if (bridge_dev) { + tmp = dsa_port_bridge_join(dp, bridge_dev, extack); + if (tmp) { + dev_err(ds->dev, + "port %d failed to rejoin bridge %s: %pe\n", + dp->index, bridge_dev->name, ERR_PTR(tmp)); + } + } + + return err; +} + void dsa_port_set_tag_protocol(struct dsa_port *cpu_dp, const struct dsa_device_ops *tag_ops) { @@ -1529,6 +1661,7 @@ int dsa_port_phylink_create(struct dsa_port *dp) { struct dsa_switch *ds = dp->ds; phy_interface_t mode; + struct phylink *pl; int err; err = of_get_phy_mode(dp->dn, &mode); @@ -1545,16 +1678,24 @@ int dsa_port_phylink_create(struct dsa_port *dp) if (ds->ops->phylink_get_caps) ds->ops->phylink_get_caps(ds, dp->index, &dp->pl_config); - dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn), - mode, &dsa_port_phylink_mac_ops); - if (IS_ERR(dp->pl)) { + pl = phylink_create(&dp->pl_config, of_fwnode_handle(dp->dn), + mode, &dsa_port_phylink_mac_ops); + if (IS_ERR(pl)) { pr_err("error creating PHYLINK: %ld\n", PTR_ERR(dp->pl)); - return PTR_ERR(dp->pl); + return PTR_ERR(pl); } + dp->pl = pl; + return 0; } +void dsa_port_phylink_destroy(struct dsa_port *dp) +{ + phylink_destroy(dp->pl); + dp->pl = NULL; +} + static int dsa_shared_port_setup_phy_of(struct dsa_port *dp, bool enable) { struct dsa_switch *ds = dp->ds; @@ -1649,7 +1790,7 @@ static int dsa_shared_port_phylink_register(struct dsa_port *dp) return 0; err_phy_connect: - phylink_destroy(dp->pl); + dsa_port_phylink_destroy(dp); return err; } @@ -1851,8 +1992,7 @@ void dsa_shared_port_link_unregister_of(struct dsa_port *dp) rtnl_lock(); phylink_disconnect_phy(dp->pl); rtnl_unlock(); - phylink_destroy(dp->pl); - dp->pl = NULL; + dsa_port_phylink_destroy(dp); return; } diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 345106b1ed78..1a59918d3b30 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -164,6 +164,48 @@ static int dsa_slave_unsync_mc(struct net_device *dev, return dsa_slave_schedule_standalone_work(dev, DSA_MC_DEL, addr, 0); } +void dsa_slave_sync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_slave_sync_mc(dev, ha->addr); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_slave_sync_uc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + +void dsa_slave_unsync_ha(struct net_device *dev) +{ + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct netdev_hw_addr *ha; + + netif_addr_lock_bh(dev); + + netdev_for_each_synced_uc_addr(ha, dev) + dsa_slave_unsync_uc(dev, ha->addr); + + netdev_for_each_synced_mc_addr(ha, dev) + dsa_slave_unsync_mc(dev, ha->addr); + + netif_addr_unlock_bh(dev); + + if (dsa_switch_supports_uc_filtering(ds) || + dsa_switch_supports_mc_filtering(ds)) + dsa_flush_workqueue(); +} + /* slave mii_bus handling ***************************************************/ static int dsa_slave_phy_read(struct mii_bus *bus, int addr, int reg) { @@ -1503,8 +1545,7 @@ static int dsa_slave_setup_tc_block(struct net_device *dev, static int dsa_slave_setup_ft_block(struct dsa_switch *ds, int port, void *type_data) { - struct dsa_port *cpu_dp = dsa_to_port(ds, port)->cpu_dp; - struct net_device *master = cpu_dp->master; + struct net_device *master = dsa_port_to_master(dsa_to_port(ds, port)); if (!master->netdev_ops->ndo_setup_tc) return -EOPNOTSUPP; @@ -2147,13 +2188,14 @@ static int dsa_slave_fill_forward_path(struct net_device_path_ctx *ctx, struct net_device_path *path) { struct dsa_port *dp = dsa_slave_to_port(ctx->dev); + struct net_device *master = dsa_port_to_master(dp); struct dsa_port *cpu_dp = dp->cpu_dp; path->dev = ctx->dev; path->type = DEV_PATH_DSA; path->dsa.proto = cpu_dp->tag_ops->proto; path->dsa.port = dp->index; - ctx->dev = cpu_dp->master; + ctx->dev = master; return 0; } @@ -2262,7 +2304,7 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) if (ret) { netdev_err(slave_dev, "failed to connect to PHY: %pe\n", ERR_PTR(ret)); - phylink_destroy(dp->pl); + dsa_port_phylink_destroy(dp); } return ret; @@ -2271,9 +2313,9 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev) void dsa_slave_setup_tagger(struct net_device *slave) { struct dsa_port *dp = dsa_slave_to_port(slave); + struct net_device *master = dsa_port_to_master(dp); struct dsa_slave_priv *p = netdev_priv(slave); const struct dsa_port *cpu_dp = dp->cpu_dp; - struct net_device *master = cpu_dp->master; const struct dsa_switch *ds = dp->ds; slave->needed_headroom = cpu_dp->tag_ops->needed_headroom; @@ -2330,8 +2372,7 @@ int dsa_slave_resume(struct net_device *slave_dev) int dsa_slave_create(struct dsa_port *port) { - const struct dsa_port *cpu_dp = port->cpu_dp; - struct net_device *master = cpu_dp->master; + struct net_device *master = dsa_port_to_master(port); struct dsa_switch *ds = port->ds; const char *name = port->name; struct net_device *slave_dev; @@ -2347,6 +2388,7 @@ int dsa_slave_create(struct dsa_port *port) if (slave_dev == NULL) return -ENOMEM; + slave_dev->rtnl_link_ops = &dsa_link_ops; slave_dev->ethtool_ops = &dsa_slave_ethtool_ops; #if IS_ENABLED(CONFIG_DCB) slave_dev->dcbnl_ops = &dsa_slave_dcbnl_ops; @@ -2434,7 +2476,7 @@ out_phy: rtnl_lock(); phylink_disconnect_phy(p->dp->pl); rtnl_unlock(); - phylink_destroy(p->dp->pl); + dsa_port_phylink_destroy(p->dp); out_gcells: gro_cells_destroy(&p->gcells); out_free: @@ -2457,12 +2499,89 @@ void dsa_slave_destroy(struct net_device *slave_dev) phylink_disconnect_phy(dp->pl); rtnl_unlock(); - phylink_destroy(dp->pl); + dsa_port_phylink_destroy(dp); gro_cells_destroy(&p->gcells); free_percpu(slave_dev->tstats); free_netdev(slave_dev); } +int dsa_slave_change_master(struct net_device *dev, struct net_device *master, + struct netlink_ext_ack *extack) +{ + struct net_device *old_master = dsa_slave_to_master(dev); + struct dsa_port *dp = dsa_slave_to_port(dev); + struct dsa_switch *ds = dp->ds; + struct net_device *upper; + struct list_head *iter; + int err; + + if (master == old_master) + return 0; + + if (!ds->ops->port_change_master) { + NL_SET_ERR_MSG_MOD(extack, + "Driver does not support changing DSA master"); + return -EOPNOTSUPP; + } + + if (!netdev_uses_dsa(master)) { + NL_SET_ERR_MSG_MOD(extack, + "Interface not eligible as DSA master"); + return -EOPNOTSUPP; + } + + netdev_for_each_upper_dev_rcu(master, upper, iter) { + if (dsa_slave_dev_check(upper)) + continue; + if (netif_is_bridge_master(upper)) + continue; + NL_SET_ERR_MSG_MOD(extack, "Cannot join master with unknown uppers"); + return -EOPNOTSUPP; + } + + /* Since we allow live-changing the DSA master, plus we auto-open the + * DSA master when the user port opens => we need to ensure that the + * new DSA master is open too. + */ + if (dev->flags & IFF_UP) { + err = dev_open(master, extack); + if (err) + return err; + } + + netdev_upper_dev_unlink(old_master, dev); + + err = netdev_upper_dev_link(master, dev, extack); + if (err) + goto out_revert_old_master_unlink; + + err = dsa_port_change_master(dp, master, extack); + if (err) + goto out_revert_master_link; + + /* Update the MTU of the new CPU port through cross-chip notifiers */ + err = dsa_slave_change_mtu(dev, dev->mtu); + if (err && err != -EOPNOTSUPP) { + netdev_warn(dev, + "nonfatal error updating MTU with new master: %pe\n", + ERR_PTR(err)); + } + + /* If the port doesn't have its own MAC address and relies on the DSA + * master's one, inherit it again from the new DSA master. + */ + if (is_zero_ether_addr(dp->mac)) + eth_hw_addr_inherit(dev, master); + + return 0; + +out_revert_master_link: + netdev_upper_dev_unlink(master, dev); +out_revert_old_master_unlink: + netdev_upper_dev_link(old_master, dev, NULL); + return err; +} + bool dsa_slave_dev_check(const struct net_device *dev) { return dev->netdev_ops == &dsa_slave_netdev_ops; @@ -2699,11 +2818,45 @@ dsa_slave_prechangeupper_sanity_check(struct net_device *dev, return NOTIFY_DONE; } +/* To be eligible as a DSA master, a LAG must have all lower interfaces be + * eligible DSA masters. Additionally, all LAG slaves must be DSA masters of + * switches in the same switch tree. + */ +static int dsa_lag_master_validate(struct net_device *lag_dev, + struct netlink_ext_ack *extack) +{ + struct net_device *lower1, *lower2; + struct list_head *iter1, *iter2; + + netdev_for_each_lower_dev(lag_dev, lower1, iter1) { + netdev_for_each_lower_dev(lag_dev, lower2, iter2) { + if (!netdev_uses_dsa(lower1) || + !netdev_uses_dsa(lower2)) { + NL_SET_ERR_MSG_MOD(extack, + "All LAG ports must be eligible as DSA masters"); + return notifier_from_errno(-EINVAL); + } + + if (lower1 == lower2) + continue; + + if (!dsa_port_tree_same(lower1->dsa_ptr, + lower2->dsa_ptr)) { + NL_SET_ERR_MSG_MOD(extack, + "LAG contains DSA masters of disjoint switch trees"); + return notifier_from_errno(-EINVAL); + } + } + } + + return NOTIFY_DONE; +} + static int dsa_master_prechangeupper_sanity_check(struct net_device *master, struct netdev_notifier_changeupper_info *info) { - struct netlink_ext_ack *extack; + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); if (!netdev_uses_dsa(master)) return NOTIFY_DONE; @@ -2721,13 +2874,51 @@ dsa_master_prechangeupper_sanity_check(struct net_device *master, if (netif_is_bridge_master(info->upper_dev)) return NOTIFY_DONE; - extack = netdev_notifier_info_to_extack(&info->info); + /* Allow LAG uppers, subject to further restrictions in + * dsa_lag_master_prechangelower_sanity_check() + */ + if (netif_is_lag_master(info->upper_dev)) + return dsa_lag_master_validate(info->upper_dev, extack); NL_SET_ERR_MSG_MOD(extack, "DSA master cannot join unknown upper interfaces"); return notifier_from_errno(-EBUSY); } +static int +dsa_lag_master_prechangelower_sanity_check(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack = netdev_notifier_info_to_extack(&info->info); + struct net_device *lag_dev = info->upper_dev; + struct net_device *lower; + struct list_head *iter; + + if (!netdev_uses_dsa(lag_dev) || !netif_is_lag_master(lag_dev)) + return NOTIFY_DONE; + + if (!info->linking) + return NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) { + NL_SET_ERR_MSG(extack, + "Only DSA masters can join a LAG DSA master"); + return notifier_from_errno(-EINVAL); + } + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (!dsa_port_tree_same(dev->dsa_ptr, lower->dsa_ptr)) { + NL_SET_ERR_MSG(extack, + "Interface is DSA master for a different switch tree than this LAG"); + return notifier_from_errno(-EINVAL); + } + + break; + } + + return NOTIFY_DONE; +} + /* Don't allow bridging of DSA masters, since the bridge layer rx_handler * prevents the DSA fake ethertype handler to be invoked, so we don't get the * chance to strip off and parse the DSA switch tag protocol header (the bridge @@ -2768,6 +2959,136 @@ dsa_bridge_prechangelower_sanity_check(struct net_device *new_lower, return NOTIFY_DONE; } +static void dsa_tree_migrate_ports_from_lag_master(struct dsa_switch_tree *dst, + struct net_device *lag_dev) +{ + struct net_device *new_master = dsa_tree_find_first_master(dst); + struct dsa_port *dp; + int err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp) != lag_dev) + continue; + + err = dsa_slave_change_master(dp->slave, new_master, NULL); + if (err) { + netdev_err(dp->slave, + "failed to restore master to %s: %pe\n", + new_master->name, ERR_PTR(err)); + } + } +} + +static int dsa_master_lag_join(struct net_device *master, + struct net_device *lag_dev, + struct netdev_lag_upper_info *uinfo, + struct netlink_ext_ack *extack) +{ + struct dsa_port *cpu_dp = master->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *dp; + int err; + + err = dsa_master_lag_setup(lag_dev, cpu_dp, uinfo, extack); + if (err) + return err; + + dsa_tree_for_each_user_port(dp, dst) { + if (dsa_port_to_master(dp) != master) + continue; + + err = dsa_slave_change_master(dp->slave, lag_dev, extack); + if (err) + goto restore; + } + + return 0; + +restore: + dsa_tree_for_each_user_port_continue_reverse(dp, dst) { + if (dsa_port_to_master(dp) != lag_dev) + continue; + + err = dsa_slave_change_master(dp->slave, master, NULL); + if (err) { + netdev_err(dp->slave, + "failed to restore master to %s: %pe\n", + master->name, ERR_PTR(err)); + } + } + + dsa_master_lag_teardown(lag_dev, master->dsa_ptr); + + return err; +} + +static void dsa_master_lag_leave(struct net_device *master, + struct net_device *lag_dev) +{ + struct dsa_port *dp, *cpu_dp = lag_dev->dsa_ptr; + struct dsa_switch_tree *dst = cpu_dp->dst; + struct dsa_port *new_cpu_dp = NULL; + struct net_device *lower; + struct list_head *iter; + + netdev_for_each_lower_dev(lag_dev, lower, iter) { + if (netdev_uses_dsa(lower)) { + new_cpu_dp = lower->dsa_ptr; + break; + } + } + + if (new_cpu_dp) { + /* Update the CPU port of the user ports still under the LAG + * so that dsa_port_to_master() continues to work properly + */ + dsa_tree_for_each_user_port(dp, dst) + if (dsa_port_to_master(dp) == lag_dev) + dp->cpu_dp = new_cpu_dp; + + /* Update the index of the virtual CPU port to match the lowest + * physical CPU port + */ + lag_dev->dsa_ptr = new_cpu_dp; + wmb(); + } else { + /* If the LAG DSA master has no ports left, migrate back all + * user ports to the first physical CPU port + */ + dsa_tree_migrate_ports_from_lag_master(dst, lag_dev); + } + + /* This DSA master has left its LAG in any case, so let + * the CPU port leave the hardware LAG as well + */ + dsa_master_lag_teardown(lag_dev, master->dsa_ptr); +} + +static int dsa_master_changeupper(struct net_device *dev, + struct netdev_notifier_changeupper_info *info) +{ + struct netlink_ext_ack *extack; + int err = NOTIFY_DONE; + + if (!netdev_uses_dsa(dev)) + return err; + + extack = netdev_notifier_info_to_extack(&info->info); + + if (netif_is_lag_master(info->upper_dev)) { + if (info->linking) { + err = dsa_master_lag_join(dev, info->upper_dev, + info->upper_info, extack); + err = notifier_from_errno(err); + } else { + dsa_master_lag_leave(dev, info->upper_dev); + err = NOTIFY_OK; + } + } + + return err; +} + static int dsa_slave_netdevice_event(struct notifier_block *nb, unsigned long event, void *ptr) { @@ -2786,6 +3107,10 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, if (notifier_to_errno(err)) return err; + err = dsa_lag_master_prechangelower_sanity_check(dev, info); + if (notifier_to_errno(err)) + return err; + err = dsa_bridge_prechangelower_sanity_check(dev, info); if (notifier_to_errno(err)) return err; @@ -2811,6 +3136,10 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, if (notifier_to_errno(err)) return err; + err = dsa_master_changeupper(dev, ptr); + if (notifier_to_errno(err)) + return err; + break; } case NETDEV_CHANGELOWERSTATE: { @@ -2818,12 +3147,21 @@ static int dsa_slave_netdevice_event(struct notifier_block *nb, struct dsa_port *dp; int err; - if (!dsa_slave_dev_check(dev)) - break; + if (dsa_slave_dev_check(dev)) { + dp = dsa_slave_to_port(dev); - dp = dsa_slave_to_port(dev); + err = dsa_port_lag_change(dp, info->lower_state_info); + } + + /* Mirror LAG port events on DSA masters that are in + * a LAG towards their respective switch CPU ports + */ + if (netdev_uses_dsa(dev)) { + dp = dev->dsa_ptr; + + err = dsa_port_lag_change(dp, info->lower_state_info); + } - err = dsa_port_lag_change(dp, info->lower_state_info); return notifier_from_errno(err); } case NETDEV_CHANGE: diff --git a/net/dsa/switch.c b/net/dsa/switch.c index 4dfd68cf61c5..ce56acdba203 100644 --- a/net/dsa/switch.c +++ b/net/dsa/switch.c @@ -398,8 +398,15 @@ static int dsa_switch_host_fdb_add(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->dp)) { - err = dsa_port_do_fdb_add(dp, info->addr, info->vid, - info->db); + if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) { + err = dsa_switch_do_lag_fdb_add(ds, dp->lag, + info->addr, + info->vid, + info->db); + } else { + err = dsa_port_do_fdb_add(dp, info->addr, + info->vid, info->db); + } if (err) break; } @@ -419,8 +426,15 @@ static int dsa_switch_host_fdb_del(struct dsa_switch *ds, dsa_switch_for_each_port(dp, ds) { if (dsa_port_host_address_match(dp, info->dp)) { - err = dsa_port_do_fdb_del(dp, info->addr, info->vid, - info->db); + if (dsa_port_is_cpu(dp) && info->dp->cpu_port_in_lag) { + err = dsa_switch_do_lag_fdb_del(ds, dp->lag, + info->addr, + info->vid, + info->db); + } else { + err = dsa_port_do_fdb_del(dp, info->addr, + info->vid, info->db); + } if (err) break; } @@ -507,12 +521,12 @@ static int dsa_switch_lag_join(struct dsa_switch *ds, { if (info->dp->ds == ds && ds->ops->port_lag_join) return ds->ops->port_lag_join(ds, info->dp->index, info->lag, - info->info); + info->info, info->extack); if (info->dp->ds != ds && ds->ops->crosschip_lag_join) return ds->ops->crosschip_lag_join(ds, info->dp->ds->index, info->dp->index, info->lag, - info->info); + info->info, info->extack); return -EOPNOTSUPP; } diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c index b5f80bc45ceb..34e5ec5d3e23 100644 --- a/net/dsa/tag_8021q.c +++ b/net/dsa/tag_8021q.c @@ -330,7 +330,7 @@ static int dsa_tag_8021q_port_setup(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return 0; - master = dp->cpu_dp->master; + master = dsa_port_to_master(dp); err = dsa_port_tag_8021q_vlan_add(dp, vid, false); if (err) { @@ -359,7 +359,7 @@ static void dsa_tag_8021q_port_teardown(struct dsa_switch *ds, int port) if (!dsa_port_is_user(dp)) return; - master = dp->cpu_dp->master; + master = dsa_port_to_master(dp); dsa_port_tag_8021q_vlan_del(dp, vid, false); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index 9298eb3251cb..57e7238a4136 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -571,6 +571,7 @@ static int ethtool_get_link_ksettings(struct net_device *dev, = __ETHTOOL_LINK_MODE_MASK_NU32; link_ksettings.base.master_slave_cfg = MASTER_SLAVE_CFG_UNSUPPORTED; link_ksettings.base.master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED; + link_ksettings.base.rate_matching = RATE_MATCH_NONE; return store_link_ksettings_for_user(useraddr, &link_ksettings); } diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c index 99b29b4fe947..126e06c713a3 100644 --- a/net/ethtool/linkmodes.c +++ b/net/ethtool/linkmodes.c @@ -70,6 +70,7 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base, + nla_total_size(sizeof(u32)) /* LINKMODES_SPEED */ + nla_total_size(sizeof(u32)) /* LINKMODES_LANES */ + nla_total_size(sizeof(u8)) /* LINKMODES_DUPLEX */ + + nla_total_size(sizeof(u8)) /* LINKMODES_RATE_MATCHING */ + 0; ret = ethnl_bitset_size(ksettings->link_modes.advertising, ksettings->link_modes.supported, @@ -143,6 +144,10 @@ static int linkmodes_fill_reply(struct sk_buff *skb, lsettings->master_slave_state)) return -EMSGSIZE; + if (nla_put_u8(skb, ETHTOOL_A_LINKMODES_RATE_MATCHING, + lsettings->rate_matching)) + return -EMSGSIZE; + return 0; } diff --git a/net/ethtool/tunnels.c b/net/ethtool/tunnels.c index efde33536687..67fb414ca859 100644 --- a/net/ethtool/tunnels.c +++ b/net/ethtool/tunnels.c @@ -136,6 +136,8 @@ ethnl_tunnel_info_fill_reply(const struct ethnl_req_info *req_base, goto err_cancel_table; entry = nla_nest_start(skb, ETHTOOL_A_TUNNEL_UDP_TABLE_ENTRY); + if (!entry) + goto err_cancel_entry; if (nla_put_be16(skb, ETHTOOL_A_TUNNEL_UDP_ENTRY_PORT, htons(IANA_VXLAN_UDP_PORT)) || diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index 718fb77bb372..7889e1ef7fad 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -200,8 +200,9 @@ static int raw_bind(struct sock *sk, struct sockaddr *_uaddr, int len) int err = 0; struct net_device *dev = NULL; - if (len < sizeof(*uaddr)) - return -EINVAL; + err = ieee802154_sockaddr_check_size(uaddr, len); + if (err < 0) + return err; uaddr = (struct sockaddr_ieee802154 *)_uaddr; if (uaddr->family != AF_IEEE802154) @@ -493,7 +494,8 @@ static int dgram_bind(struct sock *sk, struct sockaddr *uaddr, int len) ro->bound = 0; - if (len < sizeof(*addr)) + err = ieee802154_sockaddr_check_size(addr, len); + if (err < 0) goto out; if (addr->family != AF_IEEE802154) @@ -564,8 +566,9 @@ static int dgram_connect(struct sock *sk, struct sockaddr *uaddr, struct dgram_sock *ro = dgram_sk(sk); int err = 0; - if (len < sizeof(*addr)) - return -EINVAL; + err = ieee802154_sockaddr_check_size(addr, len); + if (err < 0) + return err; if (addr->family != AF_IEEE802154) return -EINVAL; @@ -604,6 +607,7 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) struct ieee802154_mac_cb *cb; struct dgram_sock *ro = dgram_sk(sk); struct ieee802154_addr dst_addr; + DECLARE_SOCKADDR(struct sockaddr_ieee802154*, daddr, msg->msg_name); int hlen, tlen; int err; @@ -612,10 +616,20 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) return -EOPNOTSUPP; } - if (!ro->connected && !msg->msg_name) - return -EDESTADDRREQ; - else if (ro->connected && msg->msg_name) - return -EISCONN; + if (msg->msg_name) { + if (ro->connected) + return -EISCONN; + if (msg->msg_namelen < IEEE802154_MIN_NAMELEN) + return -EINVAL; + err = ieee802154_sockaddr_check_size(daddr, msg->msg_namelen); + if (err < 0) + return err; + ieee802154_addr_from_sa(&dst_addr, &daddr->addr); + } else { + if (!ro->connected) + return -EDESTADDRREQ; + dst_addr = ro->dst_addr; + } if (!ro->bound) dev = dev_getfirstbyhwtype(sock_net(sk), ARPHRD_IEEE802154); @@ -651,16 +665,6 @@ static int dgram_sendmsg(struct sock *sk, struct msghdr *msg, size_t size) cb = mac_cb_init(skb); cb->type = IEEE802154_FC_TYPE_DATA; cb->ackreq = ro->want_ack; - - if (msg->msg_name) { - DECLARE_SOCKADDR(struct sockaddr_ieee802154*, - daddr, msg->msg_name); - - ieee802154_addr_from_sa(&dst_addr, &daddr->addr); - } else { - dst_addr = ro->dst_addr; - } - cb->secen = ro->secen; cb->secen_override = ro->secen_override; cb->seclevel = ro->seclevel; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index d3ab1ae32ef5..e2c219382345 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1250,7 +1250,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) } prev_addr_hashbucket = - inet_bhashfn_portaddr(sk->sk_prot->h.hashinfo, sk, + inet_bhashfn_portaddr(tcp_or_dccp_get_hashinfo(sk), sk, sock_net(sk), inet->inet_num); inet->inet_saddr = inet->inet_rcv_saddr = new_saddr; diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index f8ad04470d3a..ee4e578c7f20 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -471,30 +471,38 @@ static int ah4_err(struct sk_buff *skb, u32 info) return 0; } -static int ah_init_state(struct xfrm_state *x) +static int ah_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; struct crypto_ahash *ahash; - if (!x->aalg) + if (!x->aalg) { + NL_SET_ERR_MSG(extack, "AH requires a state with an AUTH algorithm"); goto error; + } - if (x->encap) + if (x->encap) { + NL_SET_ERR_MSG(extack, "AH is not compatible with encapsulation"); goto error; + } ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); if (!ahp) return -ENOMEM; ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); - if (IS_ERR(ahash)) + if (IS_ERR(ahash)) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; + } ahp->ahash = ahash; if (crypto_ahash_setkey(ahash, x->aalg->alg_key, - (x->aalg->alg_key_len + 7) / 8)) + (x->aalg->alg_key_len + 7) / 8)) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; + } /* * Lookup the algorithm description maintained by xfrm_algo, @@ -507,10 +515,7 @@ static int ah_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - pr_info("%s: %s digestsize %u != %u\n", - __func__, x->aalg->alg_name, - crypto_ahash_digestsize(ahash), - aalg_desc->uinfo.auth.icv_fullbits / 8); + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 5c03eba787e5..52c8047efedb 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -134,6 +134,7 @@ static void esp_free_tcp_sk(struct rcu_head *head) static struct sock *esp_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; + struct net *net = xs_net(x); struct esp_tcp_sk *esk; __be16 sport, dport; struct sock *nsk; @@ -160,7 +161,7 @@ static struct sock *esp_find_tcp_sk(struct xfrm_state *x) } spin_unlock_bh(&x->lock); - sk = inet_lookup_established(xs_net(x), &tcp_hashinfo, x->id.daddr.a4, + sk = inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, x->id.daddr.a4, dport, x->props.saddr.a4, sport, 0); if (!sk) return ERR_PTR(-ENOENT); @@ -1007,16 +1008,17 @@ static void esp_destroy(struct xfrm_state *x) crypto_free_aead(aead); } -static int esp_init_aead(struct xfrm_state *x) +static int esp_init_aead(struct xfrm_state *x, struct netlink_ext_ack *extack) { char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", - x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) { + NL_SET_ERR_MSG(extack, "Algorithm name is too long"); + return -ENAMETOOLONG; + } aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); @@ -1034,11 +1036,15 @@ static int esp_init_aead(struct xfrm_state *x) if (err) goto error; + return 0; + error: + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); return err; } -static int esp_init_authenc(struct xfrm_state *x) +static int esp_init_authenc(struct xfrm_state *x, + struct netlink_ext_ack *extack) { struct crypto_aead *aead; struct crypto_authenc_key_param *param; @@ -1049,10 +1055,6 @@ static int esp_init_authenc(struct xfrm_state *x) unsigned int keylen; int err; - err = -EINVAL; - if (!x->ealg) - goto error; - err = -ENAMETOOLONG; if ((x->props.flags & XFRM_STATE_ESN)) { @@ -1061,22 +1063,28 @@ static int esp_init_authenc(struct xfrm_state *x) x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name, - x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) { + NL_SET_ERR_MSG(extack, "Algorithm name is too long"); goto error; + } } else { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "%s%sauthenc(%s,%s)%s", x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name, - x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) { + NL_SET_ERR_MSG(extack, "Algorithm name is too long"); goto error; + } } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); - if (IS_ERR(aead)) + if (IS_ERR(aead)) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; + } x->data = aead; @@ -1106,17 +1114,16 @@ static int esp_init_authenc(struct xfrm_state *x) err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - pr_info("ESP: %s digestsize %u != %u\n", - x->aalg->alg_name, - crypto_aead_authsize(aead), - aalg_desc->uinfo.auth.icv_fullbits / 8); + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto free_key; } err = crypto_aead_setauthsize( aead, x->aalg->alg_trunc_len / 8); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto free_key; + } } param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); @@ -1131,7 +1138,7 @@ error: return err; } -static int esp_init_state(struct xfrm_state *x) +static int esp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct crypto_aead *aead; u32 align; @@ -1139,10 +1146,14 @@ static int esp_init_state(struct xfrm_state *x) x->data = NULL; - if (x->aead) - err = esp_init_aead(x); - else - err = esp_init_authenc(x); + if (x->aead) { + err = esp_init_aead(x, extack); + } else if (x->ealg) { + err = esp_init_authenc(x, extack); + } else { + NL_SET_ERR_MSG(extack, "ESP: AEAD or CRYPT must be provided"); + err = -EINVAL; + } if (err) goto error; @@ -1160,6 +1171,7 @@ static int esp_init_state(struct xfrm_state *x) switch (encap->encap_type) { default: + NL_SET_ERR_MSG(extack, "Unsupported encapsulation type for ESP"); err = -EINVAL; goto error; case UDP_ENCAP_ESPINUDP: diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index f0038043b661..ebca860e113f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -285,16 +285,14 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret, struct inet_bind2_bucket **tb2_ret, struct inet_bind_hashbucket **head2_ret, int *port_ret) { - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; - int port = 0; + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); + int i, low, high, attempt_half, port, l3mdev; struct inet_bind_hashbucket *head, *head2; struct net *net = sock_net(sk); - bool relax = false; - int i, low, high, attempt_half; struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; u32 remaining, offset; - int l3mdev; + bool relax = false; l3mdev = inet_sk_bound_l3mdev(sk); ports_exhausted: @@ -469,17 +467,16 @@ void inet_csk_update_fastreuse(struct inet_bind_bucket *tb, */ int inet_csk_get_port(struct sock *sk, unsigned short snum) { + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); bool reuse = sk->sk_reuse && sk->sk_state != TCP_LISTEN; - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; - int ret = 1, port = snum; - struct net *net = sock_net(sk); bool found_port = false, check_bind_conflict = true; bool bhash_created = false, bhash2_created = false; struct inet_bind_hashbucket *head, *head2; struct inet_bind2_bucket *tb2 = NULL; struct inet_bind_bucket *tb = NULL; bool head2_lock_acquired = false; - int l3mdev; + int ret = 1, port = snum, l3mdev; + struct net *net = sock_net(sk); l3mdev = inet_sk_bound_l3mdev(sk); @@ -909,14 +906,15 @@ static void reqsk_migrate_reset(struct request_sock *req) /* return true if req was found in the ehash table */ static bool reqsk_queue_unlink(struct request_sock *req) { - struct inet_hashinfo *hashinfo = req_to_sk(req)->sk_prot->h.hashinfo; + struct sock *sk = req_to_sk(req); bool found = false; - if (sk_hashed(req_to_sk(req))) { + if (sk_hashed(sk)) { + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); spinlock_t *lock = inet_ehash_lockp(hashinfo, req->rsk_hash); spin_lock(lock); - found = __sk_nulls_del_node_init_rcu(req_to_sk(req)); + found = __sk_nulls_del_node_init_rcu(sk); spin_unlock(lock); } if (timer_pending(&req->rsk_timer) && del_timer_sync(&req->rsk_timer)) diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 60d77e234a68..dc1c5629cd0d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -109,6 +109,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb, tb->l3mdev = l3mdev; tb->port = port; #if IS_ENABLED(CONFIG_IPV6) + tb->family = sk->sk_family; if (sk->sk_family == AF_INET6) tb->v6_rcv_saddr = sk->sk_v6_rcv_saddr; else @@ -146,6 +147,9 @@ static bool inet_bind2_bucket_addr_match(const struct inet_bind2_bucket *tb2, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family != tb2->family) + return false; + if (sk->sk_family == AF_INET6) return ipv6_addr_equal(&tb2->v6_rcv_saddr, &sk->sk_v6_rcv_saddr); @@ -168,14 +172,15 @@ void inet_bind_hash(struct sock *sk, struct inet_bind_bucket *tb, */ static void __inet_put_port(struct sock *sk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(sock_net(sk), inet_sk(sk)->inet_num, - hashinfo->bhash_size); - struct inet_bind_hashbucket *head = &hashinfo->bhash[bhash]; - struct inet_bind_hashbucket *head2 = - inet_bhashfn_portaddr(hashinfo, sk, sock_net(sk), - inet_sk(sk)->inet_num); + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); + struct inet_bind_hashbucket *head, *head2; + struct net *net = sock_net(sk); struct inet_bind_bucket *tb; + int bhash; + + bhash = inet_bhashfn(net, inet_sk(sk)->inet_num, hashinfo->bhash_size); + head = &hashinfo->bhash[bhash]; + head2 = inet_bhashfn_portaddr(hashinfo, sk, net, inet_sk(sk)->inet_num); spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; @@ -207,19 +212,19 @@ EXPORT_SYMBOL(inet_put_port); int __inet_inherit_port(const struct sock *sk, struct sock *child) { - struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *table = tcp_or_dccp_get_hashinfo(sk); unsigned short port = inet_sk(child)->inet_num; - const int bhash = inet_bhashfn(sock_net(sk), port, - table->bhash_size); - struct inet_bind_hashbucket *head = &table->bhash[bhash]; - struct inet_bind_hashbucket *head2 = - inet_bhashfn_portaddr(table, child, sock_net(sk), port); + struct inet_bind_hashbucket *head, *head2; bool created_inet_bind_bucket = false; - bool update_fastreuse = false; struct net *net = sock_net(sk); + bool update_fastreuse = false; struct inet_bind2_bucket *tb2; struct inet_bind_bucket *tb; - int l3mdev; + int bhash, l3mdev; + + bhash = inet_bhashfn(net, port, table->bhash_size); + head = &table->bhash[bhash]; + head2 = inet_bhashfn_portaddr(table, child, net, port); spin_lock(&head->lock); spin_lock(&head2->lock); @@ -385,7 +390,7 @@ static inline struct sock *inet_lookup_run_bpf(struct net *net, struct sock *sk, *reuse_sk; bool no_reuseport; - if (hashinfo != &tcp_hashinfo) + if (hashinfo != net->ipv4.tcp_death_row.hashinfo) return NULL; /* only TCP is supported */ no_reuseport = bpf_sk_lookup_run_v4(net, IPPROTO_TCP, saddr, sport, @@ -628,9 +633,9 @@ static bool inet_ehash_lookup_by_sk(struct sock *sk, */ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - struct hlist_nulls_head *list; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_ehash_bucket *head; + struct hlist_nulls_head *list; spinlock_t *lock; bool ret = true; @@ -700,7 +705,7 @@ static int inet_reuseport_add_sock(struct sock *sk, int __inet_hash(struct sock *sk, struct sock *osk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_listen_hashbucket *ilb2; int err = 0; @@ -746,7 +751,7 @@ EXPORT_SYMBOL_GPL(inet_hash); void inet_unhash(struct sock *sk) { - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hashinfo = tcp_or_dccp_get_hashinfo(sk); if (sk_unhashed(sk)) return; @@ -790,6 +795,9 @@ static bool inet_bind2_bucket_match(const struct inet_bind2_bucket *tb, int l3mdev, const struct sock *sk) { #if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family != tb->family) + return false; + if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev && @@ -806,6 +814,9 @@ bool inet_bind2_bucket_match_addr_any(const struct inet_bind2_bucket *tb, const #if IS_ENABLED(CONFIG_IPV6) struct in6_addr addr_any = {}; + if (sk->sk_family != tb->family) + return false; + if (sk->sk_family == AF_INET6) return net_eq(ib2_net(tb), net) && tb->port == port && tb->l3mdev == l3mdev && @@ -833,7 +844,7 @@ inet_bind2_bucket_find(const struct inet_bind_hashbucket *head, const struct net struct inet_bind_hashbucket * inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, int port) { - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); u32 hash; #if IS_ENABLED(CONFIG_IPV6) struct in6_addr addr_any = {}; @@ -849,7 +860,7 @@ inet_bhash2_addr_any_hashbucket(const struct sock *sk, const struct net *net, in int inet_bhash2_update_saddr(struct inet_bind_hashbucket *prev_saddr, struct sock *sk) { - struct inet_hashinfo *hinfo = sk->sk_prot->h.hashinfo; + struct inet_hashinfo *hinfo = tcp_or_dccp_get_hashinfo(sk); struct inet_bind2_bucket *tb2, *new_tb2; int l3mdev = inet_sk_bound_l3mdev(sk); struct inet_bind_hashbucket *head2; @@ -947,8 +958,8 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, if (likely(remaining > 1)) remaining &= ~1U; - net_get_random_once(table_perturb, - INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); + get_random_slow_once(table_perturb, + INET_TABLE_PERTURB_SIZE * sizeof(*table_perturb)); index = port_offset & (INET_TABLE_PERTURB_SIZE - 1); offset = READ_ONCE(table_perturb[index]) + (port_offset >> 32); @@ -1144,3 +1155,50 @@ int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) return 0; } EXPORT_SYMBOL_GPL(inet_ehash_locks_alloc); + +struct inet_hashinfo *inet_pernet_hashinfo_alloc(struct inet_hashinfo *hashinfo, + unsigned int ehash_entries) +{ + struct inet_hashinfo *new_hashinfo; + int i; + + new_hashinfo = kmemdup(hashinfo, sizeof(*hashinfo), GFP_KERNEL); + if (!new_hashinfo) + goto err; + + new_hashinfo->ehash = vmalloc_huge(ehash_entries * sizeof(struct inet_ehash_bucket), + GFP_KERNEL_ACCOUNT); + if (!new_hashinfo->ehash) + goto free_hashinfo; + + new_hashinfo->ehash_mask = ehash_entries - 1; + + if (inet_ehash_locks_alloc(new_hashinfo)) + goto free_ehash; + + for (i = 0; i < ehash_entries; i++) + INIT_HLIST_NULLS_HEAD(&new_hashinfo->ehash[i].chain, i); + + new_hashinfo->pernet = true; + + return new_hashinfo; + +free_ehash: + vfree(new_hashinfo->ehash); +free_hashinfo: + kfree(new_hashinfo); +err: + return NULL; +} +EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_alloc); + +void inet_pernet_hashinfo_free(struct inet_hashinfo *hashinfo) +{ + if (!hashinfo->pernet) + return; + + inet_ehash_locks_free(hashinfo); + vfree(hashinfo->ehash); + kfree(hashinfo); +} +EXPORT_SYMBOL_GPL(inet_pernet_hashinfo_free); diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c index 47ccc343c9fb..71d3bb0abf6c 100644 --- a/net/ipv4/inet_timewait_sock.c +++ b/net/ipv4/inet_timewait_sock.c @@ -59,9 +59,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw) inet_twsk_bind_unhash(tw, hashinfo); spin_unlock(&bhead->lock); - if (refcount_dec_and_test(&tw->tw_dr->tw_refcount)) - kfree(tw->tw_dr); - + refcount_dec(&tw->tw_dr->tw_refcount); inet_twsk_put(tw); } diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 8201cd423ff9..1ae83ad629b2 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -1043,7 +1043,7 @@ static int __ip_append_data(struct sock *sk, paged = true; zc = true; } else { - uarg->zerocopy = 0; + uarg_to_msgzc(uarg)->zerocopy = 0; skb_zcopy_set(skb, uarg, &extra_uref); } } diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index cc1caab4a654..92c02c886fe7 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -1079,3 +1079,70 @@ EXPORT_SYMBOL(ip_tunnel_parse_protocol); const struct header_ops ip_tunnel_header_ops = { .parse_protocol = ip_tunnel_parse_protocol }; EXPORT_SYMBOL(ip_tunnel_header_ops); + +/* This function returns true when ENCAP attributes are present in the nl msg */ +bool ip_tunnel_netlink_encap_parms(struct nlattr *data[], + struct ip_tunnel_encap *encap) +{ + bool ret = false; + + memset(encap, 0, sizeof(*encap)); + + if (!data) + return ret; + + if (data[IFLA_IPTUN_ENCAP_TYPE]) { + ret = true; + encap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); + } + + if (data[IFLA_IPTUN_ENCAP_FLAGS]) { + ret = true; + encap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); + } + + if (data[IFLA_IPTUN_ENCAP_SPORT]) { + ret = true; + encap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); + } + + if (data[IFLA_IPTUN_ENCAP_DPORT]) { + ret = true; + encap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); + } + + return ret; +} +EXPORT_SYMBOL_GPL(ip_tunnel_netlink_encap_parms); + +void ip_tunnel_netlink_parms(struct nlattr *data[], + struct ip_tunnel_parm *parms) +{ + if (data[IFLA_IPTUN_LINK]) + parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); + + if (data[IFLA_IPTUN_LOCAL]) + parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); + + if (data[IFLA_IPTUN_REMOTE]) + parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); + + if (data[IFLA_IPTUN_TTL]) { + parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); + if (parms->iph.ttl) + parms->iph.frag_off = htons(IP_DF); + } + + if (data[IFLA_IPTUN_TOS]) + parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); + + if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) + parms->iph.frag_off = htons(IP_DF); + + if (data[IFLA_IPTUN_FLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); + + if (data[IFLA_IPTUN_PROTO]) + parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); +} +EXPORT_SYMBOL_GPL(ip_tunnel_netlink_parms); diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 366094c1ce6c..5a4fb2539b08 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -117,7 +117,8 @@ out: return err; } -static int ipcomp4_init_state(struct xfrm_state *x) +static int ipcomp4_init_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) { int err = -EINVAL; @@ -129,17 +130,20 @@ static int ipcomp4_init_state(struct xfrm_state *x) x->props.header_len += sizeof(struct iphdr); break; default: + NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } - err = ipcomp_init_state(x); + err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; + } } err = 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 123ea63a04cb..180f9daf5bec 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -417,29 +417,7 @@ static void ipip_netlink_parms(struct nlattr *data[], if (!data) return; - if (data[IFLA_IPTUN_LINK]) - parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); - - if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_in_addr(data[IFLA_IPTUN_LOCAL]); - - if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_in_addr(data[IFLA_IPTUN_REMOTE]); - - if (data[IFLA_IPTUN_TTL]) { - parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); - if (parms->iph.ttl) - parms->iph.frag_off = htons(IP_DF); - } - - if (data[IFLA_IPTUN_TOS]) - parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); - - if (data[IFLA_IPTUN_PROTO]) - parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); - - if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) - parms->iph.frag_off = htons(IP_DF); + ip_tunnel_netlink_parms(data, parms); if (data[IFLA_IPTUN_COLLECT_METADATA]) *collect_md = true; @@ -448,40 +426,6 @@ static void ipip_netlink_parms(struct nlattr *data[], *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -/* This function returns true when ENCAP attributes are present in the nl msg */ -static bool ipip_netlink_encap_parms(struct nlattr *data[], - struct ip_tunnel_encap *ipencap) -{ - bool ret = false; - - memset(ipencap, 0, sizeof(*ipencap)); - - if (!data) - return ret; - - if (data[IFLA_IPTUN_ENCAP_TYPE]) { - ret = true; - ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); - } - - if (data[IFLA_IPTUN_ENCAP_FLAGS]) { - ret = true; - ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); - } - - if (data[IFLA_IPTUN_ENCAP_SPORT]) { - ret = true; - ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); - } - - if (data[IFLA_IPTUN_ENCAP_DPORT]) { - ret = true; - ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); - } - - return ret; -} - static int ipip_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -491,7 +435,7 @@ static int ipip_newlink(struct net *src_net, struct net_device *dev, struct ip_tunnel_encap ipencap; __u32 fwmark = 0; - if (ipip_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) @@ -512,7 +456,7 @@ static int ipip_changelink(struct net_device *dev, struct nlattr *tb[], bool collect_md; __u32 fwmark = t->fwmark; - if (ipip_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 95eefbe2e142..e04544ac4b45 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1004,7 +1004,9 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { + rcu_read_lock(); ip_mr_forward(net, mrt, skb->dev, skb, c, 0); + rcu_read_unlock(); } } } diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 8cd3224d913e..8183bbcabb4a 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -33,7 +33,6 @@ static bool rpfilter_lookup_reverse(struct net *net, struct flowi4 *fl4, const struct net_device *dev, u8 flags) { struct fib_result res; - int ret __maybe_unused; if (fib_lookup(net, fl4, &res, FIB_LOOKUP_IGNORE_LINKSTATE)) return false; diff --git a/net/ipv4/netfilter/nf_nat_h323.c b/net/ipv4/netfilter/nf_nat_h323.c index a334f0dcc2d0..faee20af4856 100644 --- a/net/ipv4/netfilter/nf_nat_h323.c +++ b/net/ipv4/netfilter/nf_nat_h323.c @@ -291,20 +291,7 @@ static int nat_t120(struct sk_buff *skb, struct nf_conn *ct, exp->expectfn = nf_nat_follow_master; exp->dir = !dir; - /* Try to get same port: if not, try to change it. */ - for (; nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, nated_port); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_h323: out of TCP ports\n"); return 0; @@ -347,20 +334,7 @@ static int nat_h245(struct sk_buff *skb, struct nf_conn *ct, if (info->sig_port[dir] == port) nated_port = ntohs(info->sig_port[!dir]); - /* Try to get same port: if not, try to change it. */ - for (; nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, nated_port); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); return 0; @@ -439,20 +413,7 @@ static int nat_q931(struct sk_buff *skb, struct nf_conn *ct, if (info->sig_port[dir] == port) nated_port = ntohs(info->sig_port[!dir]); - /* Try to get same port: if not, try to change it. */ - for (; nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, nated_port); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_ras: out of TCP ports\n"); return 0; @@ -532,20 +493,7 @@ static int nat_callforwarding(struct sk_buff *skb, struct nf_conn *ct, exp->expectfn = ip_nat_callforwarding_expect; exp->dir = !dir; - /* Try to get same port: if not, try to change it. */ - for (nated_port = ntohs(port); nated_port != 0; nated_port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(nated_port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - nated_port = 0; - break; - } - } - + nated_port = nf_nat_exp_find_port(exp, ntohs(port)); if (nated_port == 0) { /* No port available */ net_notice_ratelimited("nf_nat_q931: out of TCP ports\n"); return 0; diff --git a/net/ipv4/netfilter/nf_socket_ipv4.c b/net/ipv4/netfilter/nf_socket_ipv4.c index 2d42e4c35a20..a1350fc25838 100644 --- a/net/ipv4/netfilter/nf_socket_ipv4.c +++ b/net/ipv4/netfilter/nf_socket_ipv4.c @@ -71,8 +71,8 @@ nf_socket_get_sock_v4(struct net *net, struct sk_buff *skb, const int doff, { switch (protocol) { case IPPROTO_TCP: - return inet_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, + return inet_lookup(net, net->ipv4.tcp_death_row.hashinfo, + skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: return udp4_lib_lookup(net, saddr, sport, daddr, dport, diff --git a/net/ipv4/netfilter/nf_tproxy_ipv4.c b/net/ipv4/netfilter/nf_tproxy_ipv4.c index b2bae0b0e42a..b22b2c745c76 100644 --- a/net/ipv4/netfilter/nf_tproxy_ipv4.c +++ b/net/ipv4/netfilter/nf_tproxy_ipv4.c @@ -79,6 +79,7 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { + struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; struct sock *sk; switch (protocol) { @@ -92,12 +93,10 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, switch (lookup_type) { case NF_TPROXY_LOOKUP_LISTENER: - sk = inet_lookup_listener(net, &tcp_hashinfo, skb, - ip_hdrlen(skb) + - __tcp_hdrlen(hp), - saddr, sport, - daddr, dport, - in->ifindex, 0); + sk = inet_lookup_listener(net, hinfo, skb, + ip_hdrlen(skb) + __tcp_hdrlen(hp), + saddr, sport, daddr, dport, + in->ifindex, 0); if (sk && !refcount_inc_not_zero(&sk->sk_refcnt)) sk = NULL; @@ -108,9 +107,8 @@ nf_tproxy_get_sock_v4(struct net *net, struct sk_buff *skb, */ break; case NF_TPROXY_LOOKUP_ESTABLISHED: - sk = inet_lookup_established(net, &tcp_hashinfo, - saddr, sport, daddr, dport, - in->ifindex); + sk = inet_lookup_established(net, hinfo, saddr, sport, + daddr, dport, in->ifindex); break; default: BUG(); diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index b75cac69bd7e..7ade04ff972d 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -83,6 +83,9 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, else oif = NULL; + if (priv->flags & NFTA_FIB_F_IIF) + fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif); + if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { nft_fib_store_result(dest, priv, nft_in(pkt)); diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 0088a4c64d77..5386f460bd20 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -59,7 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v) socket_seq_show(seq); seq_printf(seq, "TCP: inuse %d orphan %d tw %d alloc %d mem %ld\n", sock_prot_inuse_get(net, &tcp_prot), orphans, - refcount_read(&net->ipv4.tcp_death_row->tw_refcount) - 1, + refcount_read(&net->ipv4.tcp_death_row.tw_refcount) - 1, sockets, proto_memory_allocated(&tcp_prot)); seq_printf(seq, "UDP: inuse %d mem %ld\n", sock_prot_inuse_get(net, &udp_prot), diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5490c285668b..9b8a6db7a66b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -39,6 +39,7 @@ static u32 u32_max_div_HZ = UINT_MAX / HZ; static int one_day_secs = 24 * 3600; static u32 fib_multipath_hash_fields_all_mask __maybe_unused = FIB_MULTIPATH_HASH_FIELD_ALL_MASK; +static unsigned int tcp_child_ehash_entries_max = 16 * 1024 * 1024; /* obsolete */ static int sysctl_tcp_low_latency __read_mostly; @@ -382,6 +383,29 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, return ret; } +static int proc_tcp_ehash_entries(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos) +{ + struct net *net = container_of(table->data, struct net, + ipv4.sysctl_tcp_child_ehash_entries); + struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; + int tcp_ehash_entries; + struct ctl_table tbl; + + tcp_ehash_entries = hinfo->ehash_mask + 1; + + /* A negative number indicates that the child netns + * shares the global ehash. + */ + if (!net_eq(net, &init_net) && !hinfo->pernet) + tcp_ehash_entries *= -1; + + tbl.data = &tcp_ehash_entries; + tbl.maxlen = sizeof(int); + + return proc_dointvec(&tbl, write, buffer, lenp, ppos); +} + #ifdef CONFIG_IP_ROUTE_MULTIPATH static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, void *buffer, size_t *lenp, @@ -530,10 +554,9 @@ static struct ctl_table ipv4_table[] = { }; static struct ctl_table ipv4_net_table[] = { - /* tcp_max_tw_buckets must be first in this table. */ { .procname = "tcp_max_tw_buckets", -/* .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, */ + .data = &init_net.ipv4.tcp_death_row.sysctl_max_tw_buckets, .maxlen = sizeof(int), .mode = 0644, .proc_handler = proc_dointvec @@ -1322,6 +1345,21 @@ static struct ctl_table ipv4_net_table[] = { .extra2 = SYSCTL_ONE, }, { + .procname = "tcp_ehash_entries", + .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries, + .mode = 0444, + .proc_handler = proc_tcp_ehash_entries, + }, + { + .procname = "tcp_child_ehash_entries", + .data = &init_net.ipv4.sysctl_tcp_child_ehash_entries, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec_minmax, + .extra1 = SYSCTL_ZERO, + .extra2 = &tcp_child_ehash_entries_max, + }, + { .procname = "udp_rmem_min", .data = &init_net.ipv4.sysctl_udp_rmem_min, .maxlen = sizeof(init_net.ipv4.sysctl_udp_rmem_min), @@ -1361,8 +1399,7 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!table) goto err_alloc; - /* skip first entry (sysctl_max_tw_buckets) */ - for (i = 1; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { + for (i = 0; i < ARRAY_SIZE(ipv4_net_table) - 1; i++) { if (table[i].data) { /* Update the variables to point into * the current struct net @@ -1377,8 +1414,6 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) } } - table[0].data = &net->ipv4.tcp_death_row->sysctl_max_tw_buckets; - net->ipv4.ipv4_hdr = register_net_sysctl(net, "net/ipv4", table); if (!net->ipv4.ipv4_hdr) goto err_reg; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 52b8879e7d20..648b5c54bb32 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1015,7 +1015,7 @@ new_segment: skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); - skb_fill_page_desc(skb, i, page, offset, copy); + skb_fill_page_desc_noacc(skb, i, page, offset, copy); } if (!(flags & MSG_NO_SHARED_FRAGS)) @@ -1162,9 +1162,8 @@ void tcp_free_fastopen_req(struct tcp_sock *tp) } } -static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, - int *copied, size_t size, - struct ubuf_info *uarg) +int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg, int *copied, + size_t size, struct ubuf_info *uarg) { struct tcp_sock *tp = tcp_sk(sk); struct inet_sock *inet = inet_sk(sk); @@ -1239,7 +1238,7 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) } zc = sk->sk_route_caps & NETIF_F_SG; if (!zc) - uarg->zerocopy = 0; + uarg_to_msgzc(uarg)->zerocopy = 0; } } @@ -1761,19 +1760,28 @@ int tcp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) if (sk->sk_state == TCP_LISTEN) return -ENOTCONN; - skb = tcp_recv_skb(sk, seq, &offset); - if (!skb) - return 0; + while ((skb = tcp_recv_skb(sk, seq, &offset)) != NULL) { + u8 tcp_flags; + int used; - __skb_unlink(skb, &sk->sk_receive_queue); - WARN_ON(!skb_set_owner_sk_safe(skb, sk)); - copied = recv_actor(sk, skb); - if (copied >= 0) { - seq += copied; - if (TCP_SKB_CB(skb)->tcp_flags & TCPHDR_FIN) + __skb_unlink(skb, &sk->sk_receive_queue); + WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); + tcp_flags = TCP_SKB_CB(skb)->tcp_flags; + used = recv_actor(sk, skb); + consume_skb(skb); + if (used < 0) { + if (!copied) + copied = used; + break; + } + seq += used; + copied += used; + + if (tcp_flags & TCPHDR_FIN) { ++seq; + break; + } } - consume_skb(skb); WRITE_ONCE(tp->copied_seq, seq); tcp_rcv_space_adjust(sk); @@ -4790,6 +4798,7 @@ void __init tcp_init(void) INIT_HLIST_HEAD(&tcp_hashinfo.bhash2[i].chain); } + tcp_hashinfo.pernet = false; cnt = tcp_hashinfo.ehash_mask + 1; sysctl_tcp_max_orphans = cnt / 2; diff --git a/net/ipv4/tcp_diag.c b/net/ipv4/tcp_diag.c index 75a1c985f49a..01b50fa79189 100644 --- a/net/ipv4/tcp_diag.c +++ b/net/ipv4/tcp_diag.c @@ -181,13 +181,21 @@ static size_t tcp_diag_get_aux_size(struct sock *sk, bool net_admin) static void tcp_diag_dump(struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r) { - inet_diag_dump_icsk(&tcp_hashinfo, skb, cb, r); + struct inet_hashinfo *hinfo; + + hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; + + inet_diag_dump_icsk(hinfo, skb, cb, r); } static int tcp_diag_dump_one(struct netlink_callback *cb, const struct inet_diag_req_v2 *req) { - return inet_diag_dump_one_icsk(&tcp_hashinfo, cb, req); + struct inet_hashinfo *hinfo; + + hinfo = sock_net(cb->skb->sk)->ipv4.tcp_death_row.hashinfo; + + return inet_diag_dump_one_icsk(hinfo, cb, req); } #ifdef CONFIG_INET_DIAG_DESTROY @@ -195,9 +203,13 @@ static int tcp_diag_destroy(struct sk_buff *in_skb, const struct inet_diag_req_v2 *req) { struct net *net = sock_net(in_skb->sk); - struct sock *sk = inet_diag_find_one_icsk(net, &tcp_hashinfo, req); + struct inet_hashinfo *hinfo; + struct sock *sk; int err; + hinfo = net->ipv4.tcp_death_row.hashinfo; + sk = inet_diag_find_one_icsk(net, hinfo, req); + if (IS_ERR(sk)) return PTR_ERR(sk); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index b85a9f755da4..bc2ea12221f9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2513,6 +2513,21 @@ static inline bool tcp_may_undo(const struct tcp_sock *tp) return tp->undo_marker && (!tp->undo_retrans || tcp_packet_delayed(tp)); } +static bool tcp_is_non_sack_preventing_reopen(struct sock *sk) +{ + struct tcp_sock *tp = tcp_sk(sk); + + if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { + /* Hold old state until something *above* high_seq + * is ACKed. For Reno it is MUST to prevent false + * fast retransmits (RFC2582). SACK TCP is safe. */ + if (!tcp_any_retrans_done(sk)) + tp->retrans_stamp = 0; + return true; + } + return false; +} + /* People celebrate: "We love our President!" */ static bool tcp_try_undo_recovery(struct sock *sk) { @@ -2535,14 +2550,8 @@ static bool tcp_try_undo_recovery(struct sock *sk) } else if (tp->rack.reo_wnd_persist) { tp->rack.reo_wnd_persist--; } - if (tp->snd_una == tp->high_seq && tcp_is_reno(tp)) { - /* Hold old state until something *above* high_seq - * is ACKed. For Reno it is MUST to prevent false - * fast retransmits (RFC2582). SACK TCP is safe. */ - if (!tcp_any_retrans_done(sk)) - tp->retrans_stamp = 0; + if (tcp_is_non_sack_preventing_reopen(sk)) return true; - } tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; return false; @@ -2578,6 +2587,8 @@ static bool tcp_try_undo_loss(struct sock *sk, bool frto_undo) NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPSPURIOUSRTOS); inet_csk(sk)->icsk_retransmits = 0; + if (tcp_is_non_sack_preventing_reopen(sk)) + return true; if (frto_undo || tcp_is_sack(tp)) { tcp_set_ca_state(sk, TCP_CA_Open); tp->is_sack_reneg = 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 01b31f5c7aba..6376ad915765 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -201,15 +201,16 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; + struct inet_timewait_death_row *tcp_death_row; __be32 daddr, nexthop, prev_sk_rcv_saddr; struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); + struct ip_options_rcu *inet_opt; + struct net *net = sock_net(sk); __be16 orig_sport, orig_dport; struct flowi4 *fl4; struct rtable *rt; int err; - struct ip_options_rcu *inet_opt; - struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; if (addr_len < sizeof(struct sockaddr_in)) return -EINVAL; @@ -235,7 +236,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (IS_ERR(rt)) { err = PTR_ERR(rt); if (err == -ENETUNREACH) - IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTNOROUTES); + IP_INC_STATS(net, IPSTATS_MIB_OUTNOROUTES); return err; } @@ -247,11 +248,12 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (!inet_opt || !inet_opt->opt.srr) daddr = fl4->daddr; + tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; + if (!inet->inet_saddr) { if (inet_csk(sk)->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo, - sk, sock_net(sk), - inet->inet_num); + prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, + sk, net, inet->inet_num); prev_sk_rcv_saddr = sk->sk_rcv_saddr; } inet->inet_saddr = fl4->saddr; @@ -317,8 +319,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_daddr, inet->inet_sport, usin->sin_port)); - tp->tsoffset = secure_tcp_ts_off(sock_net(sk), - inet->inet_saddr, + tp->tsoffset = secure_tcp_ts_off(net, inet->inet_saddr, inet->inet_daddr); } @@ -494,9 +495,9 @@ int tcp_v4_err(struct sk_buff *skb, u32 info) int err; struct net *net = dev_net(skb->dev); - sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr, - th->dest, iph->saddr, ntohs(th->source), - inet_iif(skb), 0); + sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, + iph->daddr, th->dest, iph->saddr, + ntohs(th->source), inet_iif(skb), 0); if (!sk) { __ICMP_INC_STATS(net, ICMP_MIB_INERRORS); return -ENOENT; @@ -759,8 +760,8 @@ static void tcp_v4_send_reset(const struct sock *sk, struct sk_buff *skb) * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ - sk1 = __inet_lookup_listener(net, &tcp_hashinfo, NULL, 0, - ip_hdr(skb)->saddr, + sk1 = __inet_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, + NULL, 0, ip_hdr(skb)->saddr, th->source, ip_hdr(skb)->daddr, ntohs(th->source), dif, sdif); /* don't send rst if it can't find key */ @@ -1728,6 +1729,7 @@ EXPORT_SYMBOL(tcp_v4_do_rcv); int tcp_v4_early_demux(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); const struct iphdr *iph; const struct tcphdr *th; struct sock *sk; @@ -1744,7 +1746,7 @@ int tcp_v4_early_demux(struct sk_buff *skb) if (th->doff < sizeof(struct tcphdr) / 4) return 0; - sk = __inet_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + sk = __inet_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, iph->saddr, th->source, iph->daddr, ntohs(th->dest), skb->skb_iif, inet_sdif(skb)); @@ -1970,7 +1972,8 @@ int tcp_v4_rcv(struct sk_buff *skb) th = (const struct tcphdr *)skb->data; iph = ip_hdr(skb); lookup: - sk = __inet_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), th->source, + sk = __inet_lookup_skb(net->ipv4.tcp_death_row.hashinfo, + skb, __tcp_hdrlen(th), th->source, th->dest, sdif, &refcounted); if (!sk) goto no_tcp_socket; @@ -2152,9 +2155,9 @@ do_time_wait: } switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) { case TCP_TW_SYN: { - struct sock *sk2 = inet_lookup_listener(dev_net(skb->dev), - &tcp_hashinfo, skb, - __tcp_hdrlen(th), + struct sock *sk2 = inet_lookup_listener(net, + net->ipv4.tcp_death_row.hashinfo, + skb, __tcp_hdrlen(th), iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb), @@ -2304,15 +2307,16 @@ static bool seq_sk_match(struct seq_file *seq, const struct sock *sk) */ static void *listening_get_first(struct seq_file *seq) { + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct tcp_iter_state *st = seq->private; st->offset = 0; - for (; st->bucket <= tcp_hashinfo.lhash2_mask; st->bucket++) { + for (; st->bucket <= hinfo->lhash2_mask; st->bucket++) { struct inet_listen_hashbucket *ilb2; struct hlist_nulls_node *node; struct sock *sk; - ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + ilb2 = &hinfo->lhash2[st->bucket]; if (hlist_nulls_empty(&ilb2->nulls_head)) continue; @@ -2337,6 +2341,7 @@ static void *listening_get_next(struct seq_file *seq, void *cur) struct tcp_iter_state *st = seq->private; struct inet_listen_hashbucket *ilb2; struct hlist_nulls_node *node; + struct inet_hashinfo *hinfo; struct sock *sk = cur; ++st->num; @@ -2348,7 +2353,8 @@ static void *listening_get_next(struct seq_file *seq, void *cur) return sk; } - ilb2 = &tcp_hashinfo.lhash2[st->bucket]; + hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; + ilb2 = &hinfo->lhash2[st->bucket]; spin_unlock(&ilb2->lock); ++st->bucket; return listening_get_first(seq); @@ -2370,9 +2376,10 @@ static void *listening_get_idx(struct seq_file *seq, loff_t *pos) return rc; } -static inline bool empty_bucket(const struct tcp_iter_state *st) +static inline bool empty_bucket(struct inet_hashinfo *hinfo, + const struct tcp_iter_state *st) { - return hlist_nulls_empty(&tcp_hashinfo.ehash[st->bucket].chain); + return hlist_nulls_empty(&hinfo->ehash[st->bucket].chain); } /* @@ -2381,20 +2388,21 @@ static inline bool empty_bucket(const struct tcp_iter_state *st) */ static void *established_get_first(struct seq_file *seq) { + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct tcp_iter_state *st = seq->private; st->offset = 0; - for (; st->bucket <= tcp_hashinfo.ehash_mask; ++st->bucket) { + for (; st->bucket <= hinfo->ehash_mask; ++st->bucket) { struct sock *sk; struct hlist_nulls_node *node; - spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); + spinlock_t *lock = inet_ehash_lockp(hinfo, st->bucket); /* Lockless fast path for the common case of empty buckets */ - if (empty_bucket(st)) + if (empty_bucket(hinfo, st)) continue; spin_lock_bh(lock); - sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { + sk_nulls_for_each(sk, node, &hinfo->ehash[st->bucket].chain) { if (seq_sk_match(seq, sk)) return sk; } @@ -2406,9 +2414,10 @@ static void *established_get_first(struct seq_file *seq) static void *established_get_next(struct seq_file *seq, void *cur) { - struct sock *sk = cur; - struct hlist_nulls_node *node; + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct tcp_iter_state *st = seq->private; + struct hlist_nulls_node *node; + struct sock *sk = cur; ++st->num; ++st->offset; @@ -2420,7 +2429,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) return sk; } - spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket)); ++st->bucket; return established_get_first(seq); } @@ -2458,6 +2467,7 @@ static void *tcp_get_idx(struct seq_file *seq, loff_t pos) static void *tcp_seek_last_pos(struct seq_file *seq) { + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct tcp_iter_state *st = seq->private; int bucket = st->bucket; int offset = st->offset; @@ -2466,7 +2476,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) switch (st->state) { case TCP_SEQ_STATE_LISTENING: - if (st->bucket > tcp_hashinfo.lhash2_mask) + if (st->bucket > hinfo->lhash2_mask) break; st->state = TCP_SEQ_STATE_LISTENING; rc = listening_get_first(seq); @@ -2478,7 +2488,7 @@ static void *tcp_seek_last_pos(struct seq_file *seq) st->state = TCP_SEQ_STATE_ESTABLISHED; fallthrough; case TCP_SEQ_STATE_ESTABLISHED: - if (st->bucket > tcp_hashinfo.ehash_mask) + if (st->bucket > hinfo->ehash_mask) break; rc = established_get_first(seq); while (offset-- && rc && bucket == st->bucket) @@ -2546,16 +2556,17 @@ EXPORT_SYMBOL(tcp_seq_next); void tcp_seq_stop(struct seq_file *seq, void *v) { + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct tcp_iter_state *st = seq->private; switch (st->state) { case TCP_SEQ_STATE_LISTENING: if (v != SEQ_START_TOKEN) - spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); + spin_unlock(&hinfo->lhash2[st->bucket].lock); break; case TCP_SEQ_STATE_ESTABLISHED: if (v) - spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket)); break; } } @@ -2750,6 +2761,7 @@ static int bpf_iter_tcp_realloc_batch(struct bpf_tcp_iter_state *iter, static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, struct sock *start_sk) { + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct bpf_tcp_iter_state *iter = seq->private; struct tcp_iter_state *st = &iter->state; struct hlist_nulls_node *node; @@ -2769,7 +2781,7 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, expected++; } } - spin_unlock(&tcp_hashinfo.lhash2[st->bucket].lock); + spin_unlock(&hinfo->lhash2[st->bucket].lock); return expected; } @@ -2777,6 +2789,7 @@ static unsigned int bpf_iter_tcp_listening_batch(struct seq_file *seq, static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq, struct sock *start_sk) { + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct bpf_tcp_iter_state *iter = seq->private; struct tcp_iter_state *st = &iter->state; struct hlist_nulls_node *node; @@ -2796,13 +2809,14 @@ static unsigned int bpf_iter_tcp_established_batch(struct seq_file *seq, expected++; } } - spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); + spin_unlock_bh(inet_ehash_lockp(hinfo, st->bucket)); return expected; } static struct sock *bpf_iter_tcp_batch(struct seq_file *seq) { + struct inet_hashinfo *hinfo = seq_file_net(seq)->ipv4.tcp_death_row.hashinfo; struct bpf_tcp_iter_state *iter = seq->private; struct tcp_iter_state *st = &iter->state; unsigned int expected; @@ -2818,7 +2832,7 @@ static struct sock *bpf_iter_tcp_batch(struct seq_file *seq) st->offset = 0; st->bucket++; if (st->state == TCP_SEQ_STATE_LISTENING && - st->bucket > tcp_hashinfo.lhash2_mask) { + st->bucket > hinfo->lhash2_mask) { st->state = TCP_SEQ_STATE_ESTABLISHED; st->bucket = 0; } @@ -3083,7 +3097,7 @@ struct proto tcp_prot = { .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp_timewait_sock_ops, .rsk_prot = &tcp_request_sock_ops, - .h.hashinfo = &tcp_hashinfo, + .h.hashinfo = NULL, .no_autobind = true, .diag_destroy = tcp_abort, }; @@ -3091,19 +3105,43 @@ EXPORT_SYMBOL(tcp_prot); static void __net_exit tcp_sk_exit(struct net *net) { - struct inet_timewait_death_row *tcp_death_row = net->ipv4.tcp_death_row; - if (net->ipv4.tcp_congestion_control) bpf_module_put(net->ipv4.tcp_congestion_control, net->ipv4.tcp_congestion_control->owner); - if (refcount_dec_and_test(&tcp_death_row->tw_refcount)) - kfree(tcp_death_row); } -static int __net_init tcp_sk_init(struct net *net) +static void __net_init tcp_set_hashinfo(struct net *net) { - int cnt; + struct inet_hashinfo *hinfo; + unsigned int ehash_entries; + struct net *old_net; + if (net_eq(net, &init_net)) + goto fallback; + + old_net = current->nsproxy->net_ns; + ehash_entries = READ_ONCE(old_net->ipv4.sysctl_tcp_child_ehash_entries); + if (!ehash_entries) + goto fallback; + + ehash_entries = roundup_pow_of_two(ehash_entries); + hinfo = inet_pernet_hashinfo_alloc(&tcp_hashinfo, ehash_entries); + if (!hinfo) { + pr_warn("Failed to allocate TCP ehash (entries: %u) " + "for a netns, fallback to the global one\n", + ehash_entries); +fallback: + hinfo = &tcp_hashinfo; + ehash_entries = tcp_hashinfo.ehash_mask + 1; + } + + net->ipv4.tcp_death_row.hashinfo = hinfo; + net->ipv4.tcp_death_row.sysctl_max_tw_buckets = ehash_entries / 2; + net->ipv4.sysctl_max_syn_backlog = max(128U, ehash_entries / 128); +} + +static int __net_init tcp_sk_init(struct net *net) +{ net->ipv4.sysctl_tcp_ecn = 2; net->ipv4.sysctl_tcp_ecn_fallback = 1; @@ -3129,15 +3167,9 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_tw_reuse = 2; net->ipv4.sysctl_tcp_no_ssthresh_metrics_save = 1; - net->ipv4.tcp_death_row = kzalloc(sizeof(struct inet_timewait_death_row), GFP_KERNEL); - if (!net->ipv4.tcp_death_row) - return -ENOMEM; - refcount_set(&net->ipv4.tcp_death_row->tw_refcount, 1); - cnt = tcp_hashinfo.ehash_mask + 1; - net->ipv4.tcp_death_row->sysctl_max_tw_buckets = cnt / 2; - net->ipv4.tcp_death_row->hashinfo = &tcp_hashinfo; + refcount_set(&net->ipv4.tcp_death_row.tw_refcount, 1); + tcp_set_hashinfo(net); - net->ipv4.sysctl_max_syn_backlog = max(128, cnt / 128); net->ipv4.sysctl_tcp_sack = 1; net->ipv4.sysctl_tcp_window_scaling = 1; net->ipv4.sysctl_tcp_timestamps = 1; @@ -3199,10 +3231,13 @@ static void __net_exit tcp_sk_exit_batch(struct list_head *net_exit_list) { struct net *net; - inet_twsk_purge(&tcp_hashinfo, AF_INET); + tcp_twsk_purge(net_exit_list, AF_INET); - list_for_each_entry(net, net_exit_list, exit_list) + list_for_each_entry(net, net_exit_list, exit_list) { + inet_pernet_hashinfo_free(net->ipv4.tcp_death_row.hashinfo); + WARN_ON_ONCE(!refcount_dec_and_test(&net->ipv4.tcp_death_row.tw_refcount)); tcp_fastopen_ctx_destroy(net); + } } static struct pernet_operations __net_initdata tcp_sk_ops = { diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index ddcdc2bc4c04..79f30f026d89 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -247,10 +247,10 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) { const struct inet_connection_sock *icsk = inet_csk(sk); const struct tcp_sock *tp = tcp_sk(sk); + struct net *net = sock_net(sk); struct inet_timewait_sock *tw; - struct inet_timewait_death_row *tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; - tw = inet_twsk_alloc(sk, tcp_death_row, state); + tw = inet_twsk_alloc(sk, &net->ipv4.tcp_death_row, state); if (tw) { struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw); @@ -319,14 +319,14 @@ void tcp_time_wait(struct sock *sk, int state, int timeo) /* Linkage updates. * Note that access to tw after this point is illegal. */ - inet_twsk_hashdance(tw, sk, &tcp_hashinfo); + inet_twsk_hashdance(tw, sk, net->ipv4.tcp_death_row.hashinfo); local_bh_enable(); } else { /* Sorry, if we're out of memory, just CLOSE this * socket up. We've got bigger problems than * non-graceful socket closings. */ - NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEWAITOVERFLOW); + NET_INC_STATS(net, LINUX_MIB_TCPTIMEWAITOVERFLOW); } tcp_update_metrics(sk); @@ -347,6 +347,26 @@ void tcp_twsk_destructor(struct sock *sk) } EXPORT_SYMBOL_GPL(tcp_twsk_destructor); +void tcp_twsk_purge(struct list_head *net_exit_list, int family) +{ + bool purged_once = false; + struct net *net; + + list_for_each_entry(net, net_exit_list, exit_list) { + /* The last refcount is decremented in tcp_sk_exit_batch() */ + if (refcount_read(&net->ipv4.tcp_death_row.tw_refcount) == 1) + continue; + + if (net->ipv4.tcp_death_row.hashinfo->pernet) { + inet_twsk_purge(net->ipv4.tcp_death_row.hashinfo, family); + } else if (!purged_once) { + inet_twsk_purge(&tcp_hashinfo, family); + purged_once = true; + } + } +} +EXPORT_SYMBOL_GPL(tcp_twsk_purge); + /* Warning : This function is called without sk_listener being locked. * Be sure to read socket fields once, as their value could change under us. */ diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c index a844a0d38482..45dda7889387 100644 --- a/net/ipv4/tcp_offload.c +++ b/net/ipv4/tcp_offload.c @@ -255,7 +255,15 @@ found: mss = skb_shinfo(p)->gso_size; - flush |= (len - 1) >= mss; + /* If skb is a GRO packet, make sure its gso_size matches prior packet mss. + * If it is a single frame, do not aggregate it if its length + * is bigger than our mss. + */ + if (unlikely(skb_is_gso(skb))) + flush |= (mss != skb_shinfo(skb)->gso_size); + else + flush |= (len - 1) >= mss; + flush |= (ntohl(th2->seq) + skb_gro_len(p)) ^ ntohl(th->seq); #ifdef CONFIG_TLS_DEVICE flush |= p->decrypted ^ skb->decrypted; @@ -269,7 +277,12 @@ found: tcp_flag_word(th2) |= flags & (TCP_FLAG_FIN | TCP_FLAG_PSH); out_check_final: - flush = len < mss; + /* Force a flush if last segment is smaller than mss. */ + if (unlikely(skb_is_gso(skb))) + flush = len != NAPI_GRO_CB(skb)->count * skb_shinfo(skb)->gso_size; + else + flush = len < mss; + flush |= (__force int)(flags & (TCP_FLAG_URG | TCP_FLAG_PSH | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN)); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 34eda973bbf1..d63118ce5900 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -783,6 +783,8 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) */ if (tunnel) { /* ...not for tunnels though: we don't have a sending socket */ + if (udp_sk(sk)->encap_err_rcv) + udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2); goto out; } if (!inet->recverr) { @@ -1799,41 +1801,29 @@ EXPORT_SYMBOL(__skb_recv_udp); int udp_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { - int copied = 0; - - while (1) { - struct sk_buff *skb; - int err, used; - - skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); - if (!skb) - return err; + struct sk_buff *skb; + int err, copied; - if (udp_lib_checksum_complete(skb)) { - __UDP_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, - IS_UDPLITE(sk)); - __UDP_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, - IS_UDPLITE(sk)); - atomic_inc(&sk->sk_drops); - kfree_skb(skb); - continue; - } +try_again: + skb = skb_recv_udp(sk, MSG_DONTWAIT, &err); + if (!skb) + return err; - WARN_ON(!skb_set_owner_sk_safe(skb, sk)); - used = recv_actor(sk, skb); - if (used <= 0) { - if (!copied) - copied = used; - kfree_skb(skb); - break; - } else if (used <= skb->len) { - copied += used; - } + if (udp_lib_checksum_complete(skb)) { + int is_udplite = IS_UDPLITE(sk); + struct net *net = sock_net(sk); + __UDP_INC_STATS(net, UDP_MIB_CSUMERRORS, is_udplite); + __UDP_INC_STATS(net, UDP_MIB_INERRORS, is_udplite); + atomic_inc(&sk->sk_drops); kfree_skb(skb); - break; + goto try_again; } + WARN_ON_ONCE(!skb_set_owner_sk_safe(skb, sk)); + copied = recv_actor(sk, skb); + kfree_skb(skb); + return copied; } EXPORT_SYMBOL(udp_read_skb); diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 8efaf8c3fe2a..8242c8947340 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -72,6 +72,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->encap_type = cfg->encap_type; udp_sk(sk)->encap_rcv = cfg->encap_rcv; + udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv; udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; udp_sk(sk)->encap_destroy = cfg->encap_destroy; udp_sk(sk)->gro_receive = cfg->gro_receive; diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index 9d4f418f1bf8..8489fa106583 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -22,13 +22,17 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) return ip_hdr(skb)->protocol; } -static int ipip_init_state(struct xfrm_state *x) +static int ipip_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { - if (x->props.mode != XFRM_MODE_TUNNEL) + if (x->props.mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG(extack, "IPv4 tunnel can only be used with tunnel mode"); return -EINVAL; + } - if (x->encap) + if (x->encap) { + NL_SET_ERR_MSG(extack, "IPv4 tunnel is not compatible with encapsulation"); return -EINVAL; + } x->props.header_len = sizeof(struct iphdr); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e15f64f22fa8..10ce86bf228e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3557,11 +3557,15 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, fallthrough; case NETDEV_UP: case NETDEV_CHANGE: - if (dev->flags & IFF_SLAVE) + if (idev && idev->cnf.disable_ipv6) break; - if (idev && idev->cnf.disable_ipv6) + if (dev->flags & IFF_SLAVE) { + if (event == NETDEV_UP && !IS_ERR_OR_NULL(idev) && + dev->flags & IFF_UP && dev->flags & IFF_MULTICAST) + ipv6_mc_up(idev); break; + } if (event == NETDEV_UP) { /* restore routes for permanent addresses */ diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 19732b5dce23..d40b7d60e00e 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -1072,13 +1072,13 @@ static int __init inet6_init(void) for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) INIT_LIST_HEAD(r); + raw_hashinfo_init(&raw_v6_hashinfo); + if (disable_ipv6_mod) { pr_info("Loaded, but administratively disabled, reboot required to enable\n"); goto out; } - raw_hashinfo_init(&raw_v6_hashinfo); - err = proto_register(&tcpv6_prot, 1); if (err) goto out; diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index b5995c1f4d7a..5228d2716289 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -666,30 +666,38 @@ static int ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } -static int ah6_init_state(struct xfrm_state *x) +static int ah6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; struct crypto_ahash *ahash; - if (!x->aalg) + if (!x->aalg) { + NL_SET_ERR_MSG(extack, "AH requires a state with an AUTH algorithm"); goto error; + } - if (x->encap) + if (x->encap) { + NL_SET_ERR_MSG(extack, "AH is not compatible with encapsulation"); goto error; + } ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); if (!ahp) return -ENOMEM; ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); - if (IS_ERR(ahash)) + if (IS_ERR(ahash)) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; + } ahp->ahash = ahash; if (crypto_ahash_setkey(ahash, x->aalg->alg_key, - (x->aalg->alg_key_len + 7) / 8)) + (x->aalg->alg_key_len + 7) / 8)) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; + } /* * Lookup the algorithm description maintained by xfrm_algo, @@ -702,9 +710,7 @@ static int ah6_init_state(struct xfrm_state *x) if (aalg_desc->uinfo.auth.icv_fullbits/8 != crypto_ahash_digestsize(ahash)) { - pr_info("AH: %s digestsize %u != %u\n", - x->aalg->alg_name, crypto_ahash_digestsize(ahash), - aalg_desc->uinfo.auth.icv_fullbits/8); + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; } @@ -721,6 +727,7 @@ static int ah6_init_state(struct xfrm_state *x) x->props.header_len += sizeof(struct ipv6hdr); break; default: + NL_SET_ERR_MSG(extack, "Invalid mode requested for AH, must be one of TRANSPORT, TUNNEL, BEET"); goto error; } x->data = ahp; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 8220923a12f7..14ed868680c6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -151,6 +151,7 @@ static void esp_free_tcp_sk(struct rcu_head *head) static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) { struct xfrm_encap_tmpl *encap = x->encap; + struct net *net = xs_net(x); struct esp_tcp_sk *esk; __be16 sport, dport; struct sock *nsk; @@ -177,7 +178,7 @@ static struct sock *esp6_find_tcp_sk(struct xfrm_state *x) } spin_unlock_bh(&x->lock); - sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6, + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &x->id.daddr.in6, dport, &x->props.saddr.in6, ntohs(sport), 0, 0); if (!sk) return ERR_PTR(-ENOENT); @@ -1050,16 +1051,17 @@ static void esp6_destroy(struct xfrm_state *x) crypto_free_aead(aead); } -static int esp_init_aead(struct xfrm_state *x) +static int esp_init_aead(struct xfrm_state *x, struct netlink_ext_ack *extack) { char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - err = -ENAMETOOLONG; if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", - x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) - goto error; + x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) { + NL_SET_ERR_MSG(extack, "Algorithm name is too long"); + return -ENAMETOOLONG; + } aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); @@ -1077,11 +1079,15 @@ static int esp_init_aead(struct xfrm_state *x) if (err) goto error; + return 0; + error: + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); return err; } -static int esp_init_authenc(struct xfrm_state *x) +static int esp_init_authenc(struct xfrm_state *x, + struct netlink_ext_ack *extack) { struct crypto_aead *aead; struct crypto_authenc_key_param *param; @@ -1092,10 +1098,6 @@ static int esp_init_authenc(struct xfrm_state *x) unsigned int keylen; int err; - err = -EINVAL; - if (!x->ealg) - goto error; - err = -ENAMETOOLONG; if ((x->props.flags & XFRM_STATE_ESN)) { @@ -1104,22 +1106,28 @@ static int esp_init_authenc(struct xfrm_state *x) x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name, - x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) { + NL_SET_ERR_MSG(extack, "Algorithm name is too long"); goto error; + } } else { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, "%s%sauthenc(%s,%s)%s", x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", x->ealg->alg_name, - x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) { + NL_SET_ERR_MSG(extack, "Algorithm name is too long"); goto error; + } } aead = crypto_alloc_aead(authenc_name, 0, 0); err = PTR_ERR(aead); - if (IS_ERR(aead)) + if (IS_ERR(aead)) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto error; + } x->data = aead; @@ -1149,17 +1157,16 @@ static int esp_init_authenc(struct xfrm_state *x) err = -EINVAL; if (aalg_desc->uinfo.auth.icv_fullbits / 8 != crypto_aead_authsize(aead)) { - pr_info("ESP: %s digestsize %u != %u\n", - x->aalg->alg_name, - crypto_aead_authsize(aead), - aalg_desc->uinfo.auth.icv_fullbits / 8); + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto free_key; } err = crypto_aead_setauthsize( aead, x->aalg->alg_trunc_len / 8); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Kernel was unable to initialize cryptographic operations"); goto free_key; + } } param->enckeylen = cpu_to_be32((x->ealg->alg_key_len + 7) / 8); @@ -1174,7 +1181,7 @@ error: return err; } -static int esp6_init_state(struct xfrm_state *x) +static int esp6_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct crypto_aead *aead; u32 align; @@ -1182,10 +1189,14 @@ static int esp6_init_state(struct xfrm_state *x) x->data = NULL; - if (x->aead) - err = esp_init_aead(x); - else - err = esp_init_authenc(x); + if (x->aead) { + err = esp_init_aead(x, extack); + } else if (x->ealg) { + err = esp_init_authenc(x, extack); + } else { + NL_SET_ERR_MSG(extack, "ESP: AEAD or CRYPT must be provided"); + err = -EINVAL; + } if (err) goto error; @@ -1213,6 +1224,7 @@ static int esp6_init_state(struct xfrm_state *x) switch (encap->encap_type) { default: + NL_SET_ERR_MSG(extack, "Unsupported encapsulation type for ESP"); err = -EINVAL; goto error; case UDP_ENCAP_ESPINUDP: diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 7d53d62783b1..b64b49012655 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -21,8 +21,6 @@ #include <net/ip.h> #include <net/sock_reuseport.h> -extern struct inet_hashinfo tcp_hashinfo; - u32 inet6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, const struct in6_addr *faddr, const __be16 fport) @@ -169,7 +167,7 @@ static inline struct sock *inet6_lookup_run_bpf(struct net *net, struct sock *sk, *reuse_sk; bool no_reuseport; - if (hashinfo != &tcp_hashinfo) + if (hashinfo != net->ipv4.tcp_death_row.hashinfo) return NULL; /* only TCP is supported */ no_reuseport = bpf_sk_lookup_run_v6(net, IPPROTO_TCP, saddr, sport, diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index d37a8c97e6de..3ee345672849 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -219,7 +219,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, off = skb_gro_offset(skb); hlen = off + sizeof(*iph); - iph = skb_gro_header_slow(skb, hlen, off); + iph = skb_gro_header(skb, hlen, off); if (unlikely(!iph)) goto out; @@ -232,7 +232,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *ipv6_gro_receive(struct list_head *head, proto = iph->nexthdr; ops = rcu_dereference(inet6_offloads[proto]); if (!ops || !ops->callbacks.gro_receive) { - __pskb_pull(skb, skb_gro_offset(skb)); + pskb_pull(skb, skb_gro_offset(skb)); skb_gro_frag0_invalidate(skb); proto = ipv6_gso_pull_exthdrs(skb, proto); skb_gro_pull(skb, -skb_transport_offset(skb)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a60176e913a8..e19507614f64 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1567,7 +1567,7 @@ emsgsize: paged = true; zc = true; } else { - uarg->zerocopy = 0; + uarg_to_msgzc(uarg)->zerocopy = 0; skb_zcopy_set(skb, uarg, &extra_uref); } } diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9e97f3b4c7e8..cc5d5e75b658 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1988,39 +1988,6 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -static bool ip6_tnl_netlink_encap_parms(struct nlattr *data[], - struct ip_tunnel_encap *ipencap) -{ - bool ret = false; - - memset(ipencap, 0, sizeof(*ipencap)); - - if (!data) - return ret; - - if (data[IFLA_IPTUN_ENCAP_TYPE]) { - ret = true; - ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); - } - - if (data[IFLA_IPTUN_ENCAP_FLAGS]) { - ret = true; - ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); - } - - if (data[IFLA_IPTUN_ENCAP_SPORT]) { - ret = true; - ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); - } - - if (data[IFLA_IPTUN_ENCAP_DPORT]) { - ret = true; - ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); - } - - return ret; -} - static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[], struct netlink_ext_ack *extack) @@ -2033,7 +2000,7 @@ static int ip6_tnl_newlink(struct net *src_net, struct net_device *dev, nt = netdev_priv(dev); - if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip6_tnl_encap_setup(nt, &ipencap); if (err < 0) return err; @@ -2070,7 +2037,7 @@ static int ip6_tnl_changelink(struct net_device *dev, struct nlattr *tb[], if (dev == ip6n->fb_tnl_dev) return -EINVAL; - if (ip6_tnl_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { int err = ip6_tnl_encap_setup(t, &ipencap); if (err < 0) diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 1ddd60d06830..151337d7f67b 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -154,7 +154,7 @@ vti6_tnl_link(struct vti6_net *ip6n, struct ip6_tnl *t) { struct ip6_tnl __rcu **tp = vti6_tnl_bucket(ip6n, &t->parms); - rcu_assign_pointer(t->next , rtnl_dereference(*tp)); + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); rcu_assign_pointer(*tp, t); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 516e83b52f26..facdc78a43e5 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1028,8 +1028,11 @@ static void ip6mr_cache_resolve(struct net *net, struct mr_table *mrt, ((struct nlmsgerr *)nlmsg_data(nlh))->error = -EMSGSIZE; } rtnl_unicast(skb, net, NETLINK_CB(skb).portid); - } else + } else { + rcu_read_lock(); ip6_mr_forward(net, mrt, skb->dev, skb, c); + rcu_read_unlock(); + } } } diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 15f984be3570..72d4858dec18 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -136,7 +136,8 @@ out: return err; } -static int ipcomp6_init_state(struct xfrm_state *x) +static int ipcomp6_init_state(struct xfrm_state *x, + struct netlink_ext_ack *extack) { int err = -EINVAL; @@ -148,17 +149,20 @@ static int ipcomp6_init_state(struct xfrm_state *x) x->props.header_len += sizeof(struct ipv6hdr); break; default: + NL_SET_ERR_MSG(extack, "Unsupported XFRM mode for IPcomp"); goto out; } - err = ipcomp_init_state(x); + err = ipcomp_init_state(x, extack); if (err) goto out; if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); - if (err) + if (err) { + NL_SET_ERR_MSG(extack, "Kernel error: failed to initialize the associated state"); goto out; + } } err = 0; diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index aeb35d26e474..83d2a8be263f 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -247,15 +247,14 @@ static int mip6_destopt_reject(struct xfrm_state *x, struct sk_buff *skb, return err; } -static int mip6_destopt_init_state(struct xfrm_state *x) +static int mip6_destopt_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { - pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); + NL_SET_ERR_MSG(extack, "SPI must be 0"); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { - pr_info("%s: state's mode is not %u: %u\n", - __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION"); return -EINVAL; } @@ -333,15 +332,14 @@ static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) return 0; } -static int mip6_rthdr_init_state(struct xfrm_state *x) +static int mip6_rthdr_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { if (x->id.spi) { - pr_info("%s: spi is not 0: %u\n", __func__, x->id.spi); + NL_SET_ERR_MSG(extack, "SPI must be 0"); return -EINVAL; } if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { - pr_info("%s: state's mode is not %u: %u\n", - __func__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + NL_SET_ERR_MSG(extack, "XFRM mode must be XFRM_MODE_ROUTEOPTIMIZATION"); return -EINVAL; } diff --git a/net/ipv6/netfilter/nf_socket_ipv6.c b/net/ipv6/netfilter/nf_socket_ipv6.c index aa5bb8789ba0..a7690ec62325 100644 --- a/net/ipv6/netfilter/nf_socket_ipv6.c +++ b/net/ipv6/netfilter/nf_socket_ipv6.c @@ -83,8 +83,8 @@ nf_socket_get_sock_v6(struct net *net, struct sk_buff *skb, int doff, { switch (protocol) { case IPPROTO_TCP: - return inet6_lookup(net, &tcp_hashinfo, skb, doff, - saddr, sport, daddr, dport, + return inet6_lookup(net, net->ipv4.tcp_death_row.hashinfo, + skb, doff, saddr, sport, daddr, dport, in->ifindex); case IPPROTO_UDP: return udp6_lib_lookup(net, saddr, sport, daddr, dport, diff --git a/net/ipv6/netfilter/nf_tproxy_ipv6.c b/net/ipv6/netfilter/nf_tproxy_ipv6.c index 6bac68fb27a3..929502e51203 100644 --- a/net/ipv6/netfilter/nf_tproxy_ipv6.c +++ b/net/ipv6/netfilter/nf_tproxy_ipv6.c @@ -80,6 +80,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, const struct net_device *in, const enum nf_tproxy_lookup_t lookup_type) { + struct inet_hashinfo *hinfo = net->ipv4.tcp_death_row.hashinfo; struct sock *sk; switch (protocol) { @@ -93,7 +94,7 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, switch (lookup_type) { case NF_TPROXY_LOOKUP_LISTENER: - sk = inet6_lookup_listener(net, &tcp_hashinfo, skb, + sk = inet6_lookup_listener(net, hinfo, skb, thoff + __tcp_hdrlen(hp), saddr, sport, daddr, ntohs(dport), @@ -108,9 +109,8 @@ nf_tproxy_get_sock_v6(struct net *net, struct sk_buff *skb, int thoff, */ break; case NF_TPROXY_LOOKUP_ESTABLISHED: - sk = __inet6_lookup_established(net, &tcp_hashinfo, - saddr, sport, daddr, ntohs(dport), - in->ifindex, 0); + sk = __inet6_lookup_established(net, hinfo, saddr, sport, daddr, + ntohs(dport), in->ifindex, 0); break; default: BUG(); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 8970d0b4faeb..1d7e520d9966 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -41,6 +41,9 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); + } else if ((priv->flags & NFTA_FIB_F_IIF) && + (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) { + fl6->flowi6_oif = dev->ifindex; } if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) @@ -197,7 +200,8 @@ void nft_fib6_eval(const struct nft_expr *expr, struct nft_regs *regs, if (rt->rt6i_flags & (RTF_REJECT | RTF_ANYCAST | RTF_LOCAL)) goto put_rt_err; - if (oif && oif != rt->rt6i_idev->dev) + if (oif && oif != rt->rt6i_idev->dev && + l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) != oif->ifindex) goto put_rt_err; nft_fib_store_result(dest, priv, rt->rt6i_idev->dev); diff --git a/net/ipv6/seg6.c b/net/ipv6/seg6.c index 5421cc7c935f..29346a6eec9f 100644 --- a/net/ipv6/seg6.c +++ b/net/ipv6/seg6.c @@ -191,6 +191,11 @@ static int seg6_genl_sethmac(struct sk_buff *skb, struct genl_info *info) goto out_unlock; } + if (slen > nla_len(info->attrs[SEG6_ATTR_SECRET])) { + err = -EINVAL; + goto out_unlock; + } + if (hinfo) { err = seg6_hmac_info_del(net, hmackeyid); if (err) diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c index b7de5e46fdd8..8370726ae7bf 100644 --- a/net/ipv6/seg6_local.c +++ b/net/ipv6/seg6_local.c @@ -73,6 +73,55 @@ struct bpf_lwt_prog { char *name; }; +/* default length values (expressed in bits) for both Locator-Block and + * Locator-Node Function. + * + * Both SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS *must* be: + * i) greater than 0; + * ii) evenly divisible by 8. In other terms, the lengths of the + * Locator-Block and Locator-Node Function must be byte-aligned (we can + * relax this constraint in the future if really needed). + * + * Moreover, a third condition must hold: + * iii) SEG6_LOCAL_LCBLOCK_DBITS + SEG6_LOCAL_LCNODE_FN_DBITS <= 128. + * + * The correctness of SEG6_LOCAL_LCBLOCK_DBITS and SEG6_LOCAL_LCNODE_FN_DBITS + * values are checked during the kernel compilation. If the compilation stops, + * check the value of these parameters to see if they meet conditions (i), (ii) + * and (iii). + */ +#define SEG6_LOCAL_LCBLOCK_DBITS 32 +#define SEG6_LOCAL_LCNODE_FN_DBITS 16 + +/* The following next_csid_chk_{cntr,lcblock,lcblock_fn}_bits macros can be + * used directly to check whether the lengths (in bits) of Locator-Block and + * Locator-Node Function are valid according to (i), (ii), (iii). + */ +#define next_csid_chk_cntr_bits(blen, flen) \ + ((blen) + (flen) > 128) + +#define next_csid_chk_lcblock_bits(blen) \ +({ \ + typeof(blen) __tmp = blen; \ + (!__tmp || __tmp > 120 || (__tmp & 0x07)); \ +}) + +#define next_csid_chk_lcnode_fn_bits(flen) \ + next_csid_chk_lcblock_bits(flen) + +/* Supported Flavor operations are reported in this bitmask */ +#define SEG6_LOCAL_FLV_SUPP_OPS (BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID)) + +struct seg6_flavors_info { + /* Flavor operations */ + __u32 flv_ops; + + /* Locator-Block length, expressed in bits */ + __u8 lcblock_bits; + /* Locator-Node Function length, expressed in bits*/ + __u8 lcnode_func_bits; +}; + enum seg6_end_dt_mode { DT_INVALID_MODE = -EINVAL, DT_LEGACY_MODE = 0, @@ -136,6 +185,8 @@ struct seg6_local_lwt { #ifdef CONFIG_NET_L3_MASTER_DEV struct seg6_end_dt_info dt_info; #endif + struct seg6_flavors_info flv_info; + struct pcpu_seg6_local_counters __percpu *pcpu_counters; int headroom; @@ -271,8 +322,50 @@ int seg6_lookup_nexthop(struct sk_buff *skb, return seg6_lookup_any_nexthop(skb, nhaddr, tbl_id, false); } -/* regular endpoint function */ -static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) +static __u8 seg6_flv_lcblock_octects(const struct seg6_flavors_info *finfo) +{ + return finfo->lcblock_bits >> 3; +} + +static __u8 seg6_flv_lcnode_func_octects(const struct seg6_flavors_info *finfo) +{ + return finfo->lcnode_func_bits >> 3; +} + +static bool seg6_next_csid_is_arg_zero(const struct in6_addr *addr, + const struct seg6_flavors_info *finfo) +{ + __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo); + __u8 blk_octects = seg6_flv_lcblock_octects(finfo); + __u8 arg_octects; + int i; + + arg_octects = 16 - blk_octects - fnc_octects; + for (i = 0; i < arg_octects; ++i) { + if (addr->s6_addr[blk_octects + fnc_octects + i] != 0x00) + return false; + } + + return true; +} + +/* assume that DA.Argument length > 0 */ +static void seg6_next_csid_advance_arg(struct in6_addr *addr, + const struct seg6_flavors_info *finfo) +{ + __u8 fnc_octects = seg6_flv_lcnode_func_octects(finfo); + __u8 blk_octects = seg6_flv_lcblock_octects(finfo); + + /* advance DA.Argument */ + memmove(&addr->s6_addr[blk_octects], + &addr->s6_addr[blk_octects + fnc_octects], + 16 - blk_octects - fnc_octects); + + memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects); +} + +static int input_action_end_core(struct sk_buff *skb, + struct seg6_local_lwt *slwt) { struct ipv6_sr_hdr *srh; @@ -291,6 +384,38 @@ drop: return -EINVAL; } +static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + const struct seg6_flavors_info *finfo = &slwt->flv_info; + struct in6_addr *daddr = &ipv6_hdr(skb)->daddr; + + if (seg6_next_csid_is_arg_zero(daddr, finfo)) + return input_action_end_core(skb, slwt); + + /* update DA */ + seg6_next_csid_advance_arg(daddr, finfo); + + seg6_lookup_nexthop(skb, NULL, 0); + + return dst_input(skb); +} + +static bool seg6_next_csid_enabled(__u32 fops) +{ + return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID); +} + +/* regular endpoint function */ +static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + const struct seg6_flavors_info *finfo = &slwt->flv_info; + + if (seg6_next_csid_enabled(finfo->flv_ops)) + return end_next_csid_core(skb, slwt); + + return input_action_end_core(skb, slwt); +} + /* regular endpoint, and forward to specified nexthop */ static int input_action_end_x(struct sk_buff *skb, struct seg6_local_lwt *slwt) { @@ -951,7 +1076,8 @@ static struct seg6_action_desc seg6_action_table[] = { { .action = SEG6_LOCAL_ACTION_END, .attrs = 0, - .optattrs = SEG6_F_LOCAL_COUNTERS, + .optattrs = SEG6_F_LOCAL_COUNTERS | + SEG6_F_ATTR(SEG6_LOCAL_FLAVORS), .input = input_action_end, }, { @@ -1132,9 +1258,11 @@ static const struct nla_policy seg6_local_policy[SEG6_LOCAL_MAX + 1] = { [SEG6_LOCAL_OIF] = { .type = NLA_U32 }, [SEG6_LOCAL_BPF] = { .type = NLA_NESTED }, [SEG6_LOCAL_COUNTERS] = { .type = NLA_NESTED }, + [SEG6_LOCAL_FLAVORS] = { .type = NLA_NESTED }, }; -static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_srh(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { struct ipv6_sr_hdr *srh; int len; @@ -1191,7 +1319,8 @@ static void destroy_attr_srh(struct seg6_local_lwt *slwt) kfree(slwt->srh); } -static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_table(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { slwt->table = nla_get_u32(attrs[SEG6_LOCAL_TABLE]); @@ -1225,7 +1354,8 @@ seg6_end_dt_info *seg6_possible_end_dt_info(struct seg6_local_lwt *slwt) } static int parse_nla_vrftable(struct nlattr **attrs, - struct seg6_local_lwt *slwt) + struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { struct seg6_end_dt_info *info = seg6_possible_end_dt_info(slwt); @@ -1261,7 +1391,8 @@ static int cmp_nla_vrftable(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return 0; } -static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_nh4(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { memcpy(&slwt->nh4, nla_data(attrs[SEG6_LOCAL_NH4]), sizeof(struct in_addr)); @@ -1287,7 +1418,8 @@ static int cmp_nla_nh4(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return memcmp(&a->nh4, &b->nh4, sizeof(struct in_addr)); } -static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_nh6(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { memcpy(&slwt->nh6, nla_data(attrs[SEG6_LOCAL_NH6]), sizeof(struct in6_addr)); @@ -1313,7 +1445,8 @@ static int cmp_nla_nh6(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return memcmp(&a->nh6, &b->nh6, sizeof(struct in6_addr)); } -static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_iif(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { slwt->iif = nla_get_u32(attrs[SEG6_LOCAL_IIF]); @@ -1336,7 +1469,8 @@ static int cmp_nla_iif(struct seg6_local_lwt *a, struct seg6_local_lwt *b) return 0; } -static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_oif(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { slwt->oif = nla_get_u32(attrs[SEG6_LOCAL_OIF]); @@ -1366,7 +1500,8 @@ static const struct nla_policy bpf_prog_policy[SEG6_LOCAL_BPF_PROG_MAX + 1] = { .len = MAX_PROG_NAME }, }; -static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_bpf(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { struct nlattr *tb[SEG6_LOCAL_BPF_PROG_MAX + 1]; struct bpf_prog *p; @@ -1444,7 +1579,8 @@ nla_policy seg6_local_counters_policy[SEG6_LOCAL_CNT_MAX + 1] = { }; static int parse_nla_counters(struct nlattr **attrs, - struct seg6_local_lwt *slwt) + struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { struct pcpu_seg6_local_counters __percpu *pcounters; struct nlattr *tb[SEG6_LOCAL_CNT_MAX + 1]; @@ -1542,8 +1678,195 @@ static void destroy_attr_counters(struct seg6_local_lwt *slwt) free_percpu(slwt->pcpu_counters); } +static const +struct nla_policy seg6_local_flavors_policy[SEG6_LOCAL_FLV_MAX + 1] = { + [SEG6_LOCAL_FLV_OPERATION] = { .type = NLA_U32 }, + [SEG6_LOCAL_FLV_LCBLOCK_BITS] = { .type = NLA_U8 }, + [SEG6_LOCAL_FLV_LCNODE_FN_BITS] = { .type = NLA_U8 }, +}; + +/* check whether the lengths of the Locator-Block and Locator-Node Function + * are compatible with the dimension of a C-SID container. + */ +static int seg6_chk_next_csid_cfg(__u8 block_len, __u8 func_len) +{ + /* Locator-Block and Locator-Node Function cannot exceed 128 bits + * (i.e. C-SID container lenghts). + */ + if (next_csid_chk_cntr_bits(block_len, func_len)) + return -EINVAL; + + /* Locator-Block length must be greater than zero and evenly divisible + * by 8. There must be room for a Locator-Node Function, at least. + */ + if (next_csid_chk_lcblock_bits(block_len)) + return -EINVAL; + + /* Locator-Node Function length must be greater than zero and evenly + * divisible by 8. There must be room for the Locator-Block. + */ + if (next_csid_chk_lcnode_fn_bits(func_len)) + return -EINVAL; + + return 0; +} + +static int seg6_parse_nla_next_csid_cfg(struct nlattr **tb, + struct seg6_flavors_info *finfo, + struct netlink_ext_ack *extack) +{ + __u8 func_len = SEG6_LOCAL_LCNODE_FN_DBITS; + __u8 block_len = SEG6_LOCAL_LCBLOCK_DBITS; + int rc; + + if (tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]) + block_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCBLOCK_BITS]); + + if (tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]) + func_len = nla_get_u8(tb[SEG6_LOCAL_FLV_LCNODE_FN_BITS]); + + rc = seg6_chk_next_csid_cfg(block_len, func_len); + if (rc < 0) { + NL_SET_ERR_MSG(extack, + "Invalid Locator Block/Node Function lengths"); + return rc; + } + + finfo->lcblock_bits = block_len; + finfo->lcnode_func_bits = func_len; + + return 0; +} + +static int parse_nla_flavors(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) +{ + struct seg6_flavors_info *finfo = &slwt->flv_info; + struct nlattr *tb[SEG6_LOCAL_FLV_MAX + 1]; + unsigned long fops; + int rc; + + rc = nla_parse_nested_deprecated(tb, SEG6_LOCAL_FLV_MAX, + attrs[SEG6_LOCAL_FLAVORS], + seg6_local_flavors_policy, NULL); + if (rc < 0) + return rc; + + /* this attribute MUST always be present since it represents the Flavor + * operation(s) to be carried out. + */ + if (!tb[SEG6_LOCAL_FLV_OPERATION]) + return -EINVAL; + + fops = nla_get_u32(tb[SEG6_LOCAL_FLV_OPERATION]); + if (fops & ~SEG6_LOCAL_FLV_SUPP_OPS) { + NL_SET_ERR_MSG(extack, "Unsupported Flavor operation(s)"); + return -EOPNOTSUPP; + } + + finfo->flv_ops = fops; + + if (seg6_next_csid_enabled(fops)) { + /* Locator-Block and Locator-Node Function lengths can be + * provided by the user space. Otherwise, default values are + * applied. + */ + rc = seg6_parse_nla_next_csid_cfg(tb, finfo, extack); + if (rc < 0) + return rc; + } + + return 0; +} + +static int seg6_fill_nla_next_csid_cfg(struct sk_buff *skb, + struct seg6_flavors_info *finfo) +{ + if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCBLOCK_BITS, finfo->lcblock_bits)) + return -EMSGSIZE; + + if (nla_put_u8(skb, SEG6_LOCAL_FLV_LCNODE_FN_BITS, + finfo->lcnode_func_bits)) + return -EMSGSIZE; + + return 0; +} + +static int put_nla_flavors(struct sk_buff *skb, struct seg6_local_lwt *slwt) +{ + struct seg6_flavors_info *finfo = &slwt->flv_info; + __u32 fops = finfo->flv_ops; + struct nlattr *nest; + int rc; + + nest = nla_nest_start(skb, SEG6_LOCAL_FLAVORS); + if (!nest) + return -EMSGSIZE; + + if (nla_put_u32(skb, SEG6_LOCAL_FLV_OPERATION, fops)) { + rc = -EMSGSIZE; + goto err; + } + + if (seg6_next_csid_enabled(fops)) { + rc = seg6_fill_nla_next_csid_cfg(skb, finfo); + if (rc < 0) + goto err; + } + + return nla_nest_end(skb, nest); + +err: + nla_nest_cancel(skb, nest); + return rc; +} + +static int seg6_cmp_nla_next_csid_cfg(struct seg6_flavors_info *finfo_a, + struct seg6_flavors_info *finfo_b) +{ + if (finfo_a->lcblock_bits != finfo_b->lcblock_bits) + return 1; + + if (finfo_a->lcnode_func_bits != finfo_b->lcnode_func_bits) + return 1; + + return 0; +} + +static int cmp_nla_flavors(struct seg6_local_lwt *a, struct seg6_local_lwt *b) +{ + struct seg6_flavors_info *finfo_a = &a->flv_info; + struct seg6_flavors_info *finfo_b = &b->flv_info; + + if (finfo_a->flv_ops != finfo_b->flv_ops) + return 1; + + if (seg6_next_csid_enabled(finfo_a->flv_ops)) { + if (seg6_cmp_nla_next_csid_cfg(finfo_a, finfo_b)) + return 1; + } + + return 0; +} + +static int encap_size_flavors(struct seg6_local_lwt *slwt) +{ + struct seg6_flavors_info *finfo = &slwt->flv_info; + int nlsize; + + nlsize = nla_total_size(0) + /* nest SEG6_LOCAL_FLAVORS */ + nla_total_size(4); /* SEG6_LOCAL_FLV_OPERATION */ + + if (seg6_next_csid_enabled(finfo->flv_ops)) + nlsize += nla_total_size(1) + /* SEG6_LOCAL_FLV_LCBLOCK_BITS */ + nla_total_size(1); /* SEG6_LOCAL_FLV_LCNODE_FN_BITS */ + + return nlsize; +} + struct seg6_action_param { - int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt); + int (*parse)(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack); int (*put)(struct sk_buff *skb, struct seg6_local_lwt *slwt); int (*cmp)(struct seg6_local_lwt *a, struct seg6_local_lwt *b); @@ -1593,6 +1916,10 @@ static struct seg6_action_param seg6_action_params[SEG6_LOCAL_MAX + 1] = { .put = put_nla_counters, .cmp = cmp_nla_counters, .destroy = destroy_attr_counters }, + + [SEG6_LOCAL_FLAVORS] = { .parse = parse_nla_flavors, + .put = put_nla_flavors, + .cmp = cmp_nla_flavors }, }; /* call the destroy() callback (if available) for each set attribute in @@ -1636,7 +1963,8 @@ static void destroy_attrs(struct seg6_local_lwt *slwt) } static int parse_nla_optional_attrs(struct nlattr **attrs, - struct seg6_local_lwt *slwt) + struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { struct seg6_action_desc *desc = slwt->desc; unsigned long parsed_optattrs = 0; @@ -1652,7 +1980,7 @@ static int parse_nla_optional_attrs(struct nlattr **attrs, */ param = &seg6_action_params[i]; - err = param->parse(attrs, slwt); + err = param->parse(attrs, slwt, extack); if (err < 0) goto parse_optattrs_err; @@ -1705,7 +2033,8 @@ static void seg6_local_lwtunnel_destroy_state(struct seg6_local_lwt *slwt) ops->destroy_state(slwt); } -static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) +static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt, + struct netlink_ext_ack *extack) { struct seg6_action_param *param; struct seg6_action_desc *desc; @@ -1749,14 +2078,14 @@ static int parse_nla_action(struct nlattr **attrs, struct seg6_local_lwt *slwt) param = &seg6_action_params[i]; - err = param->parse(attrs, slwt); + err = param->parse(attrs, slwt, extack); if (err < 0) goto parse_attrs_err; } } /* parse the optional attributes, if any */ - err = parse_nla_optional_attrs(attrs, slwt); + err = parse_nla_optional_attrs(attrs, slwt, extack); if (err < 0) goto parse_attrs_err; @@ -1800,7 +2129,7 @@ static int seg6_local_build_state(struct net *net, struct nlattr *nla, slwt = seg6_local_lwtunnel(newts); slwt->action = nla_get_u32(tb[SEG6_LOCAL_ACTION]); - err = parse_nla_action(tb, slwt); + err = parse_nla_action(tb, slwt, extack); if (err < 0) goto out_free; @@ -1904,6 +2233,9 @@ static int seg6_local_get_encap_size(struct lwtunnel_state *lwt) /* SEG6_LOCAL_CNT_ERRORS */ nla_total_size_64bit(sizeof(__u64)); + if (attrs & SEG6_F_ATTR(SEG6_LOCAL_FLAVORS)) + nlsize += encap_size_flavors(slwt); + return nlsize; } @@ -1959,6 +2291,15 @@ int __init seg6_local_init(void) */ BUILD_BUG_ON(SEG6_LOCAL_MAX + 1 > BITS_PER_TYPE(unsigned long)); + /* If the default NEXT-C-SID Locator-Block/Node Function lengths (in + * bits) have been changed with invalid values, kernel build stops + * here. + */ + BUILD_BUG_ON(next_csid_chk_cntr_bits(SEG6_LOCAL_LCBLOCK_DBITS, + SEG6_LOCAL_LCNODE_FN_DBITS)); + BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS)); + BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS)); + return lwtunnel_encap_add_ops(&seg6_local_ops, LWTUNNEL_ENCAP_SEG6_LOCAL); } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 98f1cf40746f..d27683e3fc97 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -1503,71 +1503,12 @@ static void ipip6_netlink_parms(struct nlattr *data[], if (!data) return; - if (data[IFLA_IPTUN_LINK]) - parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); - - if (data[IFLA_IPTUN_LOCAL]) - parms->iph.saddr = nla_get_be32(data[IFLA_IPTUN_LOCAL]); - - if (data[IFLA_IPTUN_REMOTE]) - parms->iph.daddr = nla_get_be32(data[IFLA_IPTUN_REMOTE]); - - if (data[IFLA_IPTUN_TTL]) { - parms->iph.ttl = nla_get_u8(data[IFLA_IPTUN_TTL]); - if (parms->iph.ttl) - parms->iph.frag_off = htons(IP_DF); - } - - if (data[IFLA_IPTUN_TOS]) - parms->iph.tos = nla_get_u8(data[IFLA_IPTUN_TOS]); - - if (!data[IFLA_IPTUN_PMTUDISC] || nla_get_u8(data[IFLA_IPTUN_PMTUDISC])) - parms->iph.frag_off = htons(IP_DF); - - if (data[IFLA_IPTUN_FLAGS]) - parms->i_flags = nla_get_be16(data[IFLA_IPTUN_FLAGS]); - - if (data[IFLA_IPTUN_PROTO]) - parms->iph.protocol = nla_get_u8(data[IFLA_IPTUN_PROTO]); + ip_tunnel_netlink_parms(data, parms); if (data[IFLA_IPTUN_FWMARK]) *fwmark = nla_get_u32(data[IFLA_IPTUN_FWMARK]); } -/* This function returns true when ENCAP attributes are present in the nl msg */ -static bool ipip6_netlink_encap_parms(struct nlattr *data[], - struct ip_tunnel_encap *ipencap) -{ - bool ret = false; - - memset(ipencap, 0, sizeof(*ipencap)); - - if (!data) - return ret; - - if (data[IFLA_IPTUN_ENCAP_TYPE]) { - ret = true; - ipencap->type = nla_get_u16(data[IFLA_IPTUN_ENCAP_TYPE]); - } - - if (data[IFLA_IPTUN_ENCAP_FLAGS]) { - ret = true; - ipencap->flags = nla_get_u16(data[IFLA_IPTUN_ENCAP_FLAGS]); - } - - if (data[IFLA_IPTUN_ENCAP_SPORT]) { - ret = true; - ipencap->sport = nla_get_be16(data[IFLA_IPTUN_ENCAP_SPORT]); - } - - if (data[IFLA_IPTUN_ENCAP_DPORT]) { - ret = true; - ipencap->dport = nla_get_be16(data[IFLA_IPTUN_ENCAP_DPORT]); - } - - return ret; -} - #ifdef CONFIG_IPV6_SIT_6RD /* This function returns true when 6RD attributes are present in the nl msg */ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], @@ -1619,7 +1560,7 @@ static int ipip6_newlink(struct net *src_net, struct net_device *dev, nt = netdev_priv(dev); - if (ipip6_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(nt, &ipencap); if (err < 0) return err; @@ -1671,7 +1612,7 @@ static int ipip6_changelink(struct net_device *dev, struct nlattr *tb[], if (dev == sitn->fb_tunnel_dev) return -EINVAL; - if (ipip6_netlink_encap_parms(data, &ipencap)) { + if (ip_tunnel_netlink_encap_parms(data, &ipencap)) { err = ip_tunnel_encap_setup(t, &ipencap); if (err < 0) return err; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 35013497e407..a8adda623da1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -146,15 +146,16 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; - struct inet_sock *inet = inet_sk(sk); struct inet_connection_sock *icsk = inet_csk(sk); + struct in6_addr *saddr = NULL, *final_p, final; struct inet_timewait_death_row *tcp_death_row; struct ipv6_pinfo *np = tcp_inet6_sk(sk); + struct inet_sock *inet = inet_sk(sk); struct tcp_sock *tp = tcp_sk(sk); - struct in6_addr *saddr = NULL, *final_p, final; + struct net *net = sock_net(sk); struct ipv6_txoptions *opt; - struct flowi6 fl6; struct dst_entry *dst; + struct flowi6 fl6; int addr_type; int err; @@ -280,20 +281,21 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, security_sk_classify_flow(sk, flowi6_to_flowi_common(&fl6)); - dst = ip6_dst_lookup_flow(sock_net(sk), sk, &fl6, final_p); + dst = ip6_dst_lookup_flow(net, sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); goto failure; } + tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row; + if (!saddr) { struct inet_bind_hashbucket *prev_addr_hashbucket = NULL; struct in6_addr prev_v6_rcv_saddr; if (icsk->icsk_bind2_hash) { - prev_addr_hashbucket = inet_bhashfn_portaddr(&tcp_hashinfo, - sk, sock_net(sk), - inet->inet_num); + prev_addr_hashbucket = inet_bhashfn_portaddr(tcp_death_row->hashinfo, + sk, net, inet->inet_num); prev_v6_rcv_saddr = sk->sk_v6_rcv_saddr; } saddr = &fl6.saddr; @@ -325,7 +327,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->inet_dport = usin->sin6_port; tcp_set_state(sk, TCP_SYN_SENT); - tcp_death_row = sock_net(sk)->ipv4.tcp_death_row; err = inet6_hash_connect(tcp_death_row, sk); if (err) goto late_failure; @@ -339,8 +340,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_v6_daddr.s6_addr32, inet->inet_sport, inet->inet_dport)); - tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk), - np->saddr.s6_addr32, + tp->tsoffset = secure_tcpv6_ts_off(net, np->saddr.s6_addr32, sk->sk_v6_daddr.s6_addr32); } @@ -403,7 +403,7 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, bool fatal; int err; - sk = __inet6_lookup_established(net, &tcp_hashinfo, + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &hdr->daddr, th->dest, &hdr->saddr, ntohs(th->source), skb->dev->ifindex, inet6_sdif(skb)); @@ -1001,6 +1001,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) __be32 label = 0; u32 priority = 0; struct net *net; + u32 txhash = 0; int oif = 0; if (th->rst) @@ -1036,11 +1037,10 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) * Incoming packet is checked with md5 hash with finding key, * no RST generated if md5 hash doesn't match. */ - sk1 = inet6_lookup_listener(net, - &tcp_hashinfo, NULL, 0, - &ipv6h->saddr, - th->source, &ipv6h->daddr, - ntohs(th->source), dif, sdif); + sk1 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, + NULL, 0, &ipv6h->saddr, th->source, + &ipv6h->daddr, ntohs(th->source), + dif, sdif); if (!sk1) goto out; @@ -1074,10 +1074,12 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) if (np->repflow) label = ip6_flowlabel(ipv6h); priority = sk->sk_priority; + txhash = sk->sk_hash; } if (sk->sk_state == TCP_TIME_WAIT) { label = cpu_to_be32(inet_twsk(sk)->tw_flowlabel); priority = inet_twsk(sk)->tw_priority; + txhash = inet_twsk(sk)->tw_txhash; } } else { if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_TCP_RESET) @@ -1085,7 +1087,7 @@ static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb) } tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, - ipv6_get_dsfield(ipv6h), label, priority, 0); + ipv6_get_dsfield(ipv6h), label, priority, txhash); #ifdef CONFIG_TCP_MD5SIG out: @@ -1638,7 +1640,7 @@ INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb) hdr = ipv6_hdr(skb); lookup: - sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th), + sk = __inet6_lookup_skb(net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), th->source, th->dest, inet6_iif(skb), sdif, &refcounted); if (!sk) @@ -1813,7 +1815,7 @@ do_time_wait: { struct sock *sk2; - sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo, + sk2 = inet6_lookup_listener(net, net->ipv4.tcp_death_row.hashinfo, skb, __tcp_hdrlen(th), &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, @@ -1846,6 +1848,7 @@ do_time_wait: void tcp_v6_early_demux(struct sk_buff *skb) { + struct net *net = dev_net(skb->dev); const struct ipv6hdr *hdr; const struct tcphdr *th; struct sock *sk; @@ -1863,7 +1866,7 @@ void tcp_v6_early_demux(struct sk_buff *skb) return; /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + sk = __inet6_lookup_established(net, net->ipv4.tcp_death_row.hashinfo, &hdr->saddr, th->source, &hdr->daddr, ntohs(th->dest), inet6_iif(skb), inet6_sdif(skb)); @@ -2195,7 +2198,7 @@ struct proto tcpv6_prot = { .slab_flags = SLAB_TYPESAFE_BY_RCU, .twsk_prot = &tcp6_timewait_sock_ops, .rsk_prot = &tcp6_request_sock_ops, - .h.hashinfo = &tcp_hashinfo, + .h.hashinfo = NULL, .no_autobind = true, .diag_destroy = tcp_abort, }; @@ -2229,7 +2232,7 @@ static void __net_exit tcpv6_net_exit(struct net *net) static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list) { - inet_twsk_purge(&tcp_hashinfo, AF_INET6); + tcp_twsk_purge(net_exit_list, AF_INET6); } static struct pernet_operations tcpv6_net_ops = { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 16c176e7c69a..91e795bb9ade 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -616,8 +616,11 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } /* Tunnels don't have an application socket: don't pass errors back */ - if (tunnel) + if (tunnel) { + if (udp_sk(sk)->encap_err_rcv) + udp_sk(sk)->encap_err_rcv(sk, skb, offset); goto out; + } if (!np->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) @@ -647,16 +650,20 @@ static int __udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) rc = __udp_enqueue_schedule_skb(sk, skb); if (rc < 0) { int is_udplite = IS_UDPLITE(sk); + enum skb_drop_reason drop_reason; /* Note that an ENOMEM error is charged twice */ - if (rc == -ENOMEM) + if (rc == -ENOMEM) { UDP6_INC_STATS(sock_net(sk), UDP_MIB_RCVBUFERRORS, is_udplite); - else + drop_reason = SKB_DROP_REASON_SOCKET_RCVBUFF; + } else { UDP6_INC_STATS(sock_net(sk), UDP_MIB_MEMERRORS, is_udplite); + drop_reason = SKB_DROP_REASON_PROTO_MEM; + } UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return -1; } @@ -672,11 +679,14 @@ static __inline__ int udpv6_err(struct sk_buff *skb, static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) { + enum skb_drop_reason drop_reason = SKB_DROP_REASON_NOT_SPECIFIED; struct udp_sock *up = udp_sk(sk); int is_udplite = IS_UDPLITE(sk); - if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) + if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) { + drop_reason = SKB_DROP_REASON_XFRM_POLICY; goto drop; + } if (static_branch_unlikely(&udpv6_encap_needed_key) && up->encap_type) { int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); @@ -735,8 +745,10 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) + if (sk_filter_trim_cap(sk, skb, sizeof(struct udphdr))) { + drop_reason = SKB_DROP_REASON_SOCKET_FILTER; goto drop; + } udp_csum_pull_header(skb); @@ -745,11 +757,12 @@ static int udpv6_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb) return __udpv6_queue_rcv_skb(sk, skb); csum_error: + drop_reason = SKB_DROP_REASON_UDP_CSUM; __UDP6_INC_STATS(sock_net(sk), UDP_MIB_CSUMERRORS, is_udplite); drop: __UDP6_INC_STATS(sock_net(sk), UDP_MIB_INERRORS, is_udplite); atomic_inc(&sk->sk_drops); - kfree_skb(skb); + kfree_skb_reason(skb, drop_reason); return -1; } diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 2b31112c0856..1323f2f6928e 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -270,13 +270,17 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, return 0; } -static int xfrm6_tunnel_init_state(struct xfrm_state *x) +static int xfrm6_tunnel_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { - if (x->props.mode != XFRM_MODE_TUNNEL) + if (x->props.mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG(extack, "IPv6 tunnel can only be used with tunnel mode"); return -EINVAL; + } - if (x->encap) + if (x->encap) { + NL_SET_ERR_MSG(extack, "IPv6 tunnel is not compatible with encapsulation"); return -EINVAL; + } x->props.header_len = sizeof(struct ipv6hdr); diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c index f247daa41563..e72cf0749d49 100644 --- a/net/mac80211/chan.c +++ b/net/mac80211/chan.c @@ -1799,6 +1799,12 @@ int ieee80211_link_use_channel(struct ieee80211_link_data *link, lockdep_assert_held(&local->mtx); + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link->link_id))) { + ieee80211_link_update_chandef(link, chandef); + return 0; + } + mutex_lock(&local->chanctx_mtx); ret = cfg80211_chandef_dfs_required(local->hw.wiphy, diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c index 1e5b041a5cea..5b014786fd2d 100644 --- a/net/mac80211/debugfs_netdev.c +++ b/net/mac80211/debugfs_netdev.c @@ -570,6 +570,30 @@ static ssize_t ieee80211_if_parse_tsf( } IEEE80211_IF_FILE_RW(tsf); +static ssize_t ieee80211_if_fmt_valid_links(const struct ieee80211_sub_if_data *sdata, + char *buf, int buflen) +{ + return snprintf(buf, buflen, "0x%x\n", sdata->vif.valid_links); +} +IEEE80211_IF_FILE_R(valid_links); + +static ssize_t ieee80211_if_fmt_active_links(const struct ieee80211_sub_if_data *sdata, + char *buf, int buflen) +{ + return snprintf(buf, buflen, "0x%x\n", sdata->vif.active_links); +} + +static ssize_t ieee80211_if_parse_active_links(struct ieee80211_sub_if_data *sdata, + const char *buf, int buflen) +{ + u16 active_links; + + if (kstrtou16(buf, 0, &active_links)) + return -EINVAL; + + return ieee80211_set_active_links(&sdata->vif, active_links) ?: buflen; +} +IEEE80211_IF_FILE_RW(active_links); #ifdef CONFIG_MAC80211_MESH IEEE80211_IF_FILE(estab_plinks, u.mesh.estab_plinks, ATOMIC); @@ -670,6 +694,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata) DEBUGFS_ADD_MODE(uapsd_queues, 0600); DEBUGFS_ADD_MODE(uapsd_max_sp_len, 0600); DEBUGFS_ADD_MODE(tdls_wider_bw, 0600); + DEBUGFS_ADD_MODE(valid_links, 0200); + DEBUGFS_ADD_MODE(active_links, 0600); } static void add_ap_files(struct ieee80211_sub_if_data *sdata) diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c index 9b61dc7889c2..5392ffa18270 100644 --- a/net/mac80211/driver-ops.c +++ b/net/mac80211/driver-ops.c @@ -192,6 +192,10 @@ int drv_conf_tx(struct ieee80211_local *local, if (!check_sdata_in_driver(sdata)) return -EIO; + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link->link_id))) + return 0; + if (params->cw_min == 0 || params->cw_min > params->cw_max) { /* * If we can't configure hardware anyway, don't warn. We may @@ -272,6 +276,60 @@ void drv_reset_tsf(struct ieee80211_local *local, trace_drv_return_void(local); } +int drv_assign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx) +{ + int ret = 0; + + drv_verify_link_exists(sdata, link_conf); + if (!check_sdata_in_driver(sdata)) + return -EIO; + + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link_conf->link_id))) + return 0; + + trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx); + if (local->ops->assign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); + ret = local->ops->assign_vif_chanctx(&local->hw, + &sdata->vif, + link_conf, + &ctx->conf); + } + trace_drv_return_int(local, ret); + + return ret; +} + +void drv_unassign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx) +{ + might_sleep(); + + drv_verify_link_exists(sdata, link_conf); + if (!check_sdata_in_driver(sdata)) + return; + + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link_conf->link_id))) + return; + + trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx); + if (local->ops->unassign_vif_chanctx) { + WARN_ON_ONCE(!ctx->driver_present); + local->ops->unassign_vif_chanctx(&local->hw, + &sdata->vif, + link_conf, + &ctx->conf); + } + trace_drv_return_void(local); +} + int drv_switch_vif_chanctx(struct ieee80211_local *local, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs, enum ieee80211_chanctx_switch_mode mode) @@ -346,3 +404,117 @@ int drv_ampdu_action(struct ieee80211_local *local, return ret; } + +void drv_link_info_changed(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *info, + int link_id, u64 changed) +{ + might_sleep(); + + if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | + BSS_CHANGED_BEACON_ENABLED) && + sdata->vif.type != NL80211_IFTYPE_AP && + sdata->vif.type != NL80211_IFTYPE_ADHOC && + sdata->vif.type != NL80211_IFTYPE_MESH_POINT && + sdata->vif.type != NL80211_IFTYPE_OCB)) + return; + + if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || + sdata->vif.type == NL80211_IFTYPE_NAN || + (sdata->vif.type == NL80211_IFTYPE_MONITOR && + !sdata->vif.bss_conf.mu_mimo_owner && + !(changed & BSS_CHANGED_TXPOWER)))) + return; + + if (!check_sdata_in_driver(sdata)) + return; + + if (sdata->vif.active_links && + !(sdata->vif.active_links & BIT(link_id))) + return; + + trace_drv_link_info_changed(local, sdata, info, changed); + if (local->ops->link_info_changed) + local->ops->link_info_changed(&local->hw, &sdata->vif, + info, changed); + else if (local->ops->bss_info_changed) + local->ops->bss_info_changed(&local->hw, &sdata->vif, + info, changed); + trace_drv_return_void(local); +} + +int drv_set_key(struct ieee80211_local *local, + enum set_key_cmd cmd, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key) +{ + int ret; + + might_sleep(); + + sdata = get_bss_sdata(sdata); + if (!check_sdata_in_driver(sdata)) + return -EIO; + + if (WARN_ON(key->link_id >= 0 && sdata->vif.active_links && + !(sdata->vif.active_links & BIT(key->link_id)))) + return -ENOLINK; + + trace_drv_set_key(local, cmd, sdata, sta, key); + ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); + trace_drv_return_int(local, ret); + return ret; +} + +int drv_change_vif_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 old_links, u16 new_links, + struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) +{ + int ret = -EOPNOTSUPP; + + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return -EIO; + + if (old_links == new_links) + return 0; + + trace_drv_change_vif_links(local, sdata, old_links, new_links); + if (local->ops->change_vif_links) + ret = local->ops->change_vif_links(&local->hw, &sdata->vif, + old_links, new_links, old); + trace_drv_return_int(local, ret); + + return ret; +} + +int drv_change_sta_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + u16 old_links, u16 new_links) +{ + int ret = -EOPNOTSUPP; + + might_sleep(); + + if (!check_sdata_in_driver(sdata)) + return -EIO; + + old_links &= sdata->vif.active_links; + new_links &= sdata->vif.active_links; + + if (old_links == new_links) + return 0; + + trace_drv_change_sta_links(local, sdata, sta, old_links, new_links); + if (local->ops->change_sta_links) + ret = local->ops->change_sta_links(&local->hw, &sdata->vif, sta, + old_links, new_links); + trace_drv_return_int(local, ret); + + return ret; +} diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 482f5c97a72b..81e40b0a3b16 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -165,40 +165,10 @@ static inline void drv_vif_cfg_changed(struct ieee80211_local *local, trace_drv_return_void(local); } -static inline void drv_link_info_changed(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *info, - int link_id, u64 changed) -{ - might_sleep(); - - if (WARN_ON_ONCE(changed & (BSS_CHANGED_BEACON | - BSS_CHANGED_BEACON_ENABLED) && - sdata->vif.type != NL80211_IFTYPE_AP && - sdata->vif.type != NL80211_IFTYPE_ADHOC && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT && - sdata->vif.type != NL80211_IFTYPE_OCB)) - return; - - if (WARN_ON_ONCE(sdata->vif.type == NL80211_IFTYPE_P2P_DEVICE || - sdata->vif.type == NL80211_IFTYPE_NAN || - (sdata->vif.type == NL80211_IFTYPE_MONITOR && - !sdata->vif.bss_conf.mu_mimo_owner && - !(changed & BSS_CHANGED_TXPOWER)))) - return; - - if (!check_sdata_in_driver(sdata)) - return; - - trace_drv_link_info_changed(local, sdata, info, changed); - if (local->ops->link_info_changed) - local->ops->link_info_changed(&local->hw, &sdata->vif, - info, changed); - else if (local->ops->bss_info_changed) - local->ops->bss_info_changed(&local->hw, &sdata->vif, - info, changed); - trace_drv_return_void(local); -} +void drv_link_info_changed(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *info, + int link_id, u64 changed); static inline u64 drv_prepare_multicast(struct ieee80211_local *local, struct netdev_hw_addr_list *mc_list) @@ -256,25 +226,11 @@ static inline int drv_set_tim(struct ieee80211_local *local, return ret; } -static inline int drv_set_key(struct ieee80211_local *local, - enum set_key_cmd cmd, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - struct ieee80211_key_conf *key) -{ - int ret; - - might_sleep(); - - sdata = get_bss_sdata(sdata); - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_set_key(local, cmd, sdata, sta, key); - ret = local->ops->set_key(&local->hw, cmd, &sdata->vif, sta, key); - trace_drv_return_int(local, ret); - return ret; -} +int drv_set_key(struct ieee80211_local *local, + enum set_key_cmd cmd, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + struct ieee80211_key_conf *key); static inline void drv_update_tkip_key(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, @@ -945,52 +901,14 @@ static inline void drv_verify_link_exists(struct ieee80211_sub_if_data *sdata, sdata_assert_lock(sdata); } -static inline int drv_assign_vif_chanctx(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_chanctx *ctx) -{ - int ret = 0; - - drv_verify_link_exists(sdata, link_conf); - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_assign_vif_chanctx(local, sdata, link_conf, ctx); - if (local->ops->assign_vif_chanctx) { - WARN_ON_ONCE(!ctx->driver_present); - ret = local->ops->assign_vif_chanctx(&local->hw, - &sdata->vif, - link_conf, - &ctx->conf); - } - trace_drv_return_int(local, ret); - - return ret; -} - -static inline void drv_unassign_vif_chanctx(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_bss_conf *link_conf, - struct ieee80211_chanctx *ctx) -{ - might_sleep(); - - drv_verify_link_exists(sdata, link_conf); - if (!check_sdata_in_driver(sdata)) - return; - - trace_drv_unassign_vif_chanctx(local, sdata, link_conf, ctx); - if (local->ops->unassign_vif_chanctx) { - WARN_ON_ONCE(!ctx->driver_present); - local->ops->unassign_vif_chanctx(&local->hw, - &sdata->vif, - link_conf, - &ctx->conf); - } - trace_drv_return_void(local); -} - +int drv_assign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx); +void drv_unassign_vif_chanctx(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_bss_conf *link_conf, + struct ieee80211_chanctx *ctx); int drv_switch_vif_chanctx(struct ieee80211_local *local, struct ieee80211_vif_chanctx_switch *vifs, int n_vifs, enum ieee80211_chanctx_switch_mode mode); @@ -1552,46 +1470,13 @@ static inline int drv_net_fill_forward_path(struct ieee80211_local *local, return ret; } -static inline int drv_change_vif_links(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - u16 old_links, u16 new_links, - struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]) -{ - int ret = -EOPNOTSUPP; - - might_sleep(); - - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_change_vif_links(local, sdata, old_links, new_links); - if (local->ops->change_vif_links) - ret = local->ops->change_vif_links(&local->hw, &sdata->vif, - old_links, new_links, old); - trace_drv_return_int(local, ret); - - return ret; -} - -static inline int drv_change_sta_links(struct ieee80211_local *local, - struct ieee80211_sub_if_data *sdata, - struct ieee80211_sta *sta, - u16 old_links, u16 new_links) -{ - int ret = -EOPNOTSUPP; - - might_sleep(); - - if (!check_sdata_in_driver(sdata)) - return -EIO; - - trace_drv_change_sta_links(local, sdata, sta, old_links, new_links); - if (local->ops->change_sta_links) - ret = local->ops->change_sta_links(&local->hw, &sdata->vif, sta, - old_links, new_links); - trace_drv_return_int(local, ret); - - return ret; -} +int drv_change_vif_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + u16 old_links, u16 new_links, + struct ieee80211_bss_conf *old[IEEE80211_MLD_MAX_NUM_LINKS]); +int drv_change_sta_links(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + struct ieee80211_sta *sta, + u16 old_links, u16 new_links); #endif /* __MAC80211_DRIVER_OPS */ diff --git a/net/mac80211/he.c b/net/mac80211/he.c index d9228fd3f77a..729f261520c7 100644 --- a/net/mac80211/he.c +++ b/net/mac80211/he.c @@ -31,25 +31,27 @@ ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_ break; } - sta->sta.smps_mode = smps_mode; + link_sta->pub->smps_mode = smps_mode; } else { - sta->sta.smps_mode = IEEE80211_SMPS_OFF; + link_sta->pub->smps_mode = IEEE80211_SMPS_OFF; } switch (le16_get_bits(he_6ghz_capa->capa, IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: default: - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; break; } + ieee80211_sta_recalc_aggregates(&sta->sta); + link_sta->pub->he_6ghz_capa = *he_6ghz_capa; } diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 8c24817cd497..83bc41346ae7 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -241,9 +241,11 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, ht_cap.mcs.rx_highest = ht_cap_ie->mcs.rx_highest; if (ht_cap.cap & IEEE80211_HT_CAP_MAX_AMSDU) - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_7935; else - sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_3839; + + ieee80211_sta_recalc_aggregates(&sta->sta); apply: changed = memcmp(&link_sta->pub->ht_cap, &ht_cap, sizeof(ht_cap)); @@ -299,12 +301,13 @@ bool ieee80211_ht_cap_ie_to_sta_ht_cap(struct ieee80211_sub_if_data *sdata, break; } - if (smps_mode != sta->sta.smps_mode) + if (smps_mode != link_sta->pub->smps_mode) changed = true; - sta->sta.smps_mode = smps_mode; + link_sta->pub->smps_mode = smps_mode; } else { - sta->sta.smps_mode = IEEE80211_SMPS_OFF; + link_sta->pub->smps_mode = IEEE80211_SMPS_OFF; } + return changed; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 977aea4467e0..4e1d4c339f2d 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1081,6 +1081,10 @@ struct ieee80211_sub_if_data { struct ieee80211_link_data deflink; struct ieee80211_link_data __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; + /* for ieee80211_set_active_links_async() */ + struct work_struct activate_links_work; + u16 desired_active_links; + #ifdef CONFIG_MAC80211_DEBUGFS struct { struct dentry *subdir_stations; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index f99685e2d633..572254366a0f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -754,6 +754,8 @@ static int ieee80211_stop(struct net_device *dev) ieee80211_stop_mbssid(sdata); } + cancel_work_sync(&sdata->activate_links_work); + wiphy_lock(sdata->local->hw.wiphy); ieee80211_do_stop(sdata, true); wiphy_unlock(sdata->local->hw.wiphy); @@ -1724,6 +1726,15 @@ static void ieee80211_recalc_smps_work(struct work_struct *work) ieee80211_recalc_smps(sdata, &sdata->deflink); } +static void ieee80211_activate_links_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + activate_links_work); + + ieee80211_set_active_links(&sdata->vif, sdata->desired_active_links); +} + /* * Helper function to initialise an interface to a specific type. */ @@ -1761,6 +1772,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, skb_queue_head_init(&sdata->status_queue); INIT_WORK(&sdata->work, ieee80211_iface_work); INIT_WORK(&sdata->recalc_smps, ieee80211_recalc_smps_work); + INIT_WORK(&sdata->activate_links_work, ieee80211_activate_links_work); switch (type) { case NL80211_IFTYPE_P2P_GO: diff --git a/net/mac80211/key.c b/net/mac80211/key.c index d89ec93b243b..e8f6c1e5eabf 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -177,6 +177,10 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) } } + if (key->conf.link_id >= 0 && sdata->vif.active_links && + !(sdata->vif.active_links & BIT(key->conf.link_id))) + return 0; + ret = drv_set_key(key->local, SET_KEY, sdata, sta ? &sta->sta : NULL, &key->conf); @@ -246,6 +250,10 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) sta = key->sta; sdata = key->sdata; + if (key->conf.link_id >= 0 && sdata->vif.active_links && + !(sdata->vif.active_links & BIT(key->conf.link_id))) + return; + if (!(key->conf.flags & (IEEE80211_KEY_FLAG_GENERATE_MMIC | IEEE80211_KEY_FLAG_PUT_MIC_SPACE | IEEE80211_KEY_FLAG_RESERVE_TAILROOM))) @@ -1437,3 +1445,37 @@ void ieee80211_key_replay(struct ieee80211_key_conf *keyconf) } } EXPORT_SYMBOL_GPL(ieee80211_key_replay); + +int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, + unsigned long del_links_mask, + unsigned long add_links_mask) +{ + struct ieee80211_key *key; + int ret; + + list_for_each_entry(key, &sdata->key_list, list) { + if (key->conf.link_id < 0 || + !(del_links_mask & BIT(key->conf.link_id))) + continue; + + /* shouldn't happen for per-link keys */ + WARN_ON(key->sta); + + ieee80211_key_disable_hw_accel(key); + } + + list_for_each_entry(key, &sdata->key_list, list) { + if (key->conf.link_id < 0 || + !(add_links_mask & BIT(key->conf.link_id))) + continue; + + /* shouldn't happen for per-link keys */ + WARN_ON(key->sta); + + ret = ieee80211_key_enable_hw_accel(key); + if (ret) + return ret; + } + + return 0; +} diff --git a/net/mac80211/key.h b/net/mac80211/key.h index 518af24aab56..f3df97df4b72 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -165,6 +165,9 @@ void ieee80211_free_keys(struct ieee80211_sub_if_data *sdata, void ieee80211_free_sta_keys(struct ieee80211_local *local, struct sta_info *sta); void ieee80211_reenable_keys(struct ieee80211_sub_if_data *sdata); +int ieee80211_key_switch_links(struct ieee80211_sub_if_data *sdata, + unsigned long del_links_mask, + unsigned long add_links_mask); #define key_mtx_dereference(local, ref) \ rcu_dereference_protected(ref, lockdep_is_held(&((local)->key_mtx))) diff --git a/net/mac80211/link.c b/net/mac80211/link.c index 096f313c2a6e..e309708abae8 100644 --- a/net/mac80211/link.c +++ b/net/mac80211/link.c @@ -9,6 +9,7 @@ #include <net/mac80211.h> #include "ieee80211_i.h" #include "driver-ops.h" +#include "key.h" void ieee80211_link_setup(struct ieee80211_link_data *link) { @@ -73,28 +74,37 @@ struct link_container { struct ieee80211_bss_conf conf; }; -static void ieee80211_free_links(struct ieee80211_sub_if_data *sdata, - struct link_container **links) +static void ieee80211_tear_down_links(struct ieee80211_sub_if_data *sdata, + struct link_container **links, u16 mask) { + struct ieee80211_link_data *link; LIST_HEAD(keys); unsigned int link_id; for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { - if (!links[link_id]) + if (!(mask & BIT(link_id))) + continue; + link = &links[link_id]->data; + if (link_id == 0 && !link) + link = &sdata->deflink; + if (WARN_ON(!link)) continue; - ieee80211_remove_link_keys(&links[link_id]->data, &keys); + ieee80211_remove_link_keys(link, &keys); + ieee80211_link_stop(link); } synchronize_rcu(); ieee80211_free_key_list(sdata->local, &keys); +} - for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) { - if (!links[link_id]) - continue; - ieee80211_link_stop(&links[link_id]->data); +static void ieee80211_free_links(struct ieee80211_sub_if_data *sdata, + struct link_container **links) +{ + unsigned int link_id; + + for (link_id = 0; link_id < IEEE80211_MLD_MAX_NUM_LINKS; link_id++) kfree(links[link_id]); - } } static int ieee80211_check_dup_link_addrs(struct ieee80211_sub_if_data *sdata) @@ -123,11 +133,38 @@ static int ieee80211_check_dup_link_addrs(struct ieee80211_sub_if_data *sdata) return 0; } +static void ieee80211_set_vif_links_bitmaps(struct ieee80211_sub_if_data *sdata, + u16 links) +{ + sdata->vif.valid_links = links; + + if (!links) { + sdata->vif.active_links = 0; + return; + } + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + /* in an AP all links are always active */ + sdata->vif.active_links = links; + break; + case NL80211_IFTYPE_STATION: + if (sdata->vif.active_links) + break; + WARN_ON(hweight16(links) > 1); + sdata->vif.active_links = links; + break; + default: + WARN_ON(1); + } +} + static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, struct link_container **to_free, u16 new_links) { u16 old_links = sdata->vif.valid_links; + u16 old_active = sdata->vif.active_links; unsigned long add = new_links & ~old_links; unsigned long rem = old_links & ~new_links; unsigned int link_id; @@ -195,13 +232,17 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, ieee80211_link_init(sdata, -1, &sdata->deflink, &sdata->vif.bss_conf); - sdata->vif.valid_links = new_links; - ret = ieee80211_check_dup_link_addrs(sdata); if (!ret) { + /* for keys we will not be able to undo this */ + ieee80211_tear_down_links(sdata, to_free, rem); + + ieee80211_set_vif_links_bitmaps(sdata, new_links); + /* tell the driver */ ret = drv_change_vif_links(sdata->local, sdata, - old_links, new_links, + old_links & old_active, + new_links & sdata->vif.active_links, old); } @@ -209,7 +250,7 @@ static int ieee80211_vif_update_links(struct ieee80211_sub_if_data *sdata, /* restore config */ memcpy(sdata->link, old_data, sizeof(old_data)); memcpy(sdata->vif.link_conf, old, sizeof(old)); - sdata->vif.valid_links = old_links; + ieee80211_set_vif_links_bitmaps(sdata, old_links); /* and free (only) the newly allocated links */ memset(to_free, 0, sizeof(links)); goto free; @@ -260,3 +301,173 @@ void ieee80211_vif_clear_links(struct ieee80211_sub_if_data *sdata) ieee80211_free_links(sdata, links); } + +static int _ieee80211_set_active_links(struct ieee80211_sub_if_data *sdata, + u16 active_links) +{ + struct ieee80211_bss_conf *link_confs[IEEE80211_MLD_MAX_NUM_LINKS]; + struct ieee80211_local *local = sdata->local; + u16 old_active = sdata->vif.active_links; + unsigned long rem = old_active & ~active_links; + unsigned long add = active_links & ~old_active; + struct sta_info *sta; + unsigned int link_id; + int ret, i; + + if (!ieee80211_sdata_running(sdata)) + return -ENETDOWN; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return -EINVAL; + + /* cannot activate links that don't exist */ + if (active_links & ~sdata->vif.valid_links) + return -EINVAL; + + /* nothing to do */ + if (old_active == active_links) + return 0; + + for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) + link_confs[i] = sdata_dereference(sdata->vif.link_conf[i], + sdata); + + if (add) { + sdata->vif.active_links |= active_links; + ret = drv_change_vif_links(local, sdata, + old_active, + sdata->vif.active_links, + link_confs); + if (ret) { + sdata->vif.active_links = old_active; + return ret; + } + } + + for_each_set_bit(link_id, &rem, IEEE80211_MLD_MAX_NUM_LINKS) { + struct ieee80211_link_data *link; + + link = sdata_dereference(sdata->link[link_id], sdata); + + /* FIXME: kill TDLS connections on the link */ + + ieee80211_link_release_channel(link); + } + + list_for_each_entry(sta, &local->sta_list, list) { + if (sdata != sta->sdata) + continue; + ret = drv_change_sta_links(local, sdata, &sta->sta, + old_active, + old_active | active_links); + WARN_ON_ONCE(ret); + } + + ret = ieee80211_key_switch_links(sdata, rem, add); + WARN_ON_ONCE(ret); + + list_for_each_entry(sta, &local->sta_list, list) { + if (sdata != sta->sdata) + continue; + ret = drv_change_sta_links(local, sdata, &sta->sta, + old_active | active_links, + active_links); + WARN_ON_ONCE(ret); + } + + for_each_set_bit(link_id, &add, IEEE80211_MLD_MAX_NUM_LINKS) { + struct ieee80211_link_data *link; + + link = sdata_dereference(sdata->link[link_id], sdata); + + ret = ieee80211_link_use_channel(link, &link->conf->chandef, + IEEE80211_CHANCTX_SHARED); + WARN_ON_ONCE(ret); + + ieee80211_link_info_change_notify(sdata, link, + BSS_CHANGED_ERP_CTS_PROT | + BSS_CHANGED_ERP_PREAMBLE | + BSS_CHANGED_ERP_SLOT | + BSS_CHANGED_HT | + BSS_CHANGED_BASIC_RATES | + BSS_CHANGED_BSSID | + BSS_CHANGED_CQM | + BSS_CHANGED_QOS | + BSS_CHANGED_TXPOWER | + BSS_CHANGED_BANDWIDTH | + BSS_CHANGED_TWT | + BSS_CHANGED_HE_OBSS_PD | + BSS_CHANGED_HE_BSS_COLOR); + ieee80211_mgd_set_link_qos_params(link); + } + + old_active = sdata->vif.active_links; + sdata->vif.active_links = active_links; + + if (rem) { + ret = drv_change_vif_links(local, sdata, old_active, + active_links, link_confs); + WARN_ON_ONCE(ret); + } + + return 0; +} + +int ieee80211_set_active_links(struct ieee80211_vif *vif, u16 active_links) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + u16 old_active; + int ret; + + sdata_lock(sdata); + mutex_lock(&local->sta_mtx); + mutex_lock(&local->mtx); + mutex_lock(&local->key_mtx); + old_active = sdata->vif.active_links; + if (old_active & active_links) { + /* + * if there's at least one link that stays active across + * the change then switch to it (to those) first, and + * then enable the additional links + */ + ret = _ieee80211_set_active_links(sdata, + old_active & active_links); + if (!ret) + ret = _ieee80211_set_active_links(sdata, active_links); + } else { + /* otherwise switch directly */ + ret = _ieee80211_set_active_links(sdata, active_links); + } + mutex_unlock(&local->key_mtx); + mutex_unlock(&local->mtx); + mutex_unlock(&local->sta_mtx); + sdata_unlock(sdata); + + return ret; +} +EXPORT_SYMBOL_GPL(ieee80211_set_active_links); + +void ieee80211_set_active_links_async(struct ieee80211_vif *vif, + u16 active_links) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (!ieee80211_sdata_running(sdata)) + return; + + if (sdata->vif.type != NL80211_IFTYPE_STATION) + return; + + /* cannot activate links that don't exist */ + if (active_links & ~sdata->vif.valid_links) + return; + + /* nothing to do */ + if (sdata->vif.active_links == active_links) + return; + + sdata->desired_active_links = active_links; + schedule_work(&sdata->activate_links_work); +} +EXPORT_SYMBOL_GPL(ieee80211_set_active_links_async); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 84a3e08a7e84..54b8d5065bbd 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1546,8 +1546,9 @@ void ieee80211_send_nullfunc(struct ieee80211_local *local, struct ieee80211_hdr_3addr *nullfunc; struct ieee80211_if_managed *ifmgd = &sdata->u.mgd; - skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, - !ieee80211_hw_check(&local->hw, DOESNT_SUPPORT_QOS_NDP)); + skb = ieee80211_nullfunc_get(&local->hw, &sdata->vif, -1, + !ieee80211_hw_check(&local->hw, + DOESNT_SUPPORT_QOS_NDP)); if (!skb) return; @@ -3443,11 +3444,11 @@ static void ieee80211_destroy_auth_data(struct ieee80211_sub_if_data *sdata, ieee80211_link_info_change_notify(sdata, &sdata->deflink, BSS_CHANGED_BSSID); sdata->u.mgd.flags = 0; + mutex_lock(&sdata->local->mtx); ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&sdata->local->mtx); - ieee80211_vif_set_links(sdata, 0); + mutex_unlock(&sdata->local->mtx); } cfg80211_put_bss(sdata->local->hw.wiphy, auth_data->bss); @@ -3485,10 +3486,6 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, sdata->u.mgd.flags = 0; sdata->vif.bss_conf.mu_mimo_owner = false; - mutex_lock(&sdata->local->mtx); - ieee80211_link_release_channel(&sdata->deflink); - mutex_unlock(&sdata->local->mtx); - if (status != ASSOC_REJECTED) { struct cfg80211_assoc_failure data = { .timeout = status == ASSOC_TIMEOUT, @@ -3507,7 +3504,10 @@ static void ieee80211_destroy_assoc_data(struct ieee80211_sub_if_data *sdata, cfg80211_assoc_failure(sdata->dev, &data); } + mutex_lock(&sdata->local->mtx); + ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0); + mutex_unlock(&sdata->local->mtx); } kfree(assoc_data); @@ -4057,15 +4057,14 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link, goto out; } - sband = ieee80211_get_link_sband(link); - if (!sband) { + if (WARN_ON(!link->conf->chandef.chan)) { ret = false; goto out; } + sband = local->hw.wiphy->bands[link->conf->chandef.chan->band]; if (!(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_HE) && (!elems->he_cap || !elems->he_operation)) { - mutex_unlock(&sdata->local->sta_mtx); sdata_info(sdata, "HE AP is missing HE capability/operation\n"); ret = false; @@ -4819,6 +4818,40 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata, return ret; } +static bool ieee80211_get_dtim(const struct cfg80211_bss_ies *ies, + u8 *dtim_count, u8 *dtim_period) +{ + const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len); + const u8 *idx_ie = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, ies->data, + ies->len); + const struct ieee80211_tim_ie *tim = NULL; + const struct ieee80211_bssid_index *idx; + bool valid = tim_ie && tim_ie[1] >= 2; + + if (valid) + tim = (void *)(tim_ie + 2); + + if (dtim_count) + *dtim_count = valid ? tim->dtim_count : 0; + + if (dtim_period) + *dtim_period = valid ? tim->dtim_period : 0; + + /* Check if value is overridden by non-transmitted profile */ + if (!idx_ie || idx_ie[1] < 3) + return valid; + + idx = (void *)(idx_ie + 2); + + if (dtim_count) + *dtim_count = idx->dtim_count; + + if (dtim_period) + *dtim_period = idx->dtim_period; + + return true; +} + static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, struct ieee80211_mgmt *mgmt, struct ieee802_11_elems *elems, @@ -4882,11 +4915,24 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata, goto out_err; if (link_id != assoc_data->assoc_link_id) { - err = ieee80211_prep_channel(sdata, link, - assoc_data->link[link_id].bss, + struct cfg80211_bss *cbss = assoc_data->link[link_id].bss; + const struct cfg80211_bss_ies *ies; + + rcu_read_lock(); + ies = rcu_dereference(cbss->ies); + ieee80211_get_dtim(ies, + &link->conf->sync_dtim_count, + &link->u.mgd.dtim_period); + link->conf->dtim_period = link->u.mgd.dtim_period ?: 1; + link->conf->beacon_int = cbss->beacon_interval; + rcu_read_unlock(); + + err = ieee80211_prep_channel(sdata, link, cbss, &link->u.mgd.conn_flags); - if (err) + if (err) { + link_info(link, "prep_channel failed\n"); goto out_err; + } } err = ieee80211_mgd_setup_link_sta(link, sta, link_sta, @@ -5636,12 +5682,16 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link, mutex_lock(&local->sta_mtx); sta = sta_info_get(sdata, sdata->vif.cfg.ap_addr); - if (WARN_ON(!sta)) + if (WARN_ON(!sta)) { + mutex_unlock(&local->sta_mtx); goto free; + } link_sta = rcu_dereference_protected(sta->link[link->link_id], lockdep_is_held(&local->sta_mtx)); - if (WARN_ON(!link_sta)) + if (WARN_ON(!link_sta)) { + mutex_unlock(&local->sta_mtx); goto free; + } changed |= ieee80211_recalc_twt_req(link, link_sta, elems); @@ -6354,40 +6404,6 @@ void ieee80211_mlme_notify_scan_completed(struct ieee80211_local *local) rcu_read_unlock(); } -static bool ieee80211_get_dtim(const struct cfg80211_bss_ies *ies, - u8 *dtim_count, u8 *dtim_period) -{ - const u8 *tim_ie = cfg80211_find_ie(WLAN_EID_TIM, ies->data, ies->len); - const u8 *idx_ie = cfg80211_find_ie(WLAN_EID_MULTI_BSSID_IDX, ies->data, - ies->len); - const struct ieee80211_tim_ie *tim = NULL; - const struct ieee80211_bssid_index *idx; - bool valid = tim_ie && tim_ie[1] >= 2; - - if (valid) - tim = (void *)(tim_ie + 2); - - if (dtim_count) - *dtim_count = valid ? tim->dtim_count : 0; - - if (dtim_period) - *dtim_period = valid ? tim->dtim_period : 0; - - /* Check if value is overridden by non-transmitted profile */ - if (!idx_ie || idx_ie[1] < 3) - return valid; - - idx = (void *)(idx_ie + 2); - - if (dtim_count) - *dtim_count = idx->dtim_count; - - if (dtim_period) - *dtim_period = idx->dtim_period; - - return true; -} - static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, struct cfg80211_bss *cbss, s8 link_id, const u8 *ap_mld_addr, bool assoc, @@ -6562,6 +6578,7 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata, return 0; out_err: + ieee80211_link_release_channel(&sdata->deflink); ieee80211_vif_set_links(sdata, 0); return err; } @@ -6889,23 +6906,6 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata, for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) size += req->links[i].elems_len; - if (req->ap_mld_addr) { - for (i = 0; i < IEEE80211_MLD_MAX_NUM_LINKS; i++) { - if (!req->links[i].bss) - continue; - if (i == assoc_link_id) - continue; - /* - * For now, support only a single link in MLO, we - * don't have the necessary parsing of the multi- - * link element in the association response, etc. - */ - sdata_info(sdata, - "refusing MLO association with >1 links\n"); - return -EINVAL; - } - } - /* FIXME: no support for 4-addr MLO yet */ if (sdata->u.mgd.use_4addr && req->link_id >= 0) return -EOPNOTSUPP; diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c index 5f27e6746762..7f3f5f51081d 100644 --- a/net/mac80211/rc80211_minstrel_ht.c +++ b/net/mac80211/rc80211_minstrel_ht.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0-only /* * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org> - * Copyright (C) 2019-2021 Intel Corporation + * Copyright (C) 2019-2022 Intel Corporation */ #include <linux/netdevice.h> #include <linux/types.h> @@ -10,6 +10,7 @@ #include <linux/random.h> #include <linux/moduleparam.h> #include <linux/ieee80211.h> +#include <linux/minmax.h> #include <net/mac80211.h> #include "rate.h" #include "sta_info.h" @@ -1478,7 +1479,7 @@ minstrel_ht_set_rate(struct minstrel_priv *mp, struct minstrel_ht_sta *mi, * - for fallback rates, to increase chances of getting through */ if (offset > 0 || - (mi->sta->smps_mode == IEEE80211_SMPS_DYNAMIC && + (mi->sta->deflink.smps_mode == IEEE80211_SMPS_DYNAMIC && group->streams > 1)) { ratetbl->rate[offset].count = ratetbl->rate[offset].count_rts; flags |= IEEE80211_TX_RC_USE_RTS_CTS; @@ -1550,6 +1551,7 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) { struct ieee80211_sta_rates *rates; int i = 0; + int max_rates = min_t(int, mp->hw->max_rates, IEEE80211_TX_RATE_TABLE_SIZE); rates = kzalloc(sizeof(*rates), GFP_ATOMIC); if (!rates) @@ -1559,16 +1561,17 @@ minstrel_ht_update_rates(struct minstrel_priv *mp, struct minstrel_ht_sta *mi) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_tp_rate[0]); /* Fill up remaining, keep one entry for max_probe_rate */ - for (; i < (mp->hw->max_rates - 1); i++) + for (; i < (max_rates - 1); i++) minstrel_ht_set_rate(mp, mi, rates, i, mi->max_tp_rate[i]); - if (i < mp->hw->max_rates) + if (i < max_rates) minstrel_ht_set_rate(mp, mi, rates, i++, mi->max_prob_rate); if (i < IEEE80211_TX_RATE_TABLE_SIZE) rates->rate[i].idx = -1; - mi->sta->max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); + mi->sta->deflink.agg.max_rc_amsdu_len = minstrel_ht_get_max_amsdu_len(mi); + ieee80211_sta_recalc_aggregates(mi->sta); rate_control_set_rates(mp->hw, mi->sta, rates); } @@ -1779,7 +1782,7 @@ minstrel_ht_update_caps(void *priv, struct ieee80211_supported_band *sband, nss = minstrel_mcs_groups[i].streams; /* Mark MCS > 7 as unsupported if STA is in static SMPS mode */ - if (sta->smps_mode == IEEE80211_SMPS_STATIC && nss > 1) + if (sta->deflink.smps_mode == IEEE80211_SMPS_STATIC && nss > 1) continue; /* HT rate */ diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index cc139fe5fb78..bd215fe3c796 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -49,7 +49,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, if (present_fcs_len) __pskb_trim(skb, skb->len - present_fcs_len); - __pskb_pull(skb, rtap_space); + pskb_pull(skb, rtap_space); hdr = (void *)skb->data; fc = hdr->frame_control; @@ -74,7 +74,7 @@ static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb, memmove(skb->data + IEEE80211_HT_CTL_LEN, skb->data, hdrlen - IEEE80211_HT_CTL_LEN); - __pskb_pull(skb, IEEE80211_HT_CTL_LEN); + pskb_pull(skb, IEEE80211_HT_CTL_LEN); return skb; } @@ -1452,7 +1452,7 @@ ieee80211_rx_h_check_dup(struct ieee80211_rx_data *rx) if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->seqno_idx] == hdr->seq_ctrl)) { I802_DEBUG_INC(rx->local->dot11FrameDuplicateCount); - rx->sta->deflink.rx_stats.num_duplicates++; + rx->link_sta->rx_stats.num_duplicates++; return RX_DROP_UNUSABLE; } else if (!(status->flag & RX_FLAG_AMSDU_MORE)) { rx->sta->last_seq_ctrl[rx->seqno_idx] = hdr->seq_ctrl; @@ -1731,12 +1731,13 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) { struct sta_info *sta = rx->sta; + struct link_sta_info *link_sta = rx->link_sta; struct sk_buff *skb = rx->skb; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; int i; - if (!sta) + if (!sta || !link_sta) return RX_CONTINUE; /* @@ -1752,47 +1753,47 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) NL80211_IFTYPE_ADHOC); if (ether_addr_equal(bssid, rx->sdata->u.ibss.bssid) && test_sta_flag(sta, WLAN_STA_AUTHORIZED)) { - sta->deflink.rx_stats.last_rx = jiffies; + link_sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) - sta->deflink.rx_stats.last_rate = + link_sta->rx_stats.last_rate = sta_stats_encode_rate(status); } } else if (rx->sdata->vif.type == NL80211_IFTYPE_OCB) { - sta->deflink.rx_stats.last_rx = jiffies; + link_sta->rx_stats.last_rx = jiffies; } else if (!ieee80211_is_s1g_beacon(hdr->frame_control) && !is_multicast_ether_addr(hdr->addr1)) { /* * Mesh beacons will update last_rx when if they are found to * match the current local configuration when processed. */ - sta->deflink.rx_stats.last_rx = jiffies; + link_sta->rx_stats.last_rx = jiffies; if (ieee80211_is_data(hdr->frame_control)) - sta->deflink.rx_stats.last_rate = sta_stats_encode_rate(status); + link_sta->rx_stats.last_rate = sta_stats_encode_rate(status); } - sta->deflink.rx_stats.fragments++; + link_sta->rx_stats.fragments++; - u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp); - sta->deflink.rx_stats.bytes += rx->skb->len; - u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp); + u64_stats_update_begin(&link_sta->rx_stats.syncp); + link_sta->rx_stats.bytes += rx->skb->len; + u64_stats_update_end(&link_sta->rx_stats.syncp); if (!(status->flag & RX_FLAG_NO_SIGNAL_VAL)) { - sta->deflink.rx_stats.last_signal = status->signal; - ewma_signal_add(&sta->deflink.rx_stats_avg.signal, + link_sta->rx_stats.last_signal = status->signal; + ewma_signal_add(&link_sta->rx_stats_avg.signal, -status->signal); } if (status->chains) { - sta->deflink.rx_stats.chains = status->chains; + link_sta->rx_stats.chains = status->chains; for (i = 0; i < ARRAY_SIZE(status->chain_signal); i++) { int signal = status->chain_signal[i]; if (!(status->chains & BIT(i))) continue; - sta->deflink.rx_stats.chain_signal_last[i] = signal; - ewma_signal_add(&sta->deflink.rx_stats_avg.chain_signal[i], + link_sta->rx_stats.chain_signal_last[i] = signal; + ewma_signal_add(&link_sta->rx_stats_avg.chain_signal[i], -signal); } } @@ -1853,7 +1854,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) * Update counter and free packet here to avoid * counting this as a dropped packed. */ - sta->deflink.rx_stats.packets++; + link_sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -2389,7 +2390,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) out: ieee80211_led_rx(rx->local); if (rx->sta) - rx->sta->deflink.rx_stats.packets++; + rx->link_sta->rx_stats.packets++; return RX_CONTINUE; } @@ -2665,9 +2666,9 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) * for non-QoS-data frames. Here we know it's a data * frame, so count MSDUs. */ - u64_stats_update_begin(&rx->sta->deflink.rx_stats.syncp); - rx->sta->deflink.rx_stats.msdu[rx->seqno_idx]++; - u64_stats_update_end(&rx->sta->deflink.rx_stats.syncp); + u64_stats_update_begin(&rx->link_sta->rx_stats.syncp); + rx->link_sta->rx_stats.msdu[rx->seqno_idx]++; + u64_stats_update_end(&rx->link_sta->rx_stats.syncp); } if ((sdata->vif.type == NL80211_IFTYPE_AP || @@ -3364,7 +3365,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) switch (mgmt->u.action.category) { case WLAN_CATEGORY_HT: /* reject HT action frames from stations not supporting HT */ - if (!rx->sta->sta.deflink.ht_cap.ht_supported) + if (!rx->link_sta->pub->ht_cap.ht_supported) goto invalid; if (sdata->vif.type != NL80211_IFTYPE_STATION && @@ -3404,9 +3405,9 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) } /* if no change do nothing */ - if (rx->sta->sta.smps_mode == smps_mode) + if (rx->link_sta->pub->smps_mode == smps_mode) goto handled; - rx->sta->sta.smps_mode = smps_mode; + rx->link_sta->pub->smps_mode = smps_mode; sta_opmode.smps_mode = ieee80211_smps_mode_to_smps_mode(smps_mode); sta_opmode.changed = STA_OPMODE_SMPS_MODE_CHANGED; @@ -3428,26 +3429,26 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) struct sta_opmode_info sta_opmode = {}; /* If it doesn't support 40 MHz it can't change ... */ - if (!(rx->sta->sta.deflink.ht_cap.cap & + if (!(rx->link_sta->pub->ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40)) goto handled; if (chanwidth == IEEE80211_HT_CHANWIDTH_20MHZ) max_bw = IEEE80211_STA_RX_BW_20; else - max_bw = ieee80211_sta_cap_rx_bw(&rx->sta->deflink); + max_bw = ieee80211_sta_cap_rx_bw(rx->link_sta); /* set cur_max_bandwidth and recalc sta bw */ - rx->sta->deflink.cur_max_bandwidth = max_bw; - new_bw = ieee80211_sta_cur_vht_bw(&rx->sta->deflink); + rx->link_sta->cur_max_bandwidth = max_bw; + new_bw = ieee80211_sta_cur_vht_bw(rx->link_sta); - if (rx->sta->sta.deflink.bandwidth == new_bw) + if (rx->link_sta->pub->bandwidth == new_bw) goto handled; - rx->sta->sta.deflink.bandwidth = new_bw; + rx->link_sta->pub->bandwidth = new_bw; sband = rx->local->hw.wiphy->bands[status->band]; sta_opmode.bw = - ieee80211_sta_rx_bw_to_chan_width(&rx->sta->deflink); + ieee80211_sta_rx_bw_to_chan_width(rx->link_sta); sta_opmode.changed = STA_OPMODE_MAX_BW_CHANGED; rate_control_rate_update(local, sband, rx->sta, 0, @@ -3641,7 +3642,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) handled: if (rx->sta) - rx->sta->deflink.rx_stats.packets++; + rx->link_sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; @@ -3685,7 +3686,7 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) if (cfg80211_rx_mgmt_ext(&rx->sdata->wdev, &info)) { if (rx->sta) - rx->sta->deflink.rx_stats.packets++; + rx->link_sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -3723,7 +3724,7 @@ ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx) handled: if (rx->sta) - rx->sta->deflink.rx_stats.packets++; + rx->link_sta->rx_stats.packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; } @@ -3943,7 +3944,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_MONITOR: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->deflink.rx_stats.dropped++; + rx->link_sta->rx_stats.dropped++; fallthrough; case RX_CONTINUE: { struct ieee80211_rate *rate = NULL; @@ -3962,7 +3963,7 @@ static void ieee80211_rx_handlers_result(struct ieee80211_rx_data *rx, case RX_DROP_UNUSABLE: I802_DEBUG_INC(rx->sdata->local->rx_handlers_drop); if (rx->sta) - rx->sta->deflink.rx_stats.dropped++; + rx->link_sta->rx_stats.dropped++; dev_kfree_skb(rx->skb); break; case RX_QUEUED: @@ -4084,6 +4085,7 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) .link_id = -1, }; struct tid_ampdu_rx *tid_agg_rx; + u8 link_id; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); if (!tid_agg_rx) @@ -4103,6 +4105,10 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) }; drv_event_callback(rx.local, rx.sdata, &event); } + /* FIXME: statistics won't be right with this */ + link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0; + rx.link = rcu_dereference(sta->sdata->link[link_id]); + rx.link_sta = rcu_dereference(sta->link[link_id]); ieee80211_rx_handlers(&rx, &frames); } diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index fe8702d92892..cebfd148bb40 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -274,6 +274,43 @@ link_sta_info_get_bss(struct ieee80211_sub_if_data *sdata, const u8 *addr) return NULL; } +struct ieee80211_sta * +ieee80211_find_sta_by_link_addrs(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr, + unsigned int *link_id) +{ + struct ieee80211_local *local = hw_to_local(hw); + struct link_sta_info *link_sta; + struct rhlist_head *tmp; + + for_each_link_sta_info(local, addr, link_sta, tmp) { + struct sta_info *sta = link_sta->sta; + struct ieee80211_link_data *link; + u8 _link_id = link_sta->link_id; + + if (!localaddr) { + if (link_id) + *link_id = _link_id; + return &sta->sta; + } + + link = rcu_dereference(sta->sdata->link[_link_id]); + if (!link) + continue; + + if (memcmp(link->conf->addr, localaddr, ETH_ALEN)) + continue; + + if (link_id) + *link_id = _link_id; + return &sta->sta; + } + + return NULL; +} +EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_link_addrs); + struct sta_info *sta_info_get_by_addrs(struct ieee80211_local *local, const u8 *sta_addr, const u8 *vif_addr) { @@ -339,6 +376,8 @@ static void sta_remove_link(struct sta_info *sta, unsigned int link_id, sta_info_free_link(&alloc->info); kfree_rcu(alloc, rcu_head); } + + ieee80211_sta_recalc_aggregates(&sta->sta); } /** @@ -475,6 +514,9 @@ static void sta_info_add_link(struct sta_info *sta, link_sta->link_id = link_id; rcu_assign_pointer(sta->link[link_id], link_info); rcu_assign_pointer(sta->sta.link[link_id], link_sta); + + link_sta->smps_mode = IEEE80211_SMPS_OFF; + link_sta->agg.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; } static struct sta_info * @@ -505,6 +547,8 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, sta_info_add_link(sta, 0, &sta->deflink, &sta->sta.deflink); } + sta->sta.cur = &sta->sta.deflink.agg; + spin_lock_init(&sta->lock); spin_lock_init(&sta->ps_lock); INIT_WORK(&sta->drv_deliver_wk, sta_deliver_ps_frames); @@ -628,9 +672,6 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata, } } - sta->sta.smps_mode = IEEE80211_SMPS_OFF; - sta->sta.max_rc_amsdu_len = IEEE80211_MAX_MPDU_LEN_HT_BA; - sta->cparams.ce_threshold = CODEL_DISABLED_THRESHOLD; sta->cparams.target = MS2TIME(20); sta->cparams.interval = MS2TIME(100); @@ -2086,6 +2127,44 @@ void ieee80211_sta_register_airtime(struct ieee80211_sta *pubsta, u8 tid, } EXPORT_SYMBOL(ieee80211_sta_register_airtime); +void ieee80211_sta_recalc_aggregates(struct ieee80211_sta *pubsta) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + struct ieee80211_link_sta *link_sta; + int link_id, i; + bool first = true; + + if (!pubsta->valid_links || !pubsta->mlo) { + pubsta->cur = &pubsta->deflink.agg; + return; + } + + rcu_read_lock(); + for_each_sta_active_link(&sta->sdata->vif, pubsta, link_sta, link_id) { + if (first) { + sta->cur = pubsta->deflink.agg; + first = false; + continue; + } + + sta->cur.max_amsdu_len = + min(sta->cur.max_amsdu_len, + link_sta->agg.max_amsdu_len); + sta->cur.max_rc_amsdu_len = + min(sta->cur.max_rc_amsdu_len, + link_sta->agg.max_rc_amsdu_len); + + for (i = 0; i < ARRAY_SIZE(sta->cur.max_tid_amsdu_len); i++) + sta->cur.max_tid_amsdu_len[i] = + min(sta->cur.max_tid_amsdu_len[i], + link_sta->agg.max_tid_amsdu_len[i]); + } + rcu_read_unlock(); + + pubsta->cur = &sta->cur; +} +EXPORT_SYMBOL(ieee80211_sta_recalc_aggregates); + void ieee80211_sta_update_pending_airtime(struct ieee80211_local *local, struct sta_info *sta, u8 ac, u16 tx_airtime, bool tx_completed) @@ -2781,6 +2860,11 @@ int ieee80211_sta_activate_link(struct sta_info *sta, unsigned int link_id) if (!test_sta_flag(sta, WLAN_STA_INSERTED)) goto hash; + /* Ensure the values are updated for the driver, + * redone by sta_remove_link on failure. + */ + ieee80211_sta_recalc_aggregates(&sta->sta); + ret = drv_change_sta_links(sdata->local, sdata, &sta->sta, old_links, new_links); if (ret) { @@ -2833,3 +2917,13 @@ void ieee80211_sta_set_max_amsdu_subframes(struct sta_info *sta, if (val) sta->sta.max_amsdu_subframes = 4 << val; } + +#ifdef CONFIG_LOCKDEP +bool lockdep_sta_mutex_held(struct ieee80211_sta *pubsta) +{ + struct sta_info *sta = container_of(pubsta, struct sta_info, sta); + + return lockdep_is_held(&sta->local->sta_mtx); +} +EXPORT_SYMBOL(lockdep_sta_mutex_held); +#endif diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index 2eb3a9452e07..2517ea714dc4 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -622,6 +622,8 @@ struct link_sta_info { * @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to * the BSS one. * @frags: fragment cache + * @cur: storage for aggregation data + * &struct ieee80211_sta points either here or to deflink.agg. * @deflink: This is the default link STA information, for non MLO STA all link * specific STA information is accessed through @deflink or through * link[0] which points to address of @deflink. For MLO Link STA @@ -705,6 +707,7 @@ struct sta_info { struct ieee80211_fragment_cache frags; + struct ieee80211_sta_aggregates cur; struct link_sta_info deflink; struct link_sta_info __rcu *link[IEEE80211_MLD_MAX_NUM_LINKS]; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 8e77fd2e9fdf..3f9ddd7f04b6 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -729,7 +729,7 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local, if (!sdata) { skb->dev = NULL; - } else { + } else if (!dropped) { unsigned int hdr_size = ieee80211_hdrlen(hdr->frame_control); diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 1be8c9d83d6a..27c964be102e 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -3387,7 +3387,7 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, int subframe_len = skb->len - ETH_ALEN; u8 max_subframes = sta->sta.max_amsdu_subframes; int max_frags = local->hw.max_tx_fragments; - int max_amsdu_len = sta->sta.max_amsdu_len; + int max_amsdu_len = sta->sta.cur->max_amsdu_len; int orig_truesize; u32 flow_idx; __be16 len; @@ -3413,13 +3413,13 @@ static bool ieee80211_amsdu_aggregate(struct ieee80211_sub_if_data *sdata, if (test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags)) return false; - if (sta->sta.max_rc_amsdu_len) + if (sta->sta.cur->max_rc_amsdu_len) max_amsdu_len = min_t(int, max_amsdu_len, - sta->sta.max_rc_amsdu_len); + sta->sta.cur->max_rc_amsdu_len); - if (sta->sta.max_tid_amsdu_len[tid]) + if (sta->sta.cur->max_tid_amsdu_len[tid]) max_amsdu_len = min_t(int, max_amsdu_len, - sta->sta.max_tid_amsdu_len[tid]); + sta->sta.cur->max_tid_amsdu_len[tid]); flow_idx = fq_flow_idx(fq, skb); @@ -5469,33 +5469,39 @@ EXPORT_SYMBOL(ieee80211_pspoll_get); struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - bool qos_ok) + int link_id, bool qos_ok) { + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + struct ieee80211_local *local = sdata->local; + struct ieee80211_link_data *link = NULL; struct ieee80211_hdr_3addr *nullfunc; - struct ieee80211_sub_if_data *sdata; - struct ieee80211_local *local; struct sk_buff *skb; bool qos = false; if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) return NULL; - sdata = vif_to_sdata(vif); - local = sdata->local; + skb = dev_alloc_skb(local->hw.extra_tx_headroom + + sizeof(*nullfunc) + 2); + if (!skb) + return NULL; + rcu_read_lock(); if (qos_ok) { struct sta_info *sta; - rcu_read_lock(); - sta = sta_info_get(sdata, sdata->deflink.u.mgd.bssid); + sta = sta_info_get(sdata, vif->cfg.ap_addr); qos = sta && sta->sta.wme; - rcu_read_unlock(); } - skb = dev_alloc_skb(local->hw.extra_tx_headroom + - sizeof(*nullfunc) + 2); - if (!skb) - return NULL; + if (link_id >= 0) { + link = rcu_dereference(sdata->link[link_id]); + if (WARN_ON_ONCE(!link)) { + rcu_read_unlock(); + kfree_skb(skb); + return NULL; + } + } skb_reserve(skb, local->hw.extra_tx_headroom); @@ -5516,9 +5522,16 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw, skb_put_data(skb, &qoshdr, sizeof(qoshdr)); } - memcpy(nullfunc->addr1, sdata->deflink.u.mgd.bssid, ETH_ALEN); - memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); - memcpy(nullfunc->addr3, sdata->deflink.u.mgd.bssid, ETH_ALEN); + if (link) { + memcpy(nullfunc->addr1, link->conf->bssid, ETH_ALEN); + memcpy(nullfunc->addr2, link->conf->addr, ETH_ALEN); + memcpy(nullfunc->addr3, link->conf->bssid, ETH_ALEN); + } else { + memcpy(nullfunc->addr1, vif->cfg.ap_addr, ETH_ALEN); + memcpy(nullfunc->addr2, vif->addr, ETH_ALEN); + memcpy(nullfunc->addr3, vif->cfg.ap_addr, ETH_ALEN); + } + rcu_read_unlock(); return skb; } @@ -5917,6 +5930,9 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, skb_reset_network_header(skb); skb_reset_mac_header(skb); + if (local->hw.queues < IEEE80211_NUM_ACS) + goto start_xmit; + /* update QoS header to prioritize control port frames if possible, * priorization also happens for control port frames send over * AF_PACKET @@ -5944,6 +5960,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev, } rcu_read_unlock(); +start_xmit: /* mutex lock is only needed for incrementing the cookie counter */ mutex_lock(&local->mtx); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 3359ab332d7d..bf7461c41bef 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -301,14 +301,14 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac) local_bh_disable(); spin_lock(&fq->lock); + sdata->vif.txqs_stopped[ac] = false; + if (!test_bit(SDATA_STATE_RUNNING, &sdata->state)) goto out; if (sdata->vif.type == NL80211_IFTYPE_AP) ps = &sdata->bss->ps; - sdata->vif.txqs_stopped[ac] = false; - list_for_each_entry_rcu(sta, &local->sta_list, list) { if (sdata != sta->sdata) continue; @@ -2900,7 +2900,7 @@ void ieee80211_recalc_min_chandef(struct ieee80211_sub_if_data *sdata, */ rcu_read_unlock(); - if (WARN_ON_ONCE(!chanctx_conf)) + if (!chanctx_conf) goto unlock; chanctx = container_of(chanctx_conf, struct ieee80211_chanctx, diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c index b2b09d421e8b..803de5881485 100644 --- a/net/mac80211/vht.c +++ b/net/mac80211/vht.c @@ -323,16 +323,18 @@ ieee80211_vht_cap_ie_to_sta_vht_cap(struct ieee80211_sub_if_data *sdata, */ switch (vht_cap->cap & IEEE80211_VHT_CAP_MAX_MPDU_MASK) { case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454: - link_sta->sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991: - link_sta->sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991; break; case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895: default: - link_sta->sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; + link_sta->pub->agg.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895; break; } + + ieee80211_sta_recalc_aggregates(&link_sta->sta->sta); } /* FIXME: move this to some better location - parses HE/EHT now */ diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 93ec2f349748..20f742b5503b 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -351,7 +351,7 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad) * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ put_unaligned_be16(len_a, &aad[0]); put_unaligned(mask_fc, (__le16 *)&aad[2]); - memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN); + memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN); /* Mask Seq#, leave Frag# */ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f; @@ -792,7 +792,7 @@ static void bip_aad(struct sk_buff *skb, u8 *aad) IEEE80211_FCTL_MOREDATA); put_unaligned(mask_fc, (__le16 *) &aad[0]); /* A1 || A2 || A3 */ - memcpy(aad + 2, &hdr->addr1, 3 * ETH_ALEN); + memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); } diff --git a/net/mptcp/mptcp_diag.c b/net/mptcp/mptcp_diag.c index 7f9a71780437..8df1bdb647e2 100644 --- a/net/mptcp/mptcp_diag.c +++ b/net/mptcp/mptcp_diag.c @@ -81,15 +81,18 @@ static void mptcp_diag_dump_listeners(struct sk_buff *skb, struct netlink_callba struct mptcp_diag_ctx *diag_ctx = (void *)cb->ctx; struct nlattr *bc = cb_data->inet_diag_nla_bc; struct net *net = sock_net(skb->sk); + struct inet_hashinfo *hinfo; int i; - for (i = diag_ctx->l_slot; i <= tcp_hashinfo.lhash2_mask; i++) { + hinfo = net->ipv4.tcp_death_row.hashinfo; + + for (i = diag_ctx->l_slot; i <= hinfo->lhash2_mask; i++) { struct inet_listen_hashbucket *ilb; struct hlist_nulls_node *node; struct sock *sk; int num = 0; - ilb = &tcp_hashinfo.lhash2[i]; + ilb = &hinfo->lhash2[i]; rcu_read_lock(); spin_lock(&ilb->lock); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index a3e4ee7af0ee..9813ed0fde9b 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -796,7 +796,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk, u8 rm_id = rm_list->ids[i]; bool removed = false; - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); int how = RCV_SHUTDOWN | SEND_SHUTDOWN; u8 id = subflow->local_id; @@ -1327,7 +1327,7 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info) return -EINVAL; } - entry = kmalloc(sizeof(*entry), GFP_KERNEL); + entry = kmalloc(sizeof(*entry), GFP_KERNEL_ACCOUNT); if (!entry) { GENL_SET_ERR_MSG(info, "can't allocate addr"); return -ENOMEM; @@ -2218,17 +2218,17 @@ static const struct genl_small_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_ADD_ADDR, .doit = mptcp_nl_cmd_add_addr, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_DEL_ADDR, .doit = mptcp_nl_cmd_del_addr, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_FLUSH_ADDRS, .doit = mptcp_nl_cmd_flush_addrs, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_GET_ADDR, @@ -2238,7 +2238,7 @@ static const struct genl_small_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_SET_LIMITS, .doit = mptcp_nl_cmd_set_limits, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_GET_LIMITS, @@ -2247,27 +2247,27 @@ static const struct genl_small_ops mptcp_pm_ops[] = { { .cmd = MPTCP_PM_CMD_SET_FLAGS, .doit = mptcp_nl_cmd_set_flags, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_ANNOUNCE, .doit = mptcp_nl_cmd_announce, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_REMOVE, .doit = mptcp_nl_cmd_remove, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_SUBFLOW_CREATE, .doit = mptcp_nl_cmd_sf_create, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, { .cmd = MPTCP_PM_CMD_SUBFLOW_DESTROY, .doit = mptcp_nl_cmd_sf_destroy, - .flags = GENL_ADMIN_PERM, + .flags = GENL_UNS_ADMIN_PERM, }, }; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d398f3810662..f599ad44ed24 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -150,9 +150,15 @@ static bool mptcp_try_coalesce(struct sock *sk, struct sk_buff *to, MPTCP_SKB_CB(from)->map_seq, MPTCP_SKB_CB(to)->map_seq, to->len, MPTCP_SKB_CB(from)->end_seq); MPTCP_SKB_CB(to)->end_seq = MPTCP_SKB_CB(from)->end_seq; - kfree_skb_partial(from, fragstolen); + + /* note the fwd memory can reach a negative value after accounting + * for the delta, but the later skb free will restore a non + * negative one + */ atomic_add(delta, &sk->sk_rmem_alloc); mptcp_rmem_charge(sk, delta); + kfree_skb_partial(from, fragstolen); + return true; } @@ -656,9 +662,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, skb = skb_peek(&ssk->sk_receive_queue); if (!skb) { - /* if no data is found, a racing workqueue/recvmsg - * already processed the new data, stop here or we - * can enter an infinite loop + /* With racing move_skbs_to_msk() and __mptcp_move_skbs(), + * a different CPU can have already processed the pending + * data, stop here or we can enter an infinite loop */ if (!moved) done = true; @@ -666,9 +672,9 @@ static bool __mptcp_move_skbs_from_subflow(struct mptcp_sock *msk, } if (__mptcp_check_fallback(msk)) { - /* if we are running under the workqueue, TCP could have - * collapsed skbs between dummy map creation and now - * be sure to adjust the size + /* Under fallback skbs have no MPTCP extension and TCP could + * collapse them between the dummy map creation and the + * current dequeue. Be sure to adjust the map size. */ map_remaining = skb->len; subflow->map_data_len = skb->len; @@ -1538,8 +1544,9 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) struct mptcp_sendmsg_info info = { .flags = flags, }; + bool do_check_data_fin = false; struct mptcp_data_frag *dfrag; - int len, copied = 0; + int len; while ((dfrag = mptcp_send_head(sk))) { info.sent = dfrag->already_sent; @@ -1574,8 +1581,8 @@ void __mptcp_push_pending(struct sock *sk, unsigned int flags) goto out; } + do_check_data_fin = true; info.sent += ret; - copied += ret; len -= ret; mptcp_update_post_push(msk, dfrag, ret); @@ -1591,7 +1598,7 @@ out: /* ensure the rtx timer is running */ if (!mptcp_timer_pending(sk)) mptcp_reset_timer(sk); - if (copied) + if (do_check_data_fin) __mptcp_check_send_data_fin(sk); } @@ -1670,6 +1677,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct mptcp_sock *msk = mptcp_sk(sk); struct page_frag *pfrag; + struct socket *ssock; size_t copied = 0; int ret = 0; long timeo; @@ -1683,14 +1691,39 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); + ssock = __mptcp_nmpc_socket(msk); + if (unlikely(ssock && inet_sk(ssock->sk)->defer_connect)) { + struct sock *ssk = ssock->sk; + int copied_syn = 0; + + lock_sock(ssk); + + ret = tcp_sendmsg_fastopen(ssk, msg, &copied_syn, len, NULL); + copied += copied_syn; + if (ret == -EINPROGRESS && copied_syn > 0) { + /* reflect the new state on the MPTCP socket */ + inet_sk_state_store(sk, inet_sk_state_load(ssk)); + release_sock(ssk); + goto out; + } else if (ret) { + release_sock(ssk); + goto do_error; + } + release_sock(ssk); + } + timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT); if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { ret = sk_stream_wait_connect(sk, &timeo); if (ret) - goto out; + goto do_error; } + ret = -EPIPE; + if (unlikely(sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))) + goto do_error; + pfrag = sk_page_frag(sk); while (msg_data_left(msg)) { @@ -1699,11 +1732,6 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) bool dfrag_collapsed; size_t psize, offset; - if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) { - ret = -EPIPE; - goto out; - } - /* reuse tail pfrag, if possible, or carve a new one from the * page allocator */ @@ -1735,7 +1763,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) if (copy_page_from_iter(dfrag->page, offset, psize, &msg->msg_iter) != psize) { ret = -EFAULT; - goto out; + goto do_error; } /* data successfully copied into the write queue */ @@ -1767,7 +1795,7 @@ wait_for_memory: __mptcp_push_pending(sk, msg->msg_flags); ret = sk_stream_wait_memory(sk, &timeo); if (ret) - goto out; + goto do_error; } if (copied) @@ -1775,7 +1803,14 @@ wait_for_memory: out: release_sock(sk); - return copied ? : ret; + return copied; + +do_error: + if (copied) + goto out; + + copied = sk_stream_error(sk, msg->msg_flags, ret); + goto out; } static int __mptcp_recvmsg_mskq(struct mptcp_sock *msk, @@ -2278,8 +2313,14 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); - if (flags & MPTCP_CF_FASTCLOSE) + if (flags & MPTCP_CF_FASTCLOSE) { + /* be sure to force the tcp_disconnect() path, + * to generate the egress reset + */ + ssk->sk_lingertime = 0; + sock_set_flag(ssk, SOCK_LINGER); subflow->send_fastclose = 1; + } need_push = (flags & MPTCP_CF_PUSH) && __mptcp_retransmit_pending_data(sk); if (!dispose_it) { @@ -2357,7 +2398,7 @@ static void __mptcp_close_subflow(struct mptcp_sock *msk) might_sleep(); - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *ssk = mptcp_subflow_tcp_sock(subflow); if (inet_sk_state_load(ssk) != TCP_CLOSE) @@ -2400,7 +2441,7 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) mptcp_token_destroy(msk); - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) { + mptcp_for_each_subflow_safe(msk, subflow, tmp) { struct sock *tcp_sk = mptcp_subflow_tcp_sock(subflow); bool slow; @@ -2412,12 +2453,31 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk) unlock_sock_fast(tcp_sk, slow); } + /* Mirror the tcp_reset() error propagation */ + switch (sk->sk_state) { + case TCP_SYN_SENT: + sk->sk_err = ECONNREFUSED; + break; + case TCP_CLOSE_WAIT: + sk->sk_err = EPIPE; + break; + case TCP_CLOSE: + return; + default: + sk->sk_err = ECONNRESET; + } + inet_sk_state_store(sk, TCP_CLOSE); sk->sk_shutdown = SHUTDOWN_MASK; smp_mb__before_atomic(); /* SHUTDOWN must be visible first */ set_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags); - mptcp_close_wake_up(sk); + /* the calling mptcp_worker will properly destroy the socket */ + if (sock_flag(sk, SOCK_DEAD)) + return; + + sk->sk_state_change(sk); + sk_error_report(sk); } static void __mptcp_retrans(struct sock *sk) @@ -2523,6 +2583,16 @@ static void mptcp_mp_fail_no_response(struct mptcp_sock *msk) mptcp_reset_timeout(msk, 0); } +static void mptcp_do_fastclose(struct sock *sk) +{ + struct mptcp_subflow_context *subflow, *tmp; + struct mptcp_sock *msk = mptcp_sk(sk); + + mptcp_for_each_subflow_safe(msk, subflow, tmp) + __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), + subflow, MPTCP_CF_FASTCLOSE); +} + static void mptcp_worker(struct work_struct *work) { struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work); @@ -2551,11 +2621,15 @@ static void mptcp_worker(struct work_struct *work) * closed, but we need the msk around to reply to incoming DATA_FIN, * even if it is orphaned and in FIN_WAIT2 state */ - if (sock_flag(sk, SOCK_DEAD) && - (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) { - inet_sk_state_store(sk, TCP_CLOSE); - __mptcp_destroy_sock(sk); - goto unlock; + if (sock_flag(sk, SOCK_DEAD)) { + if (mptcp_check_close_timeout(sk)) { + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } + if (sk->sk_state == TCP_CLOSE) { + __mptcp_destroy_sock(sk); + goto unlock; + } } if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags)) @@ -2656,7 +2730,7 @@ static void __mptcp_clear_xmit(struct sock *sk) dfrag_clear(sk, dfrag); } -static void mptcp_cancel_work(struct sock *sk) +void mptcp_cancel_work(struct sock *sk) { struct mptcp_sock *msk = mptcp_sk(sk); @@ -2796,13 +2870,24 @@ static void __mptcp_destroy_sock(struct sock *sk) sock_put(sk); } -static void mptcp_close(struct sock *sk, long timeout) +static __poll_t mptcp_check_readable(struct mptcp_sock *msk) +{ + /* Concurrent splices from sk_receive_queue into receive_queue will + * always show at least one non-empty queue when checked in this order. + */ + if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && + skb_queue_empty_lockless(&msk->receive_queue)) + return 0; + + return EPOLLIN | EPOLLRDNORM; +} + +bool __mptcp_close(struct sock *sk, long timeout) { struct mptcp_subflow_context *subflow; struct mptcp_sock *msk = mptcp_sk(sk); bool do_cancel_work = false; - lock_sock(sk); sk->sk_shutdown = SHUTDOWN_MASK; if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) { @@ -2810,8 +2895,13 @@ static void mptcp_close(struct sock *sk, long timeout) goto cleanup; } - if (mptcp_close_state(sk)) + if (mptcp_check_readable(msk)) { + /* the msk has read data, do the MPTCP equivalent of TCP reset */ + inet_sk_state_store(sk, TCP_CLOSE); + mptcp_do_fastclose(sk); + } else if (mptcp_close_state(sk)) { __mptcp_wr_shutdown(sk); + } sk_stream_wait_close(sk, timeout); @@ -2844,6 +2934,17 @@ cleanup: } else { mptcp_reset_timeout(msk, 0); } + + return do_cancel_work; +} + +static void mptcp_close(struct sock *sk, long timeout) +{ + bool do_cancel_work; + + lock_sock(sk); + + do_cancel_work = __mptcp_close(sk, timeout); release_sock(sk); if (do_cancel_work) mptcp_cancel_work(sk); @@ -3047,7 +3148,7 @@ void mptcp_destroy_common(struct mptcp_sock *msk, unsigned int flags) __mptcp_clear_xmit(sk); /* join list will be eventually flushed (with rst) at sock lock release time */ - list_for_each_entry_safe(subflow, tmp, &msk->conn_list, node) + mptcp_for_each_subflow_safe(msk, subflow, tmp) __mptcp_close_ssk(sk, mptcp_subflow_tcp_sock(subflow), subflow, flags); /* move to sk_receive_queue, sk_stream_kill_queues will purge it */ @@ -3519,6 +3620,7 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr, do_connect: err = ssock->ops->connect(ssock, uaddr, addr_len, flags); + inet_sk(sock->sk)->defer_connect = inet_sk(ssock->sk)->defer_connect; sock->state = ssock->state; /* on successful connect, the msk state will be moved to established by @@ -3616,18 +3718,6 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock, return err; } -static __poll_t mptcp_check_readable(struct mptcp_sock *msk) -{ - /* Concurrent splices from sk_receive_queue into receive_queue will - * always show at least one non-empty queue when checked in this order. - */ - if (skb_queue_empty_lockless(&((struct sock *)msk)->sk_receive_queue) && - skb_queue_empty_lockless(&msk->receive_queue)) - return 0; - - return EPOLLIN | EPOLLRDNORM; -} - static __poll_t mptcp_check_writeable(struct mptcp_sock *msk) { struct sock *sk = (struct sock *)msk; @@ -3669,13 +3759,16 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, if (state != TCP_SYN_SENT && state != TCP_SYN_RECV) { mask |= mptcp_check_readable(msk); mask |= mptcp_check_writeable(msk); + } else if (state == TCP_SYN_SENT && inet_sk(sk)->defer_connect) { + /* cf tcp_poll() note about TFO */ + mask |= EPOLLOUT | EPOLLWRNORM; } if (sk->sk_shutdown == SHUTDOWN_MASK || state == TCP_CLOSE) mask |= EPOLLHUP; if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= EPOLLIN | EPOLLRDNORM | EPOLLRDHUP; - /* This barrier is coupled with smp_wmb() in tcp_reset() */ + /* This barrier is coupled with smp_wmb() in __mptcp_error_report() */ smp_rmb(); if (sk->sk_err) mask |= EPOLLERR; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 132d50833df1..c0b5b4628f65 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -314,6 +314,8 @@ struct mptcp_sock { #define mptcp_for_each_subflow(__msk, __subflow) \ list_for_each_entry(__subflow, &((__msk)->conn_list), node) +#define mptcp_for_each_subflow_safe(__msk, __subflow, __tmp) \ + list_for_each_entry_safe(__subflow, __tmp, &((__msk)->conn_list), node) static inline void msk_owned_by_me(const struct mptcp_sock *msk) { @@ -612,6 +614,8 @@ void mptcp_subflow_reset(struct sock *ssk); void mptcp_subflow_queue_clean(struct sock *ssk); void mptcp_sock_graft(struct sock *sk, struct socket *parent); struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk); +bool __mptcp_close(struct sock *sk, long timeout); +void mptcp_cancel_work(struct sock *sk); bool mptcp_addresses_equal(const struct mptcp_addr_info *a, const struct mptcp_addr_info *b, bool use_port); diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c index 423d3826ca1e..c7cb68c725b2 100644 --- a/net/mptcp/sockopt.c +++ b/net/mptcp/sockopt.c @@ -559,6 +559,7 @@ static bool mptcp_supported_sockopt(int level, int optname) case TCP_NOTSENT_LOWAT: case TCP_TX_DELAY: case TCP_INQ: + case TCP_FASTOPEN_CONNECT: return true; } @@ -567,7 +568,7 @@ static bool mptcp_supported_sockopt(int level, int optname) /* TCP_REPAIR, TCP_REPAIR_QUEUE, TCP_QUEUE_SEQ, TCP_REPAIR_OPTIONS, * TCP_REPAIR_WINDOW are not supported, better avoid this mess */ - /* TCP_FASTOPEN_KEY, TCP_FASTOPEN TCP_FASTOPEN_CONNECT, TCP_FASTOPEN_NO_COOKIE, + /* TCP_FASTOPEN_KEY, TCP_FASTOPEN, TCP_FASTOPEN_NO_COOKIE, * are not supported fastopen is currently unsupported */ } @@ -768,6 +769,19 @@ static int mptcp_setsockopt_sol_tcp_defer(struct mptcp_sock *msk, sockptr_t optv return tcp_setsockopt(listener->sk, SOL_TCP, TCP_DEFER_ACCEPT, optval, optlen); } +static int mptcp_setsockopt_sol_tcp_fastopen_connect(struct mptcp_sock *msk, sockptr_t optval, + unsigned int optlen) +{ + struct socket *sock; + + /* Limit to first subflow */ + sock = __mptcp_nmpc_socket(msk); + if (!sock) + return -EINVAL; + + return tcp_setsockopt(sock->sk, SOL_TCP, TCP_FASTOPEN_CONNECT, optval, optlen); +} + static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, sockptr_t optval, unsigned int optlen) { @@ -796,6 +810,8 @@ static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname, return mptcp_setsockopt_sol_tcp_nodelay(msk, optval, optlen); case TCP_DEFER_ACCEPT: return mptcp_setsockopt_sol_tcp_defer(msk, optval, optlen); + case TCP_FASTOPEN_CONNECT: + return mptcp_setsockopt_sol_tcp_fastopen_connect(msk, optval, optlen); } return -EOPNOTSUPP; @@ -1157,6 +1173,7 @@ static int mptcp_getsockopt_sol_tcp(struct mptcp_sock *msk, int optname, case TCP_INFO: case TCP_CC_INFO: case TCP_DEFER_ACCEPT: + case TCP_FASTOPEN_CONNECT: return mptcp_getsockopt_first_sf_only(msk, SOL_TCP, optname, optval, optlen); case TCP_INQ: diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c index c7d49fb6e7bd..07dd23d0fe04 100644 --- a/net/mptcp/subflow.c +++ b/net/mptcp/subflow.c @@ -602,30 +602,6 @@ static bool subflow_hmac_valid(const struct request_sock *req, return !crypto_memneq(hmac, mp_opt->hmac, MPTCPOPT_HMAC_LEN); } -static void mptcp_sock_destruct(struct sock *sk) -{ - /* if new mptcp socket isn't accepted, it is free'd - * from the tcp listener sockets request queue, linked - * from req->sk. The tcp socket is released. - * This calls the ULP release function which will - * also remove the mptcp socket, via - * sock_put(ctx->conn). - * - * Problem is that the mptcp socket will be in - * ESTABLISHED state and will not have the SOCK_DEAD flag. - * Both result in warnings from inet_sock_destruct. - */ - if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) { - sk->sk_state = TCP_CLOSE; - WARN_ON_ONCE(sk->sk_socket); - sock_orphan(sk); - } - - /* We don't need to clear msk->subflow, as it's still NULL at this point */ - mptcp_destroy_common(mptcp_sk(sk), 0); - inet_sock_destruct(sk); -} - static void mptcp_force_close(struct sock *sk) { /* the msk is not yet exposed to user-space */ @@ -768,7 +744,6 @@ create_child: /* new mpc subflow takes ownership of the newly * created mptcp socket */ - new_msk->sk_destruct = mptcp_sock_destruct; mptcp_sk(new_msk)->setsockopt_seq = ctx->setsockopt_seq; mptcp_pm_new_connection(mptcp_sk(new_msk), child, 1); mptcp_token_accept(subflow_req, mptcp_sk(new_msk)); @@ -1763,13 +1738,19 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk) for (msk = head; msk; msk = next) { struct sock *sk = (struct sock *)msk; - bool slow; + bool slow, do_cancel_work; + sock_hold(sk); slow = lock_sock_fast_nested(sk); next = msk->dl_next; msk->first = NULL; msk->dl_next = NULL; + + do_cancel_work = __mptcp_close(sk, 0); unlock_sock_fast(sk, slow); + if (do_cancel_work) + mptcp_cancel_work(sk); + sock_put(sk); } /* we are still under the listener msk socket lock */ diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 6b31746f9be3..e7ba5b6dd2b7 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -353,7 +353,7 @@ ip_set_init_comment(struct ip_set *set, struct ip_set_comment *comment, c = kmalloc(sizeof(*c) + len + 1, GFP_ATOMIC); if (unlikely(!c)) return; - strlcpy(c->str, ext->comment, len + 1); + strscpy(c->str, ext->comment, len + 1); set->ext_size += sizeof(*c) + strlen(c->str) + 1; rcu_assign_pointer(comment->c, c); } @@ -1072,7 +1072,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info, if (!set) return -ENOMEM; spin_lock_init(&set->lock); - strlcpy(set->name, name, IPSET_MAXNAMELEN); + strscpy(set->name, name, IPSET_MAXNAMELEN); set->family = family; set->revision = revision; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 818b0b058b10..988222fff9f0 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2611,7 +2611,7 @@ ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src) dst->addr = src->addr.ip; dst->port = src->port; dst->fwmark = src->fwmark; - strlcpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); + strscpy(dst->sched_name, sched_name, sizeof(dst->sched_name)); dst->flags = src->flags; dst->timeout = src->timeout / HZ; dst->netmask = src->netmask; @@ -2805,13 +2805,13 @@ do_ip_vs_get_ctl(struct sock *sk, int cmd, void __user *user, int *len) mutex_lock(&ipvs->sync_mutex); if (ipvs->sync_state & IP_VS_STATE_MASTER) { d[0].state = IP_VS_STATE_MASTER; - strlcpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, + strscpy(d[0].mcast_ifn, ipvs->mcfg.mcast_ifn, sizeof(d[0].mcast_ifn)); d[0].syncid = ipvs->mcfg.syncid; } if (ipvs->sync_state & IP_VS_STATE_BACKUP) { d[1].state = IP_VS_STATE_BACKUP; - strlcpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, + strscpy(d[1].mcast_ifn, ipvs->bcfg.mcast_ifn, sizeof(d[1].mcast_ifn)); d[1].syncid = ipvs->bcfg.syncid; } @@ -3561,7 +3561,7 @@ static int ip_vs_genl_new_daemon(struct netns_ipvs *ipvs, struct nlattr **attrs) attrs[IPVS_DAEMON_ATTR_MCAST_IFN] && attrs[IPVS_DAEMON_ATTR_SYNC_ID])) return -EINVAL; - strlcpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), + strscpy(c.mcast_ifn, nla_data(attrs[IPVS_DAEMON_ATTR_MCAST_IFN]), sizeof(c.mcast_ifn)); c.syncid = nla_get_u32(attrs[IPVS_DAEMON_ATTR_SYNC_ID]); diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 0195f60fc43b..f97bda06d2a9 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -67,6 +67,7 @@ struct conntrack_gc_work { struct delayed_work dwork; u32 next_bucket; u32 avg_timeout; + u32 count; u32 start_time; bool exiting; bool early_drop; @@ -85,10 +86,12 @@ static DEFINE_MUTEX(nf_conntrack_mutex); /* clamp timeouts to this value (TCP unacked) */ #define GC_SCAN_INTERVAL_CLAMP (300ul * HZ) -/* large initial bias so that we don't scan often just because we have - * three entries with a 1s timeout. +/* Initial bias pretending we have 100 entries at the upper bound so we don't + * wakeup often just because we have three entries with a 1s timeout while still + * allowing non-idle machines to wakeup more often when needed. */ -#define GC_SCAN_INTERVAL_INIT INT_MAX +#define GC_SCAN_INITIAL_COUNT 100 +#define GC_SCAN_INTERVAL_INIT GC_SCAN_INTERVAL_MAX #define GC_SCAN_MAX_DURATION msecs_to_jiffies(10) #define GC_SCAN_EXPIRED_MAX (64000u / HZ) @@ -1466,6 +1469,7 @@ static void gc_worker(struct work_struct *work) unsigned int expired_count = 0; unsigned long next_run; s32 delta_time; + long count; gc_work = container_of(work, struct conntrack_gc_work, dwork.work); @@ -1475,10 +1479,12 @@ static void gc_worker(struct work_struct *work) if (i == 0) { gc_work->avg_timeout = GC_SCAN_INTERVAL_INIT; + gc_work->count = GC_SCAN_INITIAL_COUNT; gc_work->start_time = start_time; } next_run = gc_work->avg_timeout; + count = gc_work->count; end_time = start_time + GC_SCAN_MAX_DURATION; @@ -1498,8 +1504,8 @@ static void gc_worker(struct work_struct *work) hlist_nulls_for_each_entry_rcu(h, n, &ct_hash[i], hnnode) { struct nf_conntrack_net *cnet; - unsigned long expires; struct net *net; + long expires; tmp = nf_ct_tuplehash_to_ctrack(h); @@ -1513,6 +1519,7 @@ static void gc_worker(struct work_struct *work) gc_work->next_bucket = i; gc_work->avg_timeout = next_run; + gc_work->count = count; delta_time = nfct_time_stamp - gc_work->start_time; @@ -1528,8 +1535,8 @@ static void gc_worker(struct work_struct *work) } expires = clamp(nf_ct_expires(tmp), GC_SCAN_INTERVAL_MIN, GC_SCAN_INTERVAL_CLAMP); + expires = (expires - (long)next_run) / ++count; next_run += expires; - next_run /= 2u; if (nf_conntrack_max95 == 0 || gc_worker_skip_ct(tmp)) continue; @@ -1570,6 +1577,7 @@ static void gc_worker(struct work_struct *work) delta_time = nfct_time_stamp - end_time; if (delta_time > 0 && i < hashsz) { gc_work->avg_timeout = next_run; + gc_work->count = count; gc_work->next_bucket = i; next_run = 0; goto early_exit; @@ -1782,7 +1790,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl, } spin_unlock_bh(&nf_conntrack_expect_lock); } - if (!exp) + if (!exp && tmpl) __nf_ct_try_assign_helper(ct, tmpl, GFP_ATOMIC); /* Other CPU might have obtained a pointer to this object before it was @@ -2068,10 +2076,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct, ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply; if (ct->master || (help && !hlist_empty(&help->expectations))) return; - - rcu_read_lock(); - __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); - rcu_read_unlock(); } EXPORT_SYMBOL_GPL(nf_conntrack_alter_reply); @@ -2798,7 +2802,6 @@ int nf_conntrack_init_net(struct net *net) nf_conntrack_acct_pernet_init(net); nf_conntrack_tstamp_pernet_init(net); nf_conntrack_ecache_pernet_init(net); - nf_conntrack_helper_pernet_init(net); nf_conntrack_proto_pernet_init(net); return 0; diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 0d9332e9cf71..617f744a2e3a 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -33,6 +33,7 @@ MODULE_AUTHOR("Rusty Russell <rusty@rustcorp.com.au>"); MODULE_DESCRIPTION("ftp connection tracking helper"); MODULE_ALIAS("ip_conntrack_ftp"); MODULE_ALIAS_NFCT_HELPER(HELPER_NAME); +static DEFINE_SPINLOCK(nf_ftp_lock); #define MAX_PORTS 8 static u_int16_t ports[MAX_PORTS]; @@ -409,7 +410,8 @@ static int help(struct sk_buff *skb, } datalen = skb->len - dataoff; - spin_lock_bh(&ct->lock); + /* seqadj (nat) uses ct->lock internally, nf_nat_ftp would cause deadlock */ + spin_lock_bh(&nf_ftp_lock); fb_ptr = skb->data + dataoff; ends_in_nl = (fb_ptr[datalen - 1] == '\n'); @@ -538,7 +540,7 @@ out_update_nl: if (ends_in_nl) update_nl_seq(ct, seq, ct_ftp_info, dir, skb); out: - spin_unlock_bh(&ct->lock); + spin_unlock_bh(&nf_ftp_lock); return ret; } diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index e96b32221444..ff737a76052e 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -35,11 +35,6 @@ unsigned int nf_ct_helper_hsize __read_mostly; EXPORT_SYMBOL_GPL(nf_ct_helper_hsize); static unsigned int nf_ct_helper_count __read_mostly; -static bool nf_ct_auto_assign_helper __read_mostly = false; -module_param_named(nf_conntrack_helper, nf_ct_auto_assign_helper, bool, 0644); -MODULE_PARM_DESC(nf_conntrack_helper, - "Enable automatic conntrack helper assignment (default 0)"); - static DEFINE_MUTEX(nf_ct_nat_helpers_mutex); static struct list_head nf_ct_nat_helpers __read_mostly; @@ -51,24 +46,6 @@ static unsigned int helper_hash(const struct nf_conntrack_tuple *tuple) (__force __u16)tuple->src.u.all) % nf_ct_helper_hsize; } -static struct nf_conntrack_helper * -__nf_ct_helper_find(const struct nf_conntrack_tuple *tuple) -{ - struct nf_conntrack_helper *helper; - struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; - unsigned int h; - - if (!nf_ct_helper_count) - return NULL; - - h = helper_hash(tuple); - hlist_for_each_entry_rcu(helper, &nf_ct_helper_hash[h], hnode) { - if (nf_ct_tuple_src_mask_cmp(tuple, &helper->tuple, &mask)) - return helper; - } - return NULL; -} - struct nf_conntrack_helper * __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum) { @@ -209,33 +186,11 @@ nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp) } EXPORT_SYMBOL_GPL(nf_ct_helper_ext_add); -static struct nf_conntrack_helper * -nf_ct_lookup_helper(struct nf_conn *ct, struct net *net) -{ - struct nf_conntrack_net *cnet = nf_ct_pernet(net); - - if (!cnet->sysctl_auto_assign_helper) { - if (cnet->auto_assign_helper_warned) - return NULL; - if (!__nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple)) - return NULL; - pr_info("nf_conntrack: default automatic helper assignment " - "has been turned off for security reasons and CT-based " - "firewall rule not found. Use the iptables CT target " - "to attach helpers instead.\n"); - cnet->auto_assign_helper_warned = true; - return NULL; - } - - return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple); -} - int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, gfp_t flags) { struct nf_conntrack_helper *helper = NULL; struct nf_conn_help *help; - struct net *net = nf_ct_net(ct); /* We already got a helper explicitly attached. The function * nf_conntrack_alter_reply - in case NAT is in use - asks for looking @@ -246,23 +201,21 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl, if (test_bit(IPS_HELPER_BIT, &ct->status)) return 0; - if (tmpl != NULL) { - help = nfct_help(tmpl); - if (help != NULL) { - helper = rcu_dereference(help->helper); - set_bit(IPS_HELPER_BIT, &ct->status); - } + if (WARN_ON_ONCE(!tmpl)) + return 0; + + help = nfct_help(tmpl); + if (help != NULL) { + helper = rcu_dereference(help->helper); + set_bit(IPS_HELPER_BIT, &ct->status); } help = nfct_help(ct); if (helper == NULL) { - helper = nf_ct_lookup_helper(ct, net); - if (helper == NULL) { - if (help) - RCU_INIT_POINTER(help->helper, NULL); - return 0; - } + if (help) + RCU_INIT_POINTER(help->helper, NULL); + return 0; } if (help == NULL) { @@ -545,19 +498,6 @@ void nf_nat_helper_unregister(struct nf_conntrack_nat_helper *nat) } EXPORT_SYMBOL_GPL(nf_nat_helper_unregister); -void nf_ct_set_auto_assign_helper_warned(struct net *net) -{ - nf_ct_pernet(net)->auto_assign_helper_warned = true; -} -EXPORT_SYMBOL_GPL(nf_ct_set_auto_assign_helper_warned); - -void nf_conntrack_helper_pernet_init(struct net *net) -{ - struct nf_conntrack_net *cnet = nf_ct_pernet(net); - - cnet->sysctl_auto_assign_helper = nf_ct_auto_assign_helper; -} - int nf_conntrack_helper_init(void) { nf_ct_helper_hsize = 1; /* gets rounded up to use one page */ diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 1796c456ac98..5703846bea3b 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -157,15 +157,37 @@ static int help(struct sk_buff *skb, unsigned int protoff, data = ib_ptr; data_limit = ib_ptr + datalen; - /* strlen("\1DCC SENT t AAAAAAAA P\1\n")=24 - * 5+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=14 */ - while (data < data_limit - (19 + MINMATCHLEN)) { - if (memcmp(data, "\1DCC ", 5)) { + /* Skip any whitespace */ + while (data < data_limit - 10) { + if (*data == ' ' || *data == '\r' || *data == '\n') + data++; + else + break; + } + + /* strlen("PRIVMSG x ")=10 */ + if (data < data_limit - 10) { + if (strncasecmp("PRIVMSG ", data, 8)) + goto out; + data += 8; + } + + /* strlen(" :\1DCC SENT t AAAAAAAA P\1\n")=26 + * 7+MINMATCHLEN+strlen("t AAAAAAAA P\1\n")=26 + */ + while (data < data_limit - (21 + MINMATCHLEN)) { + /* Find first " :", the start of message */ + if (memcmp(data, " :", 2)) { data++; continue; } + data += 2; + + /* then check that place only for the DCC command */ + if (memcmp(data, "\1DCC ", 5)) + goto out; data += 5; - /* we have at least (19+MINMATCHLEN)-5 bytes valid data left */ + /* we have at least (21+MINMATCHLEN)-(2+5) bytes valid data left */ iph = ip_hdr(skb); pr_debug("DCC found in master %pI4:%u %pI4:%u\n", @@ -181,7 +203,7 @@ static int help(struct sk_buff *skb, unsigned int protoff, pr_debug("DCC %s detected\n", dccprotos[i]); /* we have at least - * (19+MINMATCHLEN)-5-dccprotos[i].matchlen bytes valid + * (21+MINMATCHLEN)-7-dccprotos[i].matchlen bytes valid * data left (== 14/13 bytes) */ if (parse_dcc(data, data_limit, &dcc_ip, &dcc_port, &addr_beg_p, &addr_end_p)) { @@ -194,8 +216,9 @@ static int help(struct sk_buff *skb, unsigned int protoff, /* dcc_ip can be the internal OR external (NAT'ed) IP */ tuple = &ct->tuplehash[dir].tuple; - if (tuple->src.u3.ip != dcc_ip && - tuple->dst.u3.ip != dcc_ip) { + if ((tuple->src.u3.ip != dcc_ip && + ct->tuplehash[!dir].tuple.dst.u3.ip != dcc_ip) || + dcc_port == 0) { net_warn_ratelimited("Forged DCC command from %pI4: %pI4:%u\n", &tuple->src.u3.ip, &dcc_ip, dcc_port); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 04169b54f2a2..7562b215b932 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2298,11 +2298,6 @@ ctnetlink_create_conntrack(struct net *net, ct->status |= IPS_HELPER; RCU_INIT_POINTER(help->helper, helper); } - } else { - /* try an implicit helper assignation */ - err = __nf_ct_try_assign_helper(ct, NULL, GFP_ATOMIC); - if (err < 0) - goto err2; } err = ctnetlink_setup_nat(ct, cda); diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c index a634c72b1ffc..656631083177 100644 --- a/net/netfilter/nf_conntrack_proto_tcp.c +++ b/net/netfilter/nf_conntrack_proto_tcp.c @@ -47,6 +47,12 @@ static const char *const tcp_conntrack_names[] = { "SYN_SENT2", }; +enum nf_ct_tcp_action { + NFCT_TCP_IGNORE, + NFCT_TCP_INVALID, + NFCT_TCP_ACCEPT, +}; + #define SECS * HZ #define MINS * 60 SECS #define HOURS * 60 MINS @@ -472,23 +478,45 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender, } } -static bool tcp_in_window(struct nf_conn *ct, - enum ip_conntrack_dir dir, - unsigned int index, - const struct sk_buff *skb, - unsigned int dataoff, - const struct tcphdr *tcph, - const struct nf_hook_state *hook_state) +__printf(6, 7) +static enum nf_ct_tcp_action nf_tcp_log_invalid(const struct sk_buff *skb, + const struct nf_conn *ct, + const struct nf_hook_state *state, + const struct ip_ct_tcp_state *sender, + enum nf_ct_tcp_action ret, + const char *fmt, ...) +{ + const struct nf_tcp_net *tn = nf_tcp_pernet(nf_ct_net(ct)); + struct va_format vaf; + va_list args; + bool be_liberal; + + be_liberal = sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || tn->tcp_be_liberal; + if (be_liberal) + return NFCT_TCP_ACCEPT; + + va_start(args, fmt); + vaf.fmt = fmt; + vaf.va = &args; + nf_ct_l4proto_log_invalid(skb, ct, state, "%pV", &vaf); + va_end(args); + + return ret; +} + +static enum nf_ct_tcp_action +tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir, + unsigned int index, const struct sk_buff *skb, + unsigned int dataoff, const struct tcphdr *tcph, + const struct nf_hook_state *hook_state) { struct ip_ct_tcp *state = &ct->proto.tcp; - struct net *net = nf_ct_net(ct); - struct nf_tcp_net *tn = nf_tcp_pernet(net); struct ip_ct_tcp_state *sender = &state->seen[dir]; struct ip_ct_tcp_state *receiver = &state->seen[!dir]; __u32 seq, ack, sack, end, win, swin; - u16 win_raw; + bool in_recv_win, seq_ok; s32 receiver_offset; - bool res, in_recv_win; + u16 win_raw; /* * Get the required data from the packet. @@ -517,7 +545,7 @@ static bool tcp_in_window(struct nf_conn *ct, end, win); if (!tcph->ack) /* Simultaneous open */ - return true; + return NFCT_TCP_ACCEPT; } else { /* * We are in the middle of a connection, @@ -560,7 +588,7 @@ static bool tcp_in_window(struct nf_conn *ct, end, win); if (dir == IP_CT_DIR_REPLY && !tcph->ack) - return true; + return NFCT_TCP_ACCEPT; } if (!(tcph->ack)) { @@ -584,122 +612,166 @@ static bool tcp_in_window(struct nf_conn *ct, */ seq = end = sender->td_end; - /* Is the ending sequence in the receive window (if available)? */ - in_recv_win = !receiver->td_maxwin || - after(end, sender->td_end - receiver->td_maxwin - 1); - - if (before(seq, sender->td_maxend + 1) && - in_recv_win && - before(sack, receiver->td_end + 1) && - after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) { - /* - * Take into account window scaling (RFC 1323). - */ - if (!tcph->syn) - win <<= sender->td_scale; - - /* - * Update sender data. - */ - swin = win + (sack - ack); - if (sender->td_maxwin < swin) - sender->td_maxwin = swin; - if (after(end, sender->td_end)) { + seq_ok = before(seq, sender->td_maxend + 1); + if (!seq_ok) { + u32 overshot = end - sender->td_maxend + 1; + bool ack_ok; + + ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1); + in_recv_win = receiver->td_maxwin && + after(end, sender->td_end - receiver->td_maxwin - 1); + + if (in_recv_win && + ack_ok && + overshot <= receiver->td_maxwin && + before(sack, receiver->td_end + 1)) { + /* Work around TCPs that send more bytes than allowed by + * the receive window. + * + * If the (marked as invalid) packet is allowed to pass by + * the ruleset and the peer acks this data, then its possible + * all future packets will trigger 'ACK is over upper bound' check. + * + * Thus if only the sequence check fails then do update td_end so + * possible ACK for this data can update internal state. + */ sender->td_end = end; sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; - } - if (tcph->ack) { - if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { - sender->td_maxack = ack; - sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; - } else if (after(ack, sender->td_maxack)) - sender->td_maxack = ack; + + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, + "%u bytes more than expected", overshot); } - /* - * Update receiver data. - */ - if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) - receiver->td_maxwin += end - sender->td_maxend; - if (after(sack + win, receiver->td_maxend - 1)) { - receiver->td_maxend = sack + win; - if (win == 0) - receiver->td_maxend++; + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID, + "SEQ is over upper bound %u (over the window of the receiver)", + sender->td_maxend + 1); + } + + if (!before(sack, receiver->td_end + 1)) + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_INVALID, + "ACK is over upper bound %u (ACKed data not seen yet)", + receiver->td_end + 1); + + /* Is the ending sequence in the receive window (if available)? */ + in_recv_win = !receiver->td_maxwin || + after(end, sender->td_end - receiver->td_maxwin - 1); + if (!in_recv_win) + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, + "SEQ is under lower bound %u (already ACKed data retransmitted)", + sender->td_end - receiver->td_maxwin - 1); + if (!after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1)) + return nf_tcp_log_invalid(skb, ct, hook_state, sender, NFCT_TCP_IGNORE, + "ignored ACK under lower bound %u (possible overly delayed)", + receiver->td_end - MAXACKWINDOW(sender) - 1); + + /* Take into account window scaling (RFC 1323). */ + if (!tcph->syn) + win <<= sender->td_scale; + + /* Update sender data. */ + swin = win + (sack - ack); + if (sender->td_maxwin < swin) + sender->td_maxwin = swin; + if (after(end, sender->td_end)) { + sender->td_end = end; + sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + } + if (tcph->ack) { + if (!(sender->flags & IP_CT_TCP_FLAG_MAXACK_SET)) { + sender->td_maxack = ack; + sender->flags |= IP_CT_TCP_FLAG_MAXACK_SET; + } else if (after(ack, sender->td_maxack)) { + sender->td_maxack = ack; } - if (ack == receiver->td_end) - receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + } - /* - * Check retransmissions. - */ - if (index == TCP_ACK_SET) { - if (state->last_dir == dir - && state->last_seq == seq - && state->last_ack == ack - && state->last_end == end - && state->last_win == win_raw) - state->retrans++; - else { - state->last_dir = dir; - state->last_seq = seq; - state->last_ack = ack; - state->last_end = end; - state->last_win = win_raw; - state->retrans = 0; - } + /* Update receiver data. */ + if (receiver->td_maxwin != 0 && after(end, sender->td_maxend)) + receiver->td_maxwin += end - sender->td_maxend; + if (after(sack + win, receiver->td_maxend - 1)) { + receiver->td_maxend = sack + win; + if (win == 0) + receiver->td_maxend++; + } + if (ack == receiver->td_end) + receiver->flags &= ~IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; + + /* Check retransmissions. */ + if (index == TCP_ACK_SET) { + if (state->last_dir == dir && + state->last_seq == seq && + state->last_ack == ack && + state->last_end == end && + state->last_win == win_raw) { + state->retrans++; + } else { + state->last_dir = dir; + state->last_seq = seq; + state->last_ack = ack; + state->last_end = end; + state->last_win = win_raw; + state->retrans = 0; } - res = true; - } else { - res = false; - if (sender->flags & IP_CT_TCP_FLAG_BE_LIBERAL || - tn->tcp_be_liberal) - res = true; - if (!res) { - bool seq_ok = before(seq, sender->td_maxend + 1); - - if (!seq_ok) { - u32 overshot = end - sender->td_maxend + 1; - bool ack_ok; - - ack_ok = after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1); - - if (in_recv_win && - ack_ok && - overshot <= receiver->td_maxwin && - before(sack, receiver->td_end + 1)) { - /* Work around TCPs that send more bytes than allowed by - * the receive window. - * - * If the (marked as invalid) packet is allowed to pass by - * the ruleset and the peer acks this data, then its possible - * all future packets will trigger 'ACK is over upper bound' check. - * - * Thus if only the sequence check fails then do update td_end so - * possible ACK for this data can update internal state. - */ - sender->td_end = end; - sender->flags |= IP_CT_TCP_FLAG_DATA_UNACKNOWLEDGED; - - nf_ct_l4proto_log_invalid(skb, ct, hook_state, - "%u bytes more than expected", overshot); - return res; - } - } + } + + return NFCT_TCP_ACCEPT; +} + +static void __cold nf_tcp_handle_invalid(struct nf_conn *ct, + enum ip_conntrack_dir dir, + int index, + const struct sk_buff *skb, + const struct nf_hook_state *hook_state) +{ + const unsigned int *timeouts; + const struct nf_tcp_net *tn; + unsigned int timeout; + u32 expires; + + if (!test_bit(IPS_ASSURED_BIT, &ct->status) || + test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) + return; + + /* We don't want to have connections hanging around in ESTABLISHED + * state for long time 'just because' conntrack deemed a FIN/RST + * out-of-window. + * + * Shrink the timeout just like when there is unacked data. + * This speeds up eviction of 'dead' connections where the + * connection and conntracks internal state are out of sync. + */ + switch (index) { + case TCP_RST_SET: + case TCP_FIN_SET: + break; + default: + return; + } + + if (ct->proto.tcp.last_dir != dir && + (ct->proto.tcp.last_index == TCP_FIN_SET || + ct->proto.tcp.last_index == TCP_RST_SET)) { + expires = nf_ct_expires(ct); + if (expires < 120 * HZ) + return; + + tn = nf_tcp_pernet(nf_ct_net(ct)); + timeouts = nf_ct_timeout_lookup(ct); + if (!timeouts) + timeouts = tn->timeouts; + timeout = READ_ONCE(timeouts[TCP_CONNTRACK_UNACK]); + if (expires > timeout) { nf_ct_l4proto_log_invalid(skb, ct, hook_state, - "%s", - before(seq, sender->td_maxend + 1) ? - in_recv_win ? - before(sack, receiver->td_end + 1) ? - after(sack, receiver->td_end - MAXACKWINDOW(sender) - 1) ? "BUG" - : "ACK is under the lower bound (possible overly delayed ACK)" - : "ACK is over the upper bound (ACKed data not seen yet)" - : "SEQ is under the lower bound (already ACKed data retransmitted)" - : "SEQ is over the upper bound (over the window of the receiver)"); + "packet (index %d, dir %d) response for index %d lower timeout to %u", + index, dir, ct->proto.tcp.last_index, timeout); + + WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp); } + } else { + ct->proto.tcp.last_index = index; + ct->proto.tcp.last_dir = dir; } - - return res; } /* table of valid flag combinations - PUSH, ECE and CWR are always valid */ @@ -861,6 +933,7 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, struct nf_conntrack_tuple *tuple; enum tcp_conntrack new_state, old_state; unsigned int index, *timeouts; + enum nf_ct_tcp_action res; enum ip_conntrack_dir dir; const struct tcphdr *th; struct tcphdr _tcph; @@ -1126,10 +1199,18 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct, break; } - if (!tcp_in_window(ct, dir, index, - skb, dataoff, th, state)) { + res = tcp_in_window(ct, dir, index, + skb, dataoff, th, state); + switch (res) { + case NFCT_TCP_IGNORE: + spin_unlock_bh(&ct->lock); + return NF_ACCEPT; + case NFCT_TCP_INVALID: + nf_tcp_handle_invalid(ct, dir, index, skb, state); spin_unlock_bh(&ct->lock); return -NF_ACCEPT; + case NFCT_TCP_ACCEPT: + break; } in_window: /* From now on we have got in-window packets */ diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index daf06f71d31c..77f5e82d8e3f 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -477,7 +477,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr, return ret; if (ret == 0) break; - dataoff += *matchoff; + dataoff = *matchoff; } *in_header = 0; } @@ -489,7 +489,7 @@ static int ct_sip_walk_headers(const struct nf_conn *ct, const char *dptr, break; if (ret == 0) return ret; - dataoff += *matchoff; + dataoff = *matchoff; } if (in_header) diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 05895878610c..4ffe84c5a82c 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -561,7 +561,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_LOG_INVALID, NF_SYSCTL_CT_EXPECT_MAX, NF_SYSCTL_CT_ACCT, - NF_SYSCTL_CT_HELPER, #ifdef CONFIG_NF_CONNTRACK_EVENTS NF_SYSCTL_CT_EVENTS, #endif @@ -680,14 +679,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_ONE, }, - [NF_SYSCTL_CT_HELPER] = { - .procname = "nf_conntrack_helper", - .maxlen = sizeof(u8), - .mode = 0644, - .proc_handler = proc_dou8vec_minmax, - .extra1 = SYSCTL_ZERO, - .extra2 = SYSCTL_ONE, - }, #ifdef CONFIG_NF_CONNTRACK_EVENTS [NF_SYSCTL_CT_EVENTS] = { .procname = "nf_conntrack_events", @@ -1100,7 +1091,6 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net) table[NF_SYSCTL_CT_CHECKSUM].data = &net->ct.sysctl_checksum; table[NF_SYSCTL_CT_LOG_INVALID].data = &net->ct.sysctl_log_invalid; table[NF_SYSCTL_CT_ACCT].data = &net->ct.sysctl_acct; - table[NF_SYSCTL_CT_HELPER].data = &cnet->sysctl_auto_assign_helper; #ifdef CONFIG_NF_CONNTRACK_EVENTS table[NF_SYSCTL_CT_EVENTS].data = &net->ct.sysctl_events; #endif diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index edee7fa944c1..8a29290149bd 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -443,9 +443,9 @@ static int nf_log_proc_dostring(struct ctl_table *table, int write, mutex_lock(&nf_log_mutex); logger = nft_log_dereference(net->nf.nf_loggers[tindex]); if (!logger) - strlcpy(buf, "NONE", sizeof(buf)); + strscpy(buf, "NONE", sizeof(buf)); else - strlcpy(buf, logger->name, sizeof(buf)); + strscpy(buf, logger->name, sizeof(buf)); mutex_unlock(&nf_log_mutex); r = proc_dostring(&tmp, write, buffer, lenp, ppos); } diff --git a/net/netfilter/nf_nat_amanda.c b/net/netfilter/nf_nat_amanda.c index 3bc7e0854efe..98deef6cde69 100644 --- a/net/netfilter/nf_nat_amanda.c +++ b/net/netfilter/nf_nat_amanda.c @@ -44,19 +44,7 @@ static unsigned int help(struct sk_buff *skb, exp->expectfn = nf_nat_follow_master; /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int res; - - exp->tuple.dst.u.tcp.port = htons(port); - res = nf_ct_expect_related(exp, 0); - if (res == 0) - break; - else if (res != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port)); if (port == 0) { nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; diff --git a/net/netfilter/nf_nat_ftp.c b/net/netfilter/nf_nat_ftp.c index aace6768a64e..c92a436d9c48 100644 --- a/net/netfilter/nf_nat_ftp.c +++ b/net/netfilter/nf_nat_ftp.c @@ -86,22 +86,9 @@ static unsigned int nf_nat_ftp(struct sk_buff *skb, * this one. */ exp->expectfn = nf_nat_follow_master; - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, ntohs(exp->saved_proto.tcp.port)); if (port == 0) { - nf_ct_helper_log(skb, ct, "all ports in use"); + nf_ct_helper_log(skb, exp->master, "all ports in use"); return NF_DROP; } diff --git a/net/netfilter/nf_nat_helper.c b/net/netfilter/nf_nat_helper.c index a263505455fc..a95a25196943 100644 --- a/net/netfilter/nf_nat_helper.c +++ b/net/netfilter/nf_nat_helper.c @@ -198,3 +198,34 @@ void nf_nat_follow_master(struct nf_conn *ct, nf_nat_setup_info(ct, &range, NF_NAT_MANIP_DST); } EXPORT_SYMBOL(nf_nat_follow_master); + +u16 nf_nat_exp_find_port(struct nf_conntrack_expect *exp, u16 port) +{ + static const unsigned int max_attempts = 128; + int range, attempts_left; + u16 min = port; + + range = USHRT_MAX - port; + attempts_left = range; + + if (attempts_left > max_attempts) + attempts_left = max_attempts; + + /* Try to get same port: if not, try to change it. */ + for (;;) { + int res; + + exp->tuple.dst.u.tcp.port = htons(port); + res = nf_ct_expect_related(exp, 0); + if (res == 0) + return port; + + if (res != -EBUSY || (--attempts_left < 0)) + break; + + port = min + prandom_u32_max(range); + } + + return 0; +} +EXPORT_SYMBOL_GPL(nf_nat_exp_find_port); diff --git a/net/netfilter/nf_nat_irc.c b/net/netfilter/nf_nat_irc.c index c691ab8d234c..19c4fcc60c50 100644 --- a/net/netfilter/nf_nat_irc.c +++ b/net/netfilter/nf_nat_irc.c @@ -48,20 +48,8 @@ static unsigned int help(struct sk_buff *skb, exp->dir = IP_CT_DIR_REPLY; exp->expectfn = nf_nat_follow_master; - /* Try to get same port: if not, try to change it. */ - for (port = ntohs(exp->saved_proto.tcp.port); port != 0; port++) { - int ret; - - exp->tuple.dst.u.tcp.port = htons(port); - ret = nf_ct_expect_related(exp, 0); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, + ntohs(exp->saved_proto.tcp.port)); if (port == 0) { nf_ct_helper_log(skb, ct, "all ports in use"); return NF_DROP; diff --git a/net/netfilter/nf_nat_sip.c b/net/netfilter/nf_nat_sip.c index f0a735e86851..cf4aeb299bde 100644 --- a/net/netfilter/nf_nat_sip.c +++ b/net/netfilter/nf_nat_sip.c @@ -410,19 +410,7 @@ static unsigned int nf_nat_sip_expect(struct sk_buff *skb, unsigned int protoff, exp->dir = !dir; exp->expectfn = nf_nat_sip_expected; - for (; port != 0; port++) { - int ret; - - exp->tuple.dst.u.udp.port = htons(port); - ret = nf_ct_expect_related(exp, NF_CT_EXP_F_SKIP_MASTER); - if (ret == 0) - break; - else if (ret != -EBUSY) { - port = 0; - break; - } - } - + port = nf_nat_exp_find_port(exp, port); if (port == 0) { nf_ct_helper_log(skb, ct, "all ports in use for SIP"); return NF_DROP; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2ee50e23c9b7..a0653a8dfa82 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -742,7 +742,7 @@ __printf(2, 3) int nft_request_module(struct net *net, const char *fmt, return -ENOMEM; req->done = false; - strlcpy(req->module, module_name, MODULE_NAME_LEN); + strscpy(req->module, module_name, MODULE_NAME_LEN); list_add_tail(&req->list, &nft_net->module_list); return -EAGAIN; @@ -2166,8 +2166,10 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family, chain->flags |= NFT_CHAIN_BASE | flags; basechain->policy = NF_ACCEPT; if (chain->flags & NFT_CHAIN_HW_OFFLOAD && - !nft_chain_offload_support(basechain)) + !nft_chain_offload_support(basechain)) { + list_splice_init(&basechain->hook_list, &hook->list); return -EOPNOTSUPP; + } flow_block_init(&basechain->flow_block); @@ -2195,7 +2197,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, struct netlink_ext_ack *extack) { const struct nlattr * const *nla = ctx->nla; - struct nft_stats __percpu *stats = NULL; struct nft_table *table = ctx->table; struct nft_base_chain *basechain; struct net *net = ctx->net; @@ -2210,6 +2211,7 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, return -EOVERFLOW; if (nla[NFTA_CHAIN_HOOK]) { + struct nft_stats __percpu *stats = NULL; struct nft_chain_hook hook; if (flags & NFT_CHAIN_BINDING) @@ -2241,8 +2243,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, if (err < 0) { nft_chain_release_hook(&hook); kfree(basechain); + free_percpu(stats); return err; } + if (stats) + static_branch_inc(&nft_counters_enabled); } else { if (flags & NFT_CHAIN_BASE) return -EINVAL; @@ -2317,9 +2322,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask, goto err_unregister_hook; } - if (stats) - static_branch_inc(&nft_counters_enabled); - table->use++; return 0; diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c index 0fa2e2030427..ee6840bd5933 100644 --- a/net/netfilter/nfnetlink_osf.c +++ b/net/netfilter/nfnetlink_osf.c @@ -269,6 +269,7 @@ bool nf_osf_find(const struct sk_buff *skb, struct nf_osf_hdr_ctx ctx; const struct tcphdr *tcp; struct tcphdr _tcph; + bool found = false; memset(&ctx, 0, sizeof(ctx)); @@ -283,10 +284,11 @@ bool nf_osf_find(const struct sk_buff *skb, data->genre = f->genre; data->version = f->version; + found = true; break; } - return true; + return found; } EXPORT_SYMBOL_GPL(nf_osf_find); diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index b04995c3e17f..a3f01f209a53 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -1089,9 +1089,6 @@ static int nft_ct_helper_obj_init(const struct nft_ctx *ctx, if (err < 0) goto err_put_helper; - /* Avoid the bogus warning, helper will be assigned after CT init */ - nf_ct_set_auto_assign_helper_warned(ctx->net); - return 0; err_put_helper: diff --git a/net/netfilter/nft_osf.c b/net/netfilter/nft_osf.c index 89342ccccdcc..adacf95b6e2b 100644 --- a/net/netfilter/nft_osf.c +++ b/net/netfilter/nft_osf.c @@ -51,7 +51,7 @@ static void nft_osf_eval(const struct nft_expr *expr, struct nft_regs *regs, snprintf(os_match, NFT_OSF_MAXGENRELEN, "%s:%s", data.genre, data.version); else - strlcpy(os_match, data.genre, NFT_OSF_MAXGENRELEN); + strscpy(os_match, data.genre, NFT_OSF_MAXGENRELEN); strncpy((char *)dest, os_match, NFT_OSF_MAXGENRELEN); } diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c index eb0e40c29712..088244f9d838 100644 --- a/net/netfilter/nft_payload.c +++ b/net/netfilter/nft_payload.c @@ -173,10 +173,10 @@ static const struct nla_policy nft_payload_policy[NFTA_PAYLOAD_MAX + 1] = { [NFTA_PAYLOAD_SREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_DREG] = { .type = NLA_U32 }, [NFTA_PAYLOAD_BASE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_OFFSET] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_LEN] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_OFFSET] = NLA_POLICY_MAX_BE(NLA_U32, 255), + [NFTA_PAYLOAD_LEN] = NLA_POLICY_MAX_BE(NLA_U32, 255), [NFTA_PAYLOAD_CSUM_TYPE] = { .type = NLA_U32 }, - [NFTA_PAYLOAD_CSUM_OFFSET] = { .type = NLA_U32 }, + [NFTA_PAYLOAD_CSUM_OFFSET] = NLA_POLICY_MAX_BE(NLA_U32, 255), [NFTA_PAYLOAD_CSUM_FLAGS] = { .type = NLA_U32 }, }; diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 54a489f16b17..470282cf3fae 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -766,7 +766,7 @@ void xt_compat_match_from_user(struct xt_entry_match *m, void **dstptr, msize += off; m->u.user.match_size = msize; - strlcpy(name, match->name, sizeof(name)); + strscpy(name, match->name, sizeof(name)); module_put(match->me); strncpy(m->u.user.name, name, sizeof(m->u.user.name)); @@ -1146,7 +1146,7 @@ void xt_compat_target_from_user(struct xt_entry_target *t, void **dstptr, tsize += off; t->u.user.target_size = tsize; - strlcpy(name, target->name, sizeof(name)); + strscpy(name, target->name, sizeof(name)); module_put(target->me); strncpy(t->u.user.name, name, sizeof(t->u.user.name)); @@ -1827,7 +1827,7 @@ int xt_proto_init(struct net *net, u_int8_t af) root_uid = make_kuid(net->user_ns, 0); root_gid = make_kgid(net->user_ns, 0); - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); proc = proc_create_net_data(buf, 0440, net->proc_net, &xt_table_seq_ops, sizeof(struct seq_net_private), @@ -1837,7 +1837,7 @@ int xt_proto_init(struct net *net, u_int8_t af) if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(proc, root_uid, root_gid); - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); proc = proc_create_seq_private(buf, 0440, net->proc_net, &xt_match_seq_ops, sizeof(struct nf_mttg_trav), @@ -1847,7 +1847,7 @@ int xt_proto_init(struct net *net, u_int8_t af) if (uid_valid(root_uid) && gid_valid(root_gid)) proc_set_user(proc, root_uid, root_gid); - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); proc = proc_create_seq_private(buf, 0440, net->proc_net, &xt_target_seq_ops, sizeof(struct nf_mttg_trav), @@ -1862,12 +1862,12 @@ int xt_proto_init(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS out_remove_matches: - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); out_remove_tables: - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); out: @@ -1881,15 +1881,15 @@ void xt_proto_fini(struct net *net, u_int8_t af) #ifdef CONFIG_PROC_FS char buf[XT_FUNCTION_MAXNAMELEN]; - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TABLES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_TARGETS, sizeof(buf)); remove_proc_entry(buf, net->proc_net); - strlcpy(buf, xt_prefix[af], sizeof(buf)); + strscpy(buf, xt_prefix[af], sizeof(buf)); strlcat(buf, FORMAT_MATCHES, sizeof(buf)); remove_proc_entry(buf, net->proc_net); #endif /*CONFIG_PROC_FS*/ diff --git a/net/netfilter/xt_RATEEST.c b/net/netfilter/xt_RATEEST.c index 8aec1b529364..80f6624e2355 100644 --- a/net/netfilter/xt_RATEEST.c +++ b/net/netfilter/xt_RATEEST.c @@ -144,7 +144,7 @@ static int xt_rateest_tg_checkentry(const struct xt_tgchk_param *par) goto err1; gnet_stats_basic_sync_init(&est->bstats); - strlcpy(est->name, info->name, sizeof(est->name)); + strscpy(est->name, info->name, sizeof(est->name)); spin_lock_init(&est->lock); est->refcnt = 1; est->params.interval = info->interval; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 7c136de117eb..39b7c00e4cef 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -739,6 +739,36 @@ out: return err; } +static int genl_header_check(const struct genl_family *family, + struct nlmsghdr *nlh, struct genlmsghdr *hdr, + struct netlink_ext_ack *extack) +{ + u16 flags; + + /* Only for commands added after we started validating */ + if (hdr->cmd < family->resv_start_op) + return 0; + + if (hdr->reserved) { + NL_SET_ERR_MSG(extack, "genlmsghdr.reserved field is not 0"); + return -EINVAL; + } + + /* Old netlink flags have pretty loose semantics, allow only the flags + * consumed by the core where we can enforce the meaning. + */ + flags = nlh->nlmsg_flags; + if ((flags & NLM_F_DUMP) == NLM_F_DUMP) /* DUMP is 2 bits */ + flags &= ~NLM_F_DUMP; + if (flags & ~(NLM_F_REQUEST | NLM_F_ACK | NLM_F_ECHO)) { + NL_SET_ERR_MSG(extack, + "ambiguous or reserved bits set in nlmsg_flags"); + return -EINVAL; + } + + return 0; +} + static int genl_family_rcv_msg(const struct genl_family *family, struct sk_buff *skb, struct nlmsghdr *nlh, @@ -757,7 +787,7 @@ static int genl_family_rcv_msg(const struct genl_family *family, if (nlh->nlmsg_len < nlmsg_msg_size(hdrlen)) return -EINVAL; - if (hdr->cmd >= family->resv_start_op && hdr->reserved) + if (genl_header_check(family, nlh, hdr, extack)) return -EINVAL; if (genl_get_cmd(hdr->cmd, family, &op)) diff --git a/net/nfc/hci/hcp.c b/net/nfc/hci/hcp.c index 05c60988f59a..4902f5064098 100644 --- a/net/nfc/hci/hcp.c +++ b/net/nfc/hci/hcp.c @@ -73,14 +73,12 @@ int nfc_hci_hcp_message_tx(struct nfc_hci_dev *hdev, u8 pipe, if (firstfrag) { firstfrag = false; packet->message.header = HCP_HEADER(type, instruction); - if (ptr) { - memcpy(packet->message.data, ptr, - data_link_len - 1); - ptr += data_link_len - 1; - } } else { - memcpy(&packet->message, ptr, data_link_len); - ptr += data_link_len; + packet->message.header = *ptr++; + } + if (ptr) { + memcpy(packet->message.data, ptr, data_link_len - 1); + ptr += data_link_len - 1; } /* This is the last fragment, set the cb bit */ diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 48e8f5c29b67..cb255d8ed99a 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1982,7 +1982,8 @@ static int ovs_ct_limit_set_zone_limit(struct nlattr *nla_zone_limit, } else { struct ovs_ct_limit *ct_limit; - ct_limit = kmalloc(sizeof(*ct_limit), GFP_KERNEL); + ct_limit = kmalloc(sizeof(*ct_limit), + GFP_KERNEL_ACCOUNT); if (!ct_limit) return -ENOMEM; @@ -2252,14 +2253,16 @@ exit_err: static const struct genl_small_ops ct_limit_genl_ops[] = { { .cmd = OVS_CT_LIMIT_CMD_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN - * privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ .doit = ovs_ct_limit_cmd_set, }, { .cmd = OVS_CT_LIMIT_CMD_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN - * privilege. */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ .doit = ovs_ct_limit_cmd_del, }, { .cmd = OVS_CT_LIMIT_CMD_GET, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index 4c09cf8a0ab2..4a07ab094a84 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -3304,7 +3304,7 @@ static int __ovs_nla_copy_actions(struct net *net, const struct nlattr *attr, /* Disallow subsequent L2.5+ set actions and mpls_pop * actions once the last MPLS label in the packet is - * is popped as there is no check here to ensure that + * popped as there is no check here to ensure that * the new eth type is valid and thus set actions could * write off the end of the packet or otherwise corrupt * it. diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c index 51111a9009bd..6e38f68f88c2 100644 --- a/net/openvswitch/meter.c +++ b/net/openvswitch/meter.c @@ -343,7 +343,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a) return ERR_PTR(-EINVAL); /* Allocate and set up the meter before locking anything. */ - meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL); + meter = kzalloc(struct_size(meter, bands, n_bands), GFP_KERNEL_ACCOUNT); if (!meter) return ERR_PTR(-ENOMEM); @@ -687,9 +687,9 @@ static const struct genl_small_ops dp_meter_genl_ops[] = { }, { .cmd = OVS_METER_CMD_SET, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN - * privilege. - */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ .doit = ovs_meter_cmd_set, }, { .cmd = OVS_METER_CMD_GET, @@ -699,9 +699,9 @@ static const struct genl_small_ops dp_meter_genl_ops[] = { }, { .cmd = OVS_METER_CMD_DEL, .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP, - .flags = GENL_ADMIN_PERM, /* Requires CAP_NET_ADMIN - * privilege. - */ + .flags = GENL_UNS_ADMIN_PERM, /* Requires CAP_NET_ADMIN + * privilege. + */ .doit = ovs_meter_cmd_del }, }; diff --git a/net/openvswitch/vport-internal_dev.c b/net/openvswitch/vport-internal_dev.c index 35f42c9821c2..74c88a6baa43 100644 --- a/net/openvswitch/vport-internal_dev.c +++ b/net/openvswitch/vport-internal_dev.c @@ -190,7 +190,7 @@ static void internal_dev_destroy(struct vport *vport) rtnl_unlock(); } -static netdev_tx_t internal_dev_recv(struct sk_buff *skb) +static int internal_dev_recv(struct sk_buff *skb) { struct net_device *netdev = skb->dev; diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index 7d276f60c000..6ff45e8a0868 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -132,7 +132,7 @@ struct vport_ops { int (*set_options)(struct vport *, struct nlattr *); int (*get_options)(const struct vport *, struct sk_buff *); - netdev_tx_t (*send) (struct sk_buff *skb); + int (*send)(struct sk_buff *skb); struct module *owner; struct list_head list; }; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 7de46d831349..d3f6db350de7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -4725,37 +4725,37 @@ static struct pernet_operations packet_net_ops = { static void __exit packet_exit(void) { - unregister_netdevice_notifier(&packet_netdev_notifier); - unregister_pernet_subsys(&packet_net_ops); sock_unregister(PF_PACKET); proto_unregister(&packet_proto); + unregister_netdevice_notifier(&packet_netdev_notifier); + unregister_pernet_subsys(&packet_net_ops); } static int __init packet_init(void) { int rc; - rc = proto_register(&packet_proto, 0); - if (rc) - goto out; - rc = sock_register(&packet_family_ops); - if (rc) - goto out_proto; rc = register_pernet_subsys(&packet_net_ops); if (rc) - goto out_sock; + goto out; rc = register_netdevice_notifier(&packet_netdev_notifier); if (rc) goto out_pernet; + rc = proto_register(&packet_proto, 0); + if (rc) + goto out_notifier; + rc = sock_register(&packet_family_ops); + if (rc) + goto out_proto; return 0; -out_pernet: - unregister_pernet_subsys(&packet_net_ops); -out_sock: - sock_unregister(PF_PACKET); out_proto: proto_unregister(&packet_proto); +out_notifier: + unregister_netdevice_notifier(&packet_netdev_notifier); +out_pernet: + unregister_pernet_subsys(&packet_net_ops); out: return rc; } diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index b239120dd9ca..3ff6995244e5 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -894,7 +894,7 @@ module_exit(rds_exit); u32 rds_gen_num; -static int rds_init(void) +static int __init rds_init(void) { int ret; diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index a9e4ff948a7d..d36f3f6b4351 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -291,7 +291,7 @@ static void rds_rdma_listen_stop(void) #endif } -static int rds_rdma_init(void) +static int __init rds_rdma_init(void) { int ret; @@ -307,7 +307,7 @@ out: } module_init(rds_rdma_init); -static void rds_rdma_exit(void) +static void __exit rds_rdma_exit(void) { /* stop listening first to ensure no new connections are attempted */ rds_rdma_listen_stop(); diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 73ee2771093d..d8754366506a 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -712,7 +712,7 @@ static void rds_tcp_exit(void) } module_exit(rds_tcp_exit); -static int rds_tcp_init(void) +static int __init rds_tcp_init(void) { int ret; diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 571436064cd6..1ad0ec5afb50 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -782,7 +782,6 @@ void rxrpc_delete_call_timer(struct rxrpc_call *call); */ extern const char *const rxrpc_call_states[]; extern const char *const rxrpc_call_completions[]; -extern unsigned int rxrpc_max_call_lifetime; extern struct kmem_cache *rxrpc_call_jar; struct rxrpc_call *rxrpc_find_call_by_user_ID(struct rxrpc_sock *, unsigned long); @@ -982,6 +981,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); /* * peer_event.c */ +void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); void rxrpc_error_report(struct sock *); void rxrpc_peer_keepalive_worker(struct work_struct *); diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c index f8ecad2b730e..2a93e7b5fbd0 100644 --- a/net/rxrpc/call_event.c +++ b/net/rxrpc/call_event.c @@ -166,7 +166,7 @@ static void rxrpc_resend(struct rxrpc_call *call, unsigned long now_j) _enter("{%d,%d}", call->tx_hard_ack, call->tx_top); now = ktime_get_real(); - max_age = ktime_sub(now, jiffies_to_usecs(call->peer->rto_j)); + max_age = ktime_sub_us(now, jiffies_to_usecs(call->peer->rto_j)); spin_lock_bh(&call->lock); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 96ecb7356c0f..38ea98ff426b 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -137,6 +137,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) tuncfg.encap_type = UDP_ENCAP_RXRPC; tuncfg.encap_rcv = rxrpc_input_packet; + tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; tuncfg.sk_user_data = local; setup_udp_tunnel_sock(net, local->socket, &tuncfg); @@ -405,6 +406,9 @@ static void rxrpc_local_processor(struct work_struct *work) container_of(work, struct rxrpc_local, processor); bool again; + if (local->dead) + return; + trace_rxrpc_local(local->debug_id, rxrpc_local_processing, refcount_read(&local->ref), NULL); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index be032850ae8c..32561e9567fe 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -16,22 +16,105 @@ #include <net/sock.h> #include <net/af_rxrpc.h> #include <net/ip.h> +#include <net/icmp.h> #include "ar-internal.h" +static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int); static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); static void rxrpc_distribute_error(struct rxrpc_peer *, int, enum rxrpc_call_completion); /* - * Find the peer associated with an ICMP packet. + * Find the peer associated with an ICMPv4 packet. */ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, - const struct sk_buff *skb, + struct sk_buff *skb, + unsigned int udp_offset, + unsigned int *info, struct sockaddr_rxrpc *srx) { - struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + struct iphdr *ip, *ip0 = ip_hdr(skb); + struct icmphdr *icmp = icmp_hdr(skb); + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); - _enter(""); + _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code); + + switch (icmp->type) { + case ICMP_DEST_UNREACH: + *info = ntohs(icmp->un.frag.mtu); + fallthrough; + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ip = (struct iphdr *)((void *)icmp + 8); + break; + default: + return NULL; + } + + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; + + /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice + * versa? + */ + switch (srx->transport.family) { + case AF_INET: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, + sizeof(struct in_addr)); + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, + sizeof(struct in_addr)); + break; +#endif + + default: + WARN_ON_ONCE(1); + return NULL; + } + + _net("ICMP {%pISp}", &srx->transport); + return rxrpc_lookup_peer_rcu(local, srx); +} + +#ifdef CONFIG_AF_RXRPC_IPV6 +/* + * Find the peer associated with an ICMPv6 packet. + */ +static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local, + struct sk_buff *skb, + unsigned int udp_offset, + unsigned int *info, + struct sockaddr_rxrpc *srx) +{ + struct icmp6hdr *icmp = icmp6_hdr(skb); + struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb); + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); + + _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code); + + switch (icmp->icmp6_type) { + case ICMPV6_DEST_UNREACH: + *info = ntohl(icmp->icmp6_mtu); + fallthrough; + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + case ICMPV6_PARAMPROB: + ip = (struct ipv6hdr *)((void *)icmp + 8); + break; + default: + return NULL; + } memset(srx, 0, sizeof(*srx)); srx->transport_type = local->srx.transport_type; @@ -43,6 +126,165 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, */ switch (srx->transport.family) { case AF_INET: + _net("Rx ICMP6 on v4 sock"); + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, + &ip->daddr.s6_addr32[3], sizeof(struct in_addr)); + break; + case AF_INET6: + _net("Rx ICMP6"); + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr, + sizeof(struct in6_addr)); + break; + default: + WARN_ON_ONCE(1); + return NULL; + } + + _net("ICMP {%pISp}", &srx->transport); + return rxrpc_lookup_peer_rcu(local, srx); +} +#endif /* CONFIG_AF_RXRPC_IPV6 */ + +/* + * Handle an error received on the local endpoint as a tunnel. + */ +void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, + unsigned int udp_offset) +{ + struct sock_extended_err ee; + struct sockaddr_rxrpc srx; + struct rxrpc_local *local; + struct rxrpc_peer *peer; + unsigned int info = 0; + int err; + u8 version = ip_hdr(skb)->version; + u8 type = icmp_hdr(skb)->type; + u8 code = icmp_hdr(skb)->code; + + rcu_read_lock(); + local = rcu_dereference_sk_user_data(sk); + if (unlikely(!local)) { + rcu_read_unlock(); + return; + } + + rxrpc_new_skb(skb, rxrpc_skb_received); + + switch (ip_hdr(skb)->version) { + case IPVERSION: + peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset, + &info, &srx); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case 6: + peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset, + &info, &srx); + break; +#endif + default: + rcu_read_unlock(); + return; + } + + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + rcu_read_unlock(); + return; + } + + memset(&ee, 0, sizeof(ee)); + + switch (version) { + case IPVERSION: + switch (type) { + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_FRAG_NEEDED: + rxrpc_adjust_mtu(peer, info); + rcu_read_unlock(); + rxrpc_put_peer(peer); + return; + default: + break; + } + + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + /* Might want to do something different with + * non-fatal errors + */ + //harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + default: + err = EPROTO; + break; + } + + ee.ee_origin = SO_EE_ORIGIN_ICMP; + ee.ee_type = type; + ee.ee_code = code; + ee.ee_errno = err; + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case 6: + switch (type) { + case ICMPV6_PKT_TOOBIG: + rxrpc_adjust_mtu(peer, info); + rcu_read_unlock(); + rxrpc_put_peer(peer); + return; + } + + icmpv6_err_convert(type, code, &err); + + if (err == EACCES) + err = EHOSTUNREACH; + + ee.ee_origin = SO_EE_ORIGIN_ICMP6; + ee.ee_type = type; + ee.ee_code = code; + ee.ee_errno = err; + break; +#endif + } + + trace_rxrpc_rx_icmp(peer, &ee, &srx); + + rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR); + rcu_read_unlock(); + rxrpc_put_peer(peer); +} + +/* + * Find the peer associated with a local error. + */ +static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, + const struct sk_buff *skb, + struct sockaddr_rxrpc *srx) +{ + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + + _enter(""); + + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; + + switch (srx->transport.family) { + case AF_INET: srx->transport_len = sizeof(srx->transport.sin); srx->transport.family = AF_INET; srx->transport.sin.sin_port = serr->port; @@ -104,10 +346,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, /* * Handle an MTU/fragmentation problem. */ -static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) +static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) { - u32 mtu = serr->ee.ee_info; - _net("Rx ICMP Fragmentation Needed (%d)", mtu); /* wind down the local interface MTU */ @@ -148,7 +388,7 @@ void rxrpc_error_report(struct sock *sk) struct sock_exterr_skb *serr; struct sockaddr_rxrpc srx; struct rxrpc_local *local; - struct rxrpc_peer *peer; + struct rxrpc_peer *peer = NULL; struct sk_buff *skb; rcu_read_lock(); @@ -172,41 +412,20 @@ void rxrpc_error_report(struct sock *sk) } rxrpc_new_skb(skb, rxrpc_skb_received); serr = SKB_EXT_ERR(skb); - if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { - _leave("UDP empty message"); - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - return; - } - peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx); - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (!peer) { - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - _leave(" [no peer]"); - return; - } - - trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); - - if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP && - serr->ee.ee_type == ICMP_DEST_UNREACH && - serr->ee.ee_code == ICMP_FRAG_NEEDED)) { - rxrpc_adjust_mtu(peer, serr); - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - rxrpc_put_peer(peer); - _leave(" [MTU update]"); - return; + if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) { + peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (peer) { + trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); + rxrpc_store_error(peer, serr); + } } - rxrpc_store_error(peer, serr); rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_freed); rxrpc_put_peer(peer); - _leave(""); } diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index 250f23bc1c07..7e39c262fd79 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -771,46 +771,3 @@ call_complete: goto out; } EXPORT_SYMBOL(rxrpc_kernel_recv_data); - -/** - * rxrpc_kernel_get_reply_time - Get timestamp on first reply packet - * @sock: The socket that the call exists on - * @call: The call to query - * @_ts: Where to put the timestamp - * - * Retrieve the timestamp from the first DATA packet of the reply if it is - * in the ring. Returns true if successful, false if not. - */ -bool rxrpc_kernel_get_reply_time(struct socket *sock, struct rxrpc_call *call, - ktime_t *_ts) -{ - struct sk_buff *skb; - rxrpc_seq_t hard_ack, top, seq; - bool success = false; - - mutex_lock(&call->user_mutex); - - if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_RECV_REPLY) - goto out; - - hard_ack = call->rx_hard_ack; - if (hard_ack != 0) - goto out; - - seq = hard_ack + 1; - top = smp_load_acquire(&call->rx_top); - if (after(seq, top)) - goto out; - - skb = call->rxtx_buffer[seq & RXRPC_RXTX_BUFF_MASK]; - if (!skb) - goto out; - - *_ts = skb_get_ktime(skb); - success = true; - -out: - mutex_unlock(&call->user_mutex); - return success; -} -EXPORT_SYMBOL(rxrpc_kernel_get_reply_time); diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 258917a714c8..78fa0524156f 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -540,7 +540,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb, * directly into the target buffer. */ sg = _sg; - nsg = skb_shinfo(skb)->nr_frags; + nsg = skb_shinfo(skb)->nr_frags + 1; if (nsg <= 4) { nsg = 4; } else { diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 817065aa2833..9b31a10cc639 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -676,6 +676,31 @@ int tcf_idr_search(struct tc_action_net *tn, struct tc_action **a, u32 index) } EXPORT_SYMBOL(tcf_idr_search); +static int __tcf_generic_walker(struct net *net, struct sk_buff *skb, + struct netlink_callback *cb, int type, + const struct tc_action_ops *ops, + struct netlink_ext_ack *extack) +{ + struct tc_action_net *tn = net_generic(net, ops->net_id); + + if (unlikely(ops->walk)) + return ops->walk(net, skb, cb, type, ops, extack); + + return tcf_generic_walker(tn, skb, cb, type, ops, extack); +} + +static int __tcf_idr_search(struct net *net, + const struct tc_action_ops *ops, + struct tc_action **a, u32 index) +{ + struct tc_action_net *tn = net_generic(net, ops->net_id); + + if (unlikely(ops->lookup)) + return ops->lookup(net, a, index); + + return tcf_idr_search(tn, a, index); +} + static int tcf_idr_delete_index(struct tcf_idrinfo *idrinfo, u32 index) { struct tc_action *p; @@ -926,7 +951,7 @@ int tcf_register_action(struct tc_action_ops *act, struct tc_action_ops *a; int ret; - if (!act->act || !act->dump || !act->init || !act->walk || !act->lookup) + if (!act->act || !act->dump || !act->init) return -EINVAL; /* We have to register pernet ops before making the action ops visible, @@ -1638,7 +1663,7 @@ static struct tc_action *tcf_action_get_1(struct net *net, struct nlattr *nla, goto err_out; } err = -ENOENT; - if (ops->lookup(net, &a, index) == 0) { + if (__tcf_idr_search(net, ops, &a, index) == 0) { NL_SET_ERR_MSG(extack, "TC action with specified index not found"); goto err_mod; } @@ -1703,7 +1728,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, goto out_module_put; } - err = ops->walk(net, skb, &dcb, RTM_DELACTION, ops, extack); + err = __tcf_generic_walker(net, skb, &dcb, RTM_DELACTION, ops, extack); if (err <= 0) { nla_nest_cancel(skb, nest); goto out_module_put; @@ -2121,7 +2146,7 @@ static int tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto out_module_put; - ret = a_o->walk(net, skb, cb, RTM_GETACTION, a_o, NULL); + ret = __tcf_generic_walker(net, skb, cb, RTM_GETACTION, a_o, NULL); if (ret < 0) goto out_module_put; diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c index fea2d78b9ddc..b79eee44e24e 100644 --- a/net/sched/act_bpf.c +++ b/net/sched/act_bpf.c @@ -29,7 +29,6 @@ struct tcf_bpf_cfg { bool is_ebpf; }; -static unsigned int bpf_net_id; static struct tc_action_ops act_bpf_ops; static int tcf_bpf_act(struct sk_buff *skb, const struct tc_action *act, @@ -280,7 +279,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_ACT_BPF_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -334,7 +333,7 @@ static int tcf_bpf_init(struct net *net, struct nlattr *nla, is_bpf = tb[TCA_ACT_BPF_OPS_LEN] && tb[TCA_ACT_BPF_OPS]; is_ebpf = tb[TCA_ACT_BPF_FD]; - if ((!is_bpf && !is_ebpf) || (is_bpf && is_ebpf)) { + if (is_bpf == is_ebpf) { ret = -EINVAL; goto put_chain; } @@ -390,23 +389,6 @@ static void tcf_bpf_cleanup(struct tc_action *act) tcf_bpf_cfg_cleanup(&tmp); } -static int tcf_bpf_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_bpf_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, bpf_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_bpf_ops __read_mostly = { .kind = "bpf", .id = TCA_ID_BPF, @@ -415,27 +397,25 @@ static struct tc_action_ops act_bpf_ops __read_mostly = { .dump = tcf_bpf_dump, .cleanup = tcf_bpf_cleanup, .init = tcf_bpf_init, - .walk = tcf_bpf_walker, - .lookup = tcf_bpf_search, .size = sizeof(struct tcf_bpf), }; static __net_init int bpf_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, bpf_net_id); + struct tc_action_net *tn = net_generic(net, act_bpf_ops.net_id); return tc_action_net_init(net, tn, &act_bpf_ops); } static void __net_exit bpf_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, bpf_net_id); + tc_action_net_exit(net_list, act_bpf_ops.net_id); } static struct pernet_operations bpf_net_ops = { .init = bpf_init_net, .exit_batch = bpf_exit_net, - .id = &bpf_net_id, + .id = &act_bpf_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c index 09e2aafc8943..66b143bb04ac 100644 --- a/net/sched/act_connmark.c +++ b/net/sched/act_connmark.c @@ -25,7 +25,6 @@ #include <net/netfilter/nf_conntrack_core.h> #include <net/netfilter/nf_conntrack_zones.h> -static unsigned int connmark_net_id; static struct tc_action_ops act_connmark_ops; static int tcf_connmark_act(struct sk_buff *skb, const struct tc_action *a, @@ -99,7 +98,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); struct nlattr *tb[TCA_CONNMARK_MAX + 1]; bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_chain *goto_ch = NULL; @@ -200,23 +199,6 @@ nla_put_failure: return -1; } -static int tcf_connmark_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_connmark_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, connmark_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_connmark_ops = { .kind = "connmark", .id = TCA_ID_CONNMARK, @@ -224,27 +206,25 @@ static struct tc_action_ops act_connmark_ops = { .act = tcf_connmark_act, .dump = tcf_connmark_dump, .init = tcf_connmark_init, - .walk = tcf_connmark_walker, - .lookup = tcf_connmark_search, .size = sizeof(struct tcf_connmark_info), }; static __net_init int connmark_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, connmark_net_id); + struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id); return tc_action_net_init(net, tn, &act_connmark_ops); } static void __net_exit connmark_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, connmark_net_id); + tc_action_net_exit(net_list, act_connmark_ops.net_id); } static struct pernet_operations connmark_net_ops = { .init = connmark_init_net, .exit_batch = connmark_exit_net, - .id = &connmark_net_id, + .id = &act_connmark_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c index 22847ee009ef..1366adf9b909 100644 --- a/net/sched/act_csum.c +++ b/net/sched/act_csum.c @@ -37,7 +37,6 @@ static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, }; -static unsigned int csum_net_id; static struct tc_action_ops act_csum_ops; static int tcf_csum_init(struct net *net, struct nlattr *nla, @@ -45,7 +44,7 @@ static int tcf_csum_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_csum_params *params_new; struct nlattr *tb[TCA_CSUM_MAX + 1]; @@ -673,23 +672,6 @@ static void tcf_csum_cleanup(struct tc_action *a) kfree_rcu(params, rcu); } -static int tcf_csum_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, csum_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_csum_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, csum_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_csum_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_csum)); @@ -722,8 +704,6 @@ static struct tc_action_ops act_csum_ops = { .dump = tcf_csum_dump, .init = tcf_csum_init, .cleanup = tcf_csum_cleanup, - .walk = tcf_csum_walker, - .lookup = tcf_csum_search, .get_fill_size = tcf_csum_get_fill_size, .offload_act_setup = tcf_csum_offload_act_setup, .size = sizeof(struct tcf_csum), @@ -731,20 +711,20 @@ static struct tc_action_ops act_csum_ops = { static __net_init int csum_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, csum_net_id); + struct tc_action_net *tn = net_generic(net, act_csum_ops.net_id); return tc_action_net_init(net, tn, &act_csum_ops); } static void __net_exit csum_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, csum_net_id); + tc_action_net_exit(net_list, act_csum_ops.net_id); } static struct pernet_operations csum_net_ops = { .init = csum_init_net, .exit_batch = csum_exit_net, - .id = &csum_net_id, + .id = &act_csum_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index d55afb8d14be..b38d91d6b249 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -649,7 +649,6 @@ static void tcf_ct_flow_tables_uninit(void) } static struct tc_action_ops act_ct_ops; -static unsigned int ct_net_id; struct tc_ct_action_net { struct tc_action_net tn; /* Must be first */ @@ -697,7 +696,6 @@ drop_ct: static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) { unsigned int len; - int err; switch (family) { case NFPROTO_IPV4: @@ -711,9 +709,7 @@ static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family) len = skb->len; } - err = pskb_trim_rcsum(skb, len); - - return err; + return pskb_trim_rcsum(skb, len); } static u8 tcf_ct_skb_nf_family(struct sk_buff *skb) @@ -1255,7 +1251,7 @@ static int tcf_ct_fill_params(struct net *net, struct nlattr **tb, struct netlink_ext_ack *extack) { - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); struct nf_conntrack_zone zone; struct nf_conn *tmpl; int err; @@ -1330,7 +1326,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ct_net_id); + struct tc_action_net *tn = net_generic(net, act_ct_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct tcf_ct_params *params = NULL; struct nlattr *tb[TCA_CT_MAX + 1]; @@ -1394,7 +1390,7 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, err = tcf_ct_flow_table_get(net, params); if (err) - goto cleanup; + goto cleanup_params; spin_lock_bh(&c->tcf_lock); goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch); @@ -1409,6 +1405,9 @@ static int tcf_ct_init(struct net *net, struct nlattr *nla, return res; +cleanup_params: + if (params->tmpl) + nf_ct_put(params->tmpl); cleanup: if (goto_ch) tcf_chain_put_by_act(goto_ch); @@ -1558,23 +1557,6 @@ nla_put_failure: return -1; } -static int tcf_ct_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ct_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ct_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ct_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { @@ -1613,8 +1595,6 @@ static struct tc_action_ops act_ct_ops = { .dump = tcf_ct_dump, .init = tcf_ct_init, .cleanup = tcf_ct_cleanup, - .walk = tcf_ct_walker, - .lookup = tcf_ct_search, .stats_update = tcf_stats_update, .offload_act_setup = tcf_ct_offload_act_setup, .size = sizeof(struct tcf_ct), @@ -1623,7 +1603,7 @@ static struct tc_action_ops act_ct_ops = { static __net_init int ct_init_net(struct net *net) { unsigned int n_bits = sizeof_field(struct tcf_ct_params, labels) * 8; - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); if (nf_connlabels_get(net, n_bits - 1)) { tn->labels = false; @@ -1641,20 +1621,20 @@ static void __net_exit ct_exit_net(struct list_head *net_list) rtnl_lock(); list_for_each_entry(net, net_list, exit_list) { - struct tc_ct_action_net *tn = net_generic(net, ct_net_id); + struct tc_ct_action_net *tn = net_generic(net, act_ct_ops.net_id); if (tn->labels) nf_connlabels_put(net); } rtnl_unlock(); - tc_action_net_exit(net_list, ct_net_id); + tc_action_net_exit(net_list, act_ct_ops.net_id); } static struct pernet_operations ct_net_ops = { .init = ct_init_net, .exit_batch = ct_exit_net, - .id = &ct_net_id, + .id = &act_ct_ops.net_id, .size = sizeof(struct tc_ct_action_net), }; diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c index 0281e45987a4..d4102f0a9abd 100644 --- a/net/sched/act_ctinfo.c +++ b/net/sched/act_ctinfo.c @@ -25,7 +25,6 @@ #include <net/netfilter/nf_conntrack_zones.h> static struct tc_action_ops act_ctinfo_ops; -static unsigned int ctinfo_net_id; static void tcf_ctinfo_dscp_set(struct nf_conn *ct, struct tcf_ctinfo *ca, struct tcf_ctinfo_params *cp, @@ -157,7 +156,7 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; u32 dscpmask = 0, dscpstatemask, index; struct nlattr *tb[TCA_CTINFO_MAX + 1]; @@ -342,23 +341,6 @@ nla_put_failure: return -1; } -static int tcf_ctinfo_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ctinfo_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_ctinfo_cleanup(struct tc_action *a) { struct tcf_ctinfo *ci = to_ctinfo(a); @@ -377,27 +359,25 @@ static struct tc_action_ops act_ctinfo_ops = { .dump = tcf_ctinfo_dump, .init = tcf_ctinfo_init, .cleanup= tcf_ctinfo_cleanup, - .walk = tcf_ctinfo_walker, - .lookup = tcf_ctinfo_search, .size = sizeof(struct tcf_ctinfo), }; static __net_init int ctinfo_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ctinfo_net_id); + struct tc_action_net *tn = net_generic(net, act_ctinfo_ops.net_id); return tc_action_net_init(net, tn, &act_ctinfo_ops); } static void __net_exit ctinfo_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ctinfo_net_id); + tc_action_net_exit(net_list, act_ctinfo_ops.net_id); } static struct pernet_operations ctinfo_net_ops = { .init = ctinfo_init_net, .exit_batch = ctinfo_exit_net, - .id = &ctinfo_net_id, + .id = &act_ctinfo_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index ac29d1065232..abe1bcc5c797 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -19,7 +19,6 @@ #include <linux/tc_act/tc_gact.h> #include <net/tc_act/tc_gact.h> -static unsigned int gact_net_id; static struct tc_action_ops act_gact_ops; #ifdef CONFIG_GACT_PROB @@ -55,7 +54,7 @@ static int tcf_gact_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GACT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -222,23 +221,6 @@ nla_put_failure: return -1; } -static int tcf_gact_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, gact_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_gact_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, gact_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_gact_get_fill_size(const struct tc_action *act) { size_t sz = nla_total_size(sizeof(struct tc_gact)); /* TCA_GACT_PARMS */ @@ -308,8 +290,6 @@ static struct tc_action_ops act_gact_ops = { .stats_update = tcf_gact_stats_update, .dump = tcf_gact_dump, .init = tcf_gact_init, - .walk = tcf_gact_walker, - .lookup = tcf_gact_search, .get_fill_size = tcf_gact_get_fill_size, .offload_act_setup = tcf_gact_offload_act_setup, .size = sizeof(struct tcf_gact), @@ -317,20 +297,20 @@ static struct tc_action_ops act_gact_ops = { static __net_init int gact_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, gact_net_id); + struct tc_action_net *tn = net_generic(net, act_gact_ops.net_id); return tc_action_net_init(net, tn, &act_gact_ops); } static void __net_exit gact_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, gact_net_id); + tc_action_net_exit(net_list, act_gact_ops.net_id); } static struct pernet_operations gact_net_ops = { .init = gact_init_net, .exit_batch = gact_exit_net, - .id = &gact_net_id, + .id = &act_gact_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c index fd5155274733..3049878e7315 100644 --- a/net/sched/act_gate.c +++ b/net/sched/act_gate.c @@ -15,7 +15,6 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_gate.h> -static unsigned int gate_net_id; static struct tc_action_ops act_gate_ops; static ktime_t gate_get_time(struct tcf_gate *gact) @@ -298,7 +297,7 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); enum tk_offsets tk_offset = TK_OFFS_TAI; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_GATE_MAX + 1]; @@ -565,16 +564,6 @@ nla_put_failure: return -1; } -static int tcf_gate_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, gate_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { @@ -585,13 +574,6 @@ static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u64 packets, tm->lastuse = max_t(u64, tm->lastuse, lastuse); } -static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, gate_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_gate_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_gate)); @@ -654,30 +636,28 @@ static struct tc_action_ops act_gate_ops = { .dump = tcf_gate_dump, .init = tcf_gate_init, .cleanup = tcf_gate_cleanup, - .walk = tcf_gate_walker, .stats_update = tcf_gate_stats_update, .get_fill_size = tcf_gate_get_fill_size, - .lookup = tcf_gate_search, .offload_act_setup = tcf_gate_offload_act_setup, .size = sizeof(struct tcf_gate), }; static __net_init int gate_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, gate_net_id); + struct tc_action_net *tn = net_generic(net, act_gate_ops.net_id); return tc_action_net_init(net, tn, &act_gate_ops); } static void __net_exit gate_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, gate_net_id); + tc_action_net_exit(net_list, act_gate_ops.net_id); } static struct pernet_operations gate_net_ops = { .init = gate_init_net, .exit_batch = gate_exit_net, - .id = &gate_net_id, + .id = &act_gate_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 41ba55e60b1b..41d63b33461d 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -30,7 +30,6 @@ #include <linux/etherdevice.h> #include <net/ife.h> -static unsigned int ife_net_id; static int max_metacnt = IFE_META_MAX + 1; static struct tc_action_ops act_ife_ops; @@ -482,7 +481,7 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_IFE_MAX + 1]; struct nlattr *tb2[IFE_META_MAX + 1]; @@ -878,23 +877,6 @@ static int tcf_ife_act(struct sk_buff *skb, const struct tc_action *a, return tcf_ife_decode(skb, a, res); } -static int tcf_ife_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ife_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ife_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ife_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_ife_ops = { .kind = "ife", .id = TCA_ID_IFE, @@ -903,27 +885,25 @@ static struct tc_action_ops act_ife_ops = { .dump = tcf_ife_dump, .cleanup = tcf_ife_cleanup, .init = tcf_ife_init, - .walk = tcf_ife_walker, - .lookup = tcf_ife_search, .size = sizeof(struct tcf_ife_info), }; static __net_init int ife_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ife_net_id); + struct tc_action_net *tn = net_generic(net, act_ife_ops.net_id); return tc_action_net_init(net, tn, &act_ife_ops); } static void __net_exit ife_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ife_net_id); + tc_action_net_exit(net_list, act_ife_ops.net_id); } static struct pernet_operations ife_net_ops = { .init = ife_init_net, .exit_batch = ife_exit_net, - .id = &ife_net_id, + .id = &act_ife_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 2f3d507c24a1..1625e1037416 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -24,10 +24,7 @@ #include <linux/netfilter_ipv4/ip_tables.h> -static unsigned int ipt_net_id; static struct tc_action_ops act_ipt_ops; - -static unsigned int xt_net_id; static struct tc_action_ops act_xt_ops; static int ipt_init_target(struct net *net, struct xt_entry_target *t, @@ -206,8 +203,8 @@ static int tcf_ipt_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, ipt_net_id, nla, est, a, &act_ipt_ops, - tp, flags); + return __tcf_ipt_init(net, act_ipt_ops.net_id, nla, est, + a, &act_ipt_ops, tp, flags); } static int tcf_xt_init(struct net *net, struct nlattr *nla, @@ -215,8 +212,8 @@ static int tcf_xt_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - return __tcf_ipt_init(net, xt_net_id, nla, est, a, &act_xt_ops, - tp, flags); + return __tcf_ipt_init(net, act_xt_ops.net_id, nla, est, + a, &act_xt_ops, tp, flags); } static int tcf_ipt_act(struct sk_buff *skb, const struct tc_action *a, @@ -316,23 +313,6 @@ nla_put_failure: return -1; } -static int tcf_ipt_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_ipt_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, ipt_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_ipt_ops = { .kind = "ipt", .id = TCA_ID_IPT, @@ -341,47 +321,28 @@ static struct tc_action_ops act_ipt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_release, .init = tcf_ipt_init, - .walk = tcf_ipt_walker, - .lookup = tcf_ipt_search, .size = sizeof(struct tcf_ipt), }; static __net_init int ipt_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, ipt_net_id); + struct tc_action_net *tn = net_generic(net, act_ipt_ops.net_id); return tc_action_net_init(net, tn, &act_ipt_ops); } static void __net_exit ipt_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, ipt_net_id); + tc_action_net_exit(net_list, act_ipt_ops.net_id); } static struct pernet_operations ipt_net_ops = { .init = ipt_init_net, .exit_batch = ipt_exit_net, - .id = &ipt_net_id, + .id = &act_ipt_ops.net_id, .size = sizeof(struct tc_action_net), }; -static int tcf_xt_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_xt_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, xt_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_xt_ops = { .kind = "xt", .id = TCA_ID_XT, @@ -390,27 +351,25 @@ static struct tc_action_ops act_xt_ops = { .dump = tcf_ipt_dump, .cleanup = tcf_ipt_release, .init = tcf_xt_init, - .walk = tcf_xt_walker, - .lookup = tcf_xt_search, .size = sizeof(struct tcf_ipt), }; static __net_init int xt_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, xt_net_id); + struct tc_action_net *tn = net_generic(net, act_xt_ops.net_id); return tc_action_net_init(net, tn, &act_xt_ops); } static void __net_exit xt_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, xt_net_id); + tc_action_net_exit(net_list, act_xt_ops.net_id); } static struct pernet_operations xt_net_ops = { .init = xt_init_net, .exit_batch = xt_exit_net, - .id = &xt_net_id, + .id = &act_xt_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index a1d70cf86843..b8ad6ae282c0 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -86,7 +86,6 @@ static const struct nla_policy mirred_policy[TCA_MIRRED_MAX + 1] = { [TCA_MIRRED_PARMS] = { .len = sizeof(struct tc_mirred) }, }; -static unsigned int mirred_net_id; static struct tc_action_ops act_mirred_ops; static int tcf_mirred_init(struct net *net, struct nlattr *nla, @@ -94,7 +93,7 @@ static int tcf_mirred_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MIRRED_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -306,8 +305,7 @@ static int tcf_mirred_act(struct sk_buff *skb, const struct tc_action *a, /* let's the caller reinsert the packet, if possible */ if (use_reinsert) { - res->ingress = want_ingress; - err = tcf_mirred_forward(res->ingress, skb); + err = tcf_mirred_forward(want_ingress, skb); if (err) tcf_action_inc_overlimit_qstats(&m->common); __this_cpu_dec(mirred_rec_level); @@ -373,23 +371,6 @@ nla_put_failure: return -1; } -static int tcf_mirred_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_mirred_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, mirred_net_id); - - return tcf_idr_search(tn, a, index); -} - static int mirred_device_event(struct notifier_block *unused, unsigned long event, void *ptr) { @@ -510,8 +491,6 @@ static struct tc_action_ops act_mirred_ops = { .dump = tcf_mirred_dump, .cleanup = tcf_mirred_release, .init = tcf_mirred_init, - .walk = tcf_mirred_walker, - .lookup = tcf_mirred_search, .get_fill_size = tcf_mirred_get_fill_size, .offload_act_setup = tcf_mirred_offload_act_setup, .size = sizeof(struct tcf_mirred), @@ -520,20 +499,20 @@ static struct tc_action_ops act_mirred_ops = { static __net_init int mirred_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, mirred_net_id); + struct tc_action_net *tn = net_generic(net, act_mirred_ops.net_id); return tc_action_net_init(net, tn, &act_mirred_ops); } static void __net_exit mirred_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, mirred_net_id); + tc_action_net_exit(net_list, act_mirred_ops.net_id); } static struct pernet_operations mirred_net_ops = { .init = mirred_init_net, .exit_batch = mirred_exit_net, - .id = &mirred_net_id, + .id = &act_mirred_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c index adabeccb63e1..8ad25cc8ccd5 100644 --- a/net/sched/act_mpls.c +++ b/net/sched/act_mpls.c @@ -15,7 +15,6 @@ #include <net/pkt_cls.h> #include <net/tc_act/tc_mpls.h> -static unsigned int mpls_net_id; static struct tc_action_ops act_mpls_ops; #define ACT_MPLS_TTL_DEFAULT 255 @@ -155,7 +154,7 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_MPLS_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -367,23 +366,6 @@ nla_put_failure: return -EMSGSIZE; } -static int tcf_mpls_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, mpls_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_mpls_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, mpls_net_id); - - return tcf_idr_search(tn, a, index); -} - static int tcf_mpls_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) @@ -451,28 +433,26 @@ static struct tc_action_ops act_mpls_ops = { .dump = tcf_mpls_dump, .init = tcf_mpls_init, .cleanup = tcf_mpls_cleanup, - .walk = tcf_mpls_walker, - .lookup = tcf_mpls_search, .offload_act_setup = tcf_mpls_offload_act_setup, .size = sizeof(struct tcf_mpls), }; static __net_init int mpls_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, mpls_net_id); + struct tc_action_net *tn = net_generic(net, act_mpls_ops.net_id); return tc_action_net_init(net, tn, &act_mpls_ops); } static void __net_exit mpls_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, mpls_net_id); + tc_action_net_exit(net_list, act_mpls_ops.net_id); } static struct pernet_operations mpls_net_ops = { .init = mpls_init_net, .exit_batch = mpls_exit_net, - .id = &mpls_net_id, + .id = &act_mpls_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c index 2a39b3729e84..9265145f1040 100644 --- a/net/sched/act_nat.c +++ b/net/sched/act_nat.c @@ -26,7 +26,6 @@ #include <net/udp.h> -static unsigned int nat_net_id; static struct tc_action_ops act_nat_ops; static const struct nla_policy nat_policy[TCA_NAT_MAX + 1] = { @@ -37,7 +36,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action **a, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_NAT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -289,23 +288,6 @@ nla_put_failure: return -1; } -static int tcf_nat_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, nat_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_nat_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, nat_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_nat_ops = { .kind = "nat", .id = TCA_ID_NAT, @@ -313,27 +295,25 @@ static struct tc_action_ops act_nat_ops = { .act = tcf_nat_act, .dump = tcf_nat_dump, .init = tcf_nat_init, - .walk = tcf_nat_walker, - .lookup = tcf_nat_search, .size = sizeof(struct tcf_nat), }; static __net_init int nat_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, nat_net_id); + struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id); return tc_action_net_init(net, tn, &act_nat_ops); } static void __net_exit nat_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, nat_net_id); + tc_action_net_exit(net_list, act_nat_ops.net_id); } static struct pernet_operations nat_net_ops = { .init = nat_init_net, .exit_batch = nat_exit_net, - .id = &nat_net_id, + .id = &act_nat_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index 823ee643371c..94ed5857ce67 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -21,7 +21,6 @@ #include <uapi/linux/tc_act/tc_pedit.h> #include <net/pkt_cls.h> -static unsigned int pedit_net_id; static struct tc_action_ops act_pedit_ops; static const struct nla_policy pedit_policy[TCA_PEDIT_MAX + 1] = { @@ -139,7 +138,7 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_PEDIT_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -492,23 +491,6 @@ nla_put_failure: return -1; } -static int tcf_pedit_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_pedit_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, pedit_net_id); - - return tcf_idr_search(tn, a, index); -} - static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data, u32 *index_inc, bool bind, struct netlink_ext_ack *extack) @@ -553,28 +535,26 @@ static struct tc_action_ops act_pedit_ops = { .dump = tcf_pedit_dump, .cleanup = tcf_pedit_cleanup, .init = tcf_pedit_init, - .walk = tcf_pedit_walker, - .lookup = tcf_pedit_search, .offload_act_setup = tcf_pedit_offload_act_setup, .size = sizeof(struct tcf_pedit), }; static __net_init int pedit_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, pedit_net_id); + struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id); return tc_action_net_init(net, tn, &act_pedit_ops); } static void __net_exit pedit_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, pedit_net_id); + tc_action_net_exit(net_list, act_pedit_ops.net_id); } static struct pernet_operations pedit_net_ops = { .init = pedit_init_net, .exit_batch = pedit_exit_net, - .id = &pedit_net_id, + .id = &act_pedit_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b759628a47c2..0adb26e366a7 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -22,19 +22,8 @@ /* Each policer is serialized by its individual spinlock */ -static unsigned int police_net_id; static struct tc_action_ops act_police_ops; -static int tcf_police_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, police_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - static const struct nla_policy police_policy[TCA_POLICE_MAX + 1] = { [TCA_POLICE_RATE] = { .len = TC_RTAB_SIZE }, [TCA_POLICE_PEAKRATE] = { .len = TC_RTAB_SIZE }, @@ -58,7 +47,7 @@ static int tcf_police_init(struct net *net, struct nlattr *nla, struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); struct tcf_police_params *new; bool exists = false; u32 index; @@ -412,13 +401,6 @@ nla_put_failure: return -1; } -static int tcf_police_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, police_net_id); - - return tcf_idr_search(tn, a, index); -} - static int tcf_police_act_to_flow_act(int tc_act, u32 *extval, struct netlink_ext_ack *extack) { @@ -513,8 +495,6 @@ static struct tc_action_ops act_police_ops = { .act = tcf_police_act, .dump = tcf_police_dump, .init = tcf_police_init, - .walk = tcf_police_walker, - .lookup = tcf_police_search, .cleanup = tcf_police_cleanup, .offload_act_setup = tcf_police_offload_act_setup, .size = sizeof(struct tcf_police), @@ -522,20 +502,20 @@ static struct tc_action_ops act_police_ops = { static __net_init int police_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, police_net_id); + struct tc_action_net *tn = net_generic(net, act_police_ops.net_id); return tc_action_net_init(net, tn, &act_police_ops); } static void __net_exit police_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, police_net_id); + tc_action_net_exit(net_list, act_police_ops.net_id); } static struct pernet_operations police_net_ops = { .init = police_init_net, .exit_batch = police_exit_net, - .id = &police_net_id, + .id = &act_police_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c index 2f7f5e44d28c..5ba36f70e3a1 100644 --- a/net/sched/act_sample.c +++ b/net/sched/act_sample.c @@ -23,7 +23,6 @@ #include <linux/if_arp.h> -static unsigned int sample_net_id; static struct tc_action_ops act_sample_ops; static const struct nla_policy sample_policy[TCA_SAMPLE_MAX + 1] = { @@ -38,7 +37,7 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SAMPLE_MAX + 1]; struct psample_group *psample_group; @@ -241,23 +240,6 @@ nla_put_failure: return -1; } -static int tcf_sample_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, sample_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_sample_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, sample_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_psample_group_put(void *priv) { struct psample_group *group = priv; @@ -321,8 +303,6 @@ static struct tc_action_ops act_sample_ops = { .dump = tcf_sample_dump, .init = tcf_sample_init, .cleanup = tcf_sample_cleanup, - .walk = tcf_sample_walker, - .lookup = tcf_sample_search, .get_psample_group = tcf_sample_get_group, .offload_act_setup = tcf_sample_offload_act_setup, .size = sizeof(struct tcf_sample), @@ -330,20 +310,20 @@ static struct tc_action_ops act_sample_ops = { static __net_init int sample_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, sample_net_id); + struct tc_action_net *tn = net_generic(net, act_sample_ops.net_id); return tc_action_net_init(net, tn, &act_sample_ops); } static void __net_exit sample_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, sample_net_id); + tc_action_net_exit(net_list, act_sample_ops.net_id); } static struct pernet_operations sample_net_ops = { .init = sample_init_net, .exit_batch = sample_exit_net, - .id = &sample_net_id, + .id = &act_sample_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 8c1d60bde93e..18d376135461 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -18,7 +18,6 @@ #include <linux/tc_act/tc_defact.h> #include <net/tc_act/tc_defact.h> -static unsigned int simp_net_id; static struct tc_action_ops act_simp_ops; #define SIMP_MAX_DATA 32 @@ -89,7 +88,7 @@ static int tcf_simp_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_DEF_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -198,23 +197,6 @@ nla_put_failure: return -1; } -static int tcf_simp_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, simp_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_simp_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, simp_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_simp_ops = { .kind = "simple", .id = TCA_ID_SIMP, @@ -223,27 +205,25 @@ static struct tc_action_ops act_simp_ops = { .dump = tcf_simp_dump, .cleanup = tcf_simp_release, .init = tcf_simp_init, - .walk = tcf_simp_walker, - .lookup = tcf_simp_search, .size = sizeof(struct tcf_defact), }; static __net_init int simp_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, simp_net_id); + struct tc_action_net *tn = net_generic(net, act_simp_ops.net_id); return tc_action_net_init(net, tn, &act_simp_ops); } static void __net_exit simp_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, simp_net_id); + tc_action_net_exit(net_list, act_simp_ops.net_id); } static struct pernet_operations simp_net_ops = { .init = simp_init_net, .exit_batch = simp_exit_net, - .id = &simp_net_id, + .id = &act_simp_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbedit.c b/net/sched/act_skbedit.c index e3bd11dfe1ca..7f598784fd30 100644 --- a/net/sched/act_skbedit.c +++ b/net/sched/act_skbedit.c @@ -20,7 +20,6 @@ #include <linux/tc_act/tc_skbedit.h> #include <net/tc_act/tc_skbedit.h> -static unsigned int skbedit_net_id; static struct tc_action_ops act_skbedit_ops; static u16 tcf_skbedit_hash(struct tcf_skbedit_params *params, @@ -118,7 +117,7 @@ static int tcf_skbedit_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct tcf_skbedit_params *params_new; struct nlattr *tb[TCA_SKBEDIT_MAX + 1]; @@ -347,23 +346,6 @@ static void tcf_skbedit_cleanup(struct tc_action *a) kfree_rcu(params, rcu); } -static int tcf_skbedit_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_skbedit_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbedit_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_skbedit_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_skbedit)) @@ -428,29 +410,27 @@ static struct tc_action_ops act_skbedit_ops = { .dump = tcf_skbedit_dump, .init = tcf_skbedit_init, .cleanup = tcf_skbedit_cleanup, - .walk = tcf_skbedit_walker, .get_fill_size = tcf_skbedit_get_fill_size, - .lookup = tcf_skbedit_search, .offload_act_setup = tcf_skbedit_offload_act_setup, .size = sizeof(struct tcf_skbedit), }; static __net_init int skbedit_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, skbedit_net_id); + struct tc_action_net *tn = net_generic(net, act_skbedit_ops.net_id); return tc_action_net_init(net, tn, &act_skbedit_ops); } static void __net_exit skbedit_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, skbedit_net_id); + tc_action_net_exit(net_list, act_skbedit_ops.net_id); } static struct pernet_operations skbedit_net_ops = { .init = skbedit_init_net, .exit_batch = skbedit_exit_net, - .id = &skbedit_net_id, + .id = &act_skbedit_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_skbmod.c b/net/sched/act_skbmod.c index 2083612d8780..d98758a63934 100644 --- a/net/sched/act_skbmod.c +++ b/net/sched/act_skbmod.c @@ -19,7 +19,6 @@ #include <linux/tc_act/tc_skbmod.h> #include <net/tc_act/tc_skbmod.h> -static unsigned int skbmod_net_id; static struct tc_action_ops act_skbmod_ops; static int tcf_skbmod_act(struct sk_buff *skb, const struct tc_action *a, @@ -103,7 +102,7 @@ static int tcf_skbmod_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); bool ovr = flags & TCA_ACT_FLAGS_REPLACE; bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_SKBMOD_MAX + 1]; @@ -276,23 +275,6 @@ nla_put_failure: return -1; } -static int tcf_skbmod_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tcf_skbmod_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, skbmod_net_id); - - return tcf_idr_search(tn, a, index); -} - static struct tc_action_ops act_skbmod_ops = { .kind = "skbmod", .id = TCA_ACT_SKBMOD, @@ -301,27 +283,25 @@ static struct tc_action_ops act_skbmod_ops = { .dump = tcf_skbmod_dump, .init = tcf_skbmod_init, .cleanup = tcf_skbmod_cleanup, - .walk = tcf_skbmod_walker, - .lookup = tcf_skbmod_search, .size = sizeof(struct tcf_skbmod), }; static __net_init int skbmod_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, skbmod_net_id); + struct tc_action_net *tn = net_generic(net, act_skbmod_ops.net_id); return tc_action_net_init(net, tn, &act_skbmod_ops); } static void __net_exit skbmod_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, skbmod_net_id); + tc_action_net_exit(net_list, act_skbmod_ops.net_id); } static struct pernet_operations skbmod_net_ops = { .init = skbmod_init_net, .exit_batch = skbmod_exit_net, - .id = &skbmod_net_id, + .id = &act_skbmod_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index 856dc23cef8c..2691a3d8e451 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -20,7 +20,6 @@ #include <linux/tc_act/tc_tunnel_key.h> #include <net/tc_act/tc_tunnel_key.h> -static unsigned int tunnel_key_net_id; static struct tc_action_ops act_tunnel_key_ops; static int tunnel_key_act(struct sk_buff *skb, const struct tc_action *a, @@ -358,7 +357,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 act_flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); bool bind = act_flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_TUNNEL_KEY_MAX + 1]; struct tcf_tunnel_key_params *params_new; @@ -770,23 +769,6 @@ nla_put_failure: return -1; } -static int tunnel_key_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - -static int tunnel_key_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); - - return tcf_idr_search(tn, a, index); -} - static void tcf_tunnel_encap_put_tunnel(void *priv) { struct ip_tunnel_info *tunnel = priv; @@ -850,28 +832,26 @@ static struct tc_action_ops act_tunnel_key_ops = { .dump = tunnel_key_dump, .init = tunnel_key_init, .cleanup = tunnel_key_release, - .walk = tunnel_key_walker, - .lookup = tunnel_key_search, .offload_act_setup = tcf_tunnel_key_offload_act_setup, .size = sizeof(struct tcf_tunnel_key), }; static __net_init int tunnel_key_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, tunnel_key_net_id); + struct tc_action_net *tn = net_generic(net, act_tunnel_key_ops.net_id); return tc_action_net_init(net, tn, &act_tunnel_key_ops); } static void __net_exit tunnel_key_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, tunnel_key_net_id); + tc_action_net_exit(net_list, act_tunnel_key_ops.net_id); } static struct pernet_operations tunnel_key_net_ops = { .init = tunnel_key_init_net, .exit_batch = tunnel_key_exit_net, - .id = &tunnel_key_net_id, + .id = &act_tunnel_key_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/act_vlan.c b/net/sched/act_vlan.c index 68b5e772386a..7b24e898a3e6 100644 --- a/net/sched/act_vlan.c +++ b/net/sched/act_vlan.c @@ -16,7 +16,6 @@ #include <linux/tc_act/tc_vlan.h> #include <net/tc_act/tc_vlan.h> -static unsigned int vlan_net_id; static struct tc_action_ops act_vlan_ops; static int tcf_vlan_act(struct sk_buff *skb, const struct tc_action *a, @@ -117,7 +116,7 @@ static int tcf_vlan_init(struct net *net, struct nlattr *nla, struct tcf_proto *tp, u32 flags, struct netlink_ext_ack *extack) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); bool bind = flags & TCA_ACT_FLAGS_BIND; struct nlattr *tb[TCA_VLAN_MAX + 1]; struct tcf_chain *goto_ch = NULL; @@ -333,16 +332,6 @@ nla_put_failure: return -1; } -static int tcf_vlan_walker(struct net *net, struct sk_buff *skb, - struct netlink_callback *cb, int type, - const struct tc_action_ops *ops, - struct netlink_ext_ack *extack) -{ - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - return tcf_generic_walker(tn, skb, cb, type, ops, extack); -} - static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets, u64 drops, u64 lastuse, bool hw) { @@ -353,13 +342,6 @@ static void tcf_vlan_stats_update(struct tc_action *a, u64 bytes, u64 packets, tm->lastuse = max_t(u64, tm->lastuse, lastuse); } -static int tcf_vlan_search(struct net *net, struct tc_action **a, u32 index) -{ - struct tc_action_net *tn = net_generic(net, vlan_net_id); - - return tcf_idr_search(tn, a, index); -} - static size_t tcf_vlan_get_fill_size(const struct tc_action *act) { return nla_total_size(sizeof(struct tc_vlan)) @@ -438,30 +420,28 @@ static struct tc_action_ops act_vlan_ops = { .dump = tcf_vlan_dump, .init = tcf_vlan_init, .cleanup = tcf_vlan_cleanup, - .walk = tcf_vlan_walker, .stats_update = tcf_vlan_stats_update, .get_fill_size = tcf_vlan_get_fill_size, - .lookup = tcf_vlan_search, .offload_act_setup = tcf_vlan_offload_act_setup, .size = sizeof(struct tcf_vlan), }; static __net_init int vlan_init_net(struct net *net) { - struct tc_action_net *tn = net_generic(net, vlan_net_id); + struct tc_action_net *tn = net_generic(net, act_vlan_ops.net_id); return tc_action_net_init(net, tn, &act_vlan_ops); } static void __net_exit vlan_exit_net(struct list_head *net_list) { - tc_action_net_exit(net_list, vlan_net_id); + tc_action_net_exit(net_list, act_vlan_ops.net_id); } static struct pernet_operations vlan_net_ops = { .init = vlan_init_net, .exit_batch = vlan_exit_net, - .id = &vlan_net_id, + .id = &act_vlan_ops.net_id, .size = sizeof(struct tc_action_net), }; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 5d0d57dc5cb7..50566db45949 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -2134,6 +2134,7 @@ replay: } if (chain->tmplt_ops && chain->tmplt_ops != tp->ops) { + tfilter_put(tp, fh); NL_SET_ERR_MSG(extack, "Chain template is set to a different filter kind"); err = -EINVAL; goto errout; diff --git a/net/sched/cls_basic.c b/net/sched/cls_basic.c index 8158fc9ee1ab..d229ce99e554 100644 --- a/net/sched/cls_basic.c +++ b/net/sched/cls_basic.c @@ -251,15 +251,8 @@ static void basic_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct basic_filter *f; list_for_each_entry(f, &head->flist, link) { - if (arg->count < arg->skip) - goto skip; - - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) break; - } -skip: - arg->count++; } } @@ -268,12 +261,7 @@ static void basic_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct basic_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static int basic_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/cls_bpf.c b/net/sched/cls_bpf.c index c85b85a192bf..bc317b3eac12 100644 --- a/net/sched/cls_bpf.c +++ b/net/sched/cls_bpf.c @@ -635,12 +635,7 @@ static void cls_bpf_bind_class(void *fh, u32 classid, unsigned long cl, { struct cls_bpf_prog *prog = fh; - if (prog && prog->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &prog->res, base); - else - __tcf_unbind_filter(q, &prog->res); - } + tc_cls_bind_class(classid, cl, q, &prog->res, base); } static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, @@ -650,14 +645,8 @@ static void cls_bpf_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct cls_bpf_prog *prog; list_for_each_entry(prog, &head->plist, link) { - if (arg->count < arg->skip) - goto skip; - if (arg->fn(tp, prog, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, prog)) break; - } -skip: - arg->count++; } } diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c index 972303aa8edd..014cd3de7b5d 100644 --- a/net/sched/cls_flow.c +++ b/net/sched/cls_flow.c @@ -683,14 +683,8 @@ static void flow_walk(struct tcf_proto *tp, struct tcf_walker *arg, struct flow_filter *f; list_for_each_entry(f, &head->filters, list) { - if (arg->count < arg->skip) - goto skip; - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) break; - } -skip: - arg->count++; } } diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 041d63ff809a..25bc57ee6ea1 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -69,6 +69,7 @@ struct fl_flow_key { struct flow_dissector_key_hash hash; struct flow_dissector_key_num_of_vlans num_of_vlans; struct flow_dissector_key_pppoe pppoe; + struct flow_dissector_key_l2tpv3 l2tpv3; } __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */ struct fl_flow_mask_range { @@ -712,6 +713,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = { [TCA_FLOWER_KEY_NUM_OF_VLANS] = { .type = NLA_U8 }, [TCA_FLOWER_KEY_PPPOE_SID] = { .type = NLA_U16 }, [TCA_FLOWER_KEY_PPP_PROTO] = { .type = NLA_U16 }, + [TCA_FLOWER_KEY_L2TPV3_SID] = { .type = NLA_U32 }, }; @@ -1790,6 +1792,11 @@ static int fl_set_key(struct net *net, struct nlattr **tb, fl_set_key_val(tb, key->arp.tha, TCA_FLOWER_KEY_ARP_THA, mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK, sizeof(key->arp.tha)); + } else if (key->basic.ip_proto == IPPROTO_L2TP) { + fl_set_key_val(tb, &key->l2tpv3.session_id, + TCA_FLOWER_KEY_L2TPV3_SID, + &mask->l2tpv3.session_id, TCA_FLOWER_UNSPEC, + sizeof(key->l2tpv3.session_id)); } if (key->basic.ip_proto == IPPROTO_TCP || @@ -1970,6 +1977,8 @@ static void fl_init_dissector(struct flow_dissector *dissector, FLOW_DISSECTOR_KEY_NUM_OF_VLANS, num_of_vlans); FL_KEY_SET_IF_MASKED(mask, keys, cnt, FLOW_DISSECTOR_KEY_PPPOE, pppoe); + FL_KEY_SET_IF_MASKED(mask, keys, cnt, + FLOW_DISSECTOR_KEY_L2TPV3, l2tpv3); skb_flow_dissector_init(dissector, keys, cnt); } @@ -3196,6 +3205,13 @@ static int fl_dump_key(struct sk_buff *skb, struct net *net, mask->arp.tha, TCA_FLOWER_KEY_ARP_THA_MASK, sizeof(key->arp.tha)))) goto nla_put_failure; + else if (key->basic.ip_proto == IPPROTO_L2TP && + fl_dump_key_val(skb, &key->l2tpv3.session_id, + TCA_FLOWER_KEY_L2TPV3_SID, + &mask->l2tpv3.session_id, + TCA_FLOWER_UNSPEC, + sizeof(key->l2tpv3.session_id))) + goto nla_put_failure; if ((key->basic.ip_proto == IPPROTO_TCP || key->basic.ip_proto == IPPROTO_UDP || @@ -3389,12 +3405,7 @@ static void fl_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct cls_fl_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static bool fl_delete_empty(struct tcf_proto *tp) diff --git a/net/sched/cls_fw.c b/net/sched/cls_fw.c index 8654b0ce997c..a32351da968c 100644 --- a/net/sched/cls_fw.c +++ b/net/sched/cls_fw.c @@ -358,15 +358,8 @@ static void fw_walk(struct tcf_proto *tp, struct tcf_walker *arg, for (f = rtnl_dereference(head->ht[h]); f; f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) return; - } - arg->count++; } } } @@ -423,12 +416,7 @@ static void fw_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct fw_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static struct tcf_proto_ops cls_fw_ops __read_mostly = { diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c index 06cf22adbab7..39a5d9c170de 100644 --- a/net/sched/cls_matchall.c +++ b/net/sched/cls_matchall.c @@ -313,10 +313,7 @@ static int mall_reoffload(struct tcf_proto *tp, bool add, flow_setup_cb_t *cb, tc_cleanup_offload_action(&cls_mall.rule->action); kfree(cls_mall.rule); - if (err) - return err; - - return 0; + return err; } static void mall_stats_hw_filter(struct tcf_proto *tp, @@ -397,12 +394,7 @@ static void mall_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct cls_mall_head *head = fh; - if (head && head->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &head->res, base); - else - __tcf_unbind_filter(q, &head->res); - } + tc_cls_bind_class(classid, cl, q, &head->res, base); } static struct tcf_proto_ops cls_mall_ops __read_mostly = { diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c index 29adac7812fe..9e43b929d4ca 100644 --- a/net/sched/cls_route.c +++ b/net/sched/cls_route.c @@ -587,15 +587,8 @@ static void route4_walk(struct tcf_proto *tp, struct tcf_walker *arg, for (f = rtnl_dereference(b->ht[h1]); f; f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) return; - } - arg->count++; } } } @@ -656,12 +649,7 @@ static void route4_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct route4_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static struct tcf_proto_ops cls_route4_ops __read_mostly = { diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h index 5cd9d6b143c4..b00a7dbd0587 100644 --- a/net/sched/cls_rsvp.h +++ b/net/sched/cls_rsvp.h @@ -671,15 +671,8 @@ static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg, for (f = rtnl_dereference(s->ht[h1]); f; f = rtnl_dereference(f->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, f, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, f)) return; - } - arg->count++; } } } @@ -740,12 +733,7 @@ static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct rsvp_filter *f = fh; - if (f && f->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &f->res, base); - else - __tcf_unbind_filter(q, &f->res); - } + tc_cls_bind_class(classid, cl, q, &f->res, base); } static struct tcf_proto_ops RSVP_OPS __read_mostly = { diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c index 742c7d49a958..1c9eeb98d826 100644 --- a/net/sched/cls_tcindex.c +++ b/net/sched/cls_tcindex.c @@ -566,13 +566,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, for (i = 0; i < p->hash; i++) { if (!p->perfect[i].res.class) continue; - if (walker->count >= walker->skip) { - if (walker->fn(tp, p->perfect + i, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; + if (!tc_cls_stats_dump(tp, walker, p->perfect + i)) + return; } } if (!p->h) @@ -580,13 +575,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker, for (i = 0; i < p->hash; i++) { for (f = rtnl_dereference(p->h[i]); f; f = next) { next = rtnl_dereference(f->next); - if (walker->count >= walker->skip) { - if (walker->fn(tp, &f->result, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; + if (!tc_cls_stats_dump(tp, walker, &f->result)) + return; } } } @@ -701,12 +691,7 @@ static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl, { struct tcindex_filter_result *r = fh; - if (r && r->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &r->res, base); - else - __tcf_unbind_filter(q, &r->res); - } + tc_cls_bind_class(classid, cl, q, &r->res, base); } static struct tcf_proto_ops cls_tcindex_ops __read_mostly = { diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 4d27300c287c..34d25f7a0687 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -1040,7 +1040,11 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, } #endif - memcpy(&n->sel, s, sel_size); + unsafe_memcpy(&n->sel, s, sel_size, + /* A composite flex-array structure destination, + * which was correctly sized with struct_size(), + * bounds-checked against nla_len(), and allocated + * above. */); RCU_INIT_POINTER(n->ht_up, ht); n->handle = handle; n->fshift = s->hmask ? ffs(ntohl(s->hmask)) - 1 : 0; @@ -1125,26 +1129,16 @@ static void u32_walk(struct tcf_proto *tp, struct tcf_walker *arg, ht = rtnl_dereference(ht->next)) { if (ht->prio != tp->prio) continue; - if (arg->count >= arg->skip) { - if (arg->fn(tp, ht, arg) < 0) { - arg->stop = 1; - return; - } - } - arg->count++; + + if (!tc_cls_stats_dump(tp, arg, ht)) + return; + for (h = 0; h <= ht->divisor; h++) { for (n = rtnl_dereference(ht->ht[h]); n; n = rtnl_dereference(n->next)) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(tp, n, arg) < 0) { - arg->stop = 1; + if (!tc_cls_stats_dump(tp, arg, n)) return; - } - arg->count++; } } } @@ -1256,12 +1250,7 @@ static void u32_bind_class(void *fh, u32 classid, unsigned long cl, void *q, { struct tc_u_knode *n = fh; - if (n && n->res.classid == classid) { - if (cl) - __tcf_bind_filter(q, &n->res, base); - else - __tcf_unbind_filter(q, &n->res); - } + tc_cls_bind_class(classid, cl, q, &n->res, base); } static int u32_dump(struct net *net, struct tcf_proto *tp, void *fh, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index db1569fac57c..c98af0ada706 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -868,6 +868,23 @@ void qdisc_offload_graft_helper(struct net_device *dev, struct Qdisc *sch, } EXPORT_SYMBOL(qdisc_offload_graft_helper); +void qdisc_offload_query_caps(struct net_device *dev, + enum tc_setup_type type, + void *caps, size_t caps_len) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct tc_query_caps_base base = { + .type = type, + .caps = caps, + }; + + memset(caps, 0, caps_len); + + if (ops->ndo_setup_tc) + ops->ndo_setup_tc(dev, TC_QUERY_CAPS, &base); +} +EXPORT_SYMBOL(qdisc_offload_query_caps); + static void qdisc_offload_graft_root(struct net_device *dev, struct Qdisc *new, struct Qdisc *old, struct netlink_ext_ack *extack) @@ -1898,7 +1915,7 @@ static int tcf_node_bind(struct tcf_proto *tp, void *n, struct tcf_walker *arg) { struct tcf_bind_args *a = (void *)arg; - if (tp->ops->bind_class) { + if (n && tp->ops->bind_class) { struct Qdisc *q = tcf_block_q(tp->chain->block); sch_tree_lock(q); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 816fd0d7ba38..f52255fea652 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -354,12 +354,8 @@ static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker) if (walker->stop) return; list_for_each_entry(flow, &p->flows, list) { - if (walker->count >= walker->skip && - walker->fn(sch, (unsigned long)flow, walker) < 0) { - walker->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker)) break; - } - walker->count++; } } diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c index 36acc95d611e..55c6879d2c7e 100644 --- a/net/sched/sch_cake.c +++ b/net/sched/sch_cake.c @@ -3061,16 +3061,13 @@ static void cake_walk(struct Qdisc *sch, struct qdisc_walker *arg) struct cake_tin_data *b = &q->tins[q->tin_order[i]]; for (j = 0; j < CAKE_QUEUES; j++) { - if (list_empty(&b->flows[j].flowchain) || - arg->count < arg->skip) { + if (list_empty(&b->flows[j].flowchain)) { arg->count++; continue; } - if (arg->fn(sch, i * CAKE_QUEUES + j + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i * CAKE_QUEUES + j + 1, + arg)) break; - } - arg->count++; } } } diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index ba99ce05cd52..6568e17c4c63 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1676,15 +1676,8 @@ static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (h = 0; h < q->clhash.hashsize; h++) { hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } diff --git a/net/sched/sch_cbs.c b/net/sched/sch_cbs.c index 459cc240eda9..cac870eb7897 100644 --- a/net/sched/sch_cbs.c +++ b/net/sched/sch_cbs.c @@ -520,13 +520,7 @@ static unsigned long cbs_find(struct Qdisc *sch, u32 classid) static void cbs_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) { - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index 4e5b1cf11b85..e35a4e90f4e6 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -284,15 +284,8 @@ static void drr_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 7da6dc38a382..401ffaf87d62 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -176,16 +176,12 @@ static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker) return; for (i = 0; i < p->indices; i++) { - if (p->mv[i].mask == 0xff && !p->mv[i].value) - goto ignore; - if (walker->count >= walker->skip) { - if (walker->fn(sch, i + 1, walker) < 0) { - walker->stop = 1; - break; - } + if (p->mv[i].mask == 0xff && !p->mv[i].value) { + walker->count++; + continue; } -ignore: - walker->count++; + if (!tc_qdisc_stats_dump(sch, i + 1, walker)) + break; } } diff --git a/net/sched/sch_ets.c b/net/sched/sch_ets.c index a3aea22ef09d..b10efeaf0629 100644 --- a/net/sched/sch_ets.c +++ b/net/sched/sch_ets.c @@ -341,15 +341,8 @@ static void ets_qdisc_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->nbands; i++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index eeea8c6d54e2..99d318b60568 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -673,16 +673,12 @@ static void fq_codel_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->flows_cnt; i++) { - if (list_empty(&q->flows[i].flowchain) || - arg->count < arg->skip) { + if (list_empty(&q->flows[i].flowchain)) { arg->count++; continue; } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index c8bef923c79c..70b0c5873d32 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1349,15 +1349,8 @@ hfsc_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 78d0c7549c74..e5b4bbf3ce3d 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -2119,15 +2119,8 @@ static void htb_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index 83d2e54bf303..d0bc660d7401 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -247,11 +247,8 @@ static void mq_walk(struct Qdisc *sch, struct qdisc_walker *arg) arg->count = arg->skip; for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { - if (arg->fn(sch, ntx + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, ntx + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c index b29f3453c6ea..4c68abaa289b 100644 --- a/net/sched/sch_mqprio.c +++ b/net/sched/sch_mqprio.c @@ -558,11 +558,8 @@ static void mqprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) /* Walk hierarchy with a virtual class per tc */ arg->count = arg->skip; for (ntx = arg->skip; ntx < netdev_get_num_tc(dev); ntx++) { - if (arg->fn(sch, ntx + TC_H_MIN_PRIORITY, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, ntx + TC_H_MIN_PRIORITY, arg)) return; - } - arg->count++; } /* Pad the values and skip over unused traffic classes */ diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index f28050c7f12d..75c9c860182b 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -353,15 +353,8 @@ static void multiq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (band = 0; band < q->bands; band++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, band + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, band + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index b70ac04110dd..18f4273a835b 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -1251,12 +1251,8 @@ static unsigned long netem_find(struct Qdisc *sch, u32 classid) static void netem_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + if (!tc_qdisc_stats_dump(sch, 1, walker)) + return; } } diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 298794c04836..fdc5ef52c3ee 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -376,15 +376,8 @@ static void prio_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (prio = 0; prio < q->bands; prio++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, prio + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, prio + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 13246a9dc5c1..cf5ebe43b3b4 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -659,15 +659,8 @@ static void qfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) for (i = 0; i < q->clhash.hashsize; i++) { hlist_for_each_entry(cl, &q->clhash.hash[i], common.hnode) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, (unsigned long)cl, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg)) return; - } - arg->count++; } } } diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 4952406f70b9..a5a401f93c1a 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -516,12 +516,7 @@ static unsigned long red_find(struct Qdisc *sch, u32 classid) static void red_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/sched/sch_sfb.c b/net/sched/sch_sfb.c index 31717fa45a4f..e2389fa3cff8 100644 --- a/net/sched/sch_sfb.c +++ b/net/sched/sch_sfb.c @@ -135,15 +135,15 @@ static void increment_one_qlen(u32 sfbhash, u32 slot, struct sfb_sched_data *q) } } -static void increment_qlen(const struct sk_buff *skb, struct sfb_sched_data *q) +static void increment_qlen(const struct sfb_skb_cb *cb, struct sfb_sched_data *q) { u32 sfbhash; - sfbhash = sfb_hash(skb, 0); + sfbhash = cb->hashes[0]; if (sfbhash) increment_one_qlen(sfbhash, 0, q); - sfbhash = sfb_hash(skb, 1); + sfbhash = cb->hashes[1]; if (sfbhash) increment_one_qlen(sfbhash, 1, q); } @@ -281,8 +281,10 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, { struct sfb_sched_data *q = qdisc_priv(sch); + unsigned int len = qdisc_pkt_len(skb); struct Qdisc *child = q->qdisc; struct tcf_proto *fl; + struct sfb_skb_cb cb; int i; u32 p_min = ~0; u32 minqlen = ~0; @@ -399,11 +401,12 @@ static int sfb_enqueue(struct sk_buff *skb, struct Qdisc *sch, } enqueue: + memcpy(&cb, sfb_skb_cb(skb), sizeof(cb)); ret = qdisc_enqueue(skb, child, to_free); if (likely(ret == NET_XMIT_SUCCESS)) { - qdisc_qstats_backlog_inc(sch, skb); + sch->qstats.backlog += len; sch->q.qlen++; - increment_qlen(skb, q); + increment_qlen(&cb, q); } else if (net_xmit_drop_count(ret)) { q->stats.childdrop++; qdisc_qstats_drop(sch); @@ -656,12 +659,7 @@ static int sfb_delete(struct Qdisc *sch, unsigned long cl, static void sfb_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/sched/sch_sfq.c b/net/sched/sch_sfq.c index f8e569f79f13..abd436307d6a 100644 --- a/net/sched/sch_sfq.c +++ b/net/sched/sch_sfq.c @@ -888,16 +888,12 @@ static void sfq_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < q->divisor; i++) { - if (q->ht[i] == SFQ_EMPTY_SLOT || - arg->count < arg->skip) { + if (q->ht[i] == SFQ_EMPTY_SLOT) { arg->count++; continue; } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_skbprio.c b/net/sched/sch_skbprio.c index df72fb83d9c7..5df2dacb7b1a 100644 --- a/net/sched/sch_skbprio.c +++ b/net/sched/sch_skbprio.c @@ -265,15 +265,8 @@ static void skbprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) return; for (i = 0; i < SKBPRIO_MAX_PRIORITY; i++) { - if (arg->count < arg->skip) { - arg->count++; - continue; - } - if (arg->fn(sch, i + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, i + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index db88a692ef81..435d866fcfa0 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -27,7 +27,6 @@ #include <net/tcp.h> static LIST_HEAD(taprio_list); -static DEFINE_SPINLOCK(taprio_list_lock); #define TAPRIO_ALL_GATES_OPEN -1 @@ -67,6 +66,7 @@ struct taprio_sched { u32 flags; enum tk_offsets tk_offset; int clockid; + bool offloaded; atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+ * speeds it's sub-nanoseconds per byte */ @@ -78,8 +78,8 @@ struct taprio_sched { struct sched_gate_list __rcu *admin_sched; struct hrtimer advance_timer; struct list_head taprio_list; - struct sk_buff *(*dequeue)(struct Qdisc *sch); - struct sk_buff *(*peek)(struct Qdisc *sch); + u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */ + u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */ u32 txtime_delay; }; @@ -417,6 +417,9 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child, struct sk_buff **to_free) { struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int prio = skb->priority; + u8 tc; /* sk_flags are only safe to use on full sockets. */ if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) { @@ -428,12 +431,20 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch, return qdisc_drop(skb, sch, to_free); } + /* Devices with full offload are expected to honor this in hardware */ + tc = netdev_get_prio_tc_map(dev, prio); + if (skb->len > q->max_frm_len[tc]) + return qdisc_drop(skb, sch, to_free); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return qdisc_enqueue(skb, child, to_free); } +/* Will not be called in the full offload case, since the TX queues are + * attached to the Qdisc created using qdisc_create_dflt() + */ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct sk_buff **to_free) { @@ -441,11 +452,6 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, struct Qdisc *child; int queue; - if (unlikely(FULL_OFFLOAD_IS_ENABLED(q->flags))) { - WARN_ONCE(1, "Trying to enqueue skb into the root of a taprio qdisc configured with full offload\n"); - return qdisc_drop(skb, sch, to_free); - } - queue = skb_get_queue_mapping(skb); child = q->qdiscs[queue]; @@ -454,10 +460,10 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, /* Large packets might not be transmitted when the transmission duration * exceeds any configured interval. Therefore, segment the skb into - * smaller chunks. Skip it for the full offload case, as the driver - * and/or the hardware is expected to handle this. + * smaller chunks. Drivers with full offload are expected to handle + * this in hardware. */ - if (skb_is_gso(skb) && !FULL_OFFLOAD_IS_ENABLED(q->flags)) { + if (skb_is_gso(skb)) { unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb); netdev_features_t features = netif_skb_features(skb); struct sk_buff *segs, *nskb; @@ -491,7 +497,10 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch, return taprio_enqueue_one(skb, sch, child, to_free); } -static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) +/* Will not be called in the full offload case, since the TX queues are + * attached to the Qdisc created using qdisc_create_dflt() + */ +static struct sk_buff *taprio_peek(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -535,20 +544,6 @@ static struct sk_buff *taprio_peek_soft(struct Qdisc *sch) return NULL; } -static struct sk_buff *taprio_peek_offload(struct Qdisc *sch) -{ - WARN_ONCE(1, "Trying to peek into the root of a taprio qdisc configured with full offload\n"); - - return NULL; -} - -static struct sk_buff *taprio_peek(struct Qdisc *sch) -{ - struct taprio_sched *q = qdisc_priv(sch); - - return q->peek(sch); -} - static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) { atomic_set(&entry->budget, @@ -556,7 +551,10 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry) atomic64_read(&q->picos_per_byte))); } -static struct sk_buff *taprio_dequeue_soft(struct Qdisc *sch) +/* Will not be called in the full offload case, since the TX queues are + * attached to the Qdisc created using qdisc_create_dflt() + */ +static struct sk_buff *taprio_dequeue(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); @@ -644,20 +642,6 @@ done: return skb; } -static struct sk_buff *taprio_dequeue_offload(struct Qdisc *sch) -{ - WARN_ONCE(1, "Trying to dequeue from the root of a taprio qdisc configured with full offload\n"); - - return NULL; -} - -static struct sk_buff *taprio_dequeue(struct Qdisc *sch) -{ - struct taprio_sched *q = qdisc_priv(sch); - - return q->dequeue(sch); -} - static bool should_restart_cycle(const struct sched_gate_list *oper, const struct sched_entry *entry) { @@ -780,6 +764,11 @@ static const struct nla_policy entry_policy[TCA_TAPRIO_SCHED_ENTRY_MAX + 1] = { [TCA_TAPRIO_SCHED_ENTRY_INTERVAL] = { .type = NLA_U32 }, }; +static const struct nla_policy taprio_tc_policy[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { + [TCA_TAPRIO_TC_ENTRY_INDEX] = { .type = NLA_U32 }, + [TCA_TAPRIO_TC_ENTRY_MAX_SDU] = { .type = NLA_U32 }, +}; + static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_PRIOMAP] = { .len = sizeof(struct tc_mqprio_qopt) @@ -792,6 +781,7 @@ static const struct nla_policy taprio_policy[TCA_TAPRIO_ATTR_MAX + 1] = { [TCA_TAPRIO_ATTR_SCHED_CYCLE_TIME_EXTENSION] = { .type = NLA_S64 }, [TCA_TAPRIO_ATTR_FLAGS] = { .type = NLA_U32 }, [TCA_TAPRIO_ATTR_TXTIME_DELAY] = { .type = NLA_U32 }, + [TCA_TAPRIO_ATTR_TC_ENTRY] = { .type = NLA_NESTED }, }; static int fill_sched_entry(struct taprio_sched *q, struct nlattr **tb, @@ -1098,27 +1088,20 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); - struct net_device *qdev; struct taprio_sched *q; - bool found = false; ASSERT_RTNL(); if (event != NETDEV_UP && event != NETDEV_CHANGE) return NOTIFY_DONE; - spin_lock(&taprio_list_lock); list_for_each_entry(q, &taprio_list, taprio_list) { - qdev = qdisc_dev(q->root); - if (qdev == dev) { - found = true; - break; - } - } - spin_unlock(&taprio_list_lock); + if (dev != qdisc_dev(q->root)) + continue; - if (found) taprio_set_picos_per_byte(dev, q); + break; + } return NOTIFY_DONE; } @@ -1193,16 +1176,10 @@ static void taprio_offload_config_changed(struct taprio_sched *q) { struct sched_gate_list *oper, *admin; - spin_lock(&q->current_entry_lock); - - oper = rcu_dereference_protected(q->oper_sched, - lockdep_is_held(&q->current_entry_lock)); - admin = rcu_dereference_protected(q->admin_sched, - lockdep_is_held(&q->current_entry_lock)); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); switch_schedules(q, &admin, &oper); - - spin_unlock(&q->current_entry_lock); } static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask) @@ -1255,7 +1232,8 @@ static int taprio_enable_offload(struct net_device *dev, { const struct net_device_ops *ops = dev->netdev_ops; struct tc_taprio_qopt_offload *offload; - int err = 0; + struct tc_taprio_caps caps; + int tc, err = 0; if (!ops->ndo_setup_tc) { NL_SET_ERR_MSG(extack, @@ -1263,6 +1241,19 @@ static int taprio_enable_offload(struct net_device *dev, return -EOPNOTSUPP; } + qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO, + &caps, sizeof(caps)); + + if (!caps.supports_queue_max_sdu) { + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + if (q->max_sdu[tc]) { + NL_SET_ERR_MSG_MOD(extack, + "Device does not handle queueMaxSDU"); + return -EOPNOTSUPP; + } + } + } + offload = taprio_offload_alloc(sched->num_entries); if (!offload) { NL_SET_ERR_MSG(extack, @@ -1272,6 +1263,9 @@ static int taprio_enable_offload(struct net_device *dev, offload->enable = 1; taprio_sched_to_offload(dev, sched, offload); + for (tc = 0; tc < TC_MAX_QUEUE; tc++) + offload->max_sdu[tc] = q->max_sdu[tc]; + err = ops->ndo_setup_tc(dev, TC_SETUP_QDISC_TAPRIO, offload); if (err < 0) { NL_SET_ERR_MSG(extack, @@ -1279,6 +1273,8 @@ static int taprio_enable_offload(struct net_device *dev, goto done; } + q->offloaded = true; + done: taprio_offload_free(offload); @@ -1293,12 +1289,9 @@ static int taprio_disable_offload(struct net_device *dev, struct tc_taprio_qopt_offload *offload; int err; - if (!FULL_OFFLOAD_IS_ENABLED(q->flags)) + if (!q->offloaded) return 0; - if (!ops->ndo_setup_tc) - return -EOPNOTSUPP; - offload = taprio_offload_alloc(0); if (!offload) { NL_SET_ERR_MSG(extack, @@ -1314,6 +1307,8 @@ static int taprio_disable_offload(struct net_device *dev, goto out; } + q->offloaded = false; + out: taprio_offload_free(offload); @@ -1405,6 +1400,89 @@ out: return err; } +static int taprio_parse_tc_entry(struct Qdisc *sch, + struct nlattr *opt, + u32 max_sdu[TC_QOPT_MAX_QUEUE], + unsigned long *seen_tcs, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[TCA_TAPRIO_TC_ENTRY_MAX + 1] = { }; + struct net_device *dev = qdisc_dev(sch); + u32 val = 0; + int err, tc; + + err = nla_parse_nested(tb, TCA_TAPRIO_TC_ENTRY_MAX, opt, + taprio_tc_policy, extack); + if (err < 0) + return err; + + if (!tb[TCA_TAPRIO_TC_ENTRY_INDEX]) { + NL_SET_ERR_MSG_MOD(extack, "TC entry index missing"); + return -EINVAL; + } + + tc = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_INDEX]); + if (tc >= TC_QOPT_MAX_QUEUE) { + NL_SET_ERR_MSG_MOD(extack, "TC entry index out of range"); + return -ERANGE; + } + + if (*seen_tcs & BIT(tc)) { + NL_SET_ERR_MSG_MOD(extack, "Duplicate TC entry"); + return -EINVAL; + } + + *seen_tcs |= BIT(tc); + + if (tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]) + val = nla_get_u32(tb[TCA_TAPRIO_TC_ENTRY_MAX_SDU]); + + if (val > dev->max_mtu) { + NL_SET_ERR_MSG_MOD(extack, "TC max SDU exceeds device max MTU"); + return -ERANGE; + } + + max_sdu[tc] = val; + + return 0; +} + +static int taprio_parse_tc_entries(struct Qdisc *sch, + struct nlattr *opt, + struct netlink_ext_ack *extack) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + u32 max_sdu[TC_QOPT_MAX_QUEUE]; + unsigned long seen_tcs = 0; + struct nlattr *n; + int tc, rem; + int err = 0; + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) + max_sdu[tc] = q->max_sdu[tc]; + + nla_for_each_nested(n, opt, rem) { + if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY) + continue; + + err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack); + if (err) + goto out; + } + + for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) { + q->max_sdu[tc] = max_sdu[tc]; + if (max_sdu[tc]) + q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len; + else + q->max_frm_len[tc] = U32_MAX; /* never oversized */ + } + +out: + return err; +} + static int taprio_mqprio_cmp(const struct net_device *dev, const struct tc_mqprio_qopt *mqprio) { @@ -1483,6 +1561,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, if (err < 0) return err; + err = taprio_parse_tc_entries(sch, opt, extack); + if (err) + return err; + new_admin = kzalloc(sizeof(*new_admin), GFP_KERNEL); if (!new_admin) { NL_SET_ERR_MSG(extack, "Not enough memory for a new schedule"); @@ -1490,10 +1572,8 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, } INIT_LIST_HEAD(&new_admin->entries); - rcu_read_lock(); - oper = rcu_dereference(q->oper_sched); - admin = rcu_dereference(q->admin_sched); - rcu_read_unlock(); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); /* no changes - no new mqprio settings */ if (!taprio_mqprio_cmp(dev, mqprio)) @@ -1563,17 +1643,6 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt, q->advance_timer.function = advance_sched; } - if (FULL_OFFLOAD_IS_ENABLED(q->flags)) { - q->dequeue = taprio_dequeue_offload; - q->peek = taprio_peek_offload; - } else { - /* Be sure to always keep the function pointers - * in a consistent state. - */ - q->dequeue = taprio_dequeue_soft; - q->peek = taprio_peek_soft; - } - err = taprio_get_start_time(sch, new_admin, &start); if (err < 0) { NL_SET_ERR_MSG(extack, "Internal error: failed get start time"); @@ -1642,11 +1711,10 @@ static void taprio_destroy(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); struct net_device *dev = qdisc_dev(sch); + struct sched_gate_list *oper, *admin; unsigned int i; - spin_lock(&taprio_list_lock); list_del(&q->taprio_list); - spin_unlock(&taprio_list_lock); /* Note that taprio_reset() might not be called if an error * happens in qdisc_create(), after taprio_init() has been called. @@ -1665,11 +1733,14 @@ static void taprio_destroy(struct Qdisc *sch) netdev_reset_tc(dev); - if (q->oper_sched) - call_rcu(&q->oper_sched->rcu, taprio_free_sched_cb); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); - if (q->admin_sched) - call_rcu(&q->admin_sched->rcu, taprio_free_sched_cb); + if (oper) + call_rcu(&oper->rcu, taprio_free_sched_cb); + + if (admin) + call_rcu(&admin->rcu, taprio_free_sched_cb); } static int taprio_init(struct Qdisc *sch, struct nlattr *opt, @@ -1684,9 +1755,6 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, hrtimer_init(&q->advance_timer, CLOCK_TAI, HRTIMER_MODE_ABS); q->advance_timer.function = advance_sched; - q->dequeue = taprio_dequeue_soft; - q->peek = taprio_peek_soft; - q->root = sch; /* We only support static clockids. Use an invalid value as default @@ -1695,15 +1763,17 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt, q->clockid = -1; q->flags = TAPRIO_FLAGS_INVALID; - spin_lock(&taprio_list_lock); list_add(&q->taprio_list, &taprio_list); - spin_unlock(&taprio_list_lock); - if (sch->parent != TC_H_ROOT) + if (sch->parent != TC_H_ROOT) { + NL_SET_ERR_MSG_MOD(extack, "Can only be attached as root qdisc"); return -EOPNOTSUPP; + } - if (!netif_is_multiqueue(dev)) + if (!netif_is_multiqueue(dev)) { + NL_SET_ERR_MSG_MOD(extack, "Multi-queue device is required"); return -EOPNOTSUPP; + } /* pre-allocate qdisc, attachment can't fail */ q->qdiscs = kcalloc(dev->num_tx_queues, @@ -1875,6 +1945,33 @@ error_nest: return -1; } +static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb) +{ + struct nlattr *n; + int tc; + + for (tc = 0; tc < TC_MAX_QUEUE; tc++) { + n = nla_nest_start(skb, TCA_TAPRIO_ATTR_TC_ENTRY); + if (!n) + return -EMSGSIZE; + + if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_INDEX, tc)) + goto nla_put_failure; + + if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU, + q->max_sdu[tc])) + goto nla_put_failure; + + nla_nest_end(skb, n); + } + + return 0; + +nla_put_failure: + nla_nest_cancel(skb, n); + return -EMSGSIZE; +} + static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) { struct taprio_sched *q = qdisc_priv(sch); @@ -1884,9 +1981,8 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) struct nlattr *nest, *sched_nest; unsigned int i; - rcu_read_lock(); - oper = rcu_dereference(q->oper_sched); - admin = rcu_dereference(q->admin_sched); + oper = rtnl_dereference(q->oper_sched); + admin = rtnl_dereference(q->admin_sched); opt.num_tc = netdev_get_num_tc(dev); memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map)); @@ -1914,6 +2010,9 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay)) goto options_error; + if (taprio_dump_tc_entries(q, skb)) + goto options_error; + if (oper && dump_schedule(skb, oper)) goto options_error; @@ -1930,8 +2029,6 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb) nla_nest_end(skb, sched_nest); done: - rcu_read_unlock(); - return nla_nest_end(skb, nest); admin_error: @@ -1941,18 +2038,19 @@ options_error: nla_nest_cancel(skb, nest); start_error: - rcu_read_unlock(); return -ENOSPC; } static struct Qdisc *taprio_leaf(struct Qdisc *sch, unsigned long cl) { - struct netdev_queue *dev_queue = taprio_queue_get(sch, cl); + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + unsigned int ntx = cl - 1; - if (!dev_queue) + if (ntx >= dev->num_tx_queues) return NULL; - return dev_queue->qdisc_sleeping; + return q->qdiscs[ntx]; } static unsigned long taprio_find(struct Qdisc *sch, u32 classid) @@ -2000,11 +2098,8 @@ static void taprio_walk(struct Qdisc *sch, struct qdisc_walker *arg) arg->count = arg->skip; for (ntx = arg->skip; ntx < dev->num_tx_queues; ntx++) { - if (arg->fn(sch, ntx + 1, arg) < 0) { - arg->stop = 1; + if (!tc_qdisc_stats_dump(sch, ntx + 1, arg)) break; - } - arg->count++; } } diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index e031c1a41ea6..277ad11f4d61 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -580,12 +580,7 @@ static unsigned long tbf_find(struct Qdisc *sch, u32 classid) static void tbf_walk(struct Qdisc *sch, struct qdisc_walker *walker) { if (!walker->stop) { - if (walker->count >= walker->skip) - if (walker->fn(sch, 1, walker) < 0) { - walker->stop = 1; - return; - } - walker->count++; + tc_qdisc_stats_dump(sch, 1, walker); } } diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c index 0939cc3b915a..3ccbf3c201cd 100644 --- a/net/smc/af_smc.c +++ b/net/smc/af_smc.c @@ -379,6 +379,8 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock, sk->sk_state = SMC_INIT; sk->sk_destruct = smc_destruct; sk->sk_protocol = protocol; + WRITE_ONCE(sk->sk_sndbuf, READ_ONCE(net->smc.sysctl_wmem)); + WRITE_ONCE(sk->sk_rcvbuf, READ_ONCE(net->smc.sysctl_rmem)); smc = smc_sk(sk); INIT_WORK(&smc->tcp_listen_work, smc_tcp_listen_work); INIT_WORK(&smc->connect_work, smc_connect_work); @@ -427,6 +429,7 @@ static int smc_bind(struct socket *sock, struct sockaddr *uaddr, goto out_rel; smc->clcsock->sk->sk_reuse = sk->sk_reuse; + smc->clcsock->sk->sk_reuseport = sk->sk_reuseport; rc = kernel_bind(smc->clcsock, uaddr, addr_len); out_rel: @@ -3253,9 +3256,6 @@ static int __smc_create(struct net *net, struct socket *sock, int protocol, smc->clcsock = clcsock; } - smc->sk.sk_sndbuf = max(smc->clcsock->sk->sk_sndbuf, SMC_BUF_MIN_SIZE); - smc->sk.sk_rcvbuf = max(smc->clcsock->sk->sk_rcvbuf, SMC_BUF_MIN_SIZE); - out: return rc; } diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c index ff49a11f57b8..e6ee797640b4 100644 --- a/net/smc/smc_core.c +++ b/net/smc/smc_core.c @@ -757,6 +757,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk, lnk->lgr = lgr; smc_lgr_hold(lgr); /* lgr_put in smcr_link_clear() */ lnk->link_idx = link_idx; + lnk->wr_rx_id_compl = 0; smc_ibdev_cnt_inc(lnk); smcr_copy_dev_info_to_link(lnk); atomic_set(&lnk->conn_cnt, 0); @@ -2238,7 +2239,7 @@ out: static int smcr_buf_map_usable_links(struct smc_link_group *lgr, struct smc_buf_desc *buf_desc, bool is_rmb) { - int i, rc = 0; + int i, rc = 0, cnt = 0; /* protect against parallel link reconfiguration */ mutex_lock(&lgr->llc_conf_mutex); @@ -2251,9 +2252,12 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr, rc = -ENOMEM; goto out; } + cnt++; } out: mutex_unlock(&lgr->llc_conf_mutex); + if (!rc && !cnt) + rc = -EINVAL; return rc; } @@ -2306,10 +2310,10 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (is_rmb) /* use socket recv buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_rcvbuf / 2; + sk_buf_size = smc->sk.sk_rcvbuf; else /* use socket send buffer size (w/o overhead) as start value */ - sk_buf_size = smc->sk.sk_sndbuf / 2; + sk_buf_size = smc->sk.sk_sndbuf; for (bufsize_short = smc_compress_bufsize(sk_buf_size, is_smcd, is_rmb); bufsize_short >= 0; bufsize_short--) { @@ -2368,7 +2372,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) if (is_rmb) { conn->rmb_desc = buf_desc; conn->rmbe_size_short = bufsize_short; - smc->sk.sk_rcvbuf = bufsize * 2; + smc->sk.sk_rcvbuf = bufsize; atomic_set(&conn->bytes_to_rcv, 0); conn->rmbe_update_limit = smc_rmb_wnd_update_limit(buf_desc->len); @@ -2376,7 +2380,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb) smc_ism_set_conn(conn); /* map RMB/smcd_dev to conn */ } else { conn->sndbuf_desc = buf_desc; - smc->sk.sk_sndbuf = bufsize * 2; + smc->sk.sk_sndbuf = bufsize; atomic_set(&conn->sndbuf_space, bufsize); } return 0; diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h index fe8b524ad846..285f9bd8e232 100644 --- a/net/smc/smc_core.h +++ b/net/smc/smc_core.h @@ -115,8 +115,10 @@ struct smc_link { dma_addr_t wr_rx_dma_addr; /* DMA address of wr_rx_bufs */ dma_addr_t wr_rx_v2_dma_addr; /* DMA address of v2 rx buf*/ u64 wr_rx_id; /* seq # of last recv WR */ + u64 wr_rx_id_compl; /* seq # of last completed WR */ u32 wr_rx_cnt; /* number of WR recv buffers */ unsigned long wr_rx_tstamp; /* jiffies when last buf rx */ + wait_queue_head_t wr_rx_empty_wait; /* wait for RQ empty */ struct ib_reg_wr wr_reg; /* WR register memory region */ wait_queue_head_t wr_reg_wait; /* wait for wr_reg result */ diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c index 175026ae33ae..524649d0ab65 100644 --- a/net/smc/smc_llc.c +++ b/net/smc/smc_llc.c @@ -2127,7 +2127,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc) init_waitqueue_head(&lgr->llc_flow_waiter); init_waitqueue_head(&lgr->llc_msg_waiter); mutex_init(&lgr->llc_conf_mutex); - lgr->llc_testlink_time = READ_ONCE(net->ipv4.sysctl_tcp_keepalive_time); + lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time); } /* called after lgr was removed from lgr_list */ diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h index 4404e52b3346..7e7a3162c68b 100644 --- a/net/smc/smc_llc.h +++ b/net/smc/smc_llc.h @@ -19,6 +19,7 @@ #define SMC_LLC_WAIT_FIRST_TIME (5 * HZ) #define SMC_LLC_WAIT_TIME (2 * HZ) +#define SMC_LLC_TESTLINK_DEFAULT_TIME (30 * HZ) enum smc_llc_reqresp { SMC_LLC_REQ, diff --git a/net/smc/smc_sysctl.c b/net/smc/smc_sysctl.c index 0613868fdb97..b6f79fabb9d3 100644 --- a/net/smc/smc_sysctl.c +++ b/net/smc/smc_sysctl.c @@ -16,8 +16,12 @@ #include "smc.h" #include "smc_core.h" +#include "smc_llc.h" #include "smc_sysctl.h" +static int min_sndbuf = SMC_BUF_MIN_SIZE; +static int min_rcvbuf = SMC_BUF_MIN_SIZE; + static struct ctl_table smc_table[] = { { .procname = "autocorking_size", @@ -35,6 +39,29 @@ static struct ctl_table smc_table[] = { .extra1 = SYSCTL_ZERO, .extra2 = SYSCTL_TWO, }, + { + .procname = "smcr_testlink_time", + .data = &init_net.smc.sysctl_smcr_testlink_time, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "wmem", + .data = &init_net.smc.sysctl_wmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_sndbuf, + }, + { + .procname = "rmem", + .data = &init_net.smc.sysctl_rmem, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &min_rcvbuf, + }, { } }; @@ -60,6 +87,9 @@ int __net_init smc_sysctl_net_init(struct net *net) net->smc.sysctl_autocorking_size = SMC_AUTOCORKING_DEFAULT_SIZE; net->smc.sysctl_smcr_buf_type = SMCR_PHYS_CONT_BUFS; + net->smc.sysctl_smcr_testlink_time = SMC_LLC_TESTLINK_DEFAULT_TIME; + WRITE_ONCE(net->smc.sysctl_wmem, READ_ONCE(net->ipv4.sysctl_tcp_wmem[1])); + WRITE_ONCE(net->smc.sysctl_rmem, READ_ONCE(net->ipv4.sysctl_tcp_rmem[1])); return 0; diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c index 26f8f240d9e8..b0678a417e09 100644 --- a/net/smc/smc_wr.c +++ b/net/smc/smc_wr.c @@ -454,6 +454,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) for (i = 0; i < num; i++) { link = wc[i].qp->qp_context; + link->wr_rx_id_compl = wc[i].wr_id; if (wc[i].status == IB_WC_SUCCESS) { link->wr_rx_tstamp = jiffies; smc_wr_rx_demultiplex(&wc[i]); @@ -465,6 +466,8 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num) case IB_WC_RNR_RETRY_EXC_ERR: case IB_WC_WR_FLUSH_ERR: smcr_link_down_cond_sched(link); + if (link->wr_rx_id_compl == link->wr_rx_id) + wake_up(&link->wr_rx_empty_wait); break; default: smc_wr_rx_post(link); /* refill WR RX */ @@ -639,6 +642,7 @@ void smc_wr_free_link(struct smc_link *lnk) return; ibdev = lnk->smcibdev->ibdev; + smc_wr_drain_cq(lnk); smc_wr_wakeup_reg_wait(lnk); smc_wr_wakeup_tx_wait(lnk); @@ -889,6 +893,7 @@ int smc_wr_create_link(struct smc_link *lnk) atomic_set(&lnk->wr_tx_refcnt, 0); init_waitqueue_head(&lnk->wr_reg_wait); atomic_set(&lnk->wr_reg_refcnt, 0); + init_waitqueue_head(&lnk->wr_rx_empty_wait); return rc; dma_unmap: diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h index a54e90a1110f..45e9b894d3f8 100644 --- a/net/smc/smc_wr.h +++ b/net/smc/smc_wr.h @@ -73,6 +73,11 @@ static inline void smc_wr_tx_link_put(struct smc_link *link) wake_up_all(&link->wr_tx_wait); } +static inline void smc_wr_drain_cq(struct smc_link *lnk) +{ + wait_event(lnk->wr_rx_empty_wait, lnk->wr_rx_id_compl == lnk->wr_rx_id); +} + static inline void smc_wr_wakeup_tx_wait(struct smc_link *lnk) { wake_up_all(&lnk->wr_tx_wait); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 7d268a291486..c284efa3d1ef 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2873,6 +2873,9 @@ int rpc_clnt_test_and_add_xprt(struct rpc_clnt *clnt, task = rpc_call_null_helper(clnt, xprt, NULL, RPC_TASK_ASYNC, &rpc_cb_add_xprt_call_ops, data); + if (IS_ERR(task)) + return PTR_ERR(task); + data->xps->xps_nunique_destaddr_xprts++; rpc_put_task(task); success: diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c index d71eec494826..f8fae7815649 100644 --- a/net/sunrpc/xprt.c +++ b/net/sunrpc/xprt.c @@ -1179,11 +1179,8 @@ xprt_request_dequeue_receive_locked(struct rpc_task *task) { struct rpc_rqst *req = task->tk_rqstp; - if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) { + if (test_and_clear_bit(RPC_TASK_NEED_RECV, &task->tk_runstate)) xprt_request_rb_remove(req->rq_xprt, req); - xdr_free_bvec(&req->rq_rcv_buf); - req->rq_private_buf.bvec = NULL; - } } /** @@ -1221,6 +1218,8 @@ void xprt_complete_rqst(struct rpc_task *task, int copied) xprt->stat.recvs++; + xdr_free_bvec(&req->rq_rcv_buf); + req->rq_private_buf.bvec = NULL; req->rq_private_buf.len = copied; /* Ensure all writes are done before we update */ /* req->rq_reply_bytes_recvd */ @@ -1453,6 +1452,7 @@ xprt_request_dequeue_xprt(struct rpc_task *task) xprt_request_dequeue_transmit_locked(task); xprt_request_dequeue_receive_locked(task); spin_unlock(&xprt->queue_lock); + xdr_free_bvec(&req->rq_rcv_buf); } } diff --git a/net/tipc/monitor.c b/net/tipc/monitor.c index 2f4d23238a7e..9618e4429f0f 100644 --- a/net/tipc/monitor.c +++ b/net/tipc/monitor.c @@ -160,7 +160,7 @@ static void map_set(u64 *up_map, int i, unsigned int v) static int map_get(u64 up_map, int i) { - return (up_map & (1 << i)) >> i; + return (up_map & (1ULL << i)) >> i; } static struct tipc_peer *peer_prev(struct tipc_peer *peer) diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 8267b751a526..190b49c5cbc3 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -41,14 +41,6 @@ int sysctl_tipc_named_timeout __read_mostly = 2000; -struct distr_queue_item { - struct distr_item i; - u32 dtype; - u32 node; - unsigned long expires; - struct list_head next; -}; - /** * publ_to_item - add publication info to a publication message * @p: publication info diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 0f983e5f7dde..a03d66046ca3 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -902,17 +902,28 @@ static void tls_device_core_ctrl_rx_resync(struct tls_context *tls_ctx, } static int -tls_device_reencrypt(struct sock *sk, struct tls_sw_context_rx *sw_ctx) +tls_device_reencrypt(struct sock *sk, struct tls_context *tls_ctx) { + struct tls_sw_context_rx *sw_ctx = tls_sw_ctx_rx(tls_ctx); + const struct tls_cipher_size_desc *cipher_sz; int err, offset, copy, data_len, pos; struct sk_buff *skb, *skb_iter; struct scatterlist sg[1]; struct strp_msg *rxm; char *orig_buf, *buf; + switch (tls_ctx->crypto_recv.info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: + case TLS_CIPHER_AES_GCM_256: + break; + default: + return -EINVAL; + } + cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_recv.info.cipher_type]; + rxm = strp_msg(tls_strp_msg(sw_ctx)); - orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + - TLS_CIPHER_AES_GCM_128_IV_SIZE, sk->sk_allocation); + orig_buf = kmalloc(rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv, + sk->sk_allocation); if (!orig_buf) return -ENOMEM; buf = orig_buf; @@ -927,10 +938,8 @@ tls_device_reencrypt(struct sock *sk, struct tls_sw_context_rx *sw_ctx) sg_init_table(sg, 1); sg_set_buf(&sg[0], buf, - rxm->full_len + TLS_HEADER_SIZE + - TLS_CIPHER_AES_GCM_128_IV_SIZE); - err = skb_copy_bits(skb, offset, buf, - TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE); + rxm->full_len + TLS_HEADER_SIZE + cipher_sz->iv); + err = skb_copy_bits(skb, offset, buf, TLS_HEADER_SIZE + cipher_sz->iv); if (err) goto free_buf; @@ -941,7 +950,7 @@ tls_device_reencrypt(struct sock *sk, struct tls_sw_context_rx *sw_ctx) else err = 0; - data_len = rxm->full_len - TLS_CIPHER_AES_GCM_128_TAG_SIZE; + data_len = rxm->full_len - cipher_sz->tag; if (skb_pagelen(skb) > offset) { copy = min_t(int, skb_pagelen(skb) - offset, data_len); @@ -1024,7 +1033,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx) * likely have initial fragments decrypted, and final ones not * decrypted. We need to reencrypt that single SKB. */ - return tls_device_reencrypt(sk, sw_ctx); + return tls_device_reencrypt(sk, tls_ctx); } /* Return immediately if the record is either entirely plaintext or @@ -1041,7 +1050,7 @@ int tls_device_decrypted(struct sock *sk, struct tls_context *tls_ctx) } ctx->resync_nh_reset = 1; - return tls_device_reencrypt(sk, sw_ctx); + return tls_device_reencrypt(sk, tls_ctx); } static void tls_device_attach(struct tls_context *ctx, struct sock *sk, @@ -1062,9 +1071,9 @@ static void tls_device_attach(struct tls_context *ctx, struct sock *sk, int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) { - u16 nonce_size, tag_size, iv_size, rec_seq_size, salt_size; struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_prot_info *prot = &tls_ctx->prot_info; + const struct tls_cipher_size_desc *cipher_sz; struct tls_record_info *start_marker_record; struct tls_offload_context_tx *offload_ctx; struct tls_crypto_info *crypto_info; @@ -1099,44 +1108,44 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) switch (crypto_info->cipher_type) { case TLS_CIPHER_AES_GCM_128: - nonce_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; - tag_size = TLS_CIPHER_AES_GCM_128_TAG_SIZE; - iv_size = TLS_CIPHER_AES_GCM_128_IV_SIZE; iv = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->iv; - rec_seq_size = TLS_CIPHER_AES_GCM_128_REC_SEQ_SIZE; - salt_size = TLS_CIPHER_AES_GCM_128_SALT_SIZE; rec_seq = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->rec_seq; break; + case TLS_CIPHER_AES_GCM_256: + iv = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->iv; + rec_seq = + ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->rec_seq; + break; default: rc = -EINVAL; goto release_netdev; } + cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type]; /* Sanity-check the rec_seq_size for stack allocations */ - if (rec_seq_size > TLS_MAX_REC_SEQ_SIZE) { + if (cipher_sz->rec_seq > TLS_MAX_REC_SEQ_SIZE) { rc = -EINVAL; goto release_netdev; } prot->version = crypto_info->version; prot->cipher_type = crypto_info->cipher_type; - prot->prepend_size = TLS_HEADER_SIZE + nonce_size; - prot->tag_size = tag_size; + prot->prepend_size = TLS_HEADER_SIZE + cipher_sz->iv; + prot->tag_size = cipher_sz->tag; prot->overhead_size = prot->prepend_size + prot->tag_size; - prot->iv_size = iv_size; - prot->salt_size = salt_size; - ctx->tx.iv = kmalloc(iv_size + TLS_CIPHER_AES_GCM_128_SALT_SIZE, - GFP_KERNEL); + prot->iv_size = cipher_sz->iv; + prot->salt_size = cipher_sz->salt; + ctx->tx.iv = kmalloc(cipher_sz->iv + cipher_sz->salt, GFP_KERNEL); if (!ctx->tx.iv) { rc = -ENOMEM; goto release_netdev; } - memcpy(ctx->tx.iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, iv, iv_size); + memcpy(ctx->tx.iv + cipher_sz->salt, iv, cipher_sz->iv); - prot->rec_seq_size = rec_seq_size; - ctx->tx.rec_seq = kmemdup(rec_seq, rec_seq_size, GFP_KERNEL); + prot->rec_seq_size = cipher_sz->rec_seq; + ctx->tx.rec_seq = kmemdup(rec_seq, cipher_sz->rec_seq, GFP_KERNEL); if (!ctx->tx.rec_seq) { rc = -ENOMEM; goto free_iv; diff --git a/net/tls/tls_device_fallback.c b/net/tls/tls_device_fallback.c index 7dfc8023e0f1..cdb391a8754b 100644 --- a/net/tls/tls_device_fallback.c +++ b/net/tls/tls_device_fallback.c @@ -54,13 +54,25 @@ static int tls_enc_record(struct aead_request *aead_req, struct scatter_walk *out, int *in_len, struct tls_prot_info *prot) { - unsigned char buf[TLS_HEADER_SIZE + TLS_CIPHER_AES_GCM_128_IV_SIZE]; + unsigned char buf[TLS_HEADER_SIZE + MAX_IV_SIZE]; + const struct tls_cipher_size_desc *cipher_sz; struct scatterlist sg_in[3]; struct scatterlist sg_out[3]; + unsigned int buf_size; u16 len; int rc; - len = min_t(int, *in_len, ARRAY_SIZE(buf)); + switch (prot->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + case TLS_CIPHER_AES_GCM_256: + break; + default: + return -EINVAL; + } + cipher_sz = &tls_cipher_size_desc[prot->cipher_type]; + + buf_size = TLS_HEADER_SIZE + cipher_sz->iv; + len = min_t(int, *in_len, buf_size); scatterwalk_copychunks(buf, in, len, 0); scatterwalk_copychunks(buf, out, len, 1); @@ -73,13 +85,11 @@ static int tls_enc_record(struct aead_request *aead_req, scatterwalk_pagedone(out, 1, 1); len = buf[4] | (buf[3] << 8); - len -= TLS_CIPHER_AES_GCM_128_IV_SIZE; + len -= cipher_sz->iv; - tls_make_aad(aad, len - TLS_CIPHER_AES_GCM_128_TAG_SIZE, - (char *)&rcd_sn, buf[0], prot); + tls_make_aad(aad, len - cipher_sz->tag, (char *)&rcd_sn, buf[0], prot); - memcpy(iv + TLS_CIPHER_AES_GCM_128_SALT_SIZE, buf + TLS_HEADER_SIZE, - TLS_CIPHER_AES_GCM_128_IV_SIZE); + memcpy(iv + cipher_sz->salt, buf + TLS_HEADER_SIZE, cipher_sz->iv); sg_init_table(sg_in, ARRAY_SIZE(sg_in)); sg_init_table(sg_out, ARRAY_SIZE(sg_out)); @@ -90,7 +100,7 @@ static int tls_enc_record(struct aead_request *aead_req, *in_len -= len; if (*in_len < 0) { - *in_len += TLS_CIPHER_AES_GCM_128_TAG_SIZE; + *in_len += cipher_sz->tag; /* the input buffer doesn't contain the entire record. * trim len accordingly. The resulting authentication tag * will contain garbage, but we don't care, so we won't @@ -111,7 +121,7 @@ static int tls_enc_record(struct aead_request *aead_req, scatterwalk_pagedone(out, 1, 1); } - len -= TLS_CIPHER_AES_GCM_128_TAG_SIZE; + len -= cipher_sz->tag; aead_request_set_crypt(aead_req, sg_in, sg_out, len, iv); rc = crypto_aead_encrypt(aead_req); @@ -299,11 +309,14 @@ static void fill_sg_out(struct scatterlist sg_out[3], void *buf, int sync_size, void *dummy_buf) { + const struct tls_cipher_size_desc *cipher_sz = + &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type]; + sg_set_buf(&sg_out[0], dummy_buf, sync_size); sg_set_buf(&sg_out[1], nskb->data + tcp_payload_offset, payload_len); /* Add room for authentication tag produced by crypto */ dummy_buf += sync_size; - sg_set_buf(&sg_out[2], dummy_buf, TLS_CIPHER_AES_GCM_128_TAG_SIZE); + sg_set_buf(&sg_out[2], dummy_buf, cipher_sz->tag); } static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, @@ -315,7 +328,8 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, struct tls_offload_context_tx *ctx = tls_offload_ctx_tx(tls_ctx); int tcp_payload_offset = skb_tcp_all_headers(skb); int payload_len = skb->len - tcp_payload_offset; - void *buf, *iv, *aad, *dummy_buf; + const struct tls_cipher_size_desc *cipher_sz; + void *buf, *iv, *aad, *dummy_buf, *salt; struct aead_request *aead_req; struct sk_buff *nskb = NULL; int buf_len; @@ -324,20 +338,26 @@ static struct sk_buff *tls_enc_skb(struct tls_context *tls_ctx, if (!aead_req) return NULL; - buf_len = TLS_CIPHER_AES_GCM_128_SALT_SIZE + - TLS_CIPHER_AES_GCM_128_IV_SIZE + - TLS_AAD_SPACE_SIZE + - sync_size + - TLS_CIPHER_AES_GCM_128_TAG_SIZE; + switch (tls_ctx->crypto_send.info.cipher_type) { + case TLS_CIPHER_AES_GCM_128: + salt = tls_ctx->crypto_send.aes_gcm_128.salt; + break; + case TLS_CIPHER_AES_GCM_256: + salt = tls_ctx->crypto_send.aes_gcm_256.salt; + break; + default: + return NULL; + } + cipher_sz = &tls_cipher_size_desc[tls_ctx->crypto_send.info.cipher_type]; + buf_len = cipher_sz->salt + cipher_sz->iv + TLS_AAD_SPACE_SIZE + + sync_size + cipher_sz->tag; buf = kmalloc(buf_len, GFP_ATOMIC); if (!buf) goto free_req; iv = buf; - memcpy(iv, tls_ctx->crypto_send.aes_gcm_128.salt, - TLS_CIPHER_AES_GCM_128_SALT_SIZE); - aad = buf + TLS_CIPHER_AES_GCM_128_SALT_SIZE + - TLS_CIPHER_AES_GCM_128_IV_SIZE; + memcpy(iv, salt, cipher_sz->salt); + aad = buf + cipher_sz->salt + cipher_sz->iv; dummy_buf = aad + TLS_AAD_SPACE_SIZE; nskb = alloc_skb(skb_headroom(skb) + skb->len, GFP_ATOMIC); @@ -451,6 +471,7 @@ int tls_sw_fallback_init(struct sock *sk, struct tls_offload_context_tx *offload_ctx, struct tls_crypto_info *crypto_info) { + const struct tls_cipher_size_desc *cipher_sz; const u8 *key; int rc; @@ -463,15 +484,23 @@ int tls_sw_fallback_init(struct sock *sk, goto err_out; } - key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key; + switch (crypto_info->cipher_type) { + case TLS_CIPHER_AES_GCM_128: + key = ((struct tls12_crypto_info_aes_gcm_128 *)crypto_info)->key; + break; + case TLS_CIPHER_AES_GCM_256: + key = ((struct tls12_crypto_info_aes_gcm_256 *)crypto_info)->key; + break; + default: + return -EINVAL; + } + cipher_sz = &tls_cipher_size_desc[crypto_info->cipher_type]; - rc = crypto_aead_setkey(offload_ctx->aead_send, key, - TLS_CIPHER_AES_GCM_128_KEY_SIZE); + rc = crypto_aead_setkey(offload_ctx->aead_send, key, cipher_sz->key); if (rc) goto free_aead; - rc = crypto_aead_setauthsize(offload_ctx->aead_send, - TLS_CIPHER_AES_GCM_128_TAG_SIZE); + rc = crypto_aead_setauthsize(offload_ctx->aead_send, cipher_sz->tag); if (rc) goto free_aead; diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 08ddf9d837ae..3735cb00905d 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -58,6 +58,23 @@ enum { TLS_NUM_PROTS, }; +#define CIPHER_SIZE_DESC(cipher) [cipher] = { \ + .iv = cipher ## _IV_SIZE, \ + .key = cipher ## _KEY_SIZE, \ + .salt = cipher ## _SALT_SIZE, \ + .tag = cipher ## _TAG_SIZE, \ + .rec_seq = cipher ## _REC_SEQ_SIZE, \ +} + +const struct tls_cipher_size_desc tls_cipher_size_desc[] = { + CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_128), + CIPHER_SIZE_DESC(TLS_CIPHER_AES_GCM_256), + CIPHER_SIZE_DESC(TLS_CIPHER_AES_CCM_128), + CIPHER_SIZE_DESC(TLS_CIPHER_CHACHA20_POLY1305), + CIPHER_SIZE_DESC(TLS_CIPHER_SM4_GCM), + CIPHER_SIZE_DESC(TLS_CIPHER_SM4_CCM), +}; + static const struct proto *saved_tcpv6_prot; static DEFINE_MUTEX(tcpv6_prot_mutex); static const struct proto *saved_tcpv4_prot; @@ -507,6 +524,54 @@ static int do_tls_getsockopt_conf(struct sock *sk, char __user *optval, rc = -EFAULT; break; } + case TLS_CIPHER_ARIA_GCM_128: { + struct tls12_crypto_info_aria_gcm_128 * + crypto_info_aria_gcm_128 = + container_of(crypto_info, + struct tls12_crypto_info_aria_gcm_128, + info); + + if (len != sizeof(*crypto_info_aria_gcm_128)) { + rc = -EINVAL; + goto out; + } + lock_sock(sk); + memcpy(crypto_info_aria_gcm_128->iv, + cctx->iv + TLS_CIPHER_ARIA_GCM_128_SALT_SIZE, + TLS_CIPHER_ARIA_GCM_128_IV_SIZE); + memcpy(crypto_info_aria_gcm_128->rec_seq, cctx->rec_seq, + TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE); + release_sock(sk); + if (copy_to_user(optval, + crypto_info_aria_gcm_128, + sizeof(*crypto_info_aria_gcm_128))) + rc = -EFAULT; + break; + } + case TLS_CIPHER_ARIA_GCM_256: { + struct tls12_crypto_info_aria_gcm_256 * + crypto_info_aria_gcm_256 = + container_of(crypto_info, + struct tls12_crypto_info_aria_gcm_256, + info); + + if (len != sizeof(*crypto_info_aria_gcm_256)) { + rc = -EINVAL; + goto out; + } + lock_sock(sk); + memcpy(crypto_info_aria_gcm_256->iv, + cctx->iv + TLS_CIPHER_ARIA_GCM_256_SALT_SIZE, + TLS_CIPHER_ARIA_GCM_256_IV_SIZE); + memcpy(crypto_info_aria_gcm_256->rec_seq, cctx->rec_seq, + TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE); + release_sock(sk); + if (copy_to_user(optval, + crypto_info_aria_gcm_256, + sizeof(*crypto_info_aria_gcm_256))) + rc = -EFAULT; + break; + } default: rc = -EINVAL; } @@ -668,6 +733,20 @@ static int do_tls_setsockopt_conf(struct sock *sk, sockptr_t optval, case TLS_CIPHER_SM4_CCM: optsize = sizeof(struct tls12_crypto_info_sm4_ccm); break; + case TLS_CIPHER_ARIA_GCM_128: + if (crypto_info->version != TLS_1_2_VERSION) { + rc = -EINVAL; + goto err_crypto_info; + } + optsize = sizeof(struct tls12_crypto_info_aria_gcm_128); + break; + case TLS_CIPHER_ARIA_GCM_256: + if (crypto_info->version != TLS_1_2_VERSION) { + rc = -EINVAL; + goto err_crypto_info; + } + optsize = sizeof(struct tls12_crypto_info_aria_gcm_256); + break; default: rc = -EINVAL; goto err_crypto_info; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index fe27241cd13f..264cf367e265 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -2629,6 +2629,40 @@ int tls_set_sw_offload(struct sock *sk, struct tls_context *ctx, int tx) cipher_name = "ccm(sm4)"; break; } + case TLS_CIPHER_ARIA_GCM_128: { + struct tls12_crypto_info_aria_gcm_128 *aria_gcm_128_info; + + aria_gcm_128_info = (void *)crypto_info; + nonce_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE; + tag_size = TLS_CIPHER_ARIA_GCM_128_TAG_SIZE; + iv_size = TLS_CIPHER_ARIA_GCM_128_IV_SIZE; + iv = aria_gcm_128_info->iv; + rec_seq_size = TLS_CIPHER_ARIA_GCM_128_REC_SEQ_SIZE; + rec_seq = aria_gcm_128_info->rec_seq; + keysize = TLS_CIPHER_ARIA_GCM_128_KEY_SIZE; + key = aria_gcm_128_info->key; + salt = aria_gcm_128_info->salt; + salt_size = TLS_CIPHER_ARIA_GCM_128_SALT_SIZE; + cipher_name = "gcm(aria)"; + break; + } + case TLS_CIPHER_ARIA_GCM_256: { + struct tls12_crypto_info_aria_gcm_256 *gcm_256_info; + + gcm_256_info = (void *)crypto_info; + nonce_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE; + tag_size = TLS_CIPHER_ARIA_GCM_256_TAG_SIZE; + iv_size = TLS_CIPHER_ARIA_GCM_256_IV_SIZE; + iv = gcm_256_info->iv; + rec_seq_size = TLS_CIPHER_ARIA_GCM_256_REC_SEQ_SIZE; + rec_seq = gcm_256_info->rec_seq; + keysize = TLS_CIPHER_ARIA_GCM_256_KEY_SIZE; + key = gcm_256_info->key; + salt = gcm_256_info->salt; + salt_size = TLS_CIPHER_ARIA_GCM_256_SALT_SIZE; + cipher_name = "gcm(aria)"; + break; + } default: rc = -EINVAL; goto free_priv; diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index dea2972c8178..c955c7253d4b 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -2536,32 +2536,18 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, size_t si static int unix_read_skb(struct sock *sk, skb_read_actor_t recv_actor) { - int copied = 0; - - while (1) { - struct unix_sock *u = unix_sk(sk); - struct sk_buff *skb; - int used, err; - - mutex_lock(&u->iolock); - skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); - mutex_unlock(&u->iolock); - if (!skb) - return err; + struct unix_sock *u = unix_sk(sk); + struct sk_buff *skb; + int err, copied; - used = recv_actor(sk, skb); - if (used <= 0) { - if (!copied) - copied = used; - kfree_skb(skb); - break; - } else if (used <= skb->len) { - copied += used; - } + mutex_lock(&u->iolock); + skb = skb_recv_datagram(sk, MSG_DONTWAIT, &err); + mutex_unlock(&u->iolock); + if (!skb) + return err; - kfree_skb(skb); - break; - } + copied = recv_actor(sk, skb); + kfree_skb(skb); return copied; } diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c index b14f0ed7427b..842c94286d31 100644 --- a/net/vmw_vsock/vmci_transport.c +++ b/net/vmw_vsock/vmci_transport.c @@ -951,7 +951,7 @@ static int vmci_transport_recv_listen(struct sock *sk, * for ourself or any previous connection requests that we received. * If it's the latter, we try to find a socket in our list of pending * connections and, if we do, call the appropriate handler for the - * state that that socket is in. Otherwise we try to service the + * state that socket is in. Otherwise we try to service the * connection request. */ pending = vmci_transport_get_pending(sk, pkt); diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index 6a5f08f7491e..cca5e1cf089e 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -136,7 +136,7 @@ static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr, pos = (u8 *) hdr; aad[0] = pos[0] & 0x8f; aad[1] = pos[1] & 0xc7; - memcpy(aad + 2, hdr->addr1, 3 * ETH_ALEN); + memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); pos = (u8 *) & hdr->seq_ctrl; aad[20] = pos[0] & 0x0f; aad[21] = 0; /* all bits masked */ diff --git a/net/wireless/util.c b/net/wireless/util.c index 0b28d00ba8f5..01493568a21d 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -1361,7 +1361,7 @@ static u32 cfg80211_calculate_bitrate_he(struct rate_info *rate) 25599, /* 4.166666... */ 17067, /* 2.777777... */ 12801, /* 2.083333... */ - 11769, /* 1.851851... */ + 11377, /* 1.851725... */ 10239, /* 1.666666... */ 8532, /* 1.388888... */ 7680, /* 1.250000... */ @@ -1444,7 +1444,7 @@ static u32 cfg80211_calculate_bitrate_eht(struct rate_info *rate) 25599, /* 4.166666... */ 17067, /* 2.777777... */ 12801, /* 2.083333... */ - 11769, /* 1.851851... */ + 11377, /* 1.851725... */ 10239, /* 1.666666... */ 8532, /* 1.388888... */ 7680, /* 1.250000... */ diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 869b9b9b9fad..4681e8e8ad94 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -19,8 +19,6 @@ #include "xdp_umem.h" #include "xsk_queue.h" -#define XDP_UMEM_MIN_CHUNK_SIZE 2048 - static DEFINE_IDA(umem_ida); static void xdp_umem_unpin_pages(struct xdp_umem *umem) diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c index 974eb97b77d2..29a540dcb5a7 100644 --- a/net/xfrm/espintcp.c +++ b/net/xfrm/espintcp.c @@ -91,7 +91,7 @@ static void espintcp_rcv(struct strparser *strp, struct sk_buff *skb) } /* remove header, leave non-ESP marker/SPI */ - if (!__pskb_pull(skb, rxm->offset + 2)) { + if (!pskb_pull(skb, rxm->offset + 2)) { XFRM_INC_STATS(sock_net(strp->sk), LINUX_MIB_XFRMINERROR); kfree_skb(skb); return; diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c index 637ca8838436..5f5aafd418af 100644 --- a/net/xfrm/xfrm_device.c +++ b/net/xfrm/xfrm_device.c @@ -207,7 +207,8 @@ struct sk_buff *validate_xmit_xfrm(struct sk_buff *skb, netdev_features_t featur EXPORT_SYMBOL_GPL(validate_xmit_xfrm); int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, - struct xfrm_user_offload *xuo) + struct xfrm_user_offload *xuo, + struct netlink_ext_ack *extack) { int err; struct dst_entry *dst; @@ -216,15 +217,21 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xfrm_address_t *saddr; xfrm_address_t *daddr; - if (!x->type_offload) + if (!x->type_offload) { + NL_SET_ERR_MSG(extack, "Type doesn't support offload"); return -EINVAL; + } /* We don't yet support UDP encapsulation and TFC padding. */ - if (x->encap || x->tfcpad) + if (x->encap || x->tfcpad) { + NL_SET_ERR_MSG(extack, "Encapsulation and TFC padding can't be offloaded"); return -EINVAL; + } - if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) + if (xuo->flags & ~(XFRM_OFFLOAD_IPV6 | XFRM_OFFLOAD_INBOUND)) { + NL_SET_ERR_MSG(extack, "Unrecognized flags in offload request"); return -EINVAL; + } dev = dev_get_by_index(net, xuo->ifindex); if (!dev) { @@ -256,6 +263,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, if (x->props.flags & XFRM_STATE_ESN && !dev->xfrmdev_ops->xdo_dev_state_advance_esn) { + NL_SET_ERR_MSG(extack, "Device doesn't support offload with ESN"); xso->dev = NULL; dev_put(dev); return -EINVAL; @@ -277,8 +285,10 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x, xso->real_dev = NULL; netdev_put(dev, &xso->dev_tracker); - if (err != -EOPNOTSUPP) + if (err != -EOPNOTSUPP) { + NL_SET_ERR_MSG(extack, "Device failed to offload this state"); return err; + } } return 0; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index b2f4ec9c537f..a3cf85d3f296 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -20,6 +20,7 @@ #include <net/xfrm.h> #include <net/ip_tunnels.h> #include <net/ip6_tunnel.h> +#include <net/dst_metadata.h> #include "xfrm_inout.h" @@ -719,7 +720,8 @@ resume: sp = skb_sec_path(skb); if (sp) sp->olen = 0; - skb_dst_drop(skb); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return 0; } else { @@ -737,7 +739,8 @@ resume: sp = skb_sec_path(skb); if (sp) sp->olen = 0; - skb_dst_drop(skb); + if (skb_valid_dst(skb)) + skb_dst_drop(skb); gro_cells_receive(&gro_cells, skb); return err; } diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c index 5113fa0fbcee..5a67b120c4db 100644 --- a/net/xfrm/xfrm_interface.c +++ b/net/xfrm/xfrm_interface.c @@ -41,6 +41,7 @@ #include <net/addrconf.h> #include <net/xfrm.h> #include <net/net_namespace.h> +#include <net/dst_metadata.h> #include <net/netns/generic.h> #include <linux/etherdevice.h> @@ -56,6 +57,89 @@ static const struct net_device_ops xfrmi_netdev_ops; struct xfrmi_net { /* lists for storing interfaces in use */ struct xfrm_if __rcu *xfrmi[XFRMI_HASH_SIZE]; + struct xfrm_if __rcu *collect_md_xfrmi; +}; + +static const struct nla_policy xfrm_lwt_policy[LWT_XFRM_MAX + 1] = { + [LWT_XFRM_IF_ID] = NLA_POLICY_MIN(NLA_U32, 1), + [LWT_XFRM_LINK] = NLA_POLICY_MIN(NLA_U32, 1), +}; + +static void xfrmi_destroy_state(struct lwtunnel_state *lwt) +{ +} + +static int xfrmi_build_state(struct net *net, struct nlattr *nla, + unsigned int family, const void *cfg, + struct lwtunnel_state **ts, + struct netlink_ext_ack *extack) +{ + struct nlattr *tb[LWT_XFRM_MAX + 1]; + struct lwtunnel_state *new_state; + struct xfrm_md_info *info; + int ret; + + ret = nla_parse_nested(tb, LWT_XFRM_MAX, nla, xfrm_lwt_policy, extack); + if (ret < 0) + return ret; + + if (!tb[LWT_XFRM_IF_ID]) { + NL_SET_ERR_MSG(extack, "if_id must be set"); + return -EINVAL; + } + + new_state = lwtunnel_state_alloc(sizeof(*info)); + if (!new_state) { + NL_SET_ERR_MSG(extack, "failed to create encap info"); + return -ENOMEM; + } + + new_state->type = LWTUNNEL_ENCAP_XFRM; + + info = lwt_xfrm_info(new_state); + + info->if_id = nla_get_u32(tb[LWT_XFRM_IF_ID]); + + if (tb[LWT_XFRM_LINK]) + info->link = nla_get_u32(tb[LWT_XFRM_LINK]); + + *ts = new_state; + return 0; +} + +static int xfrmi_fill_encap_info(struct sk_buff *skb, + struct lwtunnel_state *lwt) +{ + struct xfrm_md_info *info = lwt_xfrm_info(lwt); + + if (nla_put_u32(skb, LWT_XFRM_IF_ID, info->if_id) || + (info->link && nla_put_u32(skb, LWT_XFRM_LINK, info->link))) + return -EMSGSIZE; + + return 0; +} + +static int xfrmi_encap_nlsize(struct lwtunnel_state *lwtstate) +{ + return nla_total_size(sizeof(u32)) + /* LWT_XFRM_IF_ID */ + nla_total_size(sizeof(u32)); /* LWT_XFRM_LINK */ +} + +static int xfrmi_encap_cmp(struct lwtunnel_state *a, struct lwtunnel_state *b) +{ + struct xfrm_md_info *a_info = lwt_xfrm_info(a); + struct xfrm_md_info *b_info = lwt_xfrm_info(b); + + return memcmp(a_info, b_info, sizeof(*a_info)); +} + +static const struct lwtunnel_encap_ops xfrmi_encap_ops = { + .build_state = xfrmi_build_state, + .destroy_state = xfrmi_destroy_state, + .fill_encap = xfrmi_fill_encap_info, + .get_encap_size = xfrmi_encap_nlsize, + .cmp_encap = xfrmi_encap_cmp, + .owner = THIS_MODULE, }; #define for_each_xfrmi_rcu(start, xi) \ @@ -77,17 +161,23 @@ static struct xfrm_if *xfrmi_lookup(struct net *net, struct xfrm_state *x) return xi; } + xi = rcu_dereference(xfrmn->collect_md_xfrmi); + if (xi && (xi->dev->flags & IFF_UP)) + return xi; + return NULL; } -static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, - unsigned short family) +static bool xfrmi_decode_session(struct sk_buff *skb, + unsigned short family, + struct xfrm_if_decode_session_result *res) { struct net_device *dev; + struct xfrm_if *xi; int ifindex = 0; if (!secpath_exists(skb) || !skb->dev) - return NULL; + return false; switch (family) { case AF_INET6: @@ -107,11 +197,18 @@ static struct xfrm_if *xfrmi_decode_session(struct sk_buff *skb, } if (!dev || !(dev->flags & IFF_UP)) - return NULL; + return false; if (dev->netdev_ops != &xfrmi_netdev_ops) - return NULL; + return false; - return netdev_priv(dev); + xi = netdev_priv(dev); + res->net = xi->net; + + if (xi->p.collect_md) + res->if_id = xfrm_input_state(skb)->if_id; + else + res->if_id = xi->p.if_id; + return true; } static void xfrmi_link(struct xfrmi_net *xfrmn, struct xfrm_if *xi) @@ -157,7 +254,10 @@ static int xfrmi_create(struct net_device *dev) if (err < 0) goto out; - xfrmi_link(xfrmn, xi); + if (xi->p.collect_md) + rcu_assign_pointer(xfrmn->collect_md_xfrmi, xi); + else + xfrmi_link(xfrmn, xi); return 0; @@ -185,7 +285,10 @@ static void xfrmi_dev_uninit(struct net_device *dev) struct xfrm_if *xi = netdev_priv(dev); struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id); - xfrmi_unlink(xfrmn, xi); + if (xi->p.collect_md) + RCU_INIT_POINTER(xfrmn->collect_md_xfrmi, NULL); + else + xfrmi_unlink(xfrmn, xi); } static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet) @@ -214,6 +317,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) struct xfrm_state *x; struct xfrm_if *xi; bool xnet; + int link; if (err && !secpath_exists(skb)) return 0; @@ -224,6 +328,7 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) if (!xi) return 1; + link = skb->dev->ifindex; dev = xi->dev; skb->dev = dev; @@ -254,6 +359,17 @@ static int xfrmi_rcv_cb(struct sk_buff *skb, int err) } xfrmi_scrub_packet(skb, xnet); + if (xi->p.collect_md) { + struct metadata_dst *md_dst; + + md_dst = metadata_dst_alloc(0, METADATA_XFRM, GFP_ATOMIC); + if (!md_dst) + return -ENOMEM; + + md_dst->u.xfrm_info.if_id = x->if_id; + md_dst->u.xfrm_info.link = link; + skb_dst_set(skb, (struct dst_entry *)md_dst); + } dev_sw_netstats_rx_add(dev, skb->len); return 0; @@ -269,10 +385,23 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device *tdev; struct xfrm_state *x; int err = -1; + u32 if_id; int mtu; + if (xi->p.collect_md) { + struct xfrm_md_info *md_info = skb_xfrm_md_info(skb); + + if (unlikely(!md_info)) + return -EINVAL; + + if_id = md_info->if_id; + fl->flowi_oif = md_info->link; + } else { + if_id = xi->p.if_id; + } + dst_hold(dst); - dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, xi->p.if_id); + dst = xfrm_lookup_with_ifid(xi->net, dst, fl, NULL, 0, if_id); if (IS_ERR(dst)) { err = PTR_ERR(dst); dst = NULL; @@ -283,7 +412,7 @@ xfrmi_xmit2(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) if (!x) goto tx_err_link_failure; - if (x->if_id != xi->p.if_id) + if (x->if_id != if_id) goto tx_err_link_failure; tdev = dst->dev; @@ -633,6 +762,9 @@ static void xfrmi_netlink_parms(struct nlattr *data[], if (data[IFLA_XFRM_IF_ID]) parms->if_id = nla_get_u32(data[IFLA_XFRM_IF_ID]); + + if (data[IFLA_XFRM_COLLECT_METADATA]) + parms->collect_md = true; } static int xfrmi_newlink(struct net *src_net, struct net_device *dev, @@ -645,14 +777,27 @@ static int xfrmi_newlink(struct net *src_net, struct net_device *dev, int err; xfrmi_netlink_parms(data, &p); - if (!p.if_id) { - NL_SET_ERR_MSG(extack, "if_id must be non zero"); - return -EINVAL; - } + if (p.collect_md) { + struct xfrmi_net *xfrmn = net_generic(net, xfrmi_net_id); - xi = xfrmi_locate(net, &p); - if (xi) - return -EEXIST; + if (p.link || p.if_id) { + NL_SET_ERR_MSG(extack, "link and if_id must be zero"); + return -EINVAL; + } + + if (rtnl_dereference(xfrmn->collect_md_xfrmi)) + return -EEXIST; + + } else { + if (!p.if_id) { + NL_SET_ERR_MSG(extack, "if_id must be non zero"); + return -EINVAL; + } + + xi = xfrmi_locate(net, &p); + if (xi) + return -EEXIST; + } xi = netdev_priv(dev); xi->p = p; @@ -682,12 +827,22 @@ static int xfrmi_changelink(struct net_device *dev, struct nlattr *tb[], return -EINVAL; } + if (p.collect_md) { + NL_SET_ERR_MSG(extack, "collect_md can't be changed"); + return -EINVAL; + } + xi = xfrmi_locate(net, &p); if (!xi) { xi = netdev_priv(dev); } else { if (xi->dev != dev) return -EEXIST; + if (xi->p.collect_md) { + NL_SET_ERR_MSG(extack, + "device can't be changed to collect_md"); + return -EINVAL; + } } return xfrmi_update(xi, &p); @@ -700,6 +855,8 @@ static size_t xfrmi_get_size(const struct net_device *dev) nla_total_size(4) + /* IFLA_XFRM_IF_ID */ nla_total_size(4) + + /* IFLA_XFRM_COLLECT_METADATA */ + nla_total_size(0) + 0; } @@ -709,7 +866,8 @@ static int xfrmi_fill_info(struct sk_buff *skb, const struct net_device *dev) struct xfrm_if_parms *parm = &xi->p; if (nla_put_u32(skb, IFLA_XFRM_LINK, parm->link) || - nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id)) + nla_put_u32(skb, IFLA_XFRM_IF_ID, parm->if_id) || + (xi->p.collect_md && nla_put_flag(skb, IFLA_XFRM_COLLECT_METADATA))) goto nla_put_failure; return 0; @@ -725,8 +883,10 @@ static struct net *xfrmi_get_link_net(const struct net_device *dev) } static const struct nla_policy xfrmi_policy[IFLA_XFRM_MAX + 1] = { - [IFLA_XFRM_LINK] = { .type = NLA_U32 }, - [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, + [IFLA_XFRM_UNSPEC] = { .strict_start_type = IFLA_XFRM_COLLECT_METADATA }, + [IFLA_XFRM_LINK] = { .type = NLA_U32 }, + [IFLA_XFRM_IF_ID] = { .type = NLA_U32 }, + [IFLA_XFRM_COLLECT_METADATA] = { .type = NLA_FLAG }, }; static struct rtnl_link_ops xfrmi_link_ops __read_mostly = { @@ -762,6 +922,9 @@ static void __net_exit xfrmi_exit_batch_net(struct list_head *net_exit_list) xip = &xi->next) unregister_netdevice_queue(xi->dev, &list); } + xi = rtnl_dereference(xfrmn->collect_md_xfrmi); + if (xi) + unregister_netdevice_queue(xi->dev, &list); } unregister_netdevice_many(&list); rtnl_unlock(); @@ -999,6 +1162,8 @@ static int __init xfrmi_init(void) if (err < 0) goto rtnl_link_failed; + lwtunnel_encap_add_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM); + xfrm_if_register_cb(&xfrm_if_cb); return err; @@ -1017,6 +1182,7 @@ pernet_dev_failed: static void __exit xfrmi_fini(void) { xfrm_if_unregister_cb(); + lwtunnel_encap_del_ops(&xfrmi_encap_ops, LWTUNNEL_ENCAP_XFRM); rtnl_link_unregister(&xfrmi_link_ops); xfrmi4_fini(); xfrmi6_fini(); diff --git a/net/xfrm/xfrm_ipcomp.c b/net/xfrm/xfrm_ipcomp.c index cb40ff0ff28d..656045a87606 100644 --- a/net/xfrm/xfrm_ipcomp.c +++ b/net/xfrm/xfrm_ipcomp.c @@ -325,18 +325,22 @@ void ipcomp_destroy(struct xfrm_state *x) } EXPORT_SYMBOL_GPL(ipcomp_destroy); -int ipcomp_init_state(struct xfrm_state *x) +int ipcomp_init_state(struct xfrm_state *x, struct netlink_ext_ack *extack) { int err; struct ipcomp_data *ipcd; struct xfrm_algo_desc *calg_desc; err = -EINVAL; - if (!x->calg) + if (!x->calg) { + NL_SET_ERR_MSG(extack, "Missing required compression algorithm"); goto out; + } - if (x->encap) + if (x->encap) { + NL_SET_ERR_MSG(extack, "IPComp is not compatible with encapsulation"); goto out; + } err = -ENOMEM; ipcd = kzalloc(sizeof(*ipcd), GFP_KERNEL); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cc6ab79609e2..e392d8d05e0c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1889,7 +1889,7 @@ EXPORT_SYMBOL(xfrm_policy_walk_done); */ static int xfrm_policy_match(const struct xfrm_policy *pol, const struct flowi *fl, - u8 type, u16 family, int dir, u32 if_id) + u8 type, u16 family, u32 if_id) { const struct xfrm_selector *sel = &pol->selector; int ret = -ESRCH; @@ -2014,7 +2014,7 @@ static struct xfrm_policy * __xfrm_policy_eval_candidates(struct hlist_head *chain, struct xfrm_policy *prefer, const struct flowi *fl, - u8 type, u16 family, int dir, u32 if_id) + u8 type, u16 family, u32 if_id) { u32 priority = prefer ? prefer->priority : ~0u; struct xfrm_policy *pol; @@ -2028,7 +2028,7 @@ __xfrm_policy_eval_candidates(struct hlist_head *chain, if (pol->priority > priority) break; - err = xfrm_policy_match(pol, fl, type, family, dir, if_id); + err = xfrm_policy_match(pol, fl, type, family, if_id); if (err) { if (err != -ESRCH) return ERR_PTR(err); @@ -2053,7 +2053,7 @@ static struct xfrm_policy * xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand, struct xfrm_policy *prefer, const struct flowi *fl, - u8 type, u16 family, int dir, u32 if_id) + u8 type, u16 family, u32 if_id) { struct xfrm_policy *tmp; int i; @@ -2061,8 +2061,7 @@ xfrm_policy_eval_candidates(struct xfrm_pol_inexact_candidates *cand, for (i = 0; i < ARRAY_SIZE(cand->res); i++) { tmp = __xfrm_policy_eval_candidates(cand->res[i], prefer, - fl, type, family, dir, - if_id); + fl, type, family, if_id); if (!tmp) continue; @@ -2101,7 +2100,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, ret = NULL; hlist_for_each_entry_rcu(pol, chain, bydst) { - err = xfrm_policy_match(pol, fl, type, family, dir, if_id); + err = xfrm_policy_match(pol, fl, type, family, if_id); if (err) { if (err == -ESRCH) continue; @@ -2120,7 +2119,7 @@ static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type, goto skip_inexact; pol = xfrm_policy_eval_candidates(&cand, ret, fl, type, - family, dir, if_id); + family, if_id); if (pol) { ret = pol; if (IS_ERR(pol)) @@ -3516,17 +3515,17 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, int xerr_idx = -1; const struct xfrm_if_cb *ifcb; struct sec_path *sp; - struct xfrm_if *xi; u32 if_id = 0; rcu_read_lock(); ifcb = xfrm_if_get_cb(); if (ifcb) { - xi = ifcb->decode_session(skb, family); - if (xi) { - if_id = xi->p.if_id; - net = xi->net; + struct xfrm_if_decode_session_result r; + + if (ifcb->decode_session(skb, family, &r)) { + if_id = r.if_id; + net = r.net; } } rcu_read_unlock(); diff --git a/net/xfrm/xfrm_replay.c b/net/xfrm/xfrm_replay.c index 9277d81b344c..9f4d42eb090f 100644 --- a/net/xfrm/xfrm_replay.c +++ b/net/xfrm/xfrm_replay.c @@ -766,18 +766,22 @@ int xfrm_replay_overflow(struct xfrm_state *x, struct sk_buff *skb) } #endif -int xfrm_init_replay(struct xfrm_state *x) +int xfrm_init_replay(struct xfrm_state *x, struct netlink_ext_ack *extack) { struct xfrm_replay_state_esn *replay_esn = x->replay_esn; if (replay_esn) { if (replay_esn->replay_window > - replay_esn->bmp_len * sizeof(__u32) * 8) + replay_esn->bmp_len * sizeof(__u32) * 8) { + NL_SET_ERR_MSG(extack, "ESN replay window is too large for the chosen bitmap size"); return -EINVAL; + } if (x->props.flags & XFRM_STATE_ESN) { - if (replay_esn->replay_window == 0) + if (replay_esn->replay_window == 0) { + NL_SET_ERR_MSG(extack, "ESN replay window must be > 0"); return -EINVAL; + } x->repl_mode = XFRM_REPLAY_MODE_ESN; } else { x->repl_mode = XFRM_REPLAY_MODE_BMP; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 91c32a3b6924..81df34b3da6e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2611,7 +2611,8 @@ u32 xfrm_state_mtu(struct xfrm_state *x, int mtu) } EXPORT_SYMBOL_GPL(xfrm_state_mtu); -int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) +int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload, + struct netlink_ext_ack *extack) { const struct xfrm_mode *inner_mode; const struct xfrm_mode *outer_mode; @@ -2626,12 +2627,16 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) if (x->sel.family != AF_UNSPEC) { inner_mode = xfrm_get_mode(x->props.mode, x->sel.family); - if (inner_mode == NULL) + if (inner_mode == NULL) { + NL_SET_ERR_MSG(extack, "Requested mode not found"); goto error; + } if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL) && - family != x->sel.family) + family != x->sel.family) { + NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate a change of family"); goto error; + } x->inner_mode = *inner_mode; } else { @@ -2639,11 +2644,15 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) int iafamily = AF_INET; inner_mode = xfrm_get_mode(x->props.mode, x->props.family); - if (inner_mode == NULL) + if (inner_mode == NULL) { + NL_SET_ERR_MSG(extack, "Requested mode not found"); goto error; + } - if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) + if (!(inner_mode->flags & XFRM_MODE_FLAG_TUNNEL)) { + NL_SET_ERR_MSG(extack, "Only tunnel modes can accommodate an AF_UNSPEC selector"); goto error; + } x->inner_mode = *inner_mode; @@ -2658,24 +2667,27 @@ int __xfrm_init_state(struct xfrm_state *x, bool init_replay, bool offload) } x->type = xfrm_get_type(x->id.proto, family); - if (x->type == NULL) + if (x->type == NULL) { + NL_SET_ERR_MSG(extack, "Requested type not found"); goto error; + } x->type_offload = xfrm_get_type_offload(x->id.proto, family, offload); - err = x->type->init_state(x); + err = x->type->init_state(x, extack); if (err) goto error; outer_mode = xfrm_get_mode(x->props.mode, family); if (!outer_mode) { + NL_SET_ERR_MSG(extack, "Requested mode not found"); err = -EPROTONOSUPPORT; goto error; } x->outer_mode = *outer_mode; if (init_replay) { - err = xfrm_init_replay(x); + err = xfrm_init_replay(x, extack); if (err) goto error; } @@ -2690,7 +2702,7 @@ int xfrm_init_state(struct xfrm_state *x) { int err; - err = __xfrm_init_state(x, true, false); + err = __xfrm_init_state(x, true, false, NULL); if (!err) x->km.state = XFRM_STATE_VALID; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 2ff017117730..e73f9efc54c1 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -35,7 +35,8 @@ #endif #include <asm/unaligned.h> -static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) +static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type, + struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[type]; struct xfrm_algo *algp; @@ -44,8 +45,10 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; algp = nla_data(rt); - if (nla_len(rt) < (int)xfrm_alg_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_len(algp)) { + NL_SET_ERR_MSG(extack, "Invalid AUTH/CRYPT/COMP attribute length"); return -EINVAL; + } switch (type) { case XFRMA_ALG_AUTH: @@ -54,6 +57,7 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) break; default: + NL_SET_ERR_MSG(extack, "Invalid algorithm attribute type"); return -EINVAL; } @@ -61,7 +65,8 @@ static int verify_one_alg(struct nlattr **attrs, enum xfrm_attr_type_t type) return 0; } -static int verify_auth_trunc(struct nlattr **attrs) +static int verify_auth_trunc(struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_ALG_AUTH_TRUNC]; struct xfrm_algo_auth *algp; @@ -70,14 +75,16 @@ static int verify_auth_trunc(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < (int)xfrm_alg_auth_len(algp)) + if (nla_len(rt) < (int)xfrm_alg_auth_len(algp)) { + NL_SET_ERR_MSG(extack, "Invalid AUTH_TRUNC attribute length"); return -EINVAL; + } algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; } -static int verify_aead(struct nlattr **attrs) +static int verify_aead(struct nlattr **attrs, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_ALG_AEAD]; struct xfrm_algo_aead *algp; @@ -86,8 +93,10 @@ static int verify_aead(struct nlattr **attrs) return 0; algp = nla_data(rt); - if (nla_len(rt) < (int)aead_len(algp)) + if (nla_len(rt) < (int)aead_len(algp)) { + NL_SET_ERR_MSG(extack, "Invalid AEAD attribute length"); return -EINVAL; + } algp->alg_name[sizeof(algp->alg_name) - 1] = '\0'; return 0; @@ -102,7 +111,7 @@ static void verify_one_addr(struct nlattr **attrs, enum xfrm_attr_type_t type, *addrp = nla_data(rt); } -static inline int verify_sec_ctx_len(struct nlattr **attrs) +static inline int verify_sec_ctx_len(struct nlattr **attrs, struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_user_sec_ctx *uctx; @@ -112,42 +121,59 @@ static inline int verify_sec_ctx_len(struct nlattr **attrs) uctx = nla_data(rt); if (uctx->len > nla_len(rt) || - uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) + uctx->len != (sizeof(struct xfrm_user_sec_ctx) + uctx->ctx_len)) { + NL_SET_ERR_MSG(extack, "Invalid security context length"); return -EINVAL; + } return 0; } static inline int verify_replay(struct xfrm_usersa_info *p, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_REPLAY_ESN_VAL]; struct xfrm_replay_state_esn *rs; - if (!rt) - return (p->flags & XFRM_STATE_ESN) ? -EINVAL : 0; + if (!rt) { + if (p->flags & XFRM_STATE_ESN) { + NL_SET_ERR_MSG(extack, "Missing required attribute for ESN"); + return -EINVAL; + } + return 0; + } rs = nla_data(rt); - if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) + if (rs->bmp_len > XFRMA_REPLAY_ESN_MAX / sizeof(rs->bmp[0]) / 8) { + NL_SET_ERR_MSG(extack, "ESN bitmap length must be <= 128"); return -EINVAL; + } if (nla_len(rt) < (int)xfrm_replay_state_esn_len(rs) && - nla_len(rt) != sizeof(*rs)) + nla_len(rt) != sizeof(*rs)) { + NL_SET_ERR_MSG(extack, "ESN attribute is too short to fit the full bitmap length"); return -EINVAL; + } /* As only ESP and AH support ESN feature. */ - if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) + if ((p->id.proto != IPPROTO_ESP) && (p->id.proto != IPPROTO_AH)) { + NL_SET_ERR_MSG(extack, "ESN only supported for ESP and AH"); return -EINVAL; + } - if (p->replay_window != 0) + if (p->replay_window != 0) { + NL_SET_ERR_MSG(extack, "ESN not compatible with legacy replay_window"); return -EINVAL; + } return 0; } static int verify_newsa_info(struct xfrm_usersa_info *p, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { int err; @@ -161,10 +187,12 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, break; #else err = -EAFNOSUPPORT; + NL_SET_ERR_MSG(extack, "IPv6 support disabled"); goto out; #endif default: + NL_SET_ERR_MSG(extack, "Invalid address family"); goto out; } @@ -173,65 +201,98 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, break; case AF_INET: - if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) { + NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 32 for IPv4)"); goto out; + } break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) - if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 128 for IPv6)"); goto out; + } break; #else + NL_SET_ERR_MSG(extack, "IPv6 support disabled"); err = -EAFNOSUPPORT; goto out; #endif default: + NL_SET_ERR_MSG(extack, "Invalid address family in selector"); goto out; } err = -EINVAL; switch (p->id.proto) { case IPPROTO_AH: - if ((!attrs[XFRMA_ALG_AUTH] && - !attrs[XFRMA_ALG_AUTH_TRUNC]) || - attrs[XFRMA_ALG_AEAD] || + if (!attrs[XFRMA_ALG_AUTH] && + !attrs[XFRMA_ALG_AUTH_TRUNC]) { + NL_SET_ERR_MSG(extack, "Missing required attribute for AH: AUTH_TRUNC or AUTH"); + goto out; + } + + if (attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ALG_COMP] || - attrs[XFRMA_TFCPAD]) + attrs[XFRMA_TFCPAD]) { + NL_SET_ERR_MSG(extack, "Invalid attributes for AH: AEAD, CRYPT, COMP, TFCPAD"); goto out; + } break; case IPPROTO_ESP: - if (attrs[XFRMA_ALG_COMP]) + if (attrs[XFRMA_ALG_COMP]) { + NL_SET_ERR_MSG(extack, "Invalid attribute for ESP: COMP"); goto out; + } + if (!attrs[XFRMA_ALG_AUTH] && !attrs[XFRMA_ALG_AUTH_TRUNC] && !attrs[XFRMA_ALG_CRYPT] && - !attrs[XFRMA_ALG_AEAD]) + !attrs[XFRMA_ALG_AEAD]) { + NL_SET_ERR_MSG(extack, "Missing required attribute for ESP: at least one of AUTH, AUTH_TRUNC, CRYPT, AEAD"); goto out; + } + if ((attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT]) && - attrs[XFRMA_ALG_AEAD]) + attrs[XFRMA_ALG_AEAD]) { + NL_SET_ERR_MSG(extack, "Invalid attribute combination for ESP: AEAD can't be used with AUTH, AUTH_TRUNC, CRYPT"); goto out; + } + if (attrs[XFRMA_TFCPAD] && - p->mode != XFRM_MODE_TUNNEL) + p->mode != XFRM_MODE_TUNNEL) { + NL_SET_ERR_MSG(extack, "TFC padding can only be used in tunnel mode"); goto out; + } break; case IPPROTO_COMP: - if (!attrs[XFRMA_ALG_COMP] || - attrs[XFRMA_ALG_AEAD] || + if (!attrs[XFRMA_ALG_COMP]) { + NL_SET_ERR_MSG(extack, "Missing required attribute for COMP: COMP"); + goto out; + } + + if (attrs[XFRMA_ALG_AEAD] || attrs[XFRMA_ALG_AUTH] || attrs[XFRMA_ALG_AUTH_TRUNC] || attrs[XFRMA_ALG_CRYPT] || - attrs[XFRMA_TFCPAD] || - (ntohl(p->id.spi) >= 0x10000)) + attrs[XFRMA_TFCPAD]) { + NL_SET_ERR_MSG(extack, "Invalid attributes for COMP: AEAD, AUTH, AUTH_TRUNC, CRYPT, TFCPAD"); goto out; + } + + if (ntohl(p->id.spi) >= 0x10000) { + NL_SET_ERR_MSG(extack, "SPI is too large for COMP (must be < 0x10000)"); + goto out; + } break; #if IS_ENABLED(CONFIG_IPV6) @@ -244,29 +305,36 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, attrs[XFRMA_ALG_CRYPT] || attrs[XFRMA_ENCAP] || attrs[XFRMA_SEC_CTX] || - attrs[XFRMA_TFCPAD] || - !attrs[XFRMA_COADDR]) + attrs[XFRMA_TFCPAD]) { + NL_SET_ERR_MSG(extack, "Invalid attributes for DSTOPTS/ROUTING"); goto out; + } + + if (!attrs[XFRMA_COADDR]) { + NL_SET_ERR_MSG(extack, "Missing required COADDR attribute for DSTOPTS/ROUTING"); + goto out; + } break; #endif default: + NL_SET_ERR_MSG(extack, "Unsupported protocol"); goto out; } - if ((err = verify_aead(attrs))) + if ((err = verify_aead(attrs, extack))) goto out; - if ((err = verify_auth_trunc(attrs))) + if ((err = verify_auth_trunc(attrs, extack))) goto out; - if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_AUTH, extack))) goto out; - if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_CRYPT, extack))) goto out; - if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP))) + if ((err = verify_one_alg(attrs, XFRMA_ALG_COMP, extack))) goto out; - if ((err = verify_sec_ctx_len(attrs))) + if ((err = verify_sec_ctx_len(attrs, extack))) goto out; - if ((err = verify_replay(p, attrs))) + if ((err = verify_replay(p, attrs, extack))) goto out; err = -EINVAL; @@ -278,14 +346,19 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, break; default: + NL_SET_ERR_MSG(extack, "Unsupported mode"); goto out; } err = 0; - if (attrs[XFRMA_MTIMER_THRESH]) - if (!attrs[XFRMA_ENCAP]) + if (attrs[XFRMA_MTIMER_THRESH]) { + if (!attrs[XFRMA_ENCAP]) { + NL_SET_ERR_MSG(extack, "MTIMER_THRESH attribute can only be set on ENCAP states"); err = -EINVAL; + goto out; + } + } out: return err; @@ -293,7 +366,7 @@ out: static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, struct xfrm_algo_desc *(*get_byname)(const char *, int), - struct nlattr *rta) + struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; @@ -304,8 +377,10 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, ualg = nla_data(rta); algo = get_byname(ualg->alg_name, 1); - if (!algo) + if (!algo) { + NL_SET_ERR_MSG(extack, "Requested COMP algorithm not found"); return -ENOSYS; + } *props = algo->desc.sadb_alg_id; p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL); @@ -317,7 +392,8 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return 0; } -static int attach_crypt(struct xfrm_state *x, struct nlattr *rta) +static int attach_crypt(struct xfrm_state *x, struct nlattr *rta, + struct netlink_ext_ack *extack) { struct xfrm_algo *p, *ualg; struct xfrm_algo_desc *algo; @@ -328,8 +404,10 @@ static int attach_crypt(struct xfrm_state *x, struct nlattr *rta) ualg = nla_data(rta); algo = xfrm_ealg_get_byname(ualg->alg_name, 1); - if (!algo) + if (!algo) { + NL_SET_ERR_MSG(extack, "Requested CRYPT algorithm not found"); return -ENOSYS; + } x->props.ealgo = algo->desc.sadb_alg_id; p = kmemdup(ualg, xfrm_alg_len(ualg), GFP_KERNEL); @@ -343,7 +421,7 @@ static int attach_crypt(struct xfrm_state *x, struct nlattr *rta) } static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, - struct nlattr *rta) + struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo *ualg; struct xfrm_algo_auth *p; @@ -355,8 +433,10 @@ static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, ualg = nla_data(rta); algo = xfrm_aalg_get_byname(ualg->alg_name, 1); - if (!algo) + if (!algo) { + NL_SET_ERR_MSG(extack, "Requested AUTH algorithm not found"); return -ENOSYS; + } *props = algo->desc.sadb_alg_id; p = kmalloc(sizeof(*p) + (ualg->alg_key_len + 7) / 8, GFP_KERNEL); @@ -373,7 +453,7 @@ static int attach_auth(struct xfrm_algo_auth **algpp, u8 *props, } static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, - struct nlattr *rta) + struct nlattr *rta, struct netlink_ext_ack *extack) { struct xfrm_algo_auth *p, *ualg; struct xfrm_algo_desc *algo; @@ -384,10 +464,14 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, ualg = nla_data(rta); algo = xfrm_aalg_get_byname(ualg->alg_name, 1); - if (!algo) + if (!algo) { + NL_SET_ERR_MSG(extack, "Requested AUTH_TRUNC algorithm not found"); return -ENOSYS; - if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) + } + if (ualg->alg_trunc_len > algo->uinfo.auth.icv_fullbits) { + NL_SET_ERR_MSG(extack, "Invalid length requested for truncated ICV"); return -EINVAL; + } *props = algo->desc.sadb_alg_id; p = kmemdup(ualg, xfrm_alg_auth_len(ualg), GFP_KERNEL); @@ -402,7 +486,8 @@ static int attach_auth_trunc(struct xfrm_algo_auth **algpp, u8 *props, return 0; } -static int attach_aead(struct xfrm_state *x, struct nlattr *rta) +static int attach_aead(struct xfrm_state *x, struct nlattr *rta, + struct netlink_ext_ack *extack) { struct xfrm_algo_aead *p, *ualg; struct xfrm_algo_desc *algo; @@ -413,8 +498,10 @@ static int attach_aead(struct xfrm_state *x, struct nlattr *rta) ualg = nla_data(rta); algo = xfrm_aead_get_byname(ualg->alg_name, ualg->alg_icv_len, 1); - if (!algo) + if (!algo) { + NL_SET_ERR_MSG(extack, "Requested AEAD algorithm not found"); return -ENOSYS; + } x->props.ealgo = algo->desc.sadb_alg_id; p = kmemdup(ualg, aead_len(ualg), GFP_KERNEL); @@ -579,7 +666,8 @@ static void xfrm_smark_init(struct nlattr **attrs, struct xfrm_mark *m) static struct xfrm_state *xfrm_state_construct(struct net *net, struct xfrm_usersa_info *p, struct nlattr **attrs, - int *errp) + int *errp, + struct netlink_ext_ack *extack) { struct xfrm_state *x = xfrm_state_alloc(net); int err = -ENOMEM; @@ -606,21 +694,21 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_SA_EXTRA_FLAGS]) x->props.extra_flags = nla_get_u32(attrs[XFRMA_SA_EXTRA_FLAGS]); - if ((err = attach_aead(x, attrs[XFRMA_ALG_AEAD]))) + if ((err = attach_aead(x, attrs[XFRMA_ALG_AEAD], extack))) goto error; if ((err = attach_auth_trunc(&x->aalg, &x->props.aalgo, - attrs[XFRMA_ALG_AUTH_TRUNC]))) + attrs[XFRMA_ALG_AUTH_TRUNC], extack))) goto error; if (!x->props.aalgo) { if ((err = attach_auth(&x->aalg, &x->props.aalgo, - attrs[XFRMA_ALG_AUTH]))) + attrs[XFRMA_ALG_AUTH], extack))) goto error; } - if ((err = attach_crypt(x, attrs[XFRMA_ALG_CRYPT]))) + if ((err = attach_crypt(x, attrs[XFRMA_ALG_CRYPT], extack))) goto error; if ((err = attach_one_algo(&x->calg, &x->props.calgo, xfrm_calg_get_byname, - attrs[XFRMA_ALG_COMP]))) + attrs[XFRMA_ALG_COMP], extack))) goto error; if (attrs[XFRMA_TFCPAD]) @@ -633,7 +721,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, if (attrs[XFRMA_IF_ID]) x->if_id = nla_get_u32(attrs[XFRMA_IF_ID]); - err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV]); + err = __xfrm_init_state(x, false, attrs[XFRMA_OFFLOAD_DEV], extack); if (err) goto error; @@ -653,7 +741,7 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* sysctl_xfrm_aevent_etime is in 100ms units */ x->replay_maxage = (net->xfrm.sysctl_aevent_etime*HZ)/XFRM_AE_ETH_M; - if ((err = xfrm_init_replay(x))) + if ((err = xfrm_init_replay(x, extack))) goto error; /* override default values from above */ @@ -662,7 +750,8 @@ static struct xfrm_state *xfrm_state_construct(struct net *net, /* configure the hardware if offload is requested */ if (attrs[XFRMA_OFFLOAD_DEV]) { err = xfrm_dev_state_add(net, x, - nla_data(attrs[XFRMA_OFFLOAD_DEV])); + nla_data(attrs[XFRMA_OFFLOAD_DEV]), + extack); if (err) goto error; } @@ -678,7 +767,7 @@ error_no_put: } static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_usersa_info *p = nlmsg_data(nlh); @@ -686,11 +775,11 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, int err; struct km_event c; - err = verify_newsa_info(p, attrs); + err = verify_newsa_info(p, attrs, extack); if (err) return err; - x = xfrm_state_construct(net, p, attrs, &err); + x = xfrm_state_construct(net, p, attrs, &err, extack); if (!x) return err; @@ -757,7 +846,7 @@ static struct xfrm_state *xfrm_user_state_lookup(struct net *net, } static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; @@ -1254,7 +1343,8 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, } static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrmu_spdhthresh *thresh4 = NULL; @@ -1299,7 +1389,8 @@ static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; @@ -1358,7 +1449,8 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, } static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; @@ -1378,7 +1470,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_usersa_id *p = nlmsg_data(nlh); @@ -1402,7 +1494,8 @@ out_noput: } static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; @@ -1477,7 +1570,7 @@ out_noput: return err; } -static int verify_policy_dir(u8 dir) +static int verify_policy_dir(u8 dir, struct netlink_ext_ack *extack) { switch (dir) { case XFRM_POLICY_IN: @@ -1486,13 +1579,14 @@ static int verify_policy_dir(u8 dir) break; default: + NL_SET_ERR_MSG(extack, "Invalid policy direction"); return -EINVAL; } return 0; } -static int verify_policy_type(u8 type) +static int verify_policy_type(u8 type, struct netlink_ext_ack *extack) { switch (type) { case XFRM_POLICY_TYPE_MAIN: @@ -1502,13 +1596,15 @@ static int verify_policy_type(u8 type) break; default: + NL_SET_ERR_MSG(extack, "Invalid policy type"); return -EINVAL; } return 0; } -static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) +static int verify_newpolicy_info(struct xfrm_userpolicy_info *p, + struct netlink_ext_ack *extack) { int ret; @@ -1520,6 +1616,7 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) break; default: + NL_SET_ERR_MSG(extack, "Invalid policy share"); return -EINVAL; } @@ -1529,35 +1626,44 @@ static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) break; default: + NL_SET_ERR_MSG(extack, "Invalid policy action"); return -EINVAL; } switch (p->sel.family) { case AF_INET: - if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) + if (p->sel.prefixlen_d > 32 || p->sel.prefixlen_s > 32) { + NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 32 for IPv4)"); return -EINVAL; + } break; case AF_INET6: #if IS_ENABLED(CONFIG_IPV6) - if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) + if (p->sel.prefixlen_d > 128 || p->sel.prefixlen_s > 128) { + NL_SET_ERR_MSG(extack, "Invalid prefix length in selector (must be <= 128 for IPv6)"); return -EINVAL; + } break; #else + NL_SET_ERR_MSG(extack, "IPv6 support disabled"); return -EAFNOSUPPORT; #endif default: + NL_SET_ERR_MSG(extack, "Invalid selector family"); return -EINVAL; } - ret = verify_policy_dir(p->dir); + ret = verify_policy_dir(p->dir, extack); if (ret) return ret; - if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) + if (p->index && (xfrm_policy_id2dir(p->index) != p->dir)) { + NL_SET_ERR_MSG(extack, "Policy index doesn't match direction"); return -EINVAL; + } return 0; } @@ -1599,13 +1705,16 @@ static void copy_templates(struct xfrm_policy *xp, struct xfrm_user_tmpl *ut, } } -static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) +static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family, + struct netlink_ext_ack *extack) { u16 prev_family; int i; - if (nr > XFRM_MAX_DEPTH) + if (nr > XFRM_MAX_DEPTH) { + NL_SET_ERR_MSG(extack, "Template count must be <= XFRM_MAX_DEPTH (" __stringify(XFRM_MAX_DEPTH) ")"); return -EINVAL; + } prev_family = family; @@ -1625,12 +1734,16 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) case XFRM_MODE_BEET: break; default: - if (ut[i].family != prev_family) + if (ut[i].family != prev_family) { + NL_SET_ERR_MSG(extack, "Mode in template doesn't support a family change"); return -EINVAL; + } break; } - if (ut[i].mode >= XFRM_MODE_MAX) + if (ut[i].mode >= XFRM_MODE_MAX) { + NL_SET_ERR_MSG(extack, "Mode in template must be < XFRM_MODE_MAX (" __stringify(XFRM_MODE_MAX) ")"); return -EINVAL; + } prev_family = ut[i].family; @@ -1642,17 +1755,21 @@ static int validate_tmpl(int nr, struct xfrm_user_tmpl *ut, u16 family) break; #endif default: + NL_SET_ERR_MSG(extack, "Invalid family in template"); return -EINVAL; } - if (!xfrm_id_proto_valid(ut[i].id.proto)) + if (!xfrm_id_proto_valid(ut[i].id.proto)) { + NL_SET_ERR_MSG(extack, "Invalid XFRM protocol in template"); return -EINVAL; + } } return 0; } -static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs) +static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_TMPL]; @@ -1663,7 +1780,7 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs) int nr = nla_len(rt) / sizeof(*utmpl); int err; - err = validate_tmpl(nr, utmpl, pol->family); + err = validate_tmpl(nr, utmpl, pol->family, extack); if (err) return err; @@ -1672,7 +1789,8 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct nlattr **attrs) return 0; } -static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs) +static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct nlattr *rt = attrs[XFRMA_POLICY_TYPE]; struct xfrm_userpolicy_type *upt; @@ -1684,7 +1802,7 @@ static int copy_from_user_policy_type(u8 *tp, struct nlattr **attrs) type = upt->type; } - err = verify_policy_type(type); + err = verify_policy_type(type, extack); if (err) return err; @@ -1719,7 +1837,11 @@ static void copy_to_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_i p->share = XFRM_SHARE_ANY; /* XXX xp->share */ } -static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_userpolicy_info *p, struct nlattr **attrs, int *errp) +static struct xfrm_policy *xfrm_policy_construct(struct net *net, + struct xfrm_userpolicy_info *p, + struct nlattr **attrs, + int *errp, + struct netlink_ext_ack *extack) { struct xfrm_policy *xp = xfrm_policy_alloc(net, GFP_KERNEL); int err; @@ -1731,11 +1853,11 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us copy_from_user_policy(xp, p); - err = copy_from_user_policy_type(&xp->type, attrs); + err = copy_from_user_policy_type(&xp->type, attrs, extack); if (err) goto error; - if (!(err = copy_from_user_tmpl(xp, attrs))) + if (!(err = copy_from_user_tmpl(xp, attrs, extack))) err = copy_from_user_sec_ctx(xp, attrs); if (err) goto error; @@ -1754,7 +1876,8 @@ static struct xfrm_policy *xfrm_policy_construct(struct net *net, struct xfrm_us } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_info *p = nlmsg_data(nlh); @@ -1763,14 +1886,14 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, int err; int excl; - err = verify_newpolicy_info(p); + err = verify_newpolicy_info(p, extack); if (err) return err; - err = verify_sec_ctx_len(attrs); + err = verify_sec_ctx_len(attrs, extack); if (err) return err; - xp = xfrm_policy_construct(net, p, attrs, &err); + xp = xfrm_policy_construct(net, p, attrs, &err, extack); if (!xp) return err; @@ -2015,7 +2138,7 @@ static bool xfrm_userpolicy_is_valid(__u8 policy) } static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_userpolicy_default *up = nlmsg_data(nlh); @@ -2036,7 +2159,7 @@ static int xfrm_set_default(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct sk_buff *r_skb; struct nlmsghdr *r_nlh; @@ -2066,7 +2189,8 @@ static int xfrm_get_default(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; @@ -2081,11 +2205,11 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, p = nlmsg_data(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; - err = copy_from_user_policy_type(&type, attrs); + err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; - err = verify_policy_dir(p->dir); + err = verify_policy_dir(p->dir, extack); if (err) return err; @@ -2101,7 +2225,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; - err = verify_sec_ctx_len(attrs); + err = verify_sec_ctx_len(attrs, extack); if (err) return err; @@ -2149,7 +2273,8 @@ out: } static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct km_event c; @@ -2249,7 +2374,7 @@ out_cancel: } static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; @@ -2293,7 +2418,7 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; @@ -2344,14 +2469,15 @@ out: } static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct km_event c; u8 type = XFRM_POLICY_TYPE_MAIN; int err; - err = copy_from_user_policy_type(&type, attrs); + err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; @@ -2372,7 +2498,8 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, } static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; @@ -2383,11 +2510,11 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct xfrm_mark m; u32 if_id = 0; - err = copy_from_user_policy_type(&type, attrs); + err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; - err = verify_policy_dir(p->dir); + err = verify_policy_dir(p->dir, extack); if (err) return err; @@ -2403,7 +2530,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, struct nlattr *rt = attrs[XFRMA_SEC_CTX]; struct xfrm_sec_ctx *ctx; - err = verify_sec_ctx_len(attrs); + err = verify_sec_ctx_len(attrs, extack); if (err) return err; @@ -2438,7 +2565,8 @@ out: } static int xfrm_add_sa_expire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_state *x; @@ -2472,7 +2600,8 @@ out: } static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, + struct netlink_ext_ack *extack) { struct net *net = sock_net(skb->sk); struct xfrm_policy *xp; @@ -2490,15 +2619,15 @@ static int xfrm_add_acquire(struct sk_buff *skb, struct nlmsghdr *nlh, xfrm_mark_get(attrs, &mark); - err = verify_newpolicy_info(&ua->policy); + err = verify_newpolicy_info(&ua->policy, extack); if (err) goto free_state; - err = verify_sec_ctx_len(attrs); + err = verify_sec_ctx_len(attrs, extack); if (err) goto free_state; /* build an XP */ - xp = xfrm_policy_construct(net, &ua->policy, attrs, &err); + xp = xfrm_policy_construct(net, &ua->policy, attrs, &err, extack); if (!xp) goto free_state; @@ -2577,7 +2706,7 @@ static int copy_from_user_migrate(struct xfrm_migrate *ma, } static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { struct xfrm_userpolicy_id *pi = nlmsg_data(nlh); struct xfrm_migrate m[XFRM_MAX_DEPTH]; @@ -2594,7 +2723,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, kmp = attrs[XFRMA_KMADDRESS] ? &km : NULL; - err = copy_from_user_policy_type(&type, attrs); + err = copy_from_user_policy_type(&type, attrs, extack); if (err) return err; @@ -2623,7 +2752,7 @@ static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, } #else static int xfrm_do_migrate(struct sk_buff *skb, struct nlmsghdr *nlh, - struct nlattr **attrs) + struct nlattr **attrs, struct netlink_ext_ack *extack) { return -ENOPROTOOPT; } @@ -2819,7 +2948,8 @@ static const struct nla_policy xfrma_spd_policy[XFRMA_SPD_MAX+1] = { }; static const struct xfrm_link { - int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **); + int (*doit)(struct sk_buff *, struct nlmsghdr *, struct nlattr **, + struct netlink_ext_ack *); int (*start)(struct netlink_callback *); int (*dump)(struct sk_buff *, struct netlink_callback *); int (*done)(struct netlink_callback *); @@ -2921,7 +3051,7 @@ static int xfrm_user_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, goto err; } - err = link->doit(skb, nlh, attrs); + err = link->doit(skb, nlh, attrs, extack); /* We need to free skb allocated in xfrm_alloc_compat() before * returning from this function, because consume_skb() won't take @@ -3272,11 +3402,11 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, *dir = -EINVAL; if (len < sizeof(*p) || - verify_newpolicy_info(p)) + verify_newpolicy_info(p, NULL)) return NULL; nr = ((len - sizeof(*p)) / sizeof(*ut)); - if (validate_tmpl(nr, ut, p->sel.family)) + if (validate_tmpl(nr, ut, p->sel.family, NULL)) return NULL; if (p->dir > XFRM_POLICY_OUT) |