From fee48cf8374569a3888fd8c8536283e6067f0cfb Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Fri, 1 Apr 2016 14:12:12 -0700 Subject: ath10k: fix deadlock when peer cannot be created We must not attempt to send WMI packets while holding the data-lock, as it may deadlock: BUG: sleeping function called from invalid context at drivers/net/wireless/ath/ath10k/wmi.c:1824 in_atomic(): 1, irqs_disabled(): 0, pid: 2878, name: wpa_supplicant ============================================= [ INFO: possible recursive locking detected ] 4.4.6+ #21 Tainted: G W O --------------------------------------------- wpa_supplicant/2878 is trying to acquire lock: (&(&ar->data_lock)->rlock){+.-...}, at: [] ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] but task is already holding lock: (&(&ar->data_lock)->rlock){+.-...}, at: [] ath10k_peer_create+0x122/0x1ae [ath10k_core] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&ar->data_lock)->rlock); lock(&(&ar->data_lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 4 locks held by wpa_supplicant/2878: #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x12/0x14 #1: (&ar->conf_mutex){+.+.+.}, at: [] ath10k_add_interface+0x3b/0xbda [ath10k_core] #2: (&(&ar->data_lock)->rlock){+.-...}, at: [] ath10k_peer_create+0x122/0x1ae [ath10k_core] #3: (rcu_read_lock){......}, at: [] rcu_read_lock+0x0/0x66 [mac80211] stack backtrace: CPU: 3 PID: 2878 Comm: wpa_supplicant Tainted: G W O 4.4.6+ #21 Hardware name: To be filled by O.E.M. To be filled by O.E.M./ChiefRiver, BIOS 4.6.5 06/07/2013 0000000000000000 ffff8801fcadf8f0 ffffffff8137086d ffffffff82681720 ffffffff82681720 ffff8801fcadf9b0 ffffffff8112e3be ffff8801fcadf920 0000000100000000 ffffffff82681720 ffffffffa0721500 ffff8801fcb8d348 Call Trace: [] dump_stack+0x81/0xb6 [] __lock_acquire+0xc5b/0xde7 [] ? ath10k_wmi_tx_beacons_iter+0x15/0x11a [ath10k_core] [] ? mark_lock+0x24/0x201 [] lock_acquire+0x132/0x1cb [] ? lock_acquire+0x132/0x1cb [] ? ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] [] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core] [] _raw_spin_lock_bh+0x31/0x40 [] ? ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] [] ath10k_wmi_tx_beacons_iter+0x26/0x11a [ath10k_core] [] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core] [] __iterate_interfaces+0x9d/0x13d [mac80211] [] ieee80211_iterate_active_interfaces_atomic+0x32/0x3e [mac80211] [] ? ath10k_wmi_cmd_send_nowait+0x1ce/0x1ce [ath10k_core] [] ath10k_wmi_tx_beacons_nowait.isra.13+0x14/0x16 [ath10k_core] [] ath10k_wmi_cmd_send+0x71/0x242 [ath10k_core] [] ath10k_wmi_peer_delete+0x3f/0x42 [ath10k_core] [] ath10k_peer_create+0x15e/0x1ae [ath10k_core] [] ath10k_add_interface+0x70d/0xbda [ath10k_core] [] drv_add_interface+0x123/0x1a5 [mac80211] [] ieee80211_do_open+0x351/0x667 [mac80211] [] ieee80211_open+0x49/0x4c [mac80211] [] __dev_open+0x88/0xde [] __dev_change_flags+0xa4/0x13a [] dev_change_flags+0x1f/0x54 [] devinet_ioctl+0x2b9/0x5c9 [] ? copy_to_user+0x32/0x38 [] inet_ioctl+0x81/0x9d [] ? inet_ioctl+0x81/0x9d [] sock_do_ioctl+0x20/0x3d [] sock_ioctl+0x222/0x22e [] do_vfs_ioctl+0x453/0x4d7 [] ? __sys_recvmsg+0x4c/0x5b [] ? __fget_light+0x48/0x6c [] SyS_ioctl+0x52/0x74 [] entry_SYSCALL_64_fastpath+0x16/0x7a Signed-off-by: Ben Greear Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/mac.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c index 6dd1d26b357f..4040f9413e86 100644 --- a/drivers/net/wireless/ath/ath10k/mac.c +++ b/drivers/net/wireless/ath/ath10k/mac.c @@ -679,10 +679,10 @@ static int ath10k_peer_create(struct ath10k *ar, peer = ath10k_peer_find(ar, vdev_id, addr); if (!peer) { + spin_unlock_bh(&ar->data_lock); ath10k_warn(ar, "failed to find peer %pM on vdev %i after creation\n", addr, vdev_id); ath10k_wmi_peer_delete(ar, vdev_id, addr); - spin_unlock_bh(&ar->data_lock); return -ENOENT; } -- cgit v1.2.3-70-g09d2 From 8d0a0710ea0d22881fdb40eb79d346a98cc64ae6 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 2 Jun 2016 17:59:54 +0300 Subject: ath10k: fix crash related to printing features This looks like a regression from commit c4cdf753ed42 ("ath10k: move fw_features to struct ath10k_fw_file"), we were printing the features from a wrong struct. Fixes: c4cdf753ed42 ("ath10k: move fw_features to struct ath10k_fw_file") Signed-off-by: Ben Greear [kvalo@qca.qualcomm.com: improve commit log] Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c index 49af62428c88..a92a0ba829f5 100644 --- a/drivers/net/wireless/ath/ath10k/core.c +++ b/drivers/net/wireless/ath/ath10k/core.c @@ -1083,7 +1083,7 @@ int ath10k_core_fetch_firmware_api_n(struct ath10k *ar, const char *name, } ath10k_dbg_dump(ar, ATH10K_DBG_BOOT, "features", "", - ar->running_fw->fw_file.fw_features, + fw_file->fw_features, sizeof(fw_file->fw_features)); break; case ATH10K_FW_IE_FW_IMAGE: -- cgit v1.2.3-70-g09d2 From 06a84db74c3572cde79eb1b04f301399eafb8226 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 2 May 2016 15:27:34 +0300 Subject: iwlwifi: mvm: increase scan timeout to 20 seconds The 16 seconds timeout we were using turned out to be too short. Recalculations by system show that the total time in both bands should be < 18.5 seconds, even in the slowest cases (e.g. DCM P2P with DTIM=2). Rounding it up to 20 seconds for a bit more safety. Fixes: 728e825f81b1 ("iwlwifi: mvm: add a scan timeout for regular scans") Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c index 6f609dd5c222..e78fc567ff7d 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c @@ -1222,7 +1222,7 @@ static int iwl_mvm_check_running_scans(struct iwl_mvm *mvm, int type) return -EIO; } -#define SCAN_TIMEOUT (16 * HZ) +#define SCAN_TIMEOUT (20 * HZ) void iwl_mvm_scan_timeout(unsigned long data) { -- cgit v1.2.3-70-g09d2 From 7d6a1ab6a2db180122dee8db6c201f2dcf677813 Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 15 May 2016 10:20:29 +0300 Subject: iwlwifi: mvm: fix RCU splat in TKIP's update_key The commit below mistakenly changed an rcu_dereference_check to a rcu_dereference_protected which introduced the following RCU warning: [ INFO: suspicious RCU usage. ] 4.6.0-rc7-next-20160513-dbg-00004-g8de8b92-dirty #655 Not tainted ------------------------------- drivers/net/wireless/intel/iwlwifi/mvm/mvm.h:1069 suspicious rcu_dereference_protected() usage! Call Trace: [] lockdep_rcu_suspicious+0xf7/0x100 [] iwl_mvm_get_key_sta.part.0+0x5d/0x80 [iwlmvm] [] iwl_mvm_update_tkip_key+0xd3/0x162 [iwlmvm] [] iwl_mvm_mac_update_tkip_key+0x17/0x19 [iwlmvm] [] ieee80211_tkip_decrypt_data+0x22c/0x24b [mac80211] [] ieee80211_crypto_tkip_decrypt+0xc5/0x110 [mac80211] [] ieee80211_rx_handlers+0x9bb/0x1fe1 [mac80211] Fixes: 13303c0fb148 ("iwlwifi: mvm: use helpers to get iwl_mvm_sta") Reported-by: Sergey Senozhatsky Signed-off-by: Emmanuel Grumbach Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index fea4d3437e2f..0454bfe0ef6c 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1852,12 +1852,18 @@ static struct iwl_mvm_sta *iwl_mvm_get_key_sta(struct iwl_mvm *mvm, mvmvif->ap_sta_id != IWL_MVM_STATION_COUNT) { u8 sta_id = mvmvif->ap_sta_id; + sta = rcu_dereference_check(mvm->fw_id_to_mac_id[sta_id], + lockdep_is_held(&mvm->mutex)); + /* * It is possible that the 'sta' parameter is NULL, * for example when a GTK is removed - the sta_id will then * be the AP ID, and no station was passed by mac80211. */ - return iwl_mvm_sta_from_staid_protected(mvm, sta_id); + if (IS_ERR_OR_NULL(sta)) + return NULL; + + return iwl_mvm_sta_from_mac80211(sta); } return NULL; -- cgit v1.2.3-70-g09d2 From 1f9788f335d7c3145bcb59bd570c5b9ef7203ef4 Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Mon, 16 May 2016 14:34:20 +0300 Subject: iwlwifi: mvm: fix potential NULL-dereference in iwl_mvm_reorder() We try to access sta before we check for IS_ERR_OR_NULL(), so we may end up accessing a NULL pointer. To prevent that, move the conversion from sta to mvm_sta below the check. Fixes: b915c10174fb ("iwlwifi: mvm: add reorder buffer per queue") Reported-by: Dan Carpenter Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c index ac2c5718e454..2c61516d06ff 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/rxmq.c @@ -581,7 +581,7 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, struct iwl_rx_mpdu_desc *desc) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; - struct iwl_mvm_sta *mvm_sta = iwl_mvm_sta_from_mac80211(sta); + struct iwl_mvm_sta *mvm_sta; struct iwl_mvm_baid_data *baid_data; struct iwl_mvm_reorder_buffer *buffer; struct sk_buff *tail; @@ -604,6 +604,8 @@ static bool iwl_mvm_reorder(struct iwl_mvm *mvm, if (WARN_ON(IS_ERR_OR_NULL(sta))) return false; + mvm_sta = iwl_mvm_sta_from_mac80211(sta); + /* not a data packet */ if (!ieee80211_is_data_qos(hdr->frame_control) || is_multicast_ether_addr(hdr->addr1)) -- cgit v1.2.3-70-g09d2 From aa950524d501afa28869b7f56e539fd9e744dd9f Mon Sep 17 00:00:00 2001 From: Ayala Beker Date: Wed, 1 Jun 2016 00:28:09 +0300 Subject: iwlwifi: mvm: set the encryption type of an IGTK key The FW expect the driver to set the encryption algorithm type when installing the IGTK key in the HW. Currently when installing CMAC IGTK key we don't set the algorithm type and as a result the FW fails to calculate the MIC of multicast management frames. Fix it. Signed-off-by: Ayala Beker Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c index 0454bfe0ef6c..b23ab4a4504f 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c @@ -1961,6 +1961,14 @@ static int iwl_mvm_send_sta_igtk(struct iwl_mvm *mvm, struct ieee80211_key_seq seq; const u8 *pn; + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_AES_CMAC: + igtk_cmd.ctrl_flags |= cpu_to_le32(STA_KEY_FLG_CCM); + break; + default: + return -EINVAL; + } + memcpy(igtk_cmd.IGTK, keyconf->key, keyconf->keylen); ieee80211_get_key_rx_seq(keyconf, 0, &seq); pn = seq.aes_cmac.pn; -- cgit v1.2.3-70-g09d2 From 280a3efa82fccc9532c968a77e5162cb9f0af497 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 7 Jun 2016 14:46:37 +0200 Subject: iwlwifi: mvm: fix a few firmware capability checks My cleanup in "iwlwifi: prepare for higher API/CAPA bits" accidentally inverted a few tests - fix them. Fixes: 859d914c8f5c ("iwlwifi: prepare for higher API/CAPA bits") Reported-by: Sara Sharon Signed-off-by: Johannes Berg Signed-off-by: Luca Coelho --- drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c index e5f267b21316..18a8474b5760 100644 --- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c +++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c @@ -3851,8 +3851,8 @@ static int iwl_mvm_mac_get_survey(struct ieee80211_hw *hw, int idx, if (idx != 0) return -ENOENT; - if (fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) + if (!fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) return -ENOENT; mutex_lock(&mvm->mutex); @@ -3898,8 +3898,8 @@ static void iwl_mvm_mac_sta_statistics(struct ieee80211_hw *hw, struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif); struct iwl_mvm_sta *mvmsta = iwl_mvm_sta_from_mac80211(sta); - if (fw_has_capa(&mvm->fw->ucode_capa, - IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) + if (!fw_has_capa(&mvm->fw->ucode_capa, + IWL_UCODE_TLV_CAPA_RADIO_BEACON_STATS)) return; /* if beacon filtering isn't on mac80211 does it anyway */ -- cgit v1.2.3-70-g09d2 From fc0f7e3317c5f406e6d5520209b4689a4ffecfdf Mon Sep 17 00:00:00 2001 From: Manfred Schlaegl Date: Mon, 6 Jun 2016 10:47:47 +0200 Subject: net: phy: smsc: reintroduced unconditional soft reset We detected some problems using the smsc lan8720a in combination with i.MX28 and tracked this down to commit 21009686662f ("net: phy: smsc: move smsc_phy_config_init reset part in a soft_reset function") With 2100968666 the generic soft reset is replaced by a specific function which handles power down state correctly. But additionally the soft reset itself got conditional and is therefore also only performed if the phy is in power down state. This patch keeps the conditional wake up from power down, but re-introduces the unconditional soft reset using the generic soft reset function. It was tested on linux-4.1.25 and linux-4.7.0-rc2. Signed-off-by: Manfred Schlaegl Signed-off-by: David S. Miller --- drivers/net/phy/smsc.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) diff --git a/drivers/net/phy/smsc.c b/drivers/net/phy/smsc.c index 2e21e9366f76..b62c4aaee40b 100644 --- a/drivers/net/phy/smsc.c +++ b/drivers/net/phy/smsc.c @@ -75,22 +75,13 @@ static int smsc_phy_reset(struct phy_device *phydev) * in all capable mode before using it. */ if ((rc & MII_LAN83C185_MODE_MASK) == MII_LAN83C185_MODE_POWERDOWN) { - int timeout = 50000; - - /* set "all capable" mode and reset the phy */ + /* set "all capable" mode */ rc |= MII_LAN83C185_MODE_ALL; phy_write(phydev, MII_LAN83C185_SPECIAL_MODES, rc); - phy_write(phydev, MII_BMCR, BMCR_RESET); - - /* wait end of reset (max 500 ms) */ - do { - udelay(10); - if (timeout-- == 0) - return -1; - rc = phy_read(phydev, MII_BMCR); - } while (rc & BMCR_RESET); } - return 0; + + /* reset the phy */ + return genphy_soft_reset(phydev); } static int lan911x_config_init(struct phy_device *phydev) -- cgit v1.2.3-70-g09d2 From 56fae404fb2c306db0a35dad0d16fa24c65678f3 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 7 Jun 2016 12:06:58 +0300 Subject: bridge: Fix incorrect re-injection of STP packets Commit 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") fixed incorrect usage of NF_HOOK's return value by consuming packets in okfn via br_pass_frame_up(). However, this function re-injects packets to the Rx path with skb->dev set to the bridge device, which breaks kernel's STP, as all STP packets appear to originate from the bridge device itself. Instead, if STP is enabled and bridge isn't a 802.1ad bridge, then learn packet's SMAC and inject it back to the Rx path for further processing by the packet handlers. The patch also makes netfilter's behavior consistent with regards to packets destined to the Bridge Group Address, as no hook registered at LOCAL_IN will ever be called, regardless if STP is enabled or not. Cc: Florian Westphal Cc: Shmulik Ladkani Cc: Toshiaki Makita Fixes: 8626c56c8279 ("bridge: fix potential use-after-free when hook returns QUEUE or STOLEN verdict") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- net/bridge/br_input.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c index 160797722228..43d2cd862bc2 100644 --- a/net/bridge/br_input.c +++ b/net/bridge/br_input.c @@ -213,8 +213,7 @@ drop: } EXPORT_SYMBOL_GPL(br_handle_frame_finish); -/* note: already called with rcu_read_lock */ -static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +static void __br_handle_local_finish(struct sk_buff *skb) { struct net_bridge_port *p = br_port_get_rcu(skb->dev); u16 vid = 0; @@ -222,6 +221,14 @@ static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_bu /* check if vlan is allowed, to avoid spoofing */ if (p->flags & BR_LEARNING && br_should_learn(p, skb, &vid)) br_fdb_update(p->br, p, eth_hdr(skb)->h_source, vid, false); +} + +/* note: already called with rcu_read_lock */ +static int br_handle_local_finish(struct net *net, struct sock *sk, struct sk_buff *skb) +{ + struct net_bridge_port *p = br_port_get_rcu(skb->dev); + + __br_handle_local_finish(skb); BR_INPUT_SKB_CB(skb)->brdev = p->br->dev; br_pass_frame_up(skb); @@ -274,7 +281,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb) if (p->br->stp_enabled == BR_NO_STP || fwd_mask & (1u << dest[5])) goto forward; - break; + *pskb = skb; + __br_handle_local_finish(skb); + return RX_HANDLER_PASS; case 0x01: /* IEEE MAC (Pause) */ goto drop; -- cgit v1.2.3-70-g09d2 From c3ec5e5ce9cea1f369a5a8ad69d6471680796bc6 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Thu, 9 Jun 2016 18:05:09 +0100 Subject: net: diag: add missing declarations The functions inet_diag_msg_common_fill and inet_diag_msg_attrs_fill seem to have been missed from the include/linux/inet_diag.h header file. Add them to fix the following warnings: net/ipv4/inet_diag.c:69:6: warning: symbol 'inet_diag_msg_common_fill' was not declared. Should it be static? net/ipv4/inet_diag.c:108:5: warning: symbol 'inet_diag_msg_attrs_fill' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- include/linux/inet_diag.h | 6 ++++++ net/sctp/sctp_diag.c | 6 ------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h index 7c27fa1030e8..feb04ea20f11 100644 --- a/include/linux/inet_diag.h +++ b/include/linux/inet_diag.h @@ -52,6 +52,12 @@ struct sock *inet_diag_find_one_icsk(struct net *net, int inet_diag_bc_sk(const struct nlattr *_bc, struct sock *sk); +void inet_diag_msg_common_fill(struct inet_diag_msg *r, struct sock *sk); + +int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, + struct inet_diag_msg *r, int ext, + struct user_namespace *user_ns); + extern int inet_diag_register(const struct inet_diag_handler *handler); extern void inet_diag_unregister(const struct inet_diag_handler *handler); #endif /* _INET_DIAG_H_ */ diff --git a/net/sctp/sctp_diag.c b/net/sctp/sctp_diag.c index 1ce724b87618..f69edcf219e5 100644 --- a/net/sctp/sctp_diag.c +++ b/net/sctp/sctp_diag.c @@ -3,12 +3,6 @@ #include #include -extern void inet_diag_msg_common_fill(struct inet_diag_msg *r, - struct sock *sk); -extern int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb, - struct inet_diag_msg *r, int ext, - struct user_namespace *user_ns); - static void sctp_diag_get_info(struct sock *sk, struct inet_diag_msg *r, void *info); -- cgit v1.2.3-70-g09d2 From 0b392be9a86560dae3af2e7528f226ff465ab549 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Fri, 10 Jun 2016 12:11:06 +0100 Subject: net: ipconfig: avoid warning by making ic_addrservaddr static The symbol ic_addrservaddr is not static, but has no declaration to match so make it static to fix the following warning: net/ipv4/ipconfig.c:130:8: warning: symbol 'ic_addrservaddr' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index 2ed9dd2b5f2f..eccf9fd190c0 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,7 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ -__be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ __be32 ic_servaddr = NONE; /* Boot server IP address */ -- cgit v1.2.3-70-g09d2 From 562c5a70400c75f50d99de631666ac7cc2f03958 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:27:58 +0200 Subject: net: mediatek: add missing return code check The code fails to check if the scratch memory was properly allocated. Add this check and return with an error if the allocation failed. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 4763252bbf85..11cd7304e497 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -495,6 +495,9 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) eth->scratch_head = kcalloc(cnt, MTK_QDMA_PAGE_SIZE, GFP_KERNEL); + if (unlikely(!eth->scratch_head)) + return -ENOMEM; + dma_addr = dma_map_single(eth->dev, eth->scratch_head, cnt * MTK_QDMA_PAGE_SIZE, DMA_FROM_DEVICE); -- cgit v1.2.3-70-g09d2 From 605e4fe476956c67ced8518247e6c9601a31b868 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:27:59 +0200 Subject: net: mediatek: fix missing free of scratch memory Scratch memory gets allocated in mtk_init_fq_dma() but the corresponding code to free it is missing inside mtk_dma_free() causing a memory leak. With this patch applied, we can run ifconfig up/down several thousand times without any problems. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 18 +++++++++++++----- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 2 ++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 11cd7304e497..fb8b17db7fa2 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -481,14 +481,14 @@ static inline void mtk_rx_get_desc(struct mtk_rx_dma *rxd, /* the qdma core needs scratch memory to be setup */ static int mtk_init_fq_dma(struct mtk_eth *eth) { - dma_addr_t phy_ring_head, phy_ring_tail; + dma_addr_t phy_ring_tail; int cnt = MTK_DMA_SIZE; dma_addr_t dma_addr; int i; eth->scratch_ring = dma_alloc_coherent(eth->dev, cnt * sizeof(struct mtk_tx_dma), - &phy_ring_head, + ð->phy_scratch_ring, GFP_ATOMIC | __GFP_ZERO); if (unlikely(!eth->scratch_ring)) return -ENOMEM; @@ -505,19 +505,19 @@ static int mtk_init_fq_dma(struct mtk_eth *eth) return -ENOMEM; memset(eth->scratch_ring, 0x0, sizeof(struct mtk_tx_dma) * cnt); - phy_ring_tail = phy_ring_head + + phy_ring_tail = eth->phy_scratch_ring + (sizeof(struct mtk_tx_dma) * (cnt - 1)); for (i = 0; i < cnt; i++) { eth->scratch_ring[i].txd1 = (dma_addr + (i * MTK_QDMA_PAGE_SIZE)); if (i < cnt - 1) - eth->scratch_ring[i].txd2 = (phy_ring_head + + eth->scratch_ring[i].txd2 = (eth->phy_scratch_ring + ((i + 1) * sizeof(struct mtk_tx_dma))); eth->scratch_ring[i].txd3 = TX_DMA_SDL(MTK_QDMA_PAGE_SIZE); } - mtk_w32(eth, phy_ring_head, MTK_QDMA_FQ_HEAD); + mtk_w32(eth, eth->phy_scratch_ring, MTK_QDMA_FQ_HEAD); mtk_w32(eth, phy_ring_tail, MTK_QDMA_FQ_TAIL); mtk_w32(eth, (cnt << 16) | cnt, MTK_QDMA_FQ_CNT); mtk_w32(eth, MTK_QDMA_PAGE_SIZE << 16, MTK_QDMA_FQ_BLEN); @@ -1210,6 +1210,14 @@ static void mtk_dma_free(struct mtk_eth *eth) for (i = 0; i < MTK_MAC_COUNT; i++) if (eth->netdev[i]) netdev_reset_queue(eth->netdev[i]); + if (eth->scratch_ring) { + dma_free_coherent(eth->dev, + MTK_DMA_SIZE * sizeof(struct mtk_tx_dma), + eth->scratch_ring, + eth->phy_scratch_ring); + eth->scratch_ring = NULL; + eth->phy_scratch_ring = 0; + } mtk_tx_clean(eth); mtk_rx_clean(eth); kfree(eth->scratch_head); diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index eed626d56ea4..57f7e8a3dbe2 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -357,6 +357,7 @@ struct mtk_rx_ring { * @rx_ring: Pointer to the memore holding info about the RX ring * @rx_napi: The NAPI struct * @scratch_ring: Newer SoCs need memory for a second HW managed TX ring + * @phy_scratch_ring: physical address of scratch_ring * @scratch_head: The scratch memory that scratch_ring points to. * @clk_ethif: The ethif clock * @clk_esw: The switch clock @@ -384,6 +385,7 @@ struct mtk_eth { struct mtk_rx_ring rx_ring; struct napi_struct rx_napi; struct mtk_tx_dma *scratch_ring; + dma_addr_t phy_scratch_ring; void *scratch_head; struct clk *clk_ethif; struct clk *clk_esw; -- cgit v1.2.3-70-g09d2 From 2fae723cefb8bfe712371978288aa0a70b54802e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:00 +0200 Subject: net: mediatek: invalid buffer lookup in mtk_tx_map() The lookup of the tx_buffer in the error path inside mtk_tx_map() uses the wrong descriptor pointer. This looks like a copy & paste error. Change the code to use the correct pointer. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index fb8b17db7fa2..8c3e54387948 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -674,7 +674,7 @@ static int mtk_tx_map(struct sk_buff *skb, struct net_device *dev, err_dma: do { - tx_buf = mtk_desc_to_tx_buf(ring, txd); + tx_buf = mtk_desc_to_tx_buf(ring, itxd); /* unmap dma */ mtk_tx_unmap(&dev->dev, tx_buf); -- cgit v1.2.3-70-g09d2 From 94321a9fc9f5b6c6e949cc7c69741538f556ab74 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:01 +0200 Subject: net: mediatek: dropped rx packets are not being counted properly There are two places inside mtk_poll_rx where rx_dropped is not being incremented properly. Fix this by adding the missing code to increment the counter. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 8c3e54387948..b6d3c217722d 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -829,6 +829,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, DMA_FROM_DEVICE); if (unlikely(dma_mapping_error(&netdev->dev, dma_addr))) { skb_free_frag(new_data); + netdev->stats.rx_dropped++; goto release_desc; } @@ -836,6 +837,7 @@ static int mtk_poll_rx(struct napi_struct *napi, int budget, skb = build_skb(data, ring->frag_size); if (unlikely(!skb)) { put_page(virt_to_head_page(new_data)); + netdev->stats.rx_dropped++; goto release_desc; } skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN); -- cgit v1.2.3-70-g09d2 From 6675086d04e7c0748cd5884f7c8611b5f0836250 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:02 +0200 Subject: net: mediatek: add next data pointer coherency protection The QDMA engine can fail to update the register pointing to the next TX descriptor if this bit does not get set in the QDMA configuration register. Not setting this bit can result in invalid values inside the TX rings registers which will causes TX stalls. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- drivers/net/ethernet/mediatek/mtk_eth_soc.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index b6d3c217722d..f30c2e474b87 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1282,7 +1282,7 @@ static int mtk_start_dma(struct mtk_eth *eth) mtk_w32(eth, MTK_TX_WB_DDONE | MTK_RX_DMA_EN | MTK_TX_DMA_EN | MTK_RX_2B_OFFSET | MTK_DMA_SIZE_16DWORDS | - MTK_RX_BT_32DWORDS, + MTK_RX_BT_32DWORDS | MTK_NDP_CO_PRO, MTK_QDMA_GLO_CFG); return 0; diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.h b/drivers/net/ethernet/mediatek/mtk_eth_soc.h index 57f7e8a3dbe2..a5eb7c62306b 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.h +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.h @@ -91,6 +91,7 @@ #define MTK_QDMA_GLO_CFG 0x1A04 #define MTK_RX_2B_OFFSET BIT(31) #define MTK_RX_BT_32DWORDS (3 << 11) +#define MTK_NDP_CO_PRO BIT(10) #define MTK_TX_WB_DDONE BIT(6) #define MTK_DMA_SIZE_16DWORDS (2 << 4) #define MTK_RX_DMA_BUSY BIT(3) -- cgit v1.2.3-70-g09d2 From 2ff0bb61646f286fa97db2904491974302a14f1f Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:03 +0200 Subject: net: mediatek: disable all interrupts during probe The current code only disables those IRQs that we will later use. To ensure that we have a predefined state, we really want to disable all IRQs. Change the code to disable all IRQs to achieve this. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index f30c2e474b87..e3dadae9f6dd 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1396,7 +1396,7 @@ static int __init mtk_hw_init(struct mtk_eth *eth) /* disable delay and normal interrupt */ mtk_w32(eth, 0, MTK_QDMA_DELAY_INT); - mtk_irq_disable(eth, MTK_TX_DONE_INT | MTK_RX_DONE_INT); + mtk_irq_disable(eth, ~0); mtk_w32(eth, RST_GL_PSE, MTK_RST_GL); mtk_w32(eth, 0, MTK_RST_GL); -- cgit v1.2.3-70-g09d2 From 04698cccb1de54d5d97fda2e4a1c6ca365da0f70 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:04 +0200 Subject: net: mediatek: fix threshold value MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The logic to calculate the threshold value for stopping the TX queue is bad. Currently it will always use 1/2 of the rings size, which is way too much. Set the threshold to MAX_SKB_FRAGS. This makes sure that the queue is stopped when there is not enough room to accept an additional segment.  Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index e3dadae9f6dd..032939d211a7 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1033,8 +1033,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); ring->next_free = &ring->dma[0]; ring->last_free = &ring->dma[MTK_DMA_SIZE - 2]; - ring->thresh = max((unsigned long)MTK_DMA_SIZE >> 2, - MAX_SKB_FRAGS); + ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we * continue -- cgit v1.2.3-70-g09d2 From eaadf9fd3f6390f6ecbd0dfbd5997a0486fc9d5e Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:05 +0200 Subject: net: mediatek: increase watchdog_timeo During stress testing, after reducing the threshold value, we have seen TX timeouts that were caused by the watchdog_timeo value being too low. Increase the value to 5 * HZ which is a value commonly used by many other drivers. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 032939d211a7..3b3ba2e55bdc 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -1709,7 +1709,7 @@ static int mtk_add_mac(struct mtk_eth *eth, struct device_node *np) mac->hw_stats->reg_offset = id * MTK_STAT_OFFSET; SET_NETDEV_DEV(eth->netdev[id], eth->dev); - eth->netdev[id]->watchdog_timeo = HZ; + eth->netdev[id]->watchdog_timeo = 5 * HZ; eth->netdev[id]->netdev_ops = &mtk_netdev_ops; eth->netdev[id]->base_addr = (unsigned long)eth->base; eth->netdev[id]->vlan_features = MTK_HW_FEATURES & -- cgit v1.2.3-70-g09d2 From 12c97c13ea7174db5b5dc4a1ef91d4e9245bb569 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:06 +0200 Subject: net: mediatek: fix off by one in the TX ring allocation The TX ring setup has an off by one error causing it to not utilise all descriptors. This has the side effect that we need to reset the next pointer at runtime to make it work. Fix the off by one and remove the code fixing the ring at runtime. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 3b3ba2e55bdc..c097e9e3926c 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -926,7 +926,6 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) } mtk_tx_unmap(eth->dev, tx_buf); - ring->last_free->txd2 = next_cpu; ring->last_free = desc; atomic_inc(&ring->free_count); @@ -1032,7 +1031,7 @@ static int mtk_tx_alloc(struct mtk_eth *eth) atomic_set(&ring->free_count, MTK_DMA_SIZE - 2); ring->next_free = &ring->dma[0]; - ring->last_free = &ring->dma[MTK_DMA_SIZE - 2]; + ring->last_free = &ring->dma[MTK_DMA_SIZE - 1]; ring->thresh = MAX_SKB_FRAGS; /* make sure that all changes to the dma ring are flushed before we -- cgit v1.2.3-70-g09d2 From ad3cba989e8b1bbefe078eece29f0e8d8aaea1d6 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:07 +0200 Subject: net: mediatek: only wake the queue if it is stopped The current code unconditionally wakes up the queue at the end of each tx_poll action. Change the code to only wake up the queues if any of them have actually been stopped before. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index c097e9e3926c..40e37b158a69 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -704,6 +704,20 @@ static inline int mtk_cal_txd_req(struct sk_buff *skb) return nfrags; } +static int mtk_queue_stopped(struct mtk_eth *eth) +{ + int i; + + for (i = 0; i < MTK_MAC_COUNT; i++) { + if (!eth->netdev[i]) + continue; + if (netif_queue_stopped(eth->netdev[i])) + return 1; + } + + return 0; +} + static void mtk_wake_queue(struct mtk_eth *eth) { int i; @@ -950,7 +964,8 @@ static int mtk_poll_tx(struct mtk_eth *eth, int budget, bool *tx_again) if (!total) return 0; - if (atomic_read(&ring->free_count) > ring->thresh) + if (mtk_queue_stopped(eth) && + (atomic_read(&ring->free_count) > ring->thresh)) mtk_wake_queue(eth); return total; -- cgit v1.2.3-70-g09d2 From 82c6544dddc6c4bd940917af2f987dee6be0fc17 Mon Sep 17 00:00:00 2001 From: John Crispin Date: Fri, 10 Jun 2016 13:28:08 +0200 Subject: net: mediatek: remove superfluous queue wake up call The code checks if the queue should be stopped because we are below the threshold of free descriptors only to check if it should be started again. If we do end up in a state where we are at the threshold limit, it makes more sense to just stop the queue and wait for the next IRQ to trigger the TX housekeeping again. There is no rush in enqueuing the next packet, it needs to wait for all the others in the queue to be dispatched first anyway. Signed-off-by: John Crispin Signed-off-by: David S. Miller --- drivers/net/ethernet/mediatek/mtk_eth_soc.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c index 40e37b158a69..d1cdc2d76151 100644 --- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c +++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c @@ -783,12 +783,9 @@ static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev) if (mtk_tx_map(skb, dev, tx_num, ring, gso) < 0) goto drop; - if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) { + if (unlikely(atomic_read(&ring->free_count) <= ring->thresh)) mtk_stop_queue(eth); - if (unlikely(atomic_read(&ring->free_count) > - ring->thresh)) - mtk_wake_queue(eth); - } + spin_unlock_irqrestore(ð->page_lock, flags); return NETDEV_TX_OK; -- cgit v1.2.3-70-g09d2 From a2f27217e4e60e663b5b971b0ccb287a9548b04e Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Fri, 10 Jun 2016 16:53:05 +0200 Subject: net: au1000_eth: fix PHY detection Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541 ("phy: Add API for {un}registering an mdio device to a bus.") broke PHY detection on this driver with a copy-paste bug: The code is looking 32 times for a PHY at address 0. Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have their (autodetected) PHYs at address 31. Cc: Andrew Lunn Signed-off-by: Manuel Lauss Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e0fb0f1122db..d8c77e8e25ed 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -508,13 +508,12 @@ static int au1000_mii_probe(struct net_device *dev) /* find the first (lowest address) PHY * on the current MAC's MII bus */ - for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { - phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); - if (!aup->phy_search_highest_addr) - /* break out with first one found */ - break; - } + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { + phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); + if (phydev && !aup->phy_search_highest_addr) + /* break out with first one found */ + break; + } if (aup->phy1_search_mac0) { /* try harder to find a PHY */ -- cgit v1.2.3-70-g09d2 From 86c5fe4c932a8ef2d32f8b3d32cc9f0476fc54f3 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 10 Jun 2016 23:34:24 -0700 Subject: Revert "net: au1000_eth: fix PHY detection" This reverts commit a2f27217e4e60e663b5b971b0ccb287a9548b04e. I applied the wrong version of this. Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index d8c77e8e25ed..e0fb0f1122db 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -508,12 +508,13 @@ static int au1000_mii_probe(struct net_device *dev) /* find the first (lowest address) PHY * on the current MAC's MII bus */ - for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) { - phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); - if (phydev && !aup->phy_search_highest_addr) - /* break out with first one found */ - break; - } + for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) + if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); + if (!aup->phy_search_highest_addr) + /* break out with first one found */ + break; + } if (aup->phy1_search_mac0) { /* try harder to find a PHY */ -- cgit v1.2.3-70-g09d2 From 92ca8241533009e4e05a9f3999a75389678af094 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Sat, 11 Jun 2016 00:13:04 +0200 Subject: net: au1000_eth: fix PHY detection Commit 7f854420fbfe9d49afe2ffb1df052cfe8e215541 ("phy: Add API for {un}registering an mdio device to a bus.") broke PHY detection on this driver with a copy-paste bug: The code is looking 32 times for a PHY at address 0. Fixes ethernet on AMD DB1100/DB1500/DB1550 boards which have their (autodetected) PHYs at address 31. Cc: Andrew Lunn Signed-off-by: Manuel Lauss Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/au1000_eth.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/amd/au1000_eth.c b/drivers/net/ethernet/amd/au1000_eth.c index e0fb0f1122db..20760e10211a 100644 --- a/drivers/net/ethernet/amd/au1000_eth.c +++ b/drivers/net/ethernet/amd/au1000_eth.c @@ -509,8 +509,8 @@ static int au1000_mii_probe(struct net_device *dev) * on the current MAC's MII bus */ for (phy_addr = 0; phy_addr < PHY_MAX_ADDR; phy_addr++) - if (mdiobus_get_phy(aup->mii_bus, aup->phy_addr)) { - phydev = mdiobus_get_phy(aup->mii_bus, aup->phy_addr); + if (mdiobus_get_phy(aup->mii_bus, phy_addr)) { + phydev = mdiobus_get_phy(aup->mii_bus, phy_addr); if (!aup->phy_search_highest_addr) /* break out with first one found */ break; -- cgit v1.2.3-70-g09d2 From 86ef7f9cbfd564377028098cf20cc1c3ec2c776d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Jun 2016 20:40:24 -0700 Subject: ipconfig: Protect ic_addrservaddr with IPCONFIG_DYNAMIC. >> net/ipv4/ipconfig.c:130:15: warning: 'ic_addrservaddr' defined but not used [-Wunused-variable] static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ Reported-by: kbuild test robot Signed-off-by: David S. Miller --- net/ipv4/ipconfig.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/ipconfig.c b/net/ipv4/ipconfig.c index eccf9fd190c0..1d71c40eaaf3 100644 --- a/net/ipv4/ipconfig.c +++ b/net/ipv4/ipconfig.c @@ -127,7 +127,9 @@ __be32 ic_myaddr = NONE; /* My IP address */ static __be32 ic_netmask = NONE; /* Netmask for local subnet */ __be32 ic_gateway = NONE; /* Gateway IP address */ +#ifdef IPCONFIG_DYNAMIC static __be32 ic_addrservaddr = NONE; /* IP Address of the IP addresses'server */ +#endif __be32 ic_servaddr = NONE; /* Boot server IP address */ -- cgit v1.2.3-70-g09d2 From cbdf451164785c9cf5acd5d2983c1e7c778df4c1 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Jun 2016 16:21:47 -0700 Subject: net_sched: prio: properly report out of memory errors At Qdisc creation or change time, prio_tune() creates missing pfifo qdiscs but does not return an error code if one qdisc could not be allocated. Leaving a qdisc in non operational state without telling user anything about this problem is not good. Also, testing if we replace something different than noop_qdisc a second time makes no sense so I removed useless code. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 32 ++++++++++++-------------------- 1 file changed, 12 insertions(+), 20 deletions(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 4b0a82191bc4..071718bccdab 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -202,26 +202,18 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) sch_tree_unlock(sch); for (i = 0; i < q->bands; i++) { - if (q->queues[i] == &noop_qdisc) { - struct Qdisc *child, *old; - - child = qdisc_create_dflt(sch->dev_queue, - &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (child) { - sch_tree_lock(sch); - old = q->queues[i]; - q->queues[i] = child; - - if (old != &noop_qdisc) { - qdisc_tree_reduce_backlog(old, - old->q.qlen, - old->qstats.backlog); - qdisc_destroy(old); - } - sch_tree_unlock(sch); - } - } + struct Qdisc *child; + + if (q->queues[i] != &noop_qdisc) + continue; + + child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!child) + return -ENOMEM; + sch_tree_lock(sch); + q->queues[i] = child; + sch_tree_unlock(sch); } return 0; } -- cgit v1.2.3-70-g09d2 From d941ebe88a411aa281cc80477a93feb931a1b50b Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Sat, 11 Jun 2016 01:11:54 +0300 Subject: net: ethernet: ti: cpsw: use destroy ctlr to destroy channels There is no reason to destroy channels that are destroyed while cpdma_ctlr destroy. In this case no need to remember how much channels where created and destroy them by one, as cpdma_ctlr destroys all of them. Signed-off-by: Ivan Khoronzhuk Signed-off-by: David S. Miller --- drivers/net/ethernet/ti/cpsw.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c index e6bb0ecb12c7..53190894f17a 100644 --- a/drivers/net/ethernet/ti/cpsw.c +++ b/drivers/net/ethernet/ti/cpsw.c @@ -2505,8 +2505,6 @@ static int cpsw_probe(struct platform_device *pdev) clean_ale_ret: cpsw_ale_destroy(priv->ale); clean_dma_ret: - cpdma_chan_destroy(priv->txch); - cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); clean_runtime_disable_ret: pm_runtime_disable(&pdev->dev); @@ -2534,8 +2532,6 @@ static int cpsw_remove(struct platform_device *pdev) unregister_netdev(ndev); cpsw_ale_destroy(priv->ale); - cpdma_chan_destroy(priv->txch); - cpdma_chan_destroy(priv->rxch); cpdma_ctlr_destroy(priv->dma); pm_runtime_disable(&pdev->dev); device_for_each_child(&pdev->dev, NULL, cpsw_remove_child_device); -- cgit v1.2.3-70-g09d2 From e50525bef593c3dd0564df676c567d77f7c20322 Mon Sep 17 00:00:00 2001 From: Rajkumar Manoharan Date: Thu, 9 Jun 2016 11:33:55 +0530 Subject: ath10k: fix deadlock while processing rx_in_ord_ind commit 5c86d97bcc1d ("ath10k: combine txrx and replenish task") introduced deadlock while processing rx in order indication message for qca6174 based devices. While merging replenish and txrx tasklets, replenish task should be called out of htt rx ring locking since it is also try to acquire the same lock. Unfortunately this issue is not exposed by other solutions (qca988x, qca99x0 & qca4019), as rx_in_ord_ind message is specific to qca6174 based devices. This patch fixes ============================================= [ INFO: possible recursive locking detected ] 4.7.0-rc2-wt-ath+ #1353 Tainted: G E --------------------------------------------- swapper/3/0 is trying to acquire lock: (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [] ath10k_htt_rx_msdu_buff_replenish+0x29/0x90 [ath10k_core] but task is already holding lock: (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [] ath10k_htt_txrx_compl_task+0x21b/0x250 [ath10k_core] other info that might help us debug this: Possible unsafe locking scenario: CPU0 ---- lock(&(&htt->rx_ring.lock)->rlock); lock(&(&htt->rx_ring.lock)->rlock); *** DEADLOCK *** May be due to missing lock nesting notation 1 lock held by swapper/3/0: #0: (&(&htt->rx_ring.lock)->rlock){+.-...}, at: [] ath10k_htt_txrx_compl_task+0x21b/0x250 [ath10k_core] Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=119151 Fixes: 5c86d97bcc1d ("ath10k: combine txrx and replenish task") Reported-by: Mike Lothian Signed-off-by: Rajkumar Manoharan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath10k/htt_rx.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c index cc979a4faeb0..813cdd2621a1 100644 --- a/drivers/net/wireless/ath/ath10k/htt_rx.c +++ b/drivers/net/wireless/ath/ath10k/htt_rx.c @@ -1904,7 +1904,6 @@ static void ath10k_htt_rx_in_ord_ind(struct ath10k *ar, struct sk_buff *skb) return; } } - ath10k_htt_rx_msdu_buff_replenish(htt); } static void ath10k_htt_rx_tx_fetch_resp_id_confirm(struct ath10k *ar, -- cgit v1.2.3-70-g09d2 From e024111f6946f45cf1559a8c6fd48d2d0f696d07 Mon Sep 17 00:00:00 2001 From: Miaoqing Pan Date: Tue, 7 Jun 2016 15:47:07 +0300 Subject: ath9k: fix GPIO mask for AR9462 and AR9565 The incorrect GPIO mask cause kernel warning, when AR9462 access GPIO11. Also fix the mask for AR9565. WARNING: CPU: 1 PID: 199 at ../drivers/net/wireless/ath/ath9k/hw.c:2778 ath9k_hw_gpio_get+0x1a9/0x1b0 [ath9k_hw] CPU: 1 PID: 199 Comm: kworker/u16:9 Not tainted 4.7.0-rc1-next-20160530+ #5 Hardware name: Acer TravelMate P243/BA40_HC, BIOS V1.01 04/20/2012 Workqueue: events_power_efficient rfkill_poll 0000000000000000 ffff88002cf73d28 ffffffff813b8ddc 0000000000000000 0000000000000000 ffff88002cf73d68 ffffffff8107a331 00000ada00000086 ffff880148d9c018 000000000000000b ffff880147e68720 0000000000000200 Call Trace: [] dump_stack+0x63/0x87 [] __warn+0xd1/0xf0 [] warn_slowpath_null+0x1d/0x20 [] ath9k_hw_gpio_get+0x1a9/0x1b0 [ath9k_hw] [] ath9k_rfkill_poll_state+0x34/0x60 [ath9k] [] ieee80211_rfkill_poll+0x33/0x40 [mac80211] [] cfg80211_rfkill_poll+0x2a/0xc0 [cfg80211] [] rfkill_poll+0x24/0x50 [] process_one_work+0x153/0x3f0 [] worker_thread+0x12b/0x4b0 [] ? rescuer_thread+0x340/0x340 [] kthread+0xc9/0xe0 [] ret_from_fork+0x1f/0x40 [] ? kthread_park+0x60/0x60 Fixes: a01ab81b09c5 ("ath9k: define correct GPIO numbers and bits mask") Reported-by: Sudip Mukherjee Tested-by: Sudip Mukherjee Signed-off-by: Miaoqing Pan Signed-off-by: Kalle Valo --- drivers/net/wireless/ath/ath9k/reg.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/wireless/ath/ath9k/reg.h b/drivers/net/wireless/ath/ath9k/reg.h index 9272ca90632b..80ff69f99229 100644 --- a/drivers/net/wireless/ath/ath9k/reg.h +++ b/drivers/net/wireless/ath/ath9k/reg.h @@ -1122,12 +1122,12 @@ enum { #define AR9300_NUM_GPIO 16 #define AR9330_NUM_GPIO 16 #define AR9340_NUM_GPIO 23 -#define AR9462_NUM_GPIO 10 +#define AR9462_NUM_GPIO 14 #define AR9485_NUM_GPIO 12 #define AR9531_NUM_GPIO 18 #define AR9550_NUM_GPIO 24 #define AR9561_NUM_GPIO 23 -#define AR9565_NUM_GPIO 12 +#define AR9565_NUM_GPIO 14 #define AR9580_NUM_GPIO 16 #define AR7010_NUM_GPIO 16 @@ -1139,12 +1139,12 @@ enum { #define AR9300_GPIO_MASK 0x0000F4FF #define AR9330_GPIO_MASK 0x0000F4FF #define AR9340_GPIO_MASK 0x0000000F -#define AR9462_GPIO_MASK 0x000003FF +#define AR9462_GPIO_MASK 0x00003FFF #define AR9485_GPIO_MASK 0x00000FFF #define AR9531_GPIO_MASK 0x0000000F #define AR9550_GPIO_MASK 0x0000000F #define AR9561_GPIO_MASK 0x0000000F -#define AR9565_GPIO_MASK 0x00000FFF +#define AR9565_GPIO_MASK 0x00003FFF #define AR9580_GPIO_MASK 0x0000F4FF #define AR7010_GPIO_MASK 0x0000FFFF -- cgit v1.2.3-70-g09d2 From fdecf36fcefa9f48c2f2de21c8176f1a8ce2c960 Mon Sep 17 00:00:00 2001 From: Clemens Gruber Date: Sat, 11 Jun 2016 17:21:26 +0200 Subject: phy: marvell: fix LED configuration via marvell,reg-init Configuring the PHY LED registers for the Marvell 88E1510 and others is not possible, because regardless of the values in marvell,reg-init, it is later overridden in m88e1121_config_aneg with a non-standard default. This patch moves that default configuration to .config_init to allow setting the LED configuration through marvell,reg-init in the device tree, which should override said default if it exists. Signed-off-by: Clemens Gruber Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 38 ++++++++++++++++++++++++++------------ 1 file changed, 26 insertions(+), 12 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 280e8795b463..79ccc11facc3 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -407,15 +407,7 @@ static int m88e1121_config_aneg(struct phy_device *phydev) if (err < 0) return err; - oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE); - - phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE); - phy_write(phydev, MII_88E1121_PHY_LED_CTRL, MII_88E1121_PHY_LED_DEF); - phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage); - - err = genphy_config_aneg(phydev); - - return err; + return genphy_config_aneg(phydev); } static int m88e1318_config_aneg(struct phy_device *phydev) @@ -636,6 +628,28 @@ static int m88e1111_config_init(struct phy_device *phydev) return phy_write(phydev, MII_BMCR, BMCR_RESET); } +static int m88e1121_config_init(struct phy_device *phydev) +{ + int err, oldpage; + + oldpage = phy_read(phydev, MII_MARVELL_PHY_PAGE); + + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_88E1121_PHY_LED_PAGE); + if (err < 0) + return err; + + /* Default PHY LED config: LED[0] .. Link, LED[1] .. Activity */ + err = phy_write(phydev, MII_88E1121_PHY_LED_CTRL, + MII_88E1121_PHY_LED_DEF); + if (err < 0) + return err; + + phy_write(phydev, MII_MARVELL_PHY_PAGE, oldpage); + + /* Set marvell,reg-init configuration from device tree */ + return marvell_config_init(phydev); +} + static int m88e1510_config_init(struct phy_device *phydev) { int err; @@ -668,7 +682,7 @@ static int m88e1510_config_init(struct phy_device *phydev) return err; } - return marvell_config_init(phydev); + return m88e1121_config_init(phydev); } static int m88e1118_config_aneg(struct phy_device *phydev) @@ -1196,7 +1210,7 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, - .config_init = &marvell_config_init, + .config_init = &m88e1121_config_init, .config_aneg = &m88e1121_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, @@ -1215,7 +1229,7 @@ static struct phy_driver marvell_drivers[] = { .features = PHY_GBIT_FEATURES, .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, - .config_init = &marvell_config_init, + .config_init = &m88e1121_config_init, .config_aneg = &m88e1318_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, -- cgit v1.2.3-70-g09d2 From dcb94b88c09ce82a80e188d49bcffdc83ba215a6 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 11 Jun 2016 20:32:06 +0200 Subject: ipv6: fix endianness error in icmpv6_err IPv6 ping socket error handler doesn't correctly convert the new 32 bit mtu to host endianness before using. Cc: Lorenzo Colitti Fixes: 6d0bfe22611602f ("net: ipv6: Add IPv6 support to the ping socket.") Signed-off-by: Hannes Frederic Sowa Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- net/ipv6/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 4527285fcaa2..a4fa84076969 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -98,7 +98,7 @@ static void icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, if (!(type & ICMPV6_INFOMSG_MASK)) if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST) - ping_err(skb, offset, info); + ping_err(skb, offset, ntohl(info)); } static int icmpv6_rcv(struct sk_buff *skb); -- cgit v1.2.3-70-g09d2 From 5119bd16815d3f0364390a1369392dcc036790e7 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 11 Jun 2016 20:41:38 +0200 Subject: ipv6: tcp: fix endianness annotation in tcp_v6_send_response Cc: Florent Fourcot Fixes: 1d13a96c74fc ("ipv6: tcp: fix flowlabel value in ACK messages send from TIME_WAIT") Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f36c2d076fce..2255d2bf5f6b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -738,7 +738,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, int rst, - u8 tclass, u32 label) + u8 tclass, __be32 label) { const struct tcphdr *th = tcp_hdr(skb); struct tcphdr *t1; @@ -911,7 +911,7 @@ out: static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 tsval, u32 tsecr, int oif, struct tcp_md5sig_key *key, u8 tclass, - u32 label) + __be32 label) { tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0, tclass, label); -- cgit v1.2.3-70-g09d2 From c148d16369ff0095eca950d17968ba1d56a47b53 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sat, 11 Jun 2016 21:15:37 +0200 Subject: ipv6: fix checksum annotation in udp6_csum_init Cc: Tom Herbert Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6") Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_checksum.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv6/ip6_checksum.c b/net/ipv6/ip6_checksum.c index b2025bf3da4a..c0cbcb259f5a 100644 --- a/net/ipv6/ip6_checksum.c +++ b/net/ipv6/ip6_checksum.c @@ -78,9 +78,12 @@ int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh, int proto) * we accept a checksum of zero here. When we find the socket * for the UDP packet we'll check if that socket allows zero checksum * for IPv6 (set by socket option). + * + * Note, we are only interested in != 0 or == 0, thus the + * force to int. */ - return skb_checksum_init_zero_check(skb, proto, uh->check, - ip6_compute_pseudo); + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + ip6_compute_pseudo); } EXPORT_SYMBOL(udp6_csum_init); -- cgit v1.2.3-70-g09d2 From b46d9f625b07f843c706c2c7d0210a90ccdf143b Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Sun, 12 Jun 2016 12:02:46 +0200 Subject: ipv4: fix checksum annotation in udp4_csum_init Reported-by: Cong Wang Cc: Cong Wang Cc: Tom Herbert Fixes: 4068579e1e098fa ("net: Implmement RFC 6936 (zero RX csums for UDP/IPv6") Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv4/udp.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 0ff31d97d485..ba0d8b8b7690 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1755,8 +1755,11 @@ static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh, return err; } - return skb_checksum_init_zero_check(skb, proto, uh->check, - inet_compute_pseudo); + /* Note, we are only interested in != 0 or == 0, thus the + * force to int. + */ + return (__force int)skb_checksum_init_zero_check(skb, proto, uh->check, + inet_compute_pseudo); } /* -- cgit v1.2.3-70-g09d2 From 881d0327db37ad917a367c77aff1afa1ee41e0a9 Mon Sep 17 00:00:00 2001 From: Feng Tang Date: Sun, 12 Jun 2016 17:36:37 +0800 Subject: net: alx: Work around the DMA RX overflow issue Commit 26c5f03 uses a new skb allocator to avoid the RFD overflow issue. But from debugging without datasheet, we found the error always happen when the DMA RX address is set to 0x....fc0, which is very likely to be a HW/silicon problem. So one idea is instead of adding a new allocator, why not just hitting the right target by avaiding the error-prone DMA address? This patch will actually * Remove the commit 26c5f03 * Apply rx skb with 64 bytes longer space, and if the allocated skb has a 0x...fc0 address, it will use skb_resever(skb, 64) to advance the address, so that the RX overflow can be avoided. In theory this method should also apply to atl1c driver, which I can't find anyone who can help to test on real devices. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=70761 Signed-off-by: Feng Tang Suggested-by: Eric Dumazet Tested-by: Ole Lukoie Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/ethernet/atheros/alx/alx.h | 4 --- drivers/net/ethernet/atheros/alx/main.c | 61 ++++++++------------------------- 2 files changed, 14 insertions(+), 51 deletions(-) diff --git a/drivers/net/ethernet/atheros/alx/alx.h b/drivers/net/ethernet/atheros/alx/alx.h index d02c4240b7df..8fc93c5f6abc 100644 --- a/drivers/net/ethernet/atheros/alx/alx.h +++ b/drivers/net/ethernet/atheros/alx/alx.h @@ -96,10 +96,6 @@ struct alx_priv { unsigned int rx_ringsz; unsigned int rxbuf_size; - struct page *rx_page; - unsigned int rx_page_offset; - unsigned int rx_frag_size; - struct napi_struct napi; struct alx_tx_queue txq; struct alx_rx_queue rxq; diff --git a/drivers/net/ethernet/atheros/alx/main.c b/drivers/net/ethernet/atheros/alx/main.c index c98acdc0d14f..e708e360a9e3 100644 --- a/drivers/net/ethernet/atheros/alx/main.c +++ b/drivers/net/ethernet/atheros/alx/main.c @@ -70,35 +70,6 @@ static void alx_free_txbuf(struct alx_priv *alx, int entry) } } -static struct sk_buff *alx_alloc_skb(struct alx_priv *alx, gfp_t gfp) -{ - struct sk_buff *skb; - struct page *page; - - if (alx->rx_frag_size > PAGE_SIZE) - return __netdev_alloc_skb(alx->dev, alx->rxbuf_size, gfp); - - page = alx->rx_page; - if (!page) { - alx->rx_page = page = alloc_page(gfp); - if (unlikely(!page)) - return NULL; - alx->rx_page_offset = 0; - } - - skb = build_skb(page_address(page) + alx->rx_page_offset, - alx->rx_frag_size); - if (likely(skb)) { - alx->rx_page_offset += alx->rx_frag_size; - if (alx->rx_page_offset >= PAGE_SIZE) - alx->rx_page = NULL; - else - get_page(page); - } - return skb; -} - - static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) { struct alx_rx_queue *rxq = &alx->rxq; @@ -115,9 +86,22 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) while (!cur_buf->skb && next != rxq->read_idx) { struct alx_rfd *rfd = &rxq->rfd[cur]; - skb = alx_alloc_skb(alx, gfp); + /* + * When DMA RX address is set to something like + * 0x....fc0, it will be very likely to cause DMA + * RFD overflow issue. + * + * To work around it, we apply rx skb with 64 bytes + * longer space, and offset the address whenever + * 0x....fc0 is detected. + */ + skb = __netdev_alloc_skb(alx->dev, alx->rxbuf_size + 64, gfp); if (!skb) break; + + if (((unsigned long)skb->data & 0xfff) == 0xfc0) + skb_reserve(skb, 64); + dma = dma_map_single(&alx->hw.pdev->dev, skb->data, alx->rxbuf_size, DMA_FROM_DEVICE); @@ -153,7 +137,6 @@ static int alx_refill_rx_ring(struct alx_priv *alx, gfp_t gfp) alx_write_mem16(&alx->hw, ALX_RFD_PIDX, cur); } - return count; } @@ -622,11 +605,6 @@ static void alx_free_rings(struct alx_priv *alx) kfree(alx->txq.bufs); kfree(alx->rxq.bufs); - if (alx->rx_page) { - put_page(alx->rx_page); - alx->rx_page = NULL; - } - dma_free_coherent(&alx->hw.pdev->dev, alx->descmem.size, alx->descmem.virt, @@ -681,7 +659,6 @@ static int alx_request_irq(struct alx_priv *alx) alx->dev->name, alx); if (!err) goto out; - /* fall back to legacy interrupt */ pci_disable_msi(alx->hw.pdev); } @@ -725,7 +702,6 @@ static int alx_init_sw(struct alx_priv *alx) struct pci_dev *pdev = alx->hw.pdev; struct alx_hw *hw = &alx->hw; int err; - unsigned int head_size; err = alx_identify_hw(alx); if (err) { @@ -741,12 +717,7 @@ static int alx_init_sw(struct alx_priv *alx) hw->smb_timer = 400; hw->mtu = alx->dev->mtu; - alx->rxbuf_size = ALX_MAX_FRAME_LEN(hw->mtu); - head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - alx->rx_frag_size = roundup_pow_of_two(head_size); - alx->tx_ringsz = 256; alx->rx_ringsz = 512; hw->imt = 200; @@ -848,7 +819,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) { struct alx_priv *alx = netdev_priv(netdev); int max_frame = ALX_MAX_FRAME_LEN(mtu); - unsigned int head_size; if ((max_frame < ALX_MIN_FRAME_SIZE) || (max_frame > ALX_MAX_FRAME_SIZE)) @@ -860,9 +830,6 @@ static int alx_change_mtu(struct net_device *netdev, int mtu) netdev->mtu = mtu; alx->hw.mtu = mtu; alx->rxbuf_size = max(max_frame, ALX_DEF_RXBUF_SIZE); - head_size = SKB_DATA_ALIGN(alx->rxbuf_size + NET_SKB_PAD) + - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); - alx->rx_frag_size = roundup_pow_of_two(head_size); netdev_update_features(netdev); if (netif_running(netdev)) alx_reinit(alx); -- cgit v1.2.3-70-g09d2 From 6c0d54f1897d229748d4f41ef919078db6db2123 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 12 Jun 2016 20:01:25 -0700 Subject: net_sched: fix pfifo_head_drop behavior vs backlog When the qdisc is full, we drop a packet at the head of the queue, queue the current skb and return NET_XMIT_CN Now we track backlog on upper qdiscs, we need to call qdisc_tree_reduce_backlog(), even if the qlen did not change. Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_fifo.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 2177eac0a61e..2e4bd2c0a50c 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -37,14 +37,18 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc *sch) { + unsigned int prev_backlog; + if (likely(skb_queue_len(&sch->q) < sch->limit)) return qdisc_enqueue_tail(skb, sch); + prev_backlog = sch->qstats.backlog; /* queue full, remove one skb to fulfill the limit */ __qdisc_queue_drop_head(sch, &sch->q); qdisc_qstats_drop(sch); qdisc_enqueue_tail(skb, sch); + qdisc_tree_reduce_backlog(sch, 0, prev_backlog - sch->qstats.backlog); return NET_XMIT_CN; } -- cgit v1.2.3-70-g09d2 From d1e37288c9146dccff830e3253e403af8705b51f Mon Sep 17 00:00:00 2001 From: "Su, Xuemin" Date: Mon, 13 Jun 2016 11:02:50 +0800 Subject: udp reuseport: fix packet of same flow hashed to different socket There is a corner case in which udp packets belonging to a same flow are hashed to different socket when hslot->count changes from 10 to 11: 1) When hslot->count <= 10, __udp_lib_lookup() searches udp_table->hash, and always passes 'daddr' to udp_ehashfn(). 2) When hslot->count > 10, __udp_lib_lookup() searches udp_table->hash2, but may pass 'INADDR_ANY' to udp_ehashfn() if the sockets are bound to INADDR_ANY instead of some specific addr. That means when hslot->count changes from 10 to 11, the hash calculated by udp_ehashfn() is also changed, and the udp packets belonging to a same flow will be hashed to different socket. This is easily reproduced: 1) Create 10 udp sockets and bind all of them to 0.0.0.0:40000. 2) From the same host send udp packets to 127.0.0.1:40000, record the socket index which receives the packets. 3) Create 1 more udp socket and bind it to 0.0.0.0:44096. The number 44096 is 40000 + UDP_HASH_SIZE(4096), this makes the new socket put into the same hslot as the aformentioned 10 sockets, and makes the hslot->count change from 10 to 11. 4) From the same host send udp packets to 127.0.0.1:40000, and the socket index which receives the packets will be different from the one received in step 2. This should not happen as the socket bound to 0.0.0.0:44096 should not change the behavior of the sockets bound to 0.0.0.0:40000. It's the same case for IPv6, and this patch also fixes that. Signed-off-by: Su, Xuemin Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 73 +++++++++++++--------------------------------------------- net/ipv6/udp.c | 71 +++++++++++++------------------------------------------- 2 files changed, 32 insertions(+), 112 deletions(-) diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index ba0d8b8b7690..ca5e8ea29538 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -391,9 +391,9 @@ int udp_v4_get_port(struct sock *sk, unsigned short snum) return udp_lib_get_port(sk, snum, ipv4_rcv_saddr_equal, hash2_nulladdr); } -static inline int compute_score(struct sock *sk, struct net *net, - __be32 saddr, unsigned short hnum, __be16 sport, - __be32 daddr, __be16 dport, int dif) +static int compute_score(struct sock *sk, struct net *net, + __be32 saddr, __be16 sport, + __be32 daddr, unsigned short hnum, int dif) { int score; struct inet_sock *inet; @@ -434,52 +434,6 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -/* - * In this second variant, we check (daddr, dport) matches (inet_rcv_sadd, inet_num) - */ -static inline int compute_score2(struct sock *sk, struct net *net, - __be32 saddr, __be16 sport, - __be32 daddr, unsigned int hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - ipv6_only_sock(sk)) - return -1; - - inet = inet_sk(sk); - - if (inet->inet_rcv_saddr != daddr || - inet->inet_num != hnum) - return -1; - - score = (sk->sk_family == PF_INET) ? 2 : 1; - - if (inet->inet_daddr) { - if (inet->inet_daddr != saddr) - return -1; - score += 4; - } - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score += 4; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score += 4; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - static u32 udp_ehashfn(const struct net *net, const __be32 laddr, const __u16 lport, const __be32 faddr, const __be16 fport) @@ -492,11 +446,11 @@ static u32 udp_ehashfn(const struct net *net, const __be32 laddr, udp_ehash_secret + net_hash_mix(net)); } -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp4_lib_lookup2(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -506,7 +460,7 @@ static struct sock *udp4_lib_lookup2(struct net *net, result = NULL; badness = 0; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -554,17 +508,22 @@ struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr, result = udp4_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp4_portaddr_hash(net, htonl(INADDR_ANY), hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp4_lib_lookup2(net, saddr, sport, - htonl(INADDR_ANY), hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -572,8 +531,8 @@ begin: result = NULL; badness = 0; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, saddr, hnum, sport, - daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, + daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index f421c9f23c5b..005dc82c2138 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -115,11 +115,10 @@ static void udp_v6_rehash(struct sock *sk) udp_lib_rehash(sk, new_hash); } -static inline int compute_score(struct sock *sk, struct net *net, - unsigned short hnum, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, __be16 dport, - int dif) +static int compute_score(struct sock *sk, struct net *net, + const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, unsigned short hnum, + int dif) { int score; struct inet_sock *inet; @@ -162,54 +161,11 @@ static inline int compute_score(struct sock *sk, struct net *net, return score; } -static inline int compute_score2(struct sock *sk, struct net *net, - const struct in6_addr *saddr, __be16 sport, - const struct in6_addr *daddr, - unsigned short hnum, int dif) -{ - int score; - struct inet_sock *inet; - - if (!net_eq(sock_net(sk), net) || - udp_sk(sk)->udp_port_hash != hnum || - sk->sk_family != PF_INET6) - return -1; - - if (!ipv6_addr_equal(&sk->sk_v6_rcv_saddr, daddr)) - return -1; - - score = 0; - inet = inet_sk(sk); - - if (inet->inet_dport) { - if (inet->inet_dport != sport) - return -1; - score++; - } - - if (!ipv6_addr_any(&sk->sk_v6_daddr)) { - if (!ipv6_addr_equal(&sk->sk_v6_daddr, saddr)) - return -1; - score++; - } - - if (sk->sk_bound_dev_if) { - if (sk->sk_bound_dev_if != dif) - return -1; - score++; - } - - if (sk->sk_incoming_cpu == raw_smp_processor_id()) - score++; - - return score; -} - -/* called with read_rcu_lock() */ +/* called with rcu_read_lock() */ static struct sock *udp6_lib_lookup2(struct net *net, const struct in6_addr *saddr, __be16 sport, const struct in6_addr *daddr, unsigned int hnum, int dif, - struct udp_hslot *hslot2, unsigned int slot2, + struct udp_hslot *hslot2, struct sk_buff *skb) { struct sock *sk, *result; @@ -219,7 +175,7 @@ static struct sock *udp6_lib_lookup2(struct net *net, result = NULL; badness = -1; udp_portaddr_for_each_entry_rcu(sk, &hslot2->head) { - score = compute_score2(sk, net, saddr, sport, + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; @@ -268,17 +224,22 @@ struct sock *__udp6_lib_lookup(struct net *net, result = udp6_lib_lookup2(net, saddr, sport, daddr, hnum, dif, - hslot2, slot2, skb); + hslot2, skb); if (!result) { + unsigned int old_slot2 = slot2; hash2 = udp6_portaddr_hash(net, &in6addr_any, hnum); slot2 = hash2 & udptable->mask; + /* avoid searching the same slot again. */ + if (unlikely(slot2 == old_slot2)) + return result; + hslot2 = &udptable->hash2[slot2]; if (hslot->count < hslot2->count) goto begin; result = udp6_lib_lookup2(net, saddr, sport, - &in6addr_any, hnum, dif, - hslot2, slot2, skb); + daddr, hnum, dif, + hslot2, skb); } return result; } @@ -286,7 +247,7 @@ begin: result = NULL; badness = -1; sk_for_each_rcu(sk, &hslot->head) { - score = compute_score(sk, net, hnum, saddr, sport, daddr, dport, dif); + score = compute_score(sk, net, saddr, sport, daddr, hnum, dif); if (score > badness) { reuseport = sk->sk_reuseport; if (reuseport) { -- cgit v1.2.3-70-g09d2 From 106da663ff495e0aea3ac15b8317aa410754fcac Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:04 +0200 Subject: ovs/gre,geneve: fix error path when creating an iface After ipgre_newlink()/geneve_configure() call, the netdev is registered. Fixes: 7e059158d57b ("vxlan, gre, geneve: Set a large MTU on ovs-created tunnel devices") CC: David Wragg Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/geneve.c | 10 +++++++--- net/ipv4/ip_gre.c | 10 +++++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index cadefe4fdaa2..086c2dae4c3d 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1508,6 +1508,7 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); int err; memset(tb, 0, sizeof(tb)); @@ -1519,8 +1520,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, err = geneve_configure(net, dev, &geneve_remote_unspec, 0, 0, 0, 0, htons(dst_port), true, GENEVE_F_UDP_ZERO_CSUM6_RX); - if (err) - goto err; + if (err) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1532,7 +1535,8 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, return dev; err: - free_netdev(dev); + geneve_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(geneve_dev_create_fb); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 4d2025f7ec57..08deba679c8c 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1121,6 +1121,7 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, { struct nlattr *tb[IFLA_MAX + 1]; struct net_device *dev; + LIST_HEAD(list_kill); struct ip_tunnel *t; int err; @@ -1136,8 +1137,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, t->collect_md = true; err = ipgre_newlink(net, dev, tb, NULL); - if (err < 0) - goto out; + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } /* openvswitch users expect packet sizes to be unrestricted, * so set the largest MTU we can. @@ -1148,7 +1151,8 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, return dev; out: - free_netdev(dev); + ip_tunnel_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); return ERR_PTR(err); } EXPORT_SYMBOL_GPL(gretap_fb_dev_create); -- cgit v1.2.3-70-g09d2 From cf5da330bbdd0c06b05c525a3d1d58ccd82c87a6 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:05 +0200 Subject: ovs/vxlan: fix rtnl notifications on iface deletion The function vxlan_dev_create() (only used by ovs) never calls rtnl_configure_link(). The consequence is that dev->rtnl_link_state is never set to RTNL_LINK_INITIALIZED. During the deletion phase, the function rollback_registered_many() sends a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED. Note that the function vxlan_dev_create() is moved after the rtnl stuff so that vxlan_dellink() can be called in this function. Fixes: dcc38c033b32 ("openvswitch: Re-add CONFIG_OPENVSWITCH_VXLAN") CC: Thomas Graf CC: Pravin B Shelar Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/vxlan.c | 58 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c index f999db2f97b4..b3b9db68f758 100644 --- a/drivers/net/vxlan.c +++ b/drivers/net/vxlan.c @@ -2952,30 +2952,6 @@ static int vxlan_dev_configure(struct net *src_net, struct net_device *dev, return 0; } -struct net_device *vxlan_dev_create(struct net *net, const char *name, - u8 name_assign_type, struct vxlan_config *conf) -{ - struct nlattr *tb[IFLA_MAX+1]; - struct net_device *dev; - int err; - - memset(&tb, 0, sizeof(tb)); - - dev = rtnl_create_link(net, name, name_assign_type, - &vxlan_link_ops, tb); - if (IS_ERR(dev)) - return dev; - - err = vxlan_dev_configure(net, dev, conf); - if (err < 0) { - free_netdev(dev); - return ERR_PTR(err); - } - - return dev; -} -EXPORT_SYMBOL_GPL(vxlan_dev_create); - static int vxlan_newlink(struct net *src_net, struct net_device *dev, struct nlattr *tb[], struct nlattr *data[]) { @@ -3268,6 +3244,40 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = { .get_link_net = vxlan_get_link_net, }; +struct net_device *vxlan_dev_create(struct net *net, const char *name, + u8 name_assign_type, + struct vxlan_config *conf) +{ + struct nlattr *tb[IFLA_MAX + 1]; + struct net_device *dev; + int err; + + memset(&tb, 0, sizeof(tb)); + + dev = rtnl_create_link(net, name, name_assign_type, + &vxlan_link_ops, tb); + if (IS_ERR(dev)) + return dev; + + err = vxlan_dev_configure(net, dev, conf); + if (err < 0) { + free_netdev(dev); + return ERR_PTR(err); + } + + err = rtnl_configure_link(dev, NULL); + if (err < 0) { + LIST_HEAD(list_kill); + + vxlan_dellink(dev, &list_kill); + unregister_netdevice_many(&list_kill); + return ERR_PTR(err); + } + + return dev; +} +EXPORT_SYMBOL_GPL(vxlan_dev_create); + static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn, struct net_device *dev) { -- cgit v1.2.3-70-g09d2 From da6f1da819d4b9c081a477dec74dc468a0b44290 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:06 +0200 Subject: ovs/gre: fix rtnl notifications on iface deletion The function gretap_fb_dev_create() (only used by ovs) never calls rtnl_configure_link(). The consequence is that dev->rtnl_link_state is never set to RTNL_LINK_INITIALIZED. During the deletion phase, the function rollback_registered_many() sends a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED. Fixes: b2acd1dc3949 ("openvswitch: Use regular GRE net_device instead of vport") CC: Thomas Graf CC: Pravin B Shelar Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 08deba679c8c..07c5cf1838d8 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -1149,6 +1149,10 @@ struct net_device *gretap_fb_dev_create(struct net *net, const char *name, if (err) goto out; + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto out; + return dev; out: ip_tunnel_dellink(dev, &list_kill); -- cgit v1.2.3-70-g09d2 From 41009481b690493c169ce85f591b9d32c6fd9422 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 13 Jun 2016 10:31:07 +0200 Subject: ovs/geneve: fix rtnl notifications on iface deletion The function geneve_dev_create_fb() (only used by ovs) never calls rtnl_configure_link(). The consequence is that dev->rtnl_link_state is never set to RTNL_LINK_INITIALIZED. During the deletion phase, the function rollback_registered_many() sends a RTM_DELLINK only if dev->rtnl_link_state is set to RTNL_LINK_INITIALIZED. Fixes: e305ac6cf5a1 ("geneve: Add support to collect tunnel metadata.") CC: Pravin B Shelar CC: Jesse Gross CC: Thomas Graf Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- drivers/net/geneve.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 086c2dae4c3d..305a04e45a13 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -1532,6 +1532,10 @@ struct net_device *geneve_dev_create_fb(struct net *net, const char *name, if (err) goto err; + err = rtnl_configure_link(dev, NULL); + if (err < 0) + goto err; + return dev; err: -- cgit v1.2.3-70-g09d2 From 775711497202fe376368c25b0c7296ed8803e0ba Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 10 Jun 2016 17:25:19 +0200 Subject: netfilter: conntrack: destroy kmemcache on module removal I forgot to move the kmem_cache_destroy into the exit path. Fixes: 0c5366b3a8c7 ("netfilter: conntrack: use single slab cache) Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_core.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index db2312eeb2a4..f204274a9b6b 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1544,6 +1544,8 @@ void nf_conntrack_cleanup_end(void) nf_conntrack_tstamp_fini(); nf_conntrack_acct_fini(); nf_conntrack_expect_fini(); + + kmem_cache_destroy(nf_conntrack_cachep); } /* -- cgit v1.2.3-70-g09d2 From a46844021f6182cca7b575295ba33a4734b1b9d9 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 11 Jun 2016 12:20:26 +0800 Subject: netfilter: nf_tables: fix wrong check of NFT_SET_MAP in nf_tables_bind_set We should check "i" is used as a dictionary or not, "binding" is already checked before. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 7b7aa871a174..492f6f8efdda 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2946,7 +2946,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, * jumps are already validated for that chain. */ list_for_each_entry(i, &set->bindings, list) { - if (binding->flags & NFT_SET_MAP && + if (i->flags & NFT_SET_MAP && i->chain == binding->chain) goto bind; } -- cgit v1.2.3-70-g09d2 From 8588ac097b49ce8802f11541d9cd6f6667badb34 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sat, 11 Jun 2016 12:20:27 +0800 Subject: netfilter: nf_tables: reject loops from set element jump to chain Liping Zhang says: "Users may add such a wrong nft rules successfully, which will cause an endless jump loop: # nft add rule filter test tcp dport vmap {1: jump test} This is because before we commit, the element in the current anonymous set is inactive, so osp->walk will skip this element and miss the validate check." To resolve this problem, this patch passes the generation mask to the walk function through the iter container structure depending on the code path: 1) If we're dumping the elements, then we have to check if the element is active in the current generation. Thus, we check for the current bit in the genmask. 2) If we're checking for loops, then we have to check if the element is active in the next generation, as we're in the middle of a transaction. Thus, we check for the next bit in the genmask. Based on original patch from Liping Zhang. Reported-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso Tested-by: Liping Zhang --- include/net/netfilter/nf_tables.h | 1 + net/netfilter/nf_tables_api.c | 15 +++++++++------ net/netfilter/nft_hash.c | 3 +-- net/netfilter/nft_rbtree.c | 3 +-- 4 files changed, 12 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 092235458691..f7c291ff4074 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -167,6 +167,7 @@ struct nft_set_elem { struct nft_set; struct nft_set_iter { + u8 genmask; unsigned int count; unsigned int skip; int err; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 492f6f8efdda..0fd69988f00b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2951,6 +2951,7 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, goto bind; } + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; @@ -3192,12 +3193,13 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb) if (nest == NULL) goto nla_put_failure; - args.cb = cb; - args.skb = skb; - args.iter.skip = cb->args[0]; - args.iter.count = 0; - args.iter.err = 0; - args.iter.fn = nf_tables_dump_setelem; + args.cb = cb; + args.skb = skb; + args.iter.genmask = nft_genmask_cur(ctx.net); + args.iter.skip = cb->args[0]; + args.iter.count = 0; + args.iter.err = 0; + args.iter.fn = nf_tables_dump_setelem; set->ops->walk(&ctx, set, &args.iter); nla_nest_end(skb, nest); @@ -4284,6 +4286,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx, binding->chain != chain) continue; + iter.genmask = nft_genmask_next(ctx->net); iter.skip = 0; iter.count = 0; iter.err = 0; diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c index 6fa016564f90..f39c53a159eb 100644 --- a/net/netfilter/nft_hash.c +++ b/net/netfilter/nft_hash.c @@ -189,7 +189,6 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, struct nft_hash_elem *he; struct rhashtable_iter hti; struct nft_set_elem elem; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); int err; err = rhashtable_walk_init(&priv->ht, &hti, GFP_KERNEL); @@ -218,7 +217,7 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set, goto cont; if (nft_set_elem_expired(&he->ext)) goto cont; - if (!nft_set_elem_active(&he->ext, genmask)) + if (!nft_set_elem_active(&he->ext, iter->genmask)) goto cont; elem.priv = he; diff --git a/net/netfilter/nft_rbtree.c b/net/netfilter/nft_rbtree.c index f762094af7c1..7201d57b5a93 100644 --- a/net/netfilter/nft_rbtree.c +++ b/net/netfilter/nft_rbtree.c @@ -211,7 +211,6 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, struct nft_rbtree_elem *rbe; struct nft_set_elem elem; struct rb_node *node; - u8 genmask = nft_genmask_cur(read_pnet(&set->pnet)); spin_lock_bh(&nft_rbtree_lock); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { @@ -219,7 +218,7 @@ static void nft_rbtree_walk(const struct nft_ctx *ctx, if (iter->count < iter->skip) goto cont; - if (!nft_set_elem_active(&rbe->ext, genmask)) + if (!nft_set_elem_active(&rbe->ext, iter->genmask)) goto cont; elem.priv = rbe; -- cgit v1.2.3-70-g09d2 From a02f424863610a0a7abd80c468839e59cfa4d0d8 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Sat, 11 Jun 2016 12:20:28 +0800 Subject: netfilter: nf_tables: fix wrong destroy anonymous sets if binding fails When we add a nft rule like follows: # nft add rule filter test tcp dport vmap {1: jump test} -ELOOP error will be returned, and the anonymous set will be destroyed. But after that, nf_tables_abort will also try to remove the element and destroy the set, which was already destroyed and freed. If we add a nft wrong rule, nft_tables_abort will do the cleanup work rightly, so nf_tables_set_destroy call here is redundant and wrong, remove it. Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 0fd69988f00b..2c881871db38 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2958,13 +2958,8 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, iter.fn = nf_tables_bind_check_setelem; set->ops->walk(ctx, set, &iter); - if (iter.err < 0) { - /* Destroy anonymous sets if binding fails */ - if (set->flags & NFT_SET_ANONYMOUS) - nf_tables_set_destroy(ctx, set); - + if (iter.err < 0) return iter.err; - } } bind: binding->chain = ctx->chain; -- cgit v1.2.3-70-g09d2 From 8fff1722f705ce5023a0d6d77a31a9d013be2a34 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Tue, 14 Jun 2016 20:13:04 +0800 Subject: netfilter: nf_tables: fix a wrong check to skip the inactive rules nft_genmask_cur has already done left-shift operator on the gencursor, so there's no need to do left-shift operator on it again. Fixes: ea4bd995b0f2 ("netfilter: nf_tables: add transaction helper functions") Cc: Patrick McHardy Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c index e9f8dffcc244..fb8b5892b5ff 100644 --- a/net/netfilter/nf_tables_core.c +++ b/net/netfilter/nf_tables_core.c @@ -143,7 +143,7 @@ next_rule: list_for_each_entry_continue_rcu(rule, &chain->rules, list) { /* This rule is not active, skip. */ - if (unlikely(rule->genmask & (1 << gencursor))) + if (unlikely(rule->genmask & gencursor)) continue; rulenum++; -- cgit v1.2.3-70-g09d2 From 0c5ddb51e8f7be7170600f95a4ea92e5a32afad8 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 07:50:25 -0700 Subject: net/mlx4_en: initialize cmd.context_lock spinlock earlier MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Maciej Żenczykowski reported lockdep warning a spinlock was not registered before being held in mlx4_cmd_wake_completions() cmd.context_lock initialization is not at the right place. 1) mlx4_cmd_use_events() can be called multiple times. Calling spin_lock_init() on a live spinlock can lead to hangs. 2) mlx4_cmd_wake_completions() can be called while lock has not been initialized. Lockdep complains, and current logic is not race prone. It seems better to move the initialization earlier in mlx4_load_one() Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Cc: Eugenia Emantayev Cc: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 1 - drivers/net/ethernet/mellanox/mlx4/main.c | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index e94ca1c3fc7c..f04a423ff79d 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2597,7 +2597,6 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) priv->cmd.free_head = 0; sema_init(&priv->cmd.event_sem, priv->cmd.max_cmds); - spin_lock_init(&priv->cmd.context_lock); for (priv->cmd.token_mask = 1; priv->cmd.token_mask < priv->cmd.max_cmds; diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 12c77a70abdb..372ebfa880f5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -3222,6 +3222,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data, INIT_LIST_HEAD(&priv->pgdir_list); mutex_init(&priv->pgdir_mutex); + spin_lock_init(&priv->cmd.context_lock); INIT_LIST_HEAD(&priv->bf_list); mutex_init(&priv->bf_mutex); -- cgit v1.2.3-70-g09d2 From 3d7c8257d999bdf8fa77ffd9be775c7b58cc7b69 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 13 Jun 2016 11:33:32 -0700 Subject: net_sched: prio: insure proper transactional behavior Now prio_init() can return -ENOMEM, it also has to make sure any allocated qdiscs are freed, since the caller (qdisc_create()) wont call ->destroy() handler for us. More generally, we want a transactional behavior for "tc qdisc change ...", so prio_tune() should not make modifications if any error is returned. It means that we must validate parameters and allocate missing qdisc(s) before taking root qdisc lock exactly once, to not leave the prio qdisc in an intermediate state. Fixes: cbdf45116478 ("net_sched: prio: properly report out of memory errors") Signed-off-by: Eric Dumazet Reported-by: Cong Wang Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 57 +++++++++++++++++++++------------------------------- 1 file changed, 23 insertions(+), 34 deletions(-) diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 071718bccdab..a356450b747b 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -172,8 +172,9 @@ prio_destroy(struct Qdisc *sch) static int prio_tune(struct Qdisc *sch, struct nlattr *opt) { struct prio_sched_data *q = qdisc_priv(sch); + struct Qdisc *queues[TCQ_PRIO_BANDS]; + int oldbands = q->bands, i; struct tc_prio_qopt *qopt; - int i; if (nla_len(opt) < sizeof(*qopt)) return -EINVAL; @@ -187,54 +188,42 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) return -EINVAL; } + /* Before commit, make sure we can allocate all new qdiscs */ + for (i = oldbands; i < qopt->bands; i++) { + queues[i] = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + TC_H_MAKE(sch->handle, i + 1)); + if (!queues[i]) { + while (i > oldbands) + qdisc_destroy(queues[--i]); + return -ENOMEM; + } + } + sch_tree_lock(sch); q->bands = qopt->bands; memcpy(q->prio2band, qopt->priomap, TC_PRIO_MAX+1); - for (i = q->bands; i < TCQ_PRIO_BANDS; i++) { + for (i = q->bands; i < oldbands; i++) { struct Qdisc *child = q->queues[i]; - q->queues[i] = &noop_qdisc; - if (child != &noop_qdisc) { - qdisc_tree_reduce_backlog(child, child->q.qlen, child->qstats.backlog); - qdisc_destroy(child); - } - } - sch_tree_unlock(sch); - for (i = 0; i < q->bands; i++) { - struct Qdisc *child; + qdisc_tree_reduce_backlog(child, child->q.qlen, + child->qstats.backlog); + qdisc_destroy(child); + } - if (q->queues[i] != &noop_qdisc) - continue; + for (i = oldbands; i < q->bands; i++) + q->queues[i] = queues[i]; - child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, - TC_H_MAKE(sch->handle, i + 1)); - if (!child) - return -ENOMEM; - sch_tree_lock(sch); - q->queues[i] = child; - sch_tree_unlock(sch); - } + sch_tree_unlock(sch); return 0; } static int prio_init(struct Qdisc *sch, struct nlattr *opt) { - struct prio_sched_data *q = qdisc_priv(sch); - int i; - - for (i = 0; i < TCQ_PRIO_BANDS; i++) - q->queues[i] = &noop_qdisc; - - if (opt == NULL) { + if (!opt) return -EINVAL; - } else { - int err; - if ((err = prio_tune(sch, opt)) != 0) - return err; - } - return 0; + return prio_tune(sch, opt); } static int prio_dump(struct Qdisc *sch, struct sk_buff *skb) -- cgit v1.2.3-70-g09d2 From d15eccea69b96a5116169688dcc9baf6d1ce2751 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 13 Jun 2016 13:44:14 -0700 Subject: act_ipt: fix a bind refcnt leak And avoid calling tcf_hash_check() twice. Fixes: a57f19d30b2d ("net sched: ipt action fix late binding") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_ipt.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index 9f002ada7074..d4bd19ee5822 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -121,10 +121,13 @@ static int __tcf_ipt_init(struct tc_action_net *tn, struct nlattr *nla, } td = (struct xt_entry_target *)nla_data(tb[TCA_IPT_TARG]); - if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) + if (nla_len(tb[TCA_IPT_TARG]) < td->u.target_size) { + if (exists) + tcf_hash_release(a, bind); return -EINVAL; + } - if (!tcf_hash_check(tn, index, a, bind)) { + if (!exists) { ret = tcf_hash_create(tn, index, est, a, sizeof(*ipt), bind, false); if (ret) -- cgit v1.2.3-70-g09d2 From ebecaa6662b0a9c3590bd644a4cec6f9d96818b7 Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Mon, 13 Jun 2016 18:08:42 -0400 Subject: net sched actions: bug fix dumping actions directly didnt produce NLMSG_DONE This refers to commands to direct action access as follows: sudo tc actions add action drop index 12 sudo tc actions add action pipe index 10 And then dumping them like so: sudo tc actions ls action gact iproute2 worked because it depended on absence of TCA_ACT_TAB TLV as end of message. This fix has been tested with iproute2 and is backward compatible. Signed-off-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_api.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 336774a535c3..c7a0b0d481c0 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -1118,7 +1118,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_end(skb, nest); ret = skb->len; } else - nla_nest_cancel(skb, nest); + nlmsg_trim(skb, b); nlh->nlmsg_len = skb_tail_pointer(skb) - b; if (NETLINK_CB(cb->skb).portid && ret) -- cgit v1.2.3-70-g09d2 From 0ee13627f963f9c5c9544ed19d82854836d5e676 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 14 Jun 2016 06:16:27 +0200 Subject: htb: call qdisc_root with rcu read lock held saw a debug splat: net/include/net/sch_generic.h:287 suspicious rcu_dereference_check() usage! other info that might help us debug this: rcu_scheduler_active = 1, debug_locks = 0 2 locks held by kworker/2:1/710: #0: ("events"){.+.+.+}, at: [] #1: ((&q->work)){+.+...}, at: [] process_one_work+0x14d/0x690 Workqueue: events htb_work_func Call Trace: [] dump_stack+0x85/0xc2 [] lockdep_rcu_suspicious+0xe7/0x120 [] htb_work_func+0x67/0x70 Signed-off-by: Florian Westphal Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_htb.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index d4b4218af6b1..62f9d8100c6e 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1007,7 +1007,9 @@ static void htb_work_func(struct work_struct *work) struct htb_sched *q = container_of(work, struct htb_sched, work); struct Qdisc *sch = q->watchdog.qdisc; + rcu_read_lock(); __netif_schedule(qdisc_root(sch)); + rcu_read_unlock(); } static int htb_init(struct Qdisc *sch, struct nlattr *opt) -- cgit v1.2.3-70-g09d2 From b196c22af5c3ff784c472c80f6fb4e5fad67b2ac Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Jun 2016 15:25:14 +0200 Subject: macsec: add rcu_barrier() on module exit Without this, the various uses of call_rcu could cause a kernel panic. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/macsec.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 47ee2c840b55..e80736f6acd7 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -3361,6 +3361,7 @@ static void __exit macsec_exit(void) genl_unregister_family(&macsec_fam); rtnl_link_unregister(&macsec_link_ops); unregister_netdevice_notifier(&macsec_notifier); + rcu_barrier(); } module_init(macsec_init); -- cgit v1.2.3-70-g09d2 From 5d9649b3a524df9ccd15ac25ad6591089260fdbd Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Jun 2016 15:25:15 +0200 Subject: macsec: allocate sg and iv on the heap For the crypto callbacks to work properly, we cannot have sg and iv on the stack. Use kmalloc instead, with a single allocation for aead_request + scatterlist + iv. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- drivers/net/macsec.c | 46 +++++++++++++++++++++++++++++++++++++--------- 1 file changed, 37 insertions(+), 9 deletions(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index e80736f6acd7..189ea3e8e8a0 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -605,12 +605,41 @@ static void macsec_encrypt_done(struct crypto_async_request *base, int err) dev_put(dev); } +static struct aead_request *macsec_alloc_req(struct crypto_aead *tfm, + unsigned char **iv, + struct scatterlist **sg) +{ + size_t size, iv_offset, sg_offset; + struct aead_request *req; + void *tmp; + + size = sizeof(struct aead_request) + crypto_aead_reqsize(tfm); + iv_offset = size; + size += GCM_AES_IV_LEN; + + size = ALIGN(size, __alignof__(struct scatterlist)); + sg_offset = size; + size += sizeof(struct scatterlist) * (MAX_SKB_FRAGS + 1); + + tmp = kmalloc(size, GFP_ATOMIC); + if (!tmp) + return NULL; + + *iv = (unsigned char *)(tmp + iv_offset); + *sg = (struct scatterlist *)(tmp + sg_offset); + req = tmp; + + aead_request_set_tfm(req, tfm); + + return req; +} + static struct sk_buff *macsec_encrypt(struct sk_buff *skb, struct net_device *dev) { int ret; - struct scatterlist sg[MAX_SKB_FRAGS + 1]; - unsigned char iv[GCM_AES_IV_LEN]; + struct scatterlist *sg; + unsigned char *iv; struct ethhdr *eth; struct macsec_eth_header *hh; size_t unprotected_len; @@ -668,8 +697,6 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, macsec_fill_sectag(hh, secy, pn); macsec_set_shortlen(hh, unprotected_len - 2 * ETH_ALEN); - macsec_fill_iv(iv, secy->sci, pn); - skb_put(skb, secy->icv_len); if (skb->len - ETH_HLEN > macsec_priv(dev)->real_dev->mtu) { @@ -684,13 +711,15 @@ static struct sk_buff *macsec_encrypt(struct sk_buff *skb, return ERR_PTR(-EINVAL); } - req = aead_request_alloc(tx_sa->key.tfm, GFP_ATOMIC); + req = macsec_alloc_req(tx_sa->key.tfm, &iv, &sg); if (!req) { macsec_txsa_put(tx_sa); kfree_skb(skb); return ERR_PTR(-ENOMEM); } + macsec_fill_iv(iv, secy->sci, pn); + sg_init_table(sg, MAX_SKB_FRAGS + 1); skb_to_sgvec(skb, sg, 0, skb->len); @@ -861,7 +890,6 @@ static void macsec_decrypt_done(struct crypto_async_request *base, int err) out: macsec_rxsa_put(rx_sa); dev_put(dev); - return; } static struct sk_buff *macsec_decrypt(struct sk_buff *skb, @@ -871,8 +899,8 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, struct macsec_secy *secy) { int ret; - struct scatterlist sg[MAX_SKB_FRAGS + 1]; - unsigned char iv[GCM_AES_IV_LEN]; + struct scatterlist *sg; + unsigned char *iv; struct aead_request *req; struct macsec_eth_header *hdr; u16 icv_len = secy->icv_len; @@ -882,7 +910,7 @@ static struct sk_buff *macsec_decrypt(struct sk_buff *skb, if (!skb) return ERR_PTR(-ENOMEM); - req = aead_request_alloc(rx_sa->key.tfm, GFP_ATOMIC); + req = macsec_alloc_req(rx_sa->key.tfm, &iv, &sg); if (!req) { kfree_skb(skb); return ERR_PTR(-ENOMEM); -- cgit v1.2.3-70-g09d2 From 6052f7fbce857e327218a9d8a040e210ea7cc718 Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Tue, 14 Jun 2016 15:25:16 +0200 Subject: macsec: fix SA initialization The ASYNC flag prevents initialization on some physical machines. Fixes: c09440f7dcb3 ("macsec: introduce IEEE 802.1AE driver") Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- drivers/net/macsec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c index 189ea3e8e8a0..0e7eff7f1cd2 100644 --- a/drivers/net/macsec.c +++ b/drivers/net/macsec.c @@ -1262,7 +1262,7 @@ static struct crypto_aead *macsec_alloc_tfm(char *key, int key_len, int icv_len) struct crypto_aead *tfm; int ret; - tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC); + tfm = crypto_alloc_aead("gcm(aes)", 0, 0); if (!tfm || IS_ERR(tfm)) return NULL; -- cgit v1.2.3-70-g09d2 From 66d95b6705a6347f7b2645e042874ec0bb03b726 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 15 Jun 2016 14:10:57 +0800 Subject: tipc: fix suspicious RCU usage When run tipcTS&tipcTC test suite, the following complaint appears: [ 56.926168] =============================== [ 56.926169] [ INFO: suspicious RCU usage. ] [ 56.926171] 4.7.0-rc1+ #160 Not tainted [ 56.926173] ------------------------------- [ 56.926174] net/tipc/bearer.c:408 suspicious rcu_dereference_protected() usage! [ 56.926175] [ 56.926175] other info that might help us debug this: [ 56.926175] [ 56.926177] [ 56.926177] rcu_scheduler_active = 1, debug_locks = 1 [ 56.926179] 3 locks held by swapper/4/0: [ 56.926180] #0: (((&req->timer))){+.-...}, at: [] call_timer_fn+0x5/0x340 [ 56.926203] #1: (&(&req->lock)->rlock){+.-...}, at: [] disc_timeout+0x1b/0xd0 [tipc] [ 56.926212] #2: (rcu_read_lock){......}, at: [] tipc_bearer_xmit_skb+0xb0/0x2e0 [tipc] [ 56.926218] [ 56.926218] stack backtrace: [ 56.926221] CPU: 4 PID: 0 Comm: swapper/4 Not tainted 4.7.0-rc1+ #160 [ 56.926222] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007 [ 56.926224] 0000000000000000 ffff880016803d28 ffffffff813c4423 ffff8800154252c0 [ 56.926227] 0000000000000001 ffff880016803d58 ffffffff810b7512 ffff8800124d8120 [ 56.926230] ffff880013f8a160 ffff8800132b5ccc ffff8800124d8120 ffff880016803d88 [ 56.926234] Call Trace: [ 56.926235] [] dump_stack+0x67/0x94 [ 56.926250] [] lockdep_rcu_suspicious+0xe2/0x120 [ 56.926256] [] tipc_l2_send_msg+0x131/0x1c0 [tipc] [ 56.926261] [] tipc_bearer_xmit_skb+0x14c/0x2e0 [tipc] [ 56.926266] [] ? tipc_bearer_xmit_skb+0xb0/0x2e0 [tipc] [ 56.926273] [] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc] [ 56.926278] [] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc] [ 56.926283] [] disc_timeout+0x56/0xd0 [tipc] [ 56.926288] [] call_timer_fn+0xb8/0x340 [ 56.926291] [] ? call_timer_fn+0x5/0x340 [ 56.926296] [] ? tipc_disc_init_msg+0x1f0/0x1f0 [tipc] [ 56.926300] [] run_timer_softirq+0x23a/0x390 [ 56.926306] [] ? clockevents_program_event+0x7f/0x130 [ 56.926316] [] __do_softirq+0xc3/0x4a2 [ 56.926323] [] irq_exit+0x8a/0xb0 [ 56.926327] [] smp_apic_timer_interrupt+0x46/0x60 [ 56.926331] [] apic_timer_interrupt+0x89/0x90 [ 56.926333] [] ? default_idle+0x2a/0x1a0 [ 56.926340] [] ? default_idle+0x28/0x1a0 [ 56.926342] [] arch_cpu_idle+0xf/0x20 [ 56.926345] [] default_idle_call+0x2f/0x50 [ 56.926347] [] cpu_startup_entry+0x215/0x3e0 [ 56.926353] [] start_secondary+0xf9/0x100 The warning appears as rtnl_dereference() is wrongly used in tipc_l2_send_msg() under RCU read lock protection. Instead the proper usage should be that rcu_dereference_rtnl() is called here. Fixes: 5b7066c3dd24 ("tipc: stricter filtering of packets in bearer layer") Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/bearer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 6f11c62bc8f9..bf8f05c3eb82 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -405,7 +405,7 @@ int tipc_l2_send_msg(struct net *net, struct sk_buff *skb, return 0; /* Send RESET message even if bearer is detached from device */ - tipc_ptr = rtnl_dereference(dev->tipc_ptr); + tipc_ptr = rcu_dereference_rtnl(dev->tipc_ptr); if (unlikely(!tipc_ptr && !msg_is_reset(buf_msg(skb)))) goto drop; -- cgit v1.2.3-70-g09d2 From c91522f860bb9dd4178c8280bbebd4f4321b7199 Mon Sep 17 00:00:00 2001 From: Ying Xue Date: Wed, 15 Jun 2016 14:11:31 +0800 Subject: tipc: eliminate uninitialized variable warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit net/tipc/link.c: In function ‘tipc_link_timeout’: net/tipc/link.c:744:28: warning: ‘mtyp’ may be used uninitialized in this function [-Wuninitialized] Fixes: 42b18f605fea ("tipc: refactor function tipc_link_timeout()") Acked-by: Jon Maloy Signed-off-by: Ying Xue Signed-off-by: David S. Miller --- net/tipc/link.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/tipc/link.c b/net/tipc/link.c index 7059c94f33c5..67b6ab9f4c8d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -704,7 +704,8 @@ static void link_profile_stats(struct tipc_link *l) */ int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq) { - int mtyp, rc = 0; + int mtyp = 0; + int rc = 0; bool state = false; bool probe = false; bool setup = false; -- cgit v1.2.3-70-g09d2 From 4a1836701fd59476ee1331379e00a9ab51ba4f61 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 15 Jun 2016 17:45:51 +0200 Subject: net: skfb: remove obsolete -I cflag The skfp driver has been moved to drivers/net/fddi/skfp a long time ago, but we still attempt to include headers from the old location, which causes a warning when building with W=1: cc1: error: /git/arm-soc/drivers/net/skfp: No such file or directory [-Werror=missing-include-dirs] cc1: error: drivers/net/skfp: No such file or directory [-Werror=missing-include-dirs] Clearly this include directive is not needed any more, so we can just remove it now. Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- drivers/net/fddi/skfp/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/fddi/skfp/Makefile b/drivers/net/fddi/skfp/Makefile index b0be0234abf6..a957a1c7e5ba 100644 --- a/drivers/net/fddi/skfp/Makefile +++ b/drivers/net/fddi/skfp/Makefile @@ -17,4 +17,4 @@ skfp-objs := skfddi.o hwmtm.o fplustm.o smt.o cfm.o \ # projects. To keep the source common for all those drivers (and # thus simplify fixes to it), please do not clean it up! -ccflags-y := -Idrivers/net/skfp -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes +ccflags-y := -DPCI -DMEM_MAPPED_IO -Wno-strict-prototypes -- cgit v1.2.3-70-g09d2 From daddef76c3deaaa7922f9d7b18edbf0a061215c3 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 15 Jun 2016 11:14:53 +0200 Subject: net: Don't forget pr_fmt on net_dbg_ratelimited for CONFIG_DYNAMIC_DEBUG The implementation of net_dbg_ratelimited in the CONFIG_DYNAMIC_DEBUG case was added with 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case"). The implementation strategy was to take the usual definition of the dynamic_pr_debug macro, but alter it by adding a call to "net_ratelimit()" in the if statement. This is, in fact, the correct approach. However, while doing this, the author of the commit forgot to surround fmt by pr_fmt, resulting in unprefixed log messages appearing in the console. So, this commit adds back the pr_fmt(fmt) invocation, making net_dbg_ratelimited properly consistent across DEBUG, no DEBUG, and DYNAMIC_DEBUG cases, and bringing parity with the behavior of dynamic_pr_debug as well. Fixes: 2c94b5373 ("net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case") Signed-off-by: Jason A. Donenfeld Cc: Tim Bingham Signed-off-by: David S. Miller --- include/linux/net.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/include/linux/net.h b/include/linux/net.h index 9aa49a05fe38..25aa03b51c4e 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -251,7 +251,8 @@ do { \ DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ net_ratelimit()) \ - __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ + __dynamic_pr_debug(&descriptor, pr_fmt(fmt), \ + ##__VA_ARGS__); \ } while (0) #elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ -- cgit v1.2.3-70-g09d2 From e582615ad33dbd39623084a02e95567b116e1eea Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 15 Jun 2016 06:24:00 -0700 Subject: gre: fix error handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1) gre_parse_header() can be called from gre_err() At this point transport header points to ICMP header, not the inner header. 2) We can not really change transport header as ipgre_err() will later assume transport header still points to ICMP header (using icmp_hdr()) 3) pskb_may_pull() logic in gre_parse_header() really works if we are interested at zone pointed by skb->data 4) As Jiri explained in commit b7f8fe251e46 ("gre: do not pull header in ICMP error processing") we should not pull headers in error handler. So this fix : A) changes gre_parse_header() to use skb->data instead of skb_transport_header() B) Adds a nhs parameter to gre_parse_header() so that we can skip the not pulled IP header from error path. This offset is 0 for normal receive path. C) remove obsolete IPV6 includes Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Maciej Żenczykowski Cc: Jiri Benc Signed-off-by: David S. Miller --- include/net/gre.h | 2 +- net/ipv4/gre_demux.c | 10 +++++----- net/ipv4/ip_gre.c | 12 ++++-------- net/ipv6/ip6_gre.c | 2 +- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/include/net/gre.h b/include/net/gre.h index 5dce30a6abe3..7a54a31d1d4c 100644 --- a/include/net/gre.h +++ b/include/net/gre.h @@ -26,7 +26,7 @@ int gre_del_protocol(const struct gre_protocol *proto, u8 version); struct net_device *gretap_fb_dev_create(struct net *net, const char *name, u8 name_assign_type); int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto); + bool *csum_err, __be16 proto, int nhs); static inline int gre_calc_hlen(__be16 o_flags) { diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c index 4c39f4fd332a..de1d119a4497 100644 --- a/net/ipv4/gre_demux.c +++ b/net/ipv4/gre_demux.c @@ -62,26 +62,26 @@ EXPORT_SYMBOL_GPL(gre_del_protocol); /* Fills in tpi and returns header length to be pulled. */ int gre_parse_header(struct sk_buff *skb, struct tnl_ptk_info *tpi, - bool *csum_err, __be16 proto) + bool *csum_err, __be16 proto, int nhs) { const struct gre_base_hdr *greh; __be32 *options; int hdr_len; - if (unlikely(!pskb_may_pull(skb, sizeof(struct gre_base_hdr)))) + if (unlikely(!pskb_may_pull(skb, nhs + sizeof(struct gre_base_hdr)))) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); if (unlikely(greh->flags & (GRE_VERSION | GRE_ROUTING))) return -EINVAL; tpi->flags = gre_flags_to_tnl_flags(greh->flags); hdr_len = gre_calc_hlen(tpi->flags); - if (!pskb_may_pull(skb, hdr_len)) + if (!pskb_may_pull(skb, nhs + hdr_len)) return -EINVAL; - greh = (struct gre_base_hdr *)skb_transport_header(skb); + greh = (struct gre_base_hdr *)(skb->data + nhs); tpi->proto = greh->protocol; options = (__be32 *)(greh + 1); diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 07c5cf1838d8..1d000af7f561 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -49,12 +49,6 @@ #include #include -#if IS_ENABLED(CONFIG_IPV6) -#include -#include -#include -#endif - /* Problems & solutions -------------------- @@ -217,12 +211,14 @@ static void gre_err(struct sk_buff *skb, u32 info) * by themselves??? */ + const struct iphdr *iph = (struct iphdr *)skb->data; const int type = icmp_hdr(skb)->type; const int code = icmp_hdr(skb)->code; struct tnl_ptk_info tpi; bool csum_err = false; - if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)) < 0) { + if (gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), + iph->ihl * 4) < 0) { if (!csum_err) /* ignore csum errors. */ return; } @@ -338,7 +334,7 @@ static int gre_rcv(struct sk_buff *skb) } #endif - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IP), 0); if (hdr_len < 0) goto drop; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index fdc9de276ab1..776d145113e1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -468,7 +468,7 @@ static int gre_rcv(struct sk_buff *skb) bool csum_err = false; int hdr_len; - hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6)); + hdr_len = gre_parse_header(skb, &tpi, &csum_err, htons(ETH_P_IPV6), 0); if (hdr_len < 0) goto drop; -- cgit v1.2.3-70-g09d2 From 19de99f70b87fcc3338da52a89c439b088cbff71 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Jun 2016 18:25:38 -0700 Subject: bpf: fix matching of data/data_end in verifier The ctx structure passed into bpf programs is different depending on bpf program type. The verifier incorrectly marked ctx->data and ctx->data_end access based on ctx offset only. That caused loads in tracing programs int bpf_prog(struct pt_regs *ctx) { .. ctx->ax .. } to be incorrectly marked as PTR_TO_PACKET which later caused verifier to reject the program that was actually valid in tracing context. Fix this by doing program type specific matching of ctx offsets. Fixes: 969bf05eb3ce ("bpf: direct packet access") Reported-by: Sasha Goldshtein Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 28 +++++++++++++++++++++++++++- kernel/bpf/verifier.c | 41 +++++++---------------------------------- kernel/trace/bpf_trace.c | 6 ++++-- net/core/filter.c | 16 ++++++++++++++-- 4 files changed, 52 insertions(+), 39 deletions(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8ee27b8afe81..8269cafc6eb1 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -111,6 +111,31 @@ enum bpf_access_type { BPF_WRITE = 2 }; +/* types of values stored in eBPF registers */ +enum bpf_reg_type { + NOT_INIT = 0, /* nothing was written into register */ + UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ + PTR_TO_CTX, /* reg points to bpf_context */ + CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ + PTR_TO_MAP_VALUE, /* reg points to map element value */ + PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ + FRAME_PTR, /* reg == frame_pointer */ + PTR_TO_STACK, /* reg == frame_pointer + imm */ + CONST_IMM, /* constant integer value */ + + /* PTR_TO_PACKET represents: + * skb->data + * skb->data + imm + * skb->data + (u16) var + * skb->data + (u16) var + imm + * if (range > 0) then [ptr, ptr + range - off) is safe to access + * if (id > 0) means that some 'var' was added + * if (off > 0) menas that 'imm' was added + */ + PTR_TO_PACKET, + PTR_TO_PACKET_END, /* skb->data + headlen */ +}; + struct bpf_prog; struct bpf_verifier_ops { @@ -120,7 +145,8 @@ struct bpf_verifier_ops { /* return true if 'size' wide access at offset 'off' within bpf_context * with 'type' (read or write) is allowed */ - bool (*is_valid_access)(int off, int size, enum bpf_access_type type); + bool (*is_valid_access)(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type); u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 668e07903c8f..eec9f90ba030 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -126,31 +126,6 @@ * are set to NOT_INIT to indicate that they are no longer readable. */ -/* types of values stored in eBPF registers */ -enum bpf_reg_type { - NOT_INIT = 0, /* nothing was written into register */ - UNKNOWN_VALUE, /* reg doesn't contain a valid pointer */ - PTR_TO_CTX, /* reg points to bpf_context */ - CONST_PTR_TO_MAP, /* reg points to struct bpf_map */ - PTR_TO_MAP_VALUE, /* reg points to map element value */ - PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ - FRAME_PTR, /* reg == frame_pointer */ - PTR_TO_STACK, /* reg == frame_pointer + imm */ - CONST_IMM, /* constant integer value */ - - /* PTR_TO_PACKET represents: - * skb->data - * skb->data + imm - * skb->data + (u16) var - * skb->data + (u16) var + imm - * if (range > 0) then [ptr, ptr + range - off) is safe to access - * if (id > 0) means that some 'var' was added - * if (off > 0) menas that 'imm' was added - */ - PTR_TO_PACKET, - PTR_TO_PACKET_END, /* skb->data + headlen */ -}; - struct reg_state { enum bpf_reg_type type; union { @@ -695,10 +670,10 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, /* check access to 'struct bpf_context' fields */ static int check_ctx_access(struct verifier_env *env, int off, int size, - enum bpf_access_type t) + enum bpf_access_type t, enum bpf_reg_type *reg_type) { if (env->prog->aux->ops->is_valid_access && - env->prog->aux->ops->is_valid_access(off, size, t)) { + env->prog->aux->ops->is_valid_access(off, size, t, reg_type)) { /* remember the offset of last byte accessed in ctx */ if (env->prog->aux->max_ctx_offset < off + size) env->prog->aux->max_ctx_offset = off + size; @@ -798,21 +773,19 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, mark_reg_unknown_value(state->regs, value_regno); } else if (reg->type == PTR_TO_CTX) { + enum bpf_reg_type reg_type = UNKNOWN_VALUE; + if (t == BPF_WRITE && value_regno >= 0 && is_pointer_value(env, value_regno)) { verbose("R%d leaks addr into ctx\n", value_regno); return -EACCES; } - err = check_ctx_access(env, off, size, t); + err = check_ctx_access(env, off, size, t, ®_type); if (!err && t == BPF_READ && value_regno >= 0) { mark_reg_unknown_value(state->regs, value_regno); - if (off == offsetof(struct __sk_buff, data) && - env->allow_ptr_leaks) + if (env->allow_ptr_leaks) /* note that reg.[id|off|range] == 0 */ - state->regs[value_regno].type = PTR_TO_PACKET; - else if (off == offsetof(struct __sk_buff, data_end) && - env->allow_ptr_leaks) - state->regs[value_regno].type = PTR_TO_PACKET_END; + state->regs[value_regno].type = reg_type; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 720b7bb01d43..e7af6cb9d5cf 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -349,7 +349,8 @@ static const struct bpf_func_proto *kprobe_prog_func_proto(enum bpf_func_id func } /* bpf+kprobe programs can access fields of 'struct pt_regs' */ -static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type) +static bool kprobe_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) { /* check bounds */ if (off < 0 || off >= sizeof(struct pt_regs)) @@ -427,7 +428,8 @@ static const struct bpf_func_proto *tp_prog_func_proto(enum bpf_func_id func_id) } } -static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type) +static bool tp_prog_is_valid_access(int off, int size, enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (off < sizeof(void *) || off >= PERF_MAX_TRACE_SIZE) return false; diff --git a/net/core/filter.c b/net/core/filter.c index 68adb5f52110..c4b330c85c02 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2085,7 +2085,8 @@ static bool __is_valid_access(int off, int size, enum bpf_access_type type) } static bool sk_filter_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { switch (off) { case offsetof(struct __sk_buff, tc_classid): @@ -2108,7 +2109,8 @@ static bool sk_filter_is_valid_access(int off, int size, } static bool tc_cls_act_is_valid_access(int off, int size, - enum bpf_access_type type) + enum bpf_access_type type, + enum bpf_reg_type *reg_type) { if (type == BPF_WRITE) { switch (off) { @@ -2123,6 +2125,16 @@ static bool tc_cls_act_is_valid_access(int off, int size, return false; } } + + switch (off) { + case offsetof(struct __sk_buff, data): + *reg_type = PTR_TO_PACKET; + break; + case offsetof(struct __sk_buff, data_end): + *reg_type = PTR_TO_PACKET_END; + break; + } + return __is_valid_access(off, size, type); } -- cgit v1.2.3-70-g09d2 From ad572d174787daa59e24b8b5c83028c09cdb5ddb Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Jun 2016 18:25:39 -0700 Subject: bpf, trace: check event type in bpf_perf_event_read similar to bpf_perf_event_output() the bpf_perf_event_read() helper needs to check the type of the perf_event before reading the counter. Fixes: a43eec304259 ("bpf: introduce bpf_perf_event_output() helper") Reported-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/trace/bpf_trace.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index e7af6cb9d5cf..26f603da7e26 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -209,6 +209,10 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5) event->pmu->count) return -EINVAL; + if (unlikely(event->attr.type != PERF_TYPE_HARDWARE && + event->attr.type != PERF_TYPE_RAW)) + return -EINVAL; + /* * we don't know if the function is run successfully by the * return value. It can be judged in other places, such as -- cgit v1.2.3-70-g09d2 From 4e384ac19b5be6ad084b215d298b82b3a91ad24b Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 16 Jun 2016 10:55:17 +0800 Subject: r8152: disable MAC clock speed down Disable MAC clock speed down. It may casue the first control transfer to contain the wrong data, when the power state change from U1 to U0. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 3f9f6ed3eec4..89dc752f92bd 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -3382,15 +3382,11 @@ static void r8153_init(struct r8152 *tp) r8153_power_cut_en(tp, false); r8153_u1u2en(tp, true); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, ALDPS_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, EEE_SPDWN_RATIO); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, - PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN | - U1U2_SPDWN_EN | L1_SPDWN_EN); - ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, - PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN | - TP100_SPDWN_EN | TP500_SPDWN_EN | TP1000_SPDWN_EN | - EEE_SPDWN_EN); + /* MAC clock speed down */ + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0); + ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0); r8153_enable_eee(tp); r8153_aldps_en(tp, true); -- cgit v1.2.3-70-g09d2 From 93fe9b1838404fcc3bea320b704bfa461d8e57a6 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 16 Jun 2016 10:55:18 +0800 Subject: r8152: reset the bmu Reset the BMU to clear the rx/tx fifo. This avoids that the unexpected data remains in the hw. Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index 89dc752f92bd..f5bc351e09ef 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -116,6 +116,7 @@ #define USB_TX_DMA 0xd434 #define USB_TOLERANCE 0xd490 #define USB_LPM_CTRL 0xd41a +#define USB_BMU_RESET 0xd4b0 #define USB_UPS_CTRL 0xd800 #define USB_MISC_0 0xd81a #define USB_POWER_CUT 0xd80a @@ -338,6 +339,10 @@ #define TEST_MODE_DISABLE 0x00000001 #define TX_SIZE_ADJUST1 0x00000100 +/* USB_BMU_RESET */ +#define BMU_RESET_EP_IN 0x01 +#define BMU_RESET_EP_OUT 0x02 + /* USB_UPS_CTRL */ #define POWER_CUT 0x0100 @@ -2456,6 +2461,17 @@ static void r8153_teredo_off(struct r8152 *tp) ocp_write_dword(tp, MCU_TYPE_PLA, PLA_TEREDO_TIMER, 0); } +static void rtl_reset_bmu(struct r8152 *tp) +{ + u32 ocp_data; + + ocp_data = ocp_read_byte(tp, MCU_TYPE_USB, USB_BMU_RESET); + ocp_data &= ~(BMU_RESET_EP_IN | BMU_RESET_EP_OUT); + ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); + ocp_data |= BMU_RESET_EP_IN | BMU_RESET_EP_OUT; + ocp_write_byte(tp, MCU_TYPE_USB, USB_BMU_RESET, ocp_data); +} + static void r8152_aldps_en(struct r8152 *tp, bool enable) { if (enable) { @@ -2681,6 +2697,7 @@ static void r8153_first_init(struct r8152 *tp) r8153_hw_phy_cfg(tp); rtl8152_nic_reset(tp); + rtl_reset_bmu(tp); ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); ocp_data &= ~NOW_IS_OOB; @@ -2742,6 +2759,7 @@ static void r8153_enter_oob(struct r8152 *tp) ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data); rtl_disable(tp); + rtl_reset_bmu(tp); for (i = 0; i < 1000; i++) { ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL); @@ -2803,6 +2821,7 @@ static void rtl8153_disable(struct r8152 *tp) { r8153_aldps_en(tp, false); rtl_disable(tp); + rtl_reset_bmu(tp); r8153_aldps_en(tp, true); usb_enable_lpm(tp->udev); } -- cgit v1.2.3-70-g09d2 From a59e6d815226b70780427648e359da9bbee07171 Mon Sep 17 00:00:00 2001 From: hayeswang Date: Thu, 16 Jun 2016 10:55:19 +0800 Subject: r8152: correct the rx early size The rx early size should be (agg_buf_sz - packet size) / 8 Signed-off-by: Hayes Wang Signed-off-by: David S. Miller --- drivers/net/usb/r8152.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c index f5bc351e09ef..4e257b8d8f3e 100644 --- a/drivers/net/usb/r8152.c +++ b/drivers/net/usb/r8152.c @@ -31,7 +31,7 @@ #define NETNEXT_VERSION "08" /* Information for net */ -#define NET_VERSION "3" +#define NET_VERSION "4" #define DRIVER_VERSION "v1." NETNEXT_VERSION "." NET_VERSION #define DRIVER_AUTHOR "Realtek linux nic maintainers " @@ -2174,7 +2174,7 @@ static void r8153_set_rx_early_timeout(struct r8152 *tp) static void r8153_set_rx_early_size(struct r8152 *tp) { u32 mtu = tp->netdev->mtu; - u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 4; + u32 ocp_data = (agg_buf_sz - mtu - VLAN_ETH_HLEN - VLAN_HLEN) / 8; ocp_write_word(tp, MCU_TYPE_USB, USB_RX_EARLY_SIZE, ocp_data); } -- cgit v1.2.3-70-g09d2 From c70410cb91de70707a507ee7beef7021a5a89f0d Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Thu, 9 Jun 2016 14:38:50 -0400 Subject: rtl8xxxu: fix typo on variable name, compare against correct variable path_b_ok is being assigned but immediately after path_a_ok is being compared to the value 0x03. This appears to be a typo on the variable name, compare path_b_ok instead. Signed-off-by: Colin Ian King Signed-off-by: Jes Sorensen Signed-off-by: Kalle Valo --- drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c index fe19ace0d6a0..b04cf30f3959 100644 --- a/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c +++ b/drivers/net/wireless/realtek/rtl8xxxu/rtl8xxxu_8192e.c @@ -1149,7 +1149,7 @@ static void rtl8192eu_phy_iqcalibrate(struct rtl8xxxu_priv *priv, for (i = 0; i < retry; i++) { path_b_ok = rtl8192eu_rx_iqk_path_b(priv); - if (path_a_ok == 0x03) { + if (path_b_ok == 0x03) { val32 = rtl8xxxu_read32(priv, REG_RX_POWER_BEFORE_IQK_B_2); result[t][6] = (val32 >> 16) & 0x3ff; -- cgit v1.2.3-70-g09d2 From 17471c7ba5115ed724d624577b21c166d2194272 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 15 Jun 2016 22:31:10 +0200 Subject: net: sfc: avoid -Wtype-limits warning When building with -Wextra, we get a harmless warning from the EFX_EXTRACT_OWORD32 macro: ethernet/sfc/farch.c: In function 'efx_farch_test_registers': ethernet/sfc/farch.c:119:30: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] ethernet/sfc/farch.c:124:144: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] ethernet/sfc/farch.c:124:392: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] ethernet/sfc/farch.c:124:731: error: comparison of unsigned expression < 0 is always false [-Werror=type-limits] The macro and the caller are both correct, but we can avoid the warning by changing the index variable to a signed type. Signed-off-by: Arnd Bergmann Acked-by: Bert Kenward Signed-off-by: David S. Miller --- drivers/net/ethernet/sfc/farch.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/sfc/farch.c b/drivers/net/ethernet/sfc/farch.c index 133e9e35be9e..4c83739d158f 100644 --- a/drivers/net/ethernet/sfc/farch.c +++ b/drivers/net/ethernet/sfc/farch.c @@ -104,7 +104,8 @@ int efx_farch_test_registers(struct efx_nic *efx, const struct efx_farch_register_test *regs, size_t n_regs) { - unsigned address = 0, i, j; + unsigned address = 0; + int i, j; efx_oword_t mask, imask, original, reg, buf; for (i = 0; i < n_regs; ++i) { -- cgit v1.2.3-70-g09d2 From a547224dceed4645139f7eff72ff51adb9938bcb Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Wed, 15 Jun 2016 14:42:11 -0700 Subject: mlx4e: Do not attempt to offload VXLAN ports that are unrecognized The mlx4e driver does not support more than one port for VXLAN offload. As such expecting the hardware to offload other ports is invalid since it appears the parsing logic is used to perform Tx checksum and segmentation offloads. Use the vxlan_port number to determine in which cases we can apply the offload and in which cases we can not. Signed-off-by: Alexander Duyck Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 19ceced6736c..a2c25365a8a3 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2447,9 +2447,14 @@ static netdev_features_t mlx4_en_features_check(struct sk_buff *skb, * strip that feature if this is an IPv6 encapsulated frame. */ if (skb->encapsulation && - (skb->ip_summed == CHECKSUM_PARTIAL) && - (ip_hdr(skb)->version != 4)) - features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + (skb->ip_summed == CHECKSUM_PARTIAL)) { + struct mlx4_en_priv *priv = netdev_priv(dev); + + if (!priv->vxlan_port || + (ip_hdr(skb)->version != 4) || + (udp_hdr(skb)->dest != priv->vxlan_port)) + features &= ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); + } return features; } -- cgit v1.2.3-70-g09d2 From d5d8760b78d0cfafe292f965f599988138b06a70 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Thu, 16 Jun 2016 17:06:19 +0900 Subject: sit: correct IP protocol used in ipip6_err Since 32b8a8e59c9c ("sit: add IPv4 over IPv4 support") ipip6_err() may be called for packets whose IP protocol is IPPROTO_IPIP as well as those whose IP protocol is IPPROTO_IPV6. In the case of IPPROTO_IPIP packets the correct protocol value is not passed to ipv4_update_pmtu() or ipv4_redirect(). This patch resolves this problem by using the IP protocol of the packet rather than a hard-coded value. This appears to be consistent with the usage of the protocol of a packet by icmp_socket_deliver() the caller of ipip6_err(). I was able to exercise the redirect case by using a setup where an ICMP redirect was received for the destination of the encapsulated packet. However, it appears that although incorrect the protocol field is not used in this case and thus no problem manifests. On inspection it does not appear that a problem will manifest in the fragmentation needed/update pmtu case either. In short I believe this is a cosmetic fix. None the less, the use of IPPROTO_IPV6 seems wrong and confusing. Reviewed-by: Dinan Gunawardena Signed-off-by: Simon Horman Acked-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/sit.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 0a5a255277e5..0619ac70836d 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -560,13 +560,13 @@ static int ipip6_err(struct sk_buff *skb, u32 info) if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, - t->parms.link, 0, IPPROTO_IPV6, 0); + t->parms.link, 0, iph->protocol, 0); err = 0; goto out; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, - IPPROTO_IPV6, 0); + iph->protocol, 0); err = 0; goto out; } -- cgit v1.2.3-70-g09d2 From ce449ba77a2271dce9771fb1f3eb37e4be6a8b81 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 16 Jun 2016 14:42:50 +0100 Subject: nfp: use correct index to mask link state irq We were using an incorrect define to get the irq vector number. NFP_NET_CFG_LSC is a control BAR offset, LSC interrupt vector index is called NFP_NET_IRQ_LSC_IDX. For machines with less than 30 CPUs this meant that we were disabling/enabling IRQ 0. For bigger hosts we were just playing with the 31st RX/TX interrupt. Fixes: 0ba40af963f0 ("nfp: move link state interrupt request/free calls") Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index fa47c14c743a..ba26bb356b8d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2015,7 +2015,7 @@ static void nfp_net_open_stack(struct nfp_net *nn) netif_tx_wake_all_queues(nn->netdev); - enable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector); + enable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); nfp_net_read_link_status(nn); } @@ -2044,7 +2044,7 @@ static int nfp_net_netdev_open(struct net_device *netdev) NFP_NET_IRQ_LSC_IDX, nn->lsc_handler); if (err) goto err_free_exn; - disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector); + disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); nn->rx_rings = kcalloc(nn->num_rx_rings, sizeof(*nn->rx_rings), GFP_KERNEL); @@ -2133,7 +2133,7 @@ static void nfp_net_close_stack(struct nfp_net *nn) { unsigned int r; - disable_irq(nn->irq_entries[NFP_NET_CFG_LSC].vector); + disable_irq(nn->irq_entries[NFP_NET_IRQ_LSC_IDX].vector); netif_carrier_off(nn->netdev); nn->link_up = false; -- cgit v1.2.3-70-g09d2 From 8f45927c3cae4db85887700e5415286f766cbaf9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Jun 2016 12:54:18 +0200 Subject: netfilter: xt_SYNPROXY: add missing header to Kbuild Matt Whitlock says: Without this line, the file xt_SYNPROXY.h does not get installed in /usr/include/linux/netfilter/, and thus user-space programs cannot make use of it. Reported-by: Matt Whitlock Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/Kbuild | 1 + 1 file changed, 1 insertion(+) diff --git a/include/uapi/linux/netfilter/Kbuild b/include/uapi/linux/netfilter/Kbuild index 1d973d2ba417..cd26d7a0fd07 100644 --- a/include/uapi/linux/netfilter/Kbuild +++ b/include/uapi/linux/netfilter/Kbuild @@ -33,6 +33,7 @@ header-y += xt_NFLOG.h header-y += xt_NFQUEUE.h header-y += xt_RATEEST.h header-y += xt_SECMARK.h +header-y += xt_SYNPROXY.h header-y += xt_TCPMSS.h header-y += xt_TCPOPTSTRIP.h header-y += xt_TEE.h -- cgit v1.2.3-70-g09d2 From 1463847e93fe693e89c52b03ab4ede6800d717c1 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 17 Jun 2016 12:54:18 +0200 Subject: netfilter: xt_SYNPROXY: include missing ./usr/include/linux/netfilter/xt_SYNPROXY.h:11: found __[us]{8,16,32,64} type without #include Reported-by: kbuild test robot Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_SYNPROXY.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/include/uapi/linux/netfilter/xt_SYNPROXY.h b/include/uapi/linux/netfilter/xt_SYNPROXY.h index 2d59fbaa93c6..ca67e61d2a61 100644 --- a/include/uapi/linux/netfilter/xt_SYNPROXY.h +++ b/include/uapi/linux/netfilter/xt_SYNPROXY.h @@ -1,6 +1,8 @@ #ifndef _XT_SYNPROXY_H #define _XT_SYNPROXY_H +#include + #define XT_SYNPROXY_OPT_MSS 0x01 #define XT_SYNPROXY_OPT_WSCALE 0x02 #define XT_SYNPROXY_OPT_SACK_PERM 0x04 -- cgit v1.2.3-70-g09d2 From f1d048f24e66ba85d3dabf3d076cefa5f2b546b0 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Fri, 17 Jun 2016 06:35:57 -0400 Subject: tipc: fix socket timer deadlock We sometimes observe a 'deadly embrace' type deadlock occurring between mutually connected sockets on the same node. This happens when the one-hour peer supervision timers happen to expire simultaneously in both sockets. The scenario is as follows: CPU 1: CPU 2: -------- -------- tipc_sk_timeout(sk1) tipc_sk_timeout(sk2) lock(sk1.slock) lock(sk2.slock) msg_create(probe) msg_create(probe) unlock(sk1.slock) unlock(sk2.slock) tipc_node_xmit_skb() tipc_node_xmit_skb() tipc_node_xmit() tipc_node_xmit() tipc_sk_rcv(sk2) tipc_sk_rcv(sk1) lock(sk2.slock) lock((sk1.slock) filter_rcv() filter_rcv() tipc_sk_proto_rcv() tipc_sk_proto_rcv() msg_create(probe_rsp) msg_create(probe_rsp) tipc_sk_respond() tipc_sk_respond() tipc_node_xmit_skb() tipc_node_xmit_skb() tipc_node_xmit() tipc_node_xmit() tipc_sk_rcv(sk1) tipc_sk_rcv(sk2) lock((sk1.slock) lock((sk2.slock) ===> DEADLOCK ===> DEADLOCK Further analysis reveals that there are three different locations in the socket code where tipc_sk_respond() is called within the context of the socket lock, with ensuing risk of similar deadlocks. We now solve this by passing a buffer queue along with all upcalls where sk_lock.slock may potentially be held. Response or rejected message buffers are accumulated into this queue instead of being sent out directly, and only sent once we know we are safely outside the slock context. Reported-by: GUNA Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/socket.c | 54 ++++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 12 deletions(-) diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 88bfcd707064..c49b8df438cb 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -796,9 +796,11 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq, * @tsk: receiving socket * @skb: pointer to message buffer. */ -static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) +static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct sock *sk = &tsk->sk; + u32 onode = tsk_own_node(tsk); struct tipc_msg *hdr = buf_msg(skb); int mtyp = msg_type(hdr); bool conn_cong; @@ -811,7 +813,8 @@ static void tipc_sk_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb) if (mtyp == CONN_PROBE) { msg_set_type(hdr, CONN_PROBE_REPLY); - tipc_sk_respond(sk, skb, TIPC_OK); + if (tipc_msg_reverse(onode, &skb, TIPC_OK)) + __skb_queue_tail(xmitq, skb); return; } else if (mtyp == CONN_ACK) { conn_cong = tsk_conn_cong(tsk); @@ -1686,7 +1689,8 @@ static unsigned int rcvbuf_limit(struct sock *sk, struct sk_buff *skb) * * Returns true if message was added to socket receive queue, otherwise false */ -static bool filter_rcv(struct sock *sk, struct sk_buff *skb) +static bool filter_rcv(struct sock *sk, struct sk_buff *skb, + struct sk_buff_head *xmitq) { struct socket *sock = sk->sk_socket; struct tipc_sock *tsk = tipc_sk(sk); @@ -1696,7 +1700,7 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) int usr = msg_user(hdr); if (unlikely(msg_user(hdr) == CONN_MANAGER)) { - tipc_sk_proto_rcv(tsk, skb); + tipc_sk_proto_rcv(tsk, skb, xmitq); return false; } @@ -1739,7 +1743,8 @@ static bool filter_rcv(struct sock *sk, struct sk_buff *skb) return true; reject: - tipc_sk_respond(sk, skb, err); + if (tipc_msg_reverse(tsk_own_node(tsk), &skb, err)) + __skb_queue_tail(xmitq, skb); return false; } @@ -1755,9 +1760,24 @@ reject: static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) { unsigned int truesize = skb->truesize; + struct sk_buff_head xmitq; + u32 dnode, selector; - if (likely(filter_rcv(sk, skb))) + __skb_queue_head_init(&xmitq); + + if (likely(filter_rcv(sk, skb, &xmitq))) { atomic_add(truesize, &tipc_sk(sk)->dupl_rcvcnt); + return 0; + } + + if (skb_queue_empty(&xmitq)) + return 0; + + /* Send response/rejected message */ + skb = __skb_dequeue(&xmitq); + dnode = msg_destnode(buf_msg(skb)); + selector = msg_origport(buf_msg(skb)); + tipc_node_xmit_skb(sock_net(sk), skb, dnode, selector); return 0; } @@ -1771,12 +1791,13 @@ static int tipc_backlog_rcv(struct sock *sk, struct sk_buff *skb) * Caller must hold socket lock */ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, - u32 dport) + u32 dport, struct sk_buff_head *xmitq) { + unsigned long time_limit = jiffies + 2; + struct sk_buff *skb; unsigned int lim; atomic_t *dcnt; - struct sk_buff *skb; - unsigned long time_limit = jiffies + 2; + u32 onode; while (skb_queue_len(inputq)) { if (unlikely(time_after_eq(jiffies, time_limit))) @@ -1788,7 +1809,7 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, /* Add message directly to receive queue if possible */ if (!sock_owned_by_user(sk)) { - filter_rcv(sk, skb); + filter_rcv(sk, skb, xmitq); continue; } @@ -1801,7 +1822,9 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, continue; /* Overload => reject message back to sender */ - tipc_sk_respond(sk, skb, TIPC_ERR_OVERLOAD); + onode = tipc_own_addr(sock_net(sk)); + if (tipc_msg_reverse(onode, &skb, TIPC_ERR_OVERLOAD)) + __skb_queue_tail(xmitq, skb); break; } } @@ -1814,12 +1837,14 @@ static void tipc_sk_enqueue(struct sk_buff_head *inputq, struct sock *sk, */ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) { + struct sk_buff_head xmitq; u32 dnode, dport = 0; int err; struct tipc_sock *tsk; struct sock *sk; struct sk_buff *skb; + __skb_queue_head_init(&xmitq); while (skb_queue_len(inputq)) { dport = tipc_skb_peek_port(inputq, dport); tsk = tipc_sk_lookup(net, dport); @@ -1827,9 +1852,14 @@ void tipc_sk_rcv(struct net *net, struct sk_buff_head *inputq) if (likely(tsk)) { sk = &tsk->sk; if (likely(spin_trylock_bh(&sk->sk_lock.slock))) { - tipc_sk_enqueue(inputq, sk, dport); + tipc_sk_enqueue(inputq, sk, dport, &xmitq); spin_unlock_bh(&sk->sk_lock.slock); } + /* Send pending response/rejected messages, if any */ + while ((skb = __skb_dequeue(&xmitq))) { + dnode = msg_destnode(buf_msg(skb)); + tipc_node_xmit_skb(net, skb, dnode, dport); + } sock_put(sk); continue; } -- cgit v1.2.3-70-g09d2 From 63dcdd35c1552ca0c911e98ba3389a0729a457f4 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 17 Jun 2016 15:09:05 +0200 Subject: mlxsw: spectrum: Don't count internal TX header bytes to stats Stop the SW TX counter from counting the TX header bytes since they are not being sent out. Fixes: 56ade8fe3fe1 ("mlxsw: spectrum: Add initial support for Spectrum ASIC") Reviewed-by: Ido Schimmel Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 6f9e3ddff4a8..660429ebfbe1 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -408,7 +408,11 @@ static netdev_tx_t mlxsw_sp_port_xmit(struct sk_buff *skb, } mlxsw_sp_txhdr_construct(skb, &tx_info); - len = skb->len; + /* TX header is consumed by HW on the way so we shouldn't count its + * bytes as being sent. + */ + len = skb->len - MLXSW_TXHDR_LEN; + /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ -- cgit v1.2.3-70-g09d2 From 4e239fac7c6b9d703e83c81b52ec9fec00c50129 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Fri, 17 Jun 2016 15:09:06 +0200 Subject: mlxsw: switchx2: Don't count internal TX header bytes to stats Stop the SW TX counter from counting the TX header bytes since they are not being sent out. Fixes: e577516b9db3 ("mlxsw: Fix use-after-free bug in mlxsw_sx_port_xmit") Signed-off-by: Nogah Frankel Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c index 3842eab9449a..25f658b3849a 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c +++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c @@ -316,7 +316,10 @@ static netdev_tx_t mlxsw_sx_port_xmit(struct sk_buff *skb, } } mlxsw_sx_txhdr_construct(skb, &tx_info); - len = skb->len; + /* TX header is consumed by HW on the way so we shouldn't count its + * bytes as being sent. + */ + len = skb->len - MLXSW_TXHDR_LEN; /* Due to a race we might fail here because of a full queue. In that * unlikely case we simply drop the packet. */ -- cgit v1.2.3-70-g09d2 From a9836cbb5fc885d3b8f3b91b2d47525f7269dec1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Jun 2016 18:15:30 +0200 Subject: net: tilegx: use correct timespec64 type The conversion to the 64-bit time based ptp methods left two instances of 'struct timespec' in place. This is harmless because 64-bit architectures define timespec64 as timespec, and this driver is not used on 32-bit machines. However, using 'struct timespec64' directly is obviously the right thing to do, and will help us remove 'struct timespec' in the future. Signed-off-by: Arnd Bergmann Fixes: b9acf24f779c ("ptp: tilegx: convert to the 64 bit get/set time methods.") Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/net/ethernet/tile/tilegx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/tile/tilegx.c b/drivers/net/ethernet/tile/tilegx.c index 0a15acc075b3..11213a38c795 100644 --- a/drivers/net/ethernet/tile/tilegx.c +++ b/drivers/net/ethernet/tile/tilegx.c @@ -462,7 +462,7 @@ static void tile_tx_timestamp(struct sk_buff *skb, int instance) if (unlikely((shtx->tx_flags & SKBTX_HW_TSTAMP) != 0)) { struct mpipe_data *md = &mpipe_data[instance]; struct skb_shared_hwtstamps shhwtstamps; - struct timespec ts; + struct timespec64 ts; shtx->tx_flags |= SKBTX_IN_PROGRESS; gxio_mpipe_get_timestamp(&md->context, &ts); @@ -886,9 +886,9 @@ static struct ptp_clock_info ptp_mpipe_caps = { /* Sync mPIPE's timestamp up with Linux system time and register PTP clock. */ static void register_ptp_clock(struct net_device *dev, struct mpipe_data *md) { - struct timespec ts; + struct timespec64 ts; - getnstimeofday(&ts); + ktime_get_ts64(&ts); gxio_mpipe_set_timestamp(&md->context, &ts); mutex_init(&md->ptp_lock); -- cgit v1.2.3-70-g09d2 From 3bb549ae4c51028c1930528ae9fcd6eca0474724 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Fri, 17 Jun 2016 16:12:12 -0400 Subject: RDS: TCP: rds_tcp_accept_one() should transition socket from RESETTING to UP The state of the rds_connection after rds_tcp_reset_callbacks() would be RDS_CONN_RESETTING and this is the value that should be passed by rds_tcp_accept_one() to rds_connect_path_complete() to transition the socket to RDS_CONN_UP. Fixes: b5c21c0947c1 ("RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one()") Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp_listen.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 686b1d03a558..245542ca4718 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -138,7 +138,7 @@ int rds_tcp_accept_one(struct socket *sock) rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; /* rds_connect_path_complete() marks RDS_CONN_UP */ - rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); + rds_connect_path_complete(conn, RDS_CONN_RESETTING); } } else { rds_tcp_set_callbacks(new_sock, conn); -- cgit v1.2.3-70-g09d2 From 4a7d99ea1b27734558feb6833f180cd38a159940 Mon Sep 17 00:00:00 2001 From: Basil Gunn Date: Thu, 16 Jun 2016 09:42:30 -0700 Subject: AX.25: Close socket connection on session completion A socket connection made in ax.25 is not closed when session is completed. The heartbeat timer is stopped prematurely and this is where the socket gets closed. Allow heatbeat timer to run to close socket. Symptom occurs in kernels >= 4.2.0 Originally sent 6/15/2016. Resend with distribution list matching scripts/maintainer.pl output. Signed-off-by: Basil Gunn Signed-off-by: David S. Miller --- net/ax25/af_ax25.c | 3 ++- net/ax25/ax25_ds_timer.c | 5 ++++- net/ax25/ax25_std_timer.c | 5 ++++- net/ax25/ax25_subr.c | 3 ++- 4 files changed, 12 insertions(+), 4 deletions(-) diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index fbd0acf80b13..2fdebabbfacd 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -976,7 +976,8 @@ static int ax25_release(struct socket *sock) release_sock(sk); ax25_disconnect(ax25, 0); lock_sock(sk); - ax25_destroy_socket(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_destroy_socket(ax25); break; case AX25_STATE_3: diff --git a/net/ax25/ax25_ds_timer.c b/net/ax25/ax25_ds_timer.c index 951cd57bb07d..5237dff6941d 100644 --- a/net/ax25/ax25_ds_timer.c +++ b/net/ax25/ax25_ds_timer.c @@ -102,6 +102,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -111,6 +112,7 @@ void ax25_ds_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -213,7 +215,8 @@ void ax25_ds_t1_timeout(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_std_timer.c b/net/ax25/ax25_std_timer.c index 004467c9e6e1..2c0d6ef66f9d 100644 --- a/net/ax25/ax25_std_timer.c +++ b/net/ax25/ax25_std_timer.c @@ -38,6 +38,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) switch (ax25->state) { case AX25_STATE_0: + case AX25_STATE_2: /* Magic here: If we listen() and a new link dies before it is accepted() it isn't 'dead' so doesn't get removed. */ if (!sk || sock_flag(sk, SOCK_DESTROY) || @@ -47,6 +48,7 @@ void ax25_std_heartbeat_expiry(ax25_cb *ax25) sock_hold(sk); ax25_destroy_socket(ax25); bh_unlock_sock(sk); + /* Ungrab socket and destroy it */ sock_put(sk); } else ax25_destroy_socket(ax25); @@ -144,7 +146,8 @@ void ax25_std_t1timer_expiry(ax25_cb *ax25) case AX25_STATE_2: if (ax25->n2count == ax25->n2) { ax25_send_control(ax25, AX25_DISC, AX25_POLLON, AX25_COMMAND); - ax25_disconnect(ax25, ETIMEDOUT); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_disconnect(ax25, ETIMEDOUT); return; } else { ax25->n2count++; diff --git a/net/ax25/ax25_subr.c b/net/ax25/ax25_subr.c index 3b78e8473a01..655a7d4c96e1 100644 --- a/net/ax25/ax25_subr.c +++ b/net/ax25/ax25_subr.c @@ -264,7 +264,8 @@ void ax25_disconnect(ax25_cb *ax25, int reason) { ax25_clear_queues(ax25); - ax25_stop_heartbeat(ax25); + if (!sock_flag(ax25->sk, SOCK_DESTROY)) + ax25_stop_heartbeat(ax25); ax25_stop_t1timer(ax25); ax25_stop_t2timer(ax25); ax25_stop_t3timer(ax25); -- cgit v1.2.3-70-g09d2 From 5c3da57d70f1ef1d9b60900b84a74d77a9cf0774 Mon Sep 17 00:00:00 2001 From: Joshua Houghton Date: Sat, 18 Jun 2016 15:46:31 +0000 Subject: net: rds: fix coding style issues Fix coding style issues in the following files: ib_cm.c: add space loop.c: convert spaces to tabs sysctl.c: add space tcp.h: convert spaces to tabs tcp_connect.c:remove extra indentation in switch statement tcp_recv.c: convert spaces to tabs tcp_send.c: convert spaces to tabs transport.c: move brace up one line on for statement Signed-off-by: Joshua Houghton Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/ib_cm.c | 2 +- net/rds/loop.c | 5 +++-- net/rds/sysctl.c | 3 ++- net/rds/tcp.h | 2 +- net/rds/tcp_connect.c | 26 +++++++++++++------------- net/rds/tcp_recv.c | 2 +- net/rds/tcp_send.c | 14 +++++++------- net/rds/transport.c | 3 +-- 8 files changed, 29 insertions(+), 28 deletions(-) diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 310cabce2311..7c2a65a6af5c 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -111,7 +111,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even } } - if (conn->c_version < RDS_PROTOCOL(3,1)) { + if (conn->c_version < RDS_PROTOCOL(3, 1)) { printk(KERN_NOTICE "RDS/IB: Connection to %pI4 version %u.%u failed," " no longer supported\n", &conn->c_faddr, diff --git a/net/rds/loop.c b/net/rds/loop.c index 6b12b68541ae..814173b466d9 100644 --- a/net/rds/loop.c +++ b/net/rds/loop.c @@ -95,8 +95,9 @@ out: */ static void rds_loop_inc_free(struct rds_incoming *inc) { - struct rds_message *rm = container_of(inc, struct rds_message, m_inc); - rds_message_put(rm); + struct rds_message *rm = container_of(inc, struct rds_message, m_inc); + + rds_message_put(rm); } /* we need to at least give the thread something to succeed */ diff --git a/net/rds/sysctl.c b/net/rds/sysctl.c index c173f69e1479..e381bbcd9cc1 100644 --- a/net/rds/sysctl.c +++ b/net/rds/sysctl.c @@ -102,7 +102,8 @@ int rds_sysctl_init(void) rds_sysctl_reconnect_min = msecs_to_jiffies(1); rds_sysctl_reconnect_min_jiffies = rds_sysctl_reconnect_min; - rds_sysctl_reg_table = register_net_sysctl(&init_net,"net/rds", rds_sysctl_rds_table); + rds_sysctl_reg_table = + register_net_sysctl(&init_net, "net/rds", rds_sysctl_rds_table); if (!rds_sysctl_reg_table) return -ENOMEM; return 0; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index ec0602b0dc24..7940babf6c71 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -83,7 +83,7 @@ int rds_tcp_inc_copy_to_user(struct rds_incoming *inc, struct iov_iter *to); void rds_tcp_xmit_prepare(struct rds_connection *conn); void rds_tcp_xmit_complete(struct rds_connection *conn); int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off); + unsigned int hdr_off, unsigned int sg, unsigned int off); void rds_tcp_write_space(struct sock *sk); /* tcp_stats.c */ diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fba13d0305fb..f6e95d60db54 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -54,19 +54,19 @@ void rds_tcp_state_change(struct sock *sk) rdsdebug("sock %p state_change to %d\n", tc->t_sock, sk->sk_state); - switch(sk->sk_state) { - /* ignore connecting sockets as they make progress */ - case TCP_SYN_SENT: - case TCP_SYN_RECV: - break; - case TCP_ESTABLISHED: - rds_connect_path_complete(conn, RDS_CONN_CONNECTING); - break; - case TCP_CLOSE_WAIT: - case TCP_CLOSE: - rds_conn_drop(conn); - default: - break; + switch (sk->sk_state) { + /* ignore connecting sockets as they make progress */ + case TCP_SYN_SENT: + case TCP_SYN_RECV: + break; + case TCP_ESTABLISHED: + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); + break; + case TCP_CLOSE_WAIT: + case TCP_CLOSE: + rds_conn_drop(conn); + default: + break; } out: read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index c3196f9d070a..6e6a7111a034 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -171,7 +171,7 @@ static int rds_tcp_data_recv(read_descriptor_t *desc, struct sk_buff *skb, while (left) { if (!tinc) { tinc = kmem_cache_alloc(rds_tcp_incoming_slab, - arg->gfp); + arg->gfp); if (!tinc) { desc->error = -ENOMEM; goto out; diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 22d0f2020a79..618be69c9c3b 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -66,19 +66,19 @@ void rds_tcp_xmit_complete(struct rds_connection *conn) static int rds_tcp_sendmsg(struct socket *sock, void *data, unsigned int len) { struct kvec vec = { - .iov_base = data, - .iov_len = len, + .iov_base = data, + .iov_len = len, + }; + struct msghdr msg = { + .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, }; - struct msghdr msg = { - .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL, - }; return kernel_sendmsg(sock, &msg, &vec, 1, vec.iov_len); } /* the core send_sem serializes this with other xmit and shutdown */ int rds_tcp_xmit(struct rds_connection *conn, struct rds_message *rm, - unsigned int hdr_off, unsigned int sg, unsigned int off) + unsigned int hdr_off, unsigned int sg, unsigned int off) { struct rds_tcp_connection *tc = conn->c_transport_data; int done = 0; @@ -196,7 +196,7 @@ void rds_tcp_write_space(struct sock *sk) tc->t_last_seen_una = rds_tcp_snd_una(tc); rds_send_drop_acked(conn, rds_tcp_snd_una(tc), rds_tcp_is_acked); - if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) + if ((atomic_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) queue_delayed_work(rds_wq, &conn->c_send_w, 0); out: diff --git a/net/rds/transport.c b/net/rds/transport.c index f3afd1d60d3c..2ffd3e30c643 100644 --- a/net/rds/transport.c +++ b/net/rds/transport.c @@ -140,8 +140,7 @@ unsigned int rds_trans_stats_info_copy(struct rds_info_iterator *iter, rds_info_iter_unmap(iter); down_read(&rds_trans_sem); - for (i = 0; i < RDS_TRANS_COUNT; i++) - { + for (i = 0; i < RDS_TRANS_COUNT; i++) { trans = transports[i]; if (!trans || !trans->stats_info_copy) continue; -- cgit v1.2.3-70-g09d2 From e6bd89232b4a4c9282da1ac8bdd798c2a30d9a46 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:11 +0300 Subject: qed: Correct default vlan behavior When no vlan filter is configured, firmware has a configurable default on whether to pass only untagged packets or all packets regardless of their tagging. Driver currently doesn't set this field in the necessary ramrod, causing the default to always be 'receive all'. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 8fba87dd48af..7c199a94cbfc 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -72,6 +72,7 @@ int qed_sp_eth_vport_start(struct qed_hwfn *p_hwfn, p_ramrod->mtu = cpu_to_le16(p_params->mtu); p_ramrod->inner_vlan_removal_en = p_params->remove_inner_vlan; p_ramrod->drop_ttl0_en = p_params->drop_ttl0; + p_ramrod->untagged = p_params->only_untagged; SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_UCAST_DROP_ALL, 1); SET_FIELD(rx_mode, ETH_VPORT_RX_MODE_MCAST_DROP_ALL, 1); -- cgit v1.2.3-70-g09d2 From 326439883e17fca029f4ed05307480bdb6369877 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:12 +0300 Subject: qed: Prevent VF from Tx-switching 'promisc' Internal loopback in driver is based on two things - first is the willingness of transmitter to use it [in case of VFs, this can be forced based on VEPA/VEB] and secondly on another vport classification configuration which should match the packet's destination. Current code allows non-linux VFs to configure a 'promisc' mode on Tx, meaning all traffic sent by VF would be loopbacked internally by firmware; This isn't considered a valid mode and as such should be prevented by PF. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7c199a94cbfc..7e7ae83453d5 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -248,10 +248,6 @@ qed_sp_update_accept_mode(struct qed_hwfn *p_hwfn, SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_DROP_ALL, !!(accept_filter & QED_ACCEPT_NONE)); - SET_FIELD(state, ETH_VPORT_TX_MODE_UCAST_ACCEPT_ALL, - (!!(accept_filter & QED_ACCEPT_UCAST_MATCHED) && - !!(accept_filter & QED_ACCEPT_UCAST_UNMATCHED))); - SET_FIELD(state, ETH_VPORT_TX_MODE_MCAST_DROP_ALL, !!(accept_filter & QED_ACCEPT_NONE)); -- cgit v1.2.3-70-g09d2 From a0d26d5a4fc8e13993279f788deeb08069e73b69 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:13 +0300 Subject: qed*: Don't reset statistics on inner reload Several user APIs can cause driver to perform an inner-reload. Currently, doing this would cause the HW/FW statistics of the adapter to reset, which isn't the expected behavior [statistics should only reset on explicit unloads]. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_l2.c | 3 ++- drivers/net/ethernet/qlogic/qede/qede_main.c | 7 ++++--- include/linux/qed/qed_eth_if.h | 1 + 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_l2.c b/drivers/net/ethernet/qlogic/qed/qed_l2.c index 7e7ae83453d5..aada4c7e095f 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_l2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_l2.c @@ -1745,7 +1745,8 @@ static int qed_start_vport(struct qed_dev *cdev, start.vport_id, start.mtu); } - qed_reset_vport_stats(cdev); + if (params->clear_stats) + qed_reset_vport_stats(cdev); return 0; } diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 5733d1888223..f8e11f953acb 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -3231,7 +3231,7 @@ static int qede_stop_queues(struct qede_dev *edev) return rc; } -static int qede_start_queues(struct qede_dev *edev) +static int qede_start_queues(struct qede_dev *edev, bool clear_stats) { int rc, tc, i; int vlan_removal_en = 1; @@ -3462,6 +3462,7 @@ out: enum qede_load_mode { QEDE_LOAD_NORMAL, + QEDE_LOAD_RELOAD, }; static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) @@ -3500,7 +3501,7 @@ static int qede_load(struct qede_dev *edev, enum qede_load_mode mode) goto err3; DP_INFO(edev, "Setup IRQs succeeded\n"); - rc = qede_start_queues(edev); + rc = qede_start_queues(edev, mode != QEDE_LOAD_RELOAD); if (rc) goto err4; DP_INFO(edev, "Start VPORT, RXQ and TXQ succeeded\n"); @@ -3555,7 +3556,7 @@ void qede_reload(struct qede_dev *edev, if (func) func(edev, args); - qede_load(edev, QEDE_LOAD_NORMAL); + qede_load(edev, QEDE_LOAD_RELOAD); mutex_lock(&edev->qede_lock); qede_config_rx_mode(edev->ndev); diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 6ae8cb4a61d3..6c876a63558d 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -49,6 +49,7 @@ struct qed_start_vport_params { bool drop_ttl0; u8 vport_id; u16 mtu; + bool clear_stats; }; struct qed_stop_rxq_params { -- cgit v1.2.3-70-g09d2 From db511c37d4eb2b4e7312791eb8f9b34ed867445e Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:14 +0300 Subject: qed: Fix returning unlimited SPQ entries Driver has 2 sets of entries for handling ramrod configurations toward firmware - a regular pre-allocated set of entires and a possible 'unlimited' list of additional pending entries. In most scenarios the 'unlimited' list would not be used, but when it does the handling of the ramrod completion doesn't properly handle the release of the entry. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index acac6626a1b2..67d9893774d8 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -614,7 +614,9 @@ qed_spq_add_entry(struct qed_hwfn *p_hwfn, *p_en2 = *p_ent; - kfree(p_ent); + /* EBLOCK responsible to free the allocated p_ent */ + if (p_ent->comp_mode != QED_SPQ_MODE_EBLOCK) + kfree(p_ent); p_ent = p_en2; } @@ -749,6 +751,15 @@ int qed_spq_post(struct qed_hwfn *p_hwfn, * Thus, after gaining the answer perform the cleanup here. */ rc = qed_spq_block(p_hwfn, p_ent, fw_return_code); + + if (p_ent->queue == &p_spq->unlimited_pending) { + /* This is an allocated p_ent which does not need to + * return to pool. + */ + kfree(p_ent); + return rc; + } + if (rc) goto spq_post_fail2; @@ -844,8 +855,12 @@ int qed_spq_completion(struct qed_hwfn *p_hwfn, found->comp_cb.function(p_hwfn, found->comp_cb.cookie, p_data, fw_return_code); - if (found->comp_mode != QED_SPQ_MODE_EBLOCK) - /* EBLOCK is responsible for freeing its own entry */ + if ((found->comp_mode != QED_SPQ_MODE_EBLOCK) || + (found->queue == &p_spq->unlimited_pending)) + /* EBLOCK is responsible for returning its own entry into the + * free list, unless it originally added the entry into the + * unlimited pending list. + */ qed_spq_return_entry(p_hwfn, found); /* Attempt to post pending requests */ -- cgit v1.2.3-70-g09d2 From b639f197210d37905a6018ae4297659eb3f48f8f Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Sun, 19 Jun 2016 15:18:15 +0300 Subject: qed: Add missing port-mode The 'MODULE_FIBER' value replaced several other FIBER values in newer management firmware images, so existing code would fail to properly reflect its mode. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_hsi.h | 1 + drivers/net/ethernet/qlogic/qed/qed_main.c | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h index 9afc15fdbb02..e29ed5a69566 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h +++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h @@ -3700,6 +3700,7 @@ struct public_port { #define MEDIA_DA_TWINAX 0x3 #define MEDIA_BASE_T 0x4 #define MEDIA_SFP_1G_FIBER 0x5 +#define MEDIA_MODULE_FIBER 0x6 #define MEDIA_KR 0xf0 #define MEDIA_NOT_PRESENT 0xff diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 61cc6869fa65..c7e01b303540 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1085,6 +1085,7 @@ static int qed_get_port_type(u32 media_type) case MEDIA_SFPP_10G_FIBER: case MEDIA_SFP_1G_FIBER: case MEDIA_XFP_FIBER: + case MEDIA_MODULE_FIBER: case MEDIA_KR: port_type = PORT_FIBRE; break; -- cgit v1.2.3-70-g09d2 From 427460c83cdf55069eee49799a0caef7dde8df69 Mon Sep 17 00:00:00 2001 From: Thor Thayer Date: Thu, 16 Jun 2016 11:10:19 -0500 Subject: can: c_can: Update D_CAN TX and RX functions to 32 bit - fix Altera Cyclone access When testing CAN write floods on Altera's CycloneV, the first 2 bytes are sometimes 0x00, 0x00 or corrupted instead of the values sent. Also observed bytes 4 & 5 were corrupted in some cases. The D_CAN Data registers are 32 bits and changing from 16 bit writes to 32 bit writes fixes the problem. Testing performed on Altera CycloneV (D_CAN). Requesting tests on other C_CAN & D_CAN platforms. Reported-by: Richard Andrysek Signed-off-by: Thor Thayer Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/c_can/c_can.c | 38 +++++++++++++++++++++++++++++++------- 1 file changed, 31 insertions(+), 7 deletions(-) diff --git a/drivers/net/can/c_can/c_can.c b/drivers/net/can/c_can/c_can.c index f91b094288da..e3dccd3200d5 100644 --- a/drivers/net/can/c_can/c_can.c +++ b/drivers/net/can/c_can/c_can.c @@ -332,9 +332,23 @@ static void c_can_setup_tx_object(struct net_device *dev, int iface, priv->write_reg(priv, C_CAN_IFACE(MSGCTRL_REG, iface), ctrl); - for (i = 0; i < frame->can_dlc; i += 2) { - priv->write_reg(priv, C_CAN_IFACE(DATA1_REG, iface) + i / 2, - frame->data[i] | (frame->data[i + 1] << 8)); + if (priv->type == BOSCH_D_CAN) { + u32 data = 0, dreg = C_CAN_IFACE(DATA1_REG, iface); + + for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) { + data = (u32)frame->data[i]; + data |= (u32)frame->data[i + 1] << 8; + data |= (u32)frame->data[i + 2] << 16; + data |= (u32)frame->data[i + 3] << 24; + priv->write_reg32(priv, dreg, data); + } + } else { + for (i = 0; i < frame->can_dlc; i += 2) { + priv->write_reg(priv, + C_CAN_IFACE(DATA1_REG, iface) + i / 2, + frame->data[i] | + (frame->data[i + 1] << 8)); + } } } @@ -402,10 +416,20 @@ static int c_can_read_msg_object(struct net_device *dev, int iface, u32 ctrl) } else { int i, dreg = C_CAN_IFACE(DATA1_REG, iface); - for (i = 0; i < frame->can_dlc; i += 2, dreg ++) { - data = priv->read_reg(priv, dreg); - frame->data[i] = data; - frame->data[i + 1] = data >> 8; + if (priv->type == BOSCH_D_CAN) { + for (i = 0; i < frame->can_dlc; i += 4, dreg += 2) { + data = priv->read_reg32(priv, dreg); + frame->data[i] = data; + frame->data[i + 1] = data >> 8; + frame->data[i + 2] = data >> 16; + frame->data[i + 3] = data >> 24; + } + } else { + for (i = 0; i < frame->can_dlc; i += 2, dreg++) { + data = priv->read_reg(priv, dreg); + frame->data[i] = data; + frame->data[i + 1] = data >> 8; + } } } -- cgit v1.2.3-70-g09d2 From 43200a4480cbbe660309621817f54cbb93907108 Mon Sep 17 00:00:00 2001 From: Wolfgang Grandegger Date: Mon, 13 Jun 2016 15:44:19 +0200 Subject: can: at91_can: RX queue could get stuck at high bus load At high bus load it could happen that "at91_poll()" enters with all RX message boxes filled up. If then at the end the "quota" is exceeded as well, "rx_next" will not be reset to the first RX mailbox and hence the interrupts remain disabled. Signed-off-by: Wolfgang Grandegger Tested-by: Amr Bekhit Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/at91_can.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c index 8b3275d7792a..8f5e93cb7975 100644 --- a/drivers/net/can/at91_can.c +++ b/drivers/net/can/at91_can.c @@ -712,9 +712,10 @@ static int at91_poll_rx(struct net_device *dev, int quota) /* upper group completed, look again in lower */ if (priv->rx_next > get_mb_rx_low_last(priv) && - quota > 0 && mb > get_mb_rx_last(priv)) { + mb > get_mb_rx_last(priv)) { priv->rx_next = get_mb_rx_first(priv); - goto again; + if (quota > 0) + goto again; } return received; -- cgit v1.2.3-70-g09d2 From f155d9c0a138463c3c9380d35e54e433c1477336 Mon Sep 17 00:00:00 2001 From: Maximilian Schneider Date: Wed, 8 Jun 2016 18:00:26 +0000 Subject: can: gs_usb: Add Basic support for the bytewerk.org candleLight interface This patchs adds basic support for the bytewerk.org candleLight interface, a open hardware (CERN OHL) USB CAN adapter. Signed-off-by: Hubert Denkmair Signed-off-by: Maximilian Schneider Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/Kconfig | 3 ++- drivers/net/can/usb/gs_usb.c | 14 +++++++++++--- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index bcb272f6c68a..2ff0df32b3d1 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -16,7 +16,8 @@ config CAN_ESD_USB2 config CAN_GS_USB tristate "Geschwister Schneider UG interfaces" ---help--- - This driver supports the Geschwister Schneider USB/CAN devices. + This driver supports the Geschwister Schneider and bytewerk.org + candleLight USB CAN interfaces USB/CAN devices If unsure choose N, choose Y for built in support, M to compile as module (module will be named: gs_usb). diff --git a/drivers/net/can/usb/gs_usb.c b/drivers/net/can/usb/gs_usb.c index 1556d4286235..acb0c8490673 100644 --- a/drivers/net/can/usb/gs_usb.c +++ b/drivers/net/can/usb/gs_usb.c @@ -1,7 +1,9 @@ -/* CAN driver for Geschwister Schneider USB/CAN devices. +/* CAN driver for Geschwister Schneider USB/CAN devices + * and bytewerk.org candleLight USB CAN interfaces. * - * Copyright (C) 2013 Geschwister Schneider Technologie-, + * Copyright (C) 2013-2016 Geschwister Schneider Technologie-, * Entwicklungs- und Vertriebs UG (Haftungsbeschränkt). + * Copyright (C) 2016 Hubert Denkmair * * Many thanks to all socketcan devs! * @@ -29,6 +31,9 @@ #define USB_GSUSB_1_VENDOR_ID 0x1d50 #define USB_GSUSB_1_PRODUCT_ID 0x606f +#define USB_CANDLELIGHT_VENDOR_ID 0x1209 +#define USB_CANDLELIGHT_PRODUCT_ID 0x2323 + #define GSUSB_ENDPOINT_IN 1 #define GSUSB_ENDPOINT_OUT 2 @@ -952,6 +957,8 @@ static void gs_usb_disconnect(struct usb_interface *intf) static const struct usb_device_id gs_usb_table[] = { { USB_DEVICE_INTERFACE_NUMBER(USB_GSUSB_1_VENDOR_ID, USB_GSUSB_1_PRODUCT_ID, 0) }, + { USB_DEVICE_INTERFACE_NUMBER(USB_CANDLELIGHT_VENDOR_ID, + USB_CANDLELIGHT_PRODUCT_ID, 0) }, {} /* Terminating entry */ }; @@ -969,5 +976,6 @@ module_usb_driver(gs_usb_driver); MODULE_AUTHOR("Maximilian Schneider "); MODULE_DESCRIPTION( "Socket CAN device driver for Geschwister Schneider Technologie-, " -"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces."); +"Entwicklungs- und Vertriebs UG. USB2.0 to CAN interfaces\n" +"and bytewerk.org candleLight USB CAN interfaces."); MODULE_LICENSE("GPL v2"); -- cgit v1.2.3-70-g09d2 From 942f64c4c2be19a1b4a0c4295182b42d153899bf Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Mon, 20 Jun 2016 11:53:20 +0300 Subject: team: Fix possible deadlock during team enslave Both dev_uc_sync_multiple() and dev_mc_sync_multiple() require the source device to be locked by netif_addr_lock_bh(), but this is missing in team's enslave function, so add it. This fixes the following lockdep warning: Possible interrupt unsafe locking scenario: CPU0 CPU1 ---- ---- lock(_xmit_ETHER/1); local_irq_disable(); lock(&(&mc->mca_lock)->rlock); lock(&team_netdev_addr_lock_key); lock(&(&mc->mca_lock)->rlock); *** DEADLOCK *** Fixes: cb41c997d444 ("team: team should sync the port's uc/mc addrs when add a port") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/team/team.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index 2ace126533cd..fdee77207323 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1203,8 +1203,10 @@ static int team_port_add(struct team *team, struct net_device *port_dev) goto err_dev_open; } + netif_addr_lock_bh(dev); dev_uc_sync_multiple(port_dev, dev); dev_mc_sync_multiple(port_dev, dev); + netif_addr_unlock_bh(dev); err = vlan_vids_add_by_dev(port_dev, dev); if (err) { -- cgit v1.2.3-70-g09d2 From d19af0a76444fde629667ecb823c0ee28f9f67d8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Mon, 20 Jun 2016 11:36:28 +0200 Subject: kcm: fix /proc memory leak Every open of /proc/net/kcm leaks 16 bytes of memory as is reported by kmemleak: unreferenced object 0xffff88059c0e3458 (size 192): comm "cat", pid 1401, jiffies 4294935742 (age 310.720s) hex dump (first 32 bytes): 28 45 71 96 05 88 ff ff 00 10 00 00 00 00 00 00 (Eq............. 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [] kmem_cache_alloc_trace+0x16e/0x230 [] seq_open+0x79/0x1d0 [] kcm_seq_open+0x0/0x30 [kcm] [] seq_open+0x79/0x1d0 [] __seq_open_private+0x2f/0xa0 [] seq_open_net+0x38/0xa0 ... It is caused by a missing free in the ->release path. So fix it by providing seq_release_net as the ->release method. Signed-off-by: Jiri Slaby Fixes: cd6e111bf5 (kcm: Add statistics and proc interfaces) Cc: "David S. Miller" Cc: Tom Herbert Cc: netdev@vger.kernel.org Signed-off-by: David S. Miller --- net/kcm/kcmproc.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/kcm/kcmproc.c b/net/kcm/kcmproc.c index 738008726cc6..fda7f4715c58 100644 --- a/net/kcm/kcmproc.c +++ b/net/kcm/kcmproc.c @@ -241,6 +241,7 @@ static const struct file_operations kcm_seq_fops = { .open = kcm_seq_open, .read = seq_read, .llseek = seq_lseek, + .release = seq_release_net, }; static struct kcm_seq_muxinfo kcm_seq_muxinfo = { -- cgit v1.2.3-70-g09d2 From 27777daa8b6df0c19aaf591d1536a586b3eb5e36 Mon Sep 17 00:00:00 2001 From: Jon Paul Maloy Date: Mon, 20 Jun 2016 09:20:46 -0400 Subject: tipc: unclone unbundled buffers before forwarding When extracting an individual message from a received "bundle" buffer, we just create a clone of the base buffer, and adjust it to point into the right position of the linearized data area of the latter. This works well for regular message reception, but during periods of extremely high load it may happen that an extracted buffer, e.g, a connection probe, is reversed and forwarded through an external interface while the preceding extracted message is still unhandled. When this happens, the header or data area of the preceding message will be partially overwritten by a MAC header, leading to unpredicatable consequences, such as a link reset. We now fix this by ensuring that the msg_reverse() function never returns a cloned buffer, and that the returned buffer always contains sufficient valid head and tail room to be forwarded. Reported-by: Erik Hugne Acked-by: Ying Xue Signed-off-by: Jon Maloy Signed-off-by: David S. Miller --- net/tipc/msg.c | 6 ++++++ net/tipc/msg.h | 11 ----------- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 8740930f0787..17201aa8423d 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -41,6 +41,8 @@ #include "name_table.h" #define MAX_FORWARD_SIZE 1024 +#define BUF_HEADROOM (LL_MAX_HEADER + 48) +#define BUF_TAILROOM 16 static unsigned int align(unsigned int i) { @@ -505,6 +507,10 @@ bool tipc_msg_reverse(u32 own_node, struct sk_buff **skb, int err) msg_set_hdr_sz(hdr, BASIC_H_SIZE); } + if (skb_cloned(_skb) && + pskb_expand_head(_skb, BUF_HEADROOM, BUF_TAILROOM, GFP_KERNEL)) + goto exit; + /* Now reverse the concerned fields */ msg_set_errcode(hdr, err); msg_set_origport(hdr, msg_destport(&ohdr)); diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 024da8af91f0..7cf52fb39bee 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -94,17 +94,6 @@ struct plist; #define TIPC_MEDIA_INFO_OFFSET 5 -/** - * TIPC message buffer code - * - * TIPC message buffer headroom reserves space for the worst-case - * link-level device header (in case the message is sent off-node). - * - * Note: Headroom should be a multiple of 4 to ensure the TIPC header fields - * are word aligned for quicker access - */ -#define BUF_HEADROOM (LL_MAX_HEADER + 48) - struct tipc_skb_cb { void *handle; struct sk_buff *tail; -- cgit v1.2.3-70-g09d2 From 93c098af09455ea7bdc6f0f6b08f6ac14fa06cf4 Mon Sep 17 00:00:00 2001 From: Kamal Heib Date: Tue, 21 Jun 2016 14:20:02 +0300 Subject: net/mlx4_en: Fix the return value of a failure in VLAN VID add/kill Modify mlx4_en_vlan_rx_[add/kill]_vid to return error value in case of failure. Fixes: 8e586137e6b6 ('net: make vlan ndo_vlan_rx_[add/kill]_vid return error value') Signed-off-by: Kamal Heib Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index a2c25365a8a3..43f505e2d291 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -406,14 +406,18 @@ static int mlx4_en_vlan_rx_add_vid(struct net_device *dev, mutex_lock(&mdev->state_lock); if (mdev->device_up && priv->port_up) { err = mlx4_SET_VLAN_FLTR(mdev->dev, priv); - if (err) + if (err) { en_err(priv, "Failed configuring VLAN filter\n"); + goto out; + } } - if (mlx4_register_vlan(mdev->dev, priv->port, vid, &idx)) - en_dbg(HW, priv, "failed adding vlan %d\n", vid); - mutex_unlock(&mdev->state_lock); + err = mlx4_register_vlan(mdev->dev, priv->port, vid, &idx); + if (err) + en_dbg(HW, priv, "Failed adding vlan %d\n", vid); - return 0; +out: + mutex_unlock(&mdev->state_lock); + return err; } static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, @@ -421,7 +425,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; - int err; + int err = 0; en_dbg(HW, priv, "Killing VID:%d\n", vid); @@ -438,7 +442,7 @@ static int mlx4_en_vlan_rx_kill_vid(struct net_device *dev, } mutex_unlock(&mdev->state_lock); - return 0; + return err; } static void mlx4_en_u64_to_mac(unsigned char dst_mac[ETH_ALEN + 2], u64 src_mac) -- cgit v1.2.3-70-g09d2 From 9d76931180557270796f9631e2c79b9c7bb3c9fb Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 21 Jun 2016 14:20:03 +0300 Subject: net/mlx4_en: Avoid unregister_netdev at shutdown flow This allows a clean shutdown, even if some netdev clients do not release their reference from this netdev. It is enough to release the HW resources only as the kernel is shutting down. Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly') Signed-off-by: Eran Ben Elisha Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 17 +++++++++++++++-- drivers/net/ethernet/mellanox/mlx4/main.c | 5 ++++- include/linux/mlx4/device.h | 1 + 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 43f505e2d291..0c0dfd6cdca6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -2036,11 +2036,20 @@ err: return -ENOMEM; } +static void mlx4_en_shutdown(struct net_device *dev) +{ + rtnl_lock(); + netif_device_detach(dev); + mlx4_en_close(dev); + rtnl_unlock(); +} void mlx4_en_destroy_netdev(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); struct mlx4_en_dev *mdev = priv->mdev; + bool shutdown = mdev->dev->persist->interface_state & + MLX4_INTERFACE_STATE_SHUTDOWN; en_dbg(DRV, priv, "Destroying netdev on port:%d\n", priv->port); @@ -2048,7 +2057,10 @@ void mlx4_en_destroy_netdev(struct net_device *dev) if (priv->registered) { devlink_port_type_clear(mlx4_get_devlink_port(mdev->dev, priv->port)); - unregister_netdev(dev); + if (shutdown) + mlx4_en_shutdown(dev); + else + unregister_netdev(dev); } if (priv->allocated) @@ -2073,7 +2085,8 @@ void mlx4_en_destroy_netdev(struct net_device *dev) kfree(priv->tx_ring); kfree(priv->tx_cq); - free_netdev(dev); + if (!shutdown) + free_netdev(dev); } static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c index 372ebfa880f5..546fab0ecc3b 100644 --- a/drivers/net/ethernet/mellanox/mlx4/main.c +++ b/drivers/net/ethernet/mellanox/mlx4/main.c @@ -4135,8 +4135,11 @@ static void mlx4_shutdown(struct pci_dev *pdev) mlx4_info(persist->dev, "mlx4_shutdown was called\n"); mutex_lock(&persist->interface_state_mutex); - if (persist->interface_state & MLX4_INTERFACE_STATE_UP) + if (persist->interface_state & MLX4_INTERFACE_STATE_UP) { + /* Notify mlx4 clients that the kernel is being shut down */ + persist->interface_state |= MLX4_INTERFACE_STATE_SHUTDOWN; mlx4_unload_one(pdev); + } mutex_unlock(&persist->interface_state_mutex); } diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 80dec87a94f8..d46a0e7f144d 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -466,6 +466,7 @@ enum { enum { MLX4_INTERFACE_STATE_UP = 1 << 0, MLX4_INTERFACE_STATE_DELETION = 1 << 1, + MLX4_INTERFACE_STATE_SHUTDOWN = 1 << 2, }; #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \ -- cgit v1.2.3-70-g09d2 From bce271f255dae8335dc4d2ee2c4531e09cc67f5a Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 21 Jun 2016 12:14:07 +0200 Subject: can: fix handling of unmodifiable configuration options fix With upstream commit bb208f144cf3f59 (can: fix handling of unmodifiable configuration options) a new can_validate() function was introduced. When invoking 'ip link set can0 type can' without any configuration data can_validate() tries to validate the content without taking into account that there's totally no content. This patch adds a check for missing content. Reported-by: ajneu Signed-off-by: Oliver Hartkopp Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 910c12e2638e..348dd5001fa4 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -798,6 +798,9 @@ static int can_validate(struct nlattr *tb[], struct nlattr *data[]) * - control mode with CAN_CTRLMODE_FD set */ + if (!data) + return 0; + if (data[IFLA_CAN_CTRLMODE]) { struct can_ctrlmode *cm = nla_data(data[IFLA_CAN_CTRLMODE]); -- cgit v1.2.3-70-g09d2 From 25e1ed6e64f52a692ba3191c4fde650aab3ecc07 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 21 Jun 2016 15:45:47 +0200 Subject: can: fix oops caused by wrong rtnl dellink usage For 'real' hardware CAN devices the netlink interface is used to set CAN specific communication parameters. Real CAN hardware can not be created nor removed with the ip tool ... This patch adds a private dellink function for the CAN device driver interface that does just nothing. It's a follow up to commit 993e6f2fd ("can: fix oops caused by wrong rtnl newlink usage") but for dellink. Reported-by: ajneu Signed-off-by: Oliver Hartkopp Cc: Signed-off-by: Marc Kleine-Budde --- drivers/net/can/dev.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c index 348dd5001fa4..ad535a854e5c 100644 --- a/drivers/net/can/dev.c +++ b/drivers/net/can/dev.c @@ -1011,6 +1011,11 @@ static int can_newlink(struct net *src_net, struct net_device *dev, return -EOPNOTSUPP; } +static void can_dellink(struct net_device *dev, struct list_head *head) +{ + return; +} + static struct rtnl_link_ops can_link_ops __read_mostly = { .kind = "can", .maxtype = IFLA_CAN_MAX, @@ -1019,6 +1024,7 @@ static struct rtnl_link_ops can_link_ops __read_mostly = { .validate = can_validate, .newlink = can_newlink, .changelink = can_changelink, + .dellink = can_dellink, .get_size = can_get_size, .fill_info = can_fill_info, .get_xstats_size = can_get_xstats_size, -- cgit v1.2.3-70-g09d2 From 71873a9b38d1cc6c93e2962149a7bb7272a7cb66 Mon Sep 17 00:00:00 2001 From: Jimmy Assarsson Date: Thu, 23 Jun 2016 07:57:21 +0200 Subject: can: kvaser_usb: Add support for more Kvaser Leaf v2 devices This patch adds support for Kvaser Leaf Light HS v2 OEM, Mini PCI Express 2xHS and USBcan Light 2xHS. Signed-off-by: Jimmy Assarsson Signed-off-by: Marc Kleine-Budde --- drivers/net/can/usb/Kconfig | 2 ++ drivers/net/can/usb/kvaser_usb.c | 8 +++++++- 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/drivers/net/can/usb/Kconfig b/drivers/net/can/usb/Kconfig index 2ff0df32b3d1..8483a40e7e9e 100644 --- a/drivers/net/can/usb/Kconfig +++ b/drivers/net/can/usb/Kconfig @@ -47,6 +47,8 @@ config CAN_KVASER_USB - Kvaser USBcan R - Kvaser Leaf Light v2 - Kvaser Mini PCI Express HS + - Kvaser Mini PCI Express 2xHS + - Kvaser USBcan Light 2xHS - Kvaser USBcan II HS/HS - Kvaser USBcan II HS/LS - Kvaser USBcan Rugged ("USBcan Rev B") diff --git a/drivers/net/can/usb/kvaser_usb.c b/drivers/net/can/usb/kvaser_usb.c index 022bfa13ebfa..6f1f3b675ff5 100644 --- a/drivers/net/can/usb/kvaser_usb.c +++ b/drivers/net/can/usb/kvaser_usb.c @@ -59,11 +59,14 @@ #define USB_CAN_R_PRODUCT_ID 39 #define USB_LEAF_LITE_V2_PRODUCT_ID 288 #define USB_MINI_PCIE_HS_PRODUCT_ID 289 +#define USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID 290 +#define USB_USBCAN_LIGHT_2HS_PRODUCT_ID 291 +#define USB_MINI_PCIE_2HS_PRODUCT_ID 292 static inline bool kvaser_is_leaf(const struct usb_device_id *id) { return id->idProduct >= USB_LEAF_DEVEL_PRODUCT_ID && - id->idProduct <= USB_MINI_PCIE_HS_PRODUCT_ID; + id->idProduct <= USB_MINI_PCIE_2HS_PRODUCT_ID; } /* Kvaser USBCan-II devices */ @@ -537,6 +540,9 @@ static const struct usb_device_id kvaser_usb_table[] = { .driver_info = KVASER_HAS_TXRX_ERRORS }, { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LITE_V2_PRODUCT_ID) }, { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_HS_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_LEAF_LIGHT_HS_V2_OEM_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_LIGHT_2HS_PRODUCT_ID) }, + { USB_DEVICE(KVASER_VENDOR_ID, USB_MINI_PCIE_2HS_PRODUCT_ID) }, /* USBCANII family IDs */ { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN2_PRODUCT_ID), -- cgit v1.2.3-70-g09d2 From 962fcef33b03395051367181a0549d29d109d9a4 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 18 Jun 2016 13:03:36 +0800 Subject: esp: Fix ESN generation under UDP encapsulation Blair Steven noticed that ESN in conjunction with UDP encapsulation is broken because we set the temporary ESP header to the wrong spot. This patch fixes this by first of all using the right spot, i.e., 4 bytes off the real ESP header, and then saving this information so that after encryption we can restore it properly. Fixes: 7021b2e1cddd ("esp4: Switch to new AEAD interface") Reported-by: Blair Steven Signed-off-by: Herbert Xu Acked-by: Steffen Klassert Signed-off-by: David S. Miller --- net/ipv4/esp4.c | 52 ++++++++++++++++++++++++++++++++-------------------- 1 file changed, 32 insertions(+), 20 deletions(-) diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 477937465a20..d95631d09248 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -23,6 +23,11 @@ struct esp_skb_cb { void *tmp; }; +struct esp_output_extra { + __be32 seqhi; + u32 esphoff; +}; + #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0])) static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); @@ -35,11 +40,11 @@ static u32 esp4_get_mtu(struct xfrm_state *x, int mtu); * * TODO: Use spare space in skb for this where possible. */ -static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) +static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int extralen) { unsigned int len; - len = seqhilen; + len = extralen; len += crypto_aead_ivsize(aead); @@ -57,15 +62,16 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqhilen) return kmalloc(len, GFP_ATOMIC); } -static inline __be32 *esp_tmp_seqhi(void *tmp) +static inline void *esp_tmp_extra(void *tmp) { - return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32)); + return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra)); } -static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) + +static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int extralen) { return crypto_aead_ivsize(aead) ? - PTR_ALIGN((u8 *)tmp + seqhilen, - crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; + PTR_ALIGN((u8 *)tmp + extralen, + crypto_aead_alignmask(aead) + 1) : tmp + extralen; } static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) @@ -99,7 +105,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) { struct ip_esp_hdr *esph = (void *)(skb->data + offset); void *tmp = ESP_SKB_CB(skb)->tmp; - __be32 *seqhi = esp_tmp_seqhi(tmp); + __be32 *seqhi = esp_tmp_extra(tmp); esph->seq_no = esph->spi; esph->spi = *seqhi; @@ -107,7 +113,11 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset) static void esp_output_restore_header(struct sk_buff *skb) { - esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); + void *tmp = ESP_SKB_CB(skb)->tmp; + struct esp_output_extra *extra = esp_tmp_extra(tmp); + + esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff - + sizeof(__be32)); } static void esp_output_done_esn(struct crypto_async_request *base, int err) @@ -121,6 +131,7 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err) static int esp_output(struct xfrm_state *x, struct sk_buff *skb) { int err; + struct esp_output_extra *extra; struct ip_esp_hdr *esph; struct crypto_aead *aead; struct aead_request *req; @@ -137,8 +148,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int tfclen; int nfrags; int assoclen; - int seqhilen; - __be32 *seqhi; + int extralen; __be64 seqno; /* skb is pure payload to encrypt */ @@ -166,21 +176,21 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - seqhilen = 0; + extralen = 0; if (x->props.flags & XFRM_STATE_ESN) { - seqhilen += sizeof(__be32); - assoclen += seqhilen; + extralen += sizeof(*extra); + assoclen += sizeof(__be32); } - tmp = esp_alloc_tmp(aead, nfrags, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, extralen); if (!tmp) { err = -ENOMEM; goto error; } - seqhi = esp_tmp_seqhi(tmp); - iv = esp_tmp_iv(aead, tmp, seqhilen); + extra = esp_tmp_extra(tmp); + iv = esp_tmp_iv(aead, tmp, extralen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); @@ -247,8 +257,10 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) * encryption. */ if ((x->props.flags & XFRM_STATE_ESN)) { - esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); - *seqhi = esph->spi; + extra->esphoff = (unsigned char *)esph - + skb_transport_header(skb); + esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4); + extra->seqhi = esph->spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); aead_request_set_callback(req, 0, esp_output_done_esn, skb); } @@ -445,7 +457,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; ESP_SKB_CB(skb)->tmp = tmp; - seqhi = esp_tmp_seqhi(tmp); + seqhi = esp_tmp_extra(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); sg = esp_req_sg(aead, req); -- cgit v1.2.3-70-g09d2 From 067a7cd06f7bf860f2e3415394b065b9a0983802 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 20 Jun 2016 13:37:18 -0700 Subject: act_ife: only acquire tcf_lock for existing actions Alexey reported that we have GFP_KERNEL allocation when holding the spinlock tcf_lock. Actually we don't have to take that spinlock for all the cases, especially for the new one we just create. To modify the existing actions, we still need this spinlock to make sure the whole update is atomic. For net-next, we can get rid of this spinlock because we already hold the RTNL lock on slow path, and on fast path we can use RCU to protect the metalist. Joint work with Jamal. Reported-by: Alexey Khoroshilov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/tc_act/tc_ife.h | 6 ++--- net/sched/act_ife.c | 55 +++++++++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/include/net/tc_act/tc_ife.h b/include/net/tc_act/tc_ife.h index dc9a09aefb33..c55facd17b7e 100644 --- a/include/net/tc_act/tc_ife.h +++ b/include/net/tc_act/tc_ife.h @@ -36,7 +36,7 @@ struct tcf_meta_ops { int (*encode)(struct sk_buff *, void *, struct tcf_meta_info *); int (*decode)(struct sk_buff *, void *, u16 len); int (*get)(struct sk_buff *skb, struct tcf_meta_info *mi); - int (*alloc)(struct tcf_meta_info *, void *); + int (*alloc)(struct tcf_meta_info *, void *, gfp_t); void (*release)(struct tcf_meta_info *); int (*validate)(void *val, int len); struct module *owner; @@ -48,8 +48,8 @@ int ife_get_meta_u32(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi); int ife_tlv_meta_encode(void *skbdata, u16 attrtype, u16 dlen, const void *dval); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval); +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp); int ife_check_meta_u32(u32 metaval, struct tcf_meta_info *mi); int ife_encode_meta_u32(u32 metaval, void *skbdata, struct tcf_meta_info *mi); int ife_validate_meta_u32(void *val, int len); diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index 658046dfe02d..e4076598f214 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -106,9 +106,9 @@ int ife_get_meta_u16(struct sk_buff *skb, struct tcf_meta_info *mi) } EXPORT_SYMBOL_GPL(ife_get_meta_u16); -int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u32), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u32), gfp); if (!mi->metaval) return -ENOMEM; @@ -116,9 +116,9 @@ int ife_alloc_meta_u32(struct tcf_meta_info *mi, void *metaval) } EXPORT_SYMBOL_GPL(ife_alloc_meta_u32); -int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval) +int ife_alloc_meta_u16(struct tcf_meta_info *mi, void *metaval, gfp_t gfp) { - mi->metaval = kmemdup(metaval, sizeof(u16), GFP_KERNEL); + mi->metaval = kmemdup(metaval, sizeof(u16), gfp); if (!mi->metaval) return -ENOMEM; @@ -240,10 +240,10 @@ static int ife_validate_metatype(struct tcf_meta_ops *ops, void *val, int len) } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, - void *val, int len) + void *val, int len, bool exists) { struct tcf_meta_ops *ops = find_ife_oplist(metaid); int ret = 0; @@ -251,11 +251,13 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, if (!ops) { ret = -ENOENT; #ifdef CONFIG_MODULES - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); rtnl_unlock(); request_module("ifemeta%u", metaid); rtnl_lock(); - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ops = find_ife_oplist(metaid); #endif } @@ -272,10 +274,10 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, } /* called when adding new meta information - * under ife->tcf_lock + * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len) + int len, bool exists) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -284,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), GFP_KERNEL); + mi = kzalloc(sizeof(*mi), exists ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -294,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval); + ret = ops->alloc(mi, metaval, exists ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -307,14 +309,14 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } -static int use_all_metadata(struct tcf_ife_info *ife) +static int use_all_metadata(struct tcf_ife_info *ife, bool exists) { struct tcf_meta_ops *o; int rc = 0; int installed = 0; list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0); + rc = add_metainfo(ife, o->metaid, NULL, 0, exists); if (rc == 0) installed += 1; } @@ -385,8 +387,9 @@ static void tcf_ife_cleanup(struct tc_action *a, int bind) spin_unlock_bh(&ife->tcf_lock); } -/* under ife->tcf_lock */ -static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) +/* under ife->tcf_lock for existing action */ +static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb, + bool exists) { int len = 0; int rc = 0; @@ -398,11 +401,11 @@ static int populate_metalist(struct tcf_ife_info *ife, struct nlattr **tb) val = nla_data(tb[i]); len = nla_len(tb[i]); - rc = load_metaops_and_vet(ife, i, val, len); + rc = load_metaops_and_vet(ife, i, val, len, exists); if (rc != 0) return rc; - rc = add_metainfo(ife, i, val, len); + rc = add_metainfo(ife, i, val, len, exists); if (rc) return rc; } @@ -474,7 +477,8 @@ static int tcf_ife_init(struct net *net, struct nlattr *nla, saddr = nla_data(tb[TCA_IFE_SMAC]); } - spin_lock_bh(&ife->tcf_lock); + if (exists) + spin_lock_bh(&ife->tcf_lock); ife->tcf_action = parm->action; if (parm->flags & IFE_ENCODE) { @@ -504,11 +508,12 @@ metadata_parse_err: if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } - err = populate_metalist(ife, tb2); + err = populate_metalist(ife, tb2, exists); if (err) goto metadata_parse_err; @@ -518,17 +523,19 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife); + err = use_all_metadata(ife, exists); if (err) { if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); return err; } } - spin_unlock_bh(&ife->tcf_lock); + if (exists) + spin_unlock_bh(&ife->tcf_lock); if (ret == ACT_P_CREATED) tcf_hash_insert(tn, a); -- cgit v1.2.3-70-g09d2 From 817e9f2c5c262b2716f5d77020d118ad53315f3e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 20 Jun 2016 13:37:19 -0700 Subject: act_ife: acquire ife_mod_lock before reading ifeoplist Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- net/sched/act_ife.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/net/sched/act_ife.c b/net/sched/act_ife.c index e4076598f214..ea4a2fef1b71 100644 --- a/net/sched/act_ife.c +++ b/net/sched/act_ife.c @@ -277,7 +277,7 @@ static int load_metaops_and_vet(struct tcf_ife_info *ife, u32 metaid, * under ife->tcf_lock for existing action */ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, - int len, bool exists) + int len, bool atomic) { struct tcf_meta_info *mi = NULL; struct tcf_meta_ops *ops = find_ife_oplist(metaid); @@ -286,7 +286,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, if (!ops) return -ENOENT; - mi = kzalloc(sizeof(*mi), exists ? GFP_ATOMIC : GFP_KERNEL); + mi = kzalloc(sizeof(*mi), atomic ? GFP_ATOMIC : GFP_KERNEL); if (!mi) { /*put back what find_ife_oplist took */ module_put(ops->owner); @@ -296,7 +296,7 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, mi->metaid = metaid; mi->ops = ops; if (len > 0) { - ret = ops->alloc(mi, metaval, exists ? GFP_ATOMIC : GFP_KERNEL); + ret = ops->alloc(mi, metaval, atomic ? GFP_ATOMIC : GFP_KERNEL); if (ret != 0) { kfree(mi); module_put(ops->owner); @@ -309,17 +309,19 @@ static int add_metainfo(struct tcf_ife_info *ife, u32 metaid, void *metaval, return ret; } -static int use_all_metadata(struct tcf_ife_info *ife, bool exists) +static int use_all_metadata(struct tcf_ife_info *ife) { struct tcf_meta_ops *o; int rc = 0; int installed = 0; + read_lock(&ife_mod_lock); list_for_each_entry(o, &ifeoplist, list) { - rc = add_metainfo(ife, o->metaid, NULL, 0, exists); + rc = add_metainfo(ife, o->metaid, NULL, 0, true); if (rc == 0) installed += 1; } + read_unlock(&ife_mod_lock); if (installed) return 0; @@ -523,7 +525,7 @@ metadata_parse_err: * as we can. You better have at least one else we are * going to bail out */ - err = use_all_metadata(ife, exists); + err = use_all_metadata(ife); if (err) { if (ret == ACT_P_CREATED) _tcf_ife_cleanup(a, bind); -- cgit v1.2.3-70-g09d2 From 21de12ee5568fd1aec47890c72967abf791ac80a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 20 Jun 2016 15:00:43 -0700 Subject: netem: fix a use after free If the packet was dropped by lower qdisc, then we must not access it later. Save qdisc_pkt_len(skb) in a temp variable. Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Cc: Stephen Hemminger Signed-off-by: David S. Miller --- net/sched/sch_netem.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 205bed00dd34..178f1630a036 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -650,14 +650,14 @@ deliver: #endif if (q->qdisc) { + unsigned int pkt_len = qdisc_pkt_len(skb); int err = qdisc_enqueue(skb, q->qdisc); - if (unlikely(err != NET_XMIT_SUCCESS)) { - if (net_xmit_drop_count(err)) { - qdisc_qstats_drop(sch); - qdisc_tree_reduce_backlog(sch, 1, - qdisc_pkt_len(skb)); - } + if (err != NET_XMIT_SUCCESS && + net_xmit_drop_count(err)) { + qdisc_qstats_drop(sch); + qdisc_tree_reduce_backlog(sch, 1, + pkt_len); } goto tfifo_dequeue; } -- cgit v1.2.3-70-g09d2 From 52fe705b493d40b472b9e7220a71bdca8537c6b2 Mon Sep 17 00:00:00 2001 From: Chris Packham Date: Tue, 21 Jun 2016 15:39:43 +1200 Subject: net: vrf: replace hard tab with space in assignment The assignment of rth->dst.output in vrf_rt6_create() and vrf_rtable_create() used a hard tab before the '='. The neighboring assignments did not. Make the assignment of rth->dst.output consistent with the surrounding code. Signed-off-by: Chris Packham Acked-by: David Ahern Signed-off-by: David S. Miller --- drivers/net/vrf.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index dff08842f26d..8bd8c7e1ee87 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -304,7 +304,7 @@ static int vrf_rt6_create(struct net_device *dev) dst_hold(&rt6->dst); rt6->rt6i_table = rt6i_table; - rt6->dst.output = vrf_output6; + rt6->dst.output = vrf_output6; rcu_assign_pointer(vrf->rt6, rt6); rc = 0; @@ -403,7 +403,7 @@ static int vrf_rtable_create(struct net_device *dev) if (!rth) return -ENOMEM; - rth->dst.output = vrf_output; + rth->dst.output = vrf_output; rth->rt_table_id = vrf->tb_id; rcu_assign_pointer(vrf->rth, rth); -- cgit v1.2.3-70-g09d2 From efeb2267bba8aa893afdadfc9bae4790777c600c Mon Sep 17 00:00:00 2001 From: Haishuang Yan Date: Tue, 21 Jun 2016 16:26:49 +0800 Subject: geneve: fix tx_errors statistics Tx errors present summation of errors encountered while transmitting packets. Signed-off-by: Haishuang Yan Signed-off-by: David S. Miller --- drivers/net/geneve.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 305a04e45a13..cc39cefeae45 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -958,8 +958,8 @@ tx_error: dev->stats.collisions++; else if (err == -ENETUNREACH) dev->stats.tx_carrier_errors++; - else - dev->stats.tx_errors++; + + dev->stats.tx_errors++; return NETDEV_TX_OK; } @@ -1048,8 +1048,8 @@ tx_error: dev->stats.collisions++; else if (err == -ENETUNREACH) dev->stats.tx_carrier_errors++; - else - dev->stats.tx_errors++; + + dev->stats.tx_errors++; return NETDEV_TX_OK; } #endif -- cgit v1.2.3-70-g09d2 From c40e4096bf0a12a76c349cbc4d1b2aa40a3d68cd Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 21 Jun 2016 16:36:06 +0300 Subject: MAINTAINERS: Update Mellanox's mlx4 Eth NIC driver entry Tariq Toukan is replacing Eugenia (Jenny) Emantayev as the mlx4 Ethernet driver maintainer, thanks to Jenny and good luck to him. Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 2ebe195951af..8cffceb61a2d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7406,7 +7406,7 @@ F: drivers/scsi/megaraid.* F: drivers/scsi/megaraid/ MELLANOX ETHERNET DRIVER (mlx4_en) -M: Eugenia Emantayev +M: Tariq Toukan L: netdev@vger.kernel.org S: Supported W: http://www.mellanox.com -- cgit v1.2.3-70-g09d2 From a37503bc387ce2434106d4bf4870bd73081c7355 Mon Sep 17 00:00:00 2001 From: Jeremy Linton Date: Wed, 22 Jun 2016 12:40:50 -0500 Subject: net: smsc911x: Fix bug where PHY interrupts are overwritten by 0 By default, mdiobus_alloc() sets the PHYs to polling mode, but a pointer size memcpy means that a couple IRQs end up being overwritten with a value of 0. This means that PHY_POLL is disabled and results in unpredictable behavior depending on the PHY's location on the MDIO bus. Remove that memcpy and the now unused phy_irq member to force the SMSC911x PHYs into polling mode 100% of the time. Fixes: e7f4dc3536a4 ("mdio: Move allocation of interrupts into core") Signed-off-by: Jeremy Linton Reviewed-by: Andrew Lunn Acked-by: Sergei Shtylyov Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/smsc911x.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/smsc/smsc911x.c b/drivers/net/ethernet/smsc/smsc911x.c index 8af25563f627..b5ab5e120bca 100644 --- a/drivers/net/ethernet/smsc/smsc911x.c +++ b/drivers/net/ethernet/smsc/smsc911x.c @@ -116,7 +116,6 @@ struct smsc911x_data { struct phy_device *phy_dev; struct mii_bus *mii_bus; - int phy_irq[PHY_MAX_ADDR]; unsigned int using_extphy; int last_duplex; int last_carrier; @@ -1073,7 +1072,6 @@ static int smsc911x_mii_init(struct platform_device *pdev, pdata->mii_bus->priv = pdata; pdata->mii_bus->read = smsc911x_mii_read; pdata->mii_bus->write = smsc911x_mii_write; - memcpy(pdata->mii_bus->irq, pdata->phy_irq, sizeof(pdata->mii_bus)); pdata->mii_bus->parent = &pdev->dev; -- cgit v1.2.3-70-g09d2 From 48f1dcb55a7d29aeb8965c567660c14d0dfd1a42 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 23 Jun 2016 15:25:09 +0200 Subject: ipv6: enforce egress device match in per table nexthop lookups with the commit 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups"), net hop lookup is first performed on route creation in the passed-in table. However device match is not enforced in table lookup, so the found route can be later discarded due to egress device mismatch and no global lookup will be performed. This cause the following to fail: ip link add dummy1 type dummy ip link add dummy2 type dummy ip link set dummy1 up ip link set dummy2 up ip route add 2001:db8:8086::/48 dev dummy1 metric 20 ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy1 metric 20 ip route add 2001:db8:8086::/48 dev dummy2 metric 21 ip route add 2001:db8:d34d::/64 via 2001:db8:8086::2 dev dummy2 metric 21 RTNETLINK answers: No route to host This change fixes the issue enforcing device lookup in ip6_nh_lookup_table() v1->v2: updated commit message title Fixes: 8c14586fc320 ("net: ipv6: Use passed in table for nexthop lookups") Reported-and-tested-by: Beniamino Galvani Signed-off-by: Paolo Abeni Acked-by: David Ahern Signed-off-by: David S. Miller --- net/ipv6/route.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 969913da494f..520b7884d0c2 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1782,7 +1782,7 @@ static struct rt6_info *ip6_nh_lookup_table(struct net *net, }; struct fib6_table *table; struct rt6_info *rt; - int flags = 0; + int flags = RT6_LOOKUP_F_IFACE; table = fib6_get_table(net, cfg->fc_table); if (!table) -- cgit v1.2.3-70-g09d2 From 4192f672fae559f32d82de72a677701853cc98a7 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Thu, 23 Jun 2016 16:28:58 +0100 Subject: vsock: make listener child lock ordering explicit There are several places where the listener and pending or accept queue child sockets are accessed at the same time. Lockdep is unhappy that two locks from the same class are held. Tell lockdep that it is safe and document the lock ordering. Originally Claudio Imbrenda sent a similar patch asking whether this is safe. I have audited the code and also covered the vsock_pending_work() function. Suggested-by: Claudio Imbrenda Signed-off-by: Stefan Hajnoczi Signed-off-by: David S. Miller --- net/vmw_vsock/af_vsock.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index b5f1221f48d4..b96ac918e0ba 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -61,6 +61,14 @@ * function will also cleanup rejected sockets, those that reach the connected * state but leave it before they have been accepted. * + * - Lock ordering for pending or accept queue sockets is: + * + * lock_sock(listener); + * lock_sock_nested(pending, SINGLE_DEPTH_NESTING); + * + * Using explicit nested locking keeps lockdep happy since normally only one + * lock of a given class may be taken at a time. + * * - Sockets created by user action will be cleaned up when the user process * calls close(2), causing our release implementation to be called. Our release * implementation will perform some cleanup then drop the last reference so our @@ -443,7 +451,7 @@ void vsock_pending_work(struct work_struct *work) cleanup = true; lock_sock(listener); - lock_sock(sk); + lock_sock_nested(sk, SINGLE_DEPTH_NESTING); if (vsock_is_pending(sk)) { vsock_remove_pending(listener, sk); @@ -1292,7 +1300,7 @@ static int vsock_accept(struct socket *sock, struct socket *newsock, int flags) if (connected) { listener->sk_ack_backlog--; - lock_sock(connected); + lock_sock_nested(connected, SINGLE_DEPTH_NESTING); vconnected = vsock_sk(connected); /* If the listener socket has received an error, then we should -- cgit v1.2.3-70-g09d2 From 70a0dec45174c976c64b4c8c1d0898581f759948 Mon Sep 17 00:00:00 2001 From: Tom Goff Date: Thu, 23 Jun 2016 16:11:57 -0400 Subject: ipmr/ip6mr: Initialize the last assert time of mfc entries. This fixes wrong-interface signaling on 32-bit platforms for entries created when jiffies > 2^31 + MFC_ASSERT_THRESH. Signed-off-by: Tom Goff Signed-off-by: David S. Miller --- net/ipv4/ipmr.c | 4 +++- net/ipv6/ip6mr.c | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 21a38e296fe2..5ad48ec77710 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -891,8 +891,10 @@ static struct mfc_cache *ipmr_cache_alloc(void) { struct mfc_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c) + if (c) { + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXVIFS; + } return c; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index f2e2013f8346..487ef3bc7bbc 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -1074,6 +1074,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); if (!c) return NULL; + c->mfc_un.res.last_assert = jiffies - MFC_ASSERT_THRESH - 1; c->mfc_un.res.minvif = MAXMIFS; return c; } -- cgit v1.2.3-70-g09d2 From 0622cab0341cac6b30da177b0faa39fae0680e71 Mon Sep 17 00:00:00 2001 From: Jay Vosburgh Date: Thu, 23 Jun 2016 14:20:51 -0700 Subject: bonding: fix 802.3ad aggregator reselection Since commit 7bb11dc9f59d ("bonding: unify all places where actor-oper key needs to be updated."), the logic in bonding to handle selection between multiple aggregators has not functioned. This affects only configurations wherein the bonding slaves connect to two discrete aggregators (e.g., two independent switches, each with LACP enabled), thus creating two separate aggregation groups within a single bond. The cause is a change in 7bb11dc9f59d to no longer set AD_PORT_BEGIN on a port after a link state change, which would cause the port to be reselected for attachment to an aggregator as if were newly added to the bond. We cannot restore the prior behavior, as it contradicts IEEE 802.1AX 5.4.12, which requires ports that "become inoperable" (lose carrier, setting port_enabled=false as per 802.1AX 5.4.7) to remain selected (i.e., assigned to the aggregator). As the port now remains selected, the aggregator selection logic is not invoked. A side effect of this change is that aggregators in bonding will now contain ports that are link down. The aggregator selection logic does not currently handle this situation correctly, causing incorrect aggregator selection. This patch makes two changes to repair the aggregator selection logic in bonding to function as documented and within the confines of the standard: First, the aggregator selection and related logic now utilizes the number of active ports per aggregator, not the number of selected ports (as some selected ports may be down). The ad_select "bandwidth" and "count" options only consider ports that are link up. Second, on any carrier state change of any slave, the aggregator selection logic is explicitly called to insure the correct aggregator is active. Reported-by: Veli-Matti Lintu Fixes: 7bb11dc9f59d ("bonding: unify all places where actor-oper key needs to be updated.") Signed-off-by: Jay Vosburgh Signed-off-by: David S. Miller --- drivers/net/bonding/bond_3ad.c | 64 +++++++++++++++++++++++++++++------------- 1 file changed, 45 insertions(+), 19 deletions(-) diff --git a/drivers/net/bonding/bond_3ad.c b/drivers/net/bonding/bond_3ad.c index b9304a295f86..ca81f46ea1aa 100644 --- a/drivers/net/bonding/bond_3ad.c +++ b/drivers/net/bonding/bond_3ad.c @@ -657,6 +657,20 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) } } +static int __agg_active_ports(struct aggregator *agg) +{ + struct port *port; + int active = 0; + + for (port = agg->lag_ports; port; + port = port->next_port_in_aggregator) { + if (port->is_enabled) + active++; + } + + return active; +} + /** * __get_agg_bandwidth - get the total bandwidth of an aggregator * @aggregator: the aggregator we're looking at @@ -664,39 +678,40 @@ static void __set_agg_ports_ready(struct aggregator *aggregator, int val) */ static u32 __get_agg_bandwidth(struct aggregator *aggregator) { + int nports = __agg_active_ports(aggregator); u32 bandwidth = 0; - if (aggregator->num_of_ports) { + if (nports) { switch (__get_link_speed(aggregator->lag_ports)) { case AD_LINK_SPEED_1MBPS: - bandwidth = aggregator->num_of_ports; + bandwidth = nports; break; case AD_LINK_SPEED_10MBPS: - bandwidth = aggregator->num_of_ports * 10; + bandwidth = nports * 10; break; case AD_LINK_SPEED_100MBPS: - bandwidth = aggregator->num_of_ports * 100; + bandwidth = nports * 100; break; case AD_LINK_SPEED_1000MBPS: - bandwidth = aggregator->num_of_ports * 1000; + bandwidth = nports * 1000; break; case AD_LINK_SPEED_2500MBPS: - bandwidth = aggregator->num_of_ports * 2500; + bandwidth = nports * 2500; break; case AD_LINK_SPEED_10000MBPS: - bandwidth = aggregator->num_of_ports * 10000; + bandwidth = nports * 10000; break; case AD_LINK_SPEED_20000MBPS: - bandwidth = aggregator->num_of_ports * 20000; + bandwidth = nports * 20000; break; case AD_LINK_SPEED_40000MBPS: - bandwidth = aggregator->num_of_ports * 40000; + bandwidth = nports * 40000; break; case AD_LINK_SPEED_56000MBPS: - bandwidth = aggregator->num_of_ports * 56000; + bandwidth = nports * 56000; break; case AD_LINK_SPEED_100000MBPS: - bandwidth = aggregator->num_of_ports * 100000; + bandwidth = nports * 100000; break; default: bandwidth = 0; /* to silence the compiler */ @@ -1530,10 +1545,10 @@ static struct aggregator *ad_agg_selection_test(struct aggregator *best, switch (__get_agg_selection_mode(curr->lag_ports)) { case BOND_AD_COUNT: - if (curr->num_of_ports > best->num_of_ports) + if (__agg_active_ports(curr) > __agg_active_ports(best)) return curr; - if (curr->num_of_ports < best->num_of_ports) + if (__agg_active_ports(curr) < __agg_active_ports(best)) return best; /*FALLTHROUGH*/ @@ -1561,8 +1576,14 @@ static int agg_device_up(const struct aggregator *agg) if (!port) return 0; - return netif_running(port->slave->dev) && - netif_carrier_ok(port->slave->dev); + for (port = agg->lag_ports; port; + port = port->next_port_in_aggregator) { + if (netif_running(port->slave->dev) && + netif_carrier_ok(port->slave->dev)) + return 1; + } + + return 0; } /** @@ -1610,7 +1631,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, agg->is_active = 0; - if (agg->num_of_ports && agg_device_up(agg)) + if (__agg_active_ports(agg) && agg_device_up(agg)) best = ad_agg_selection_test(best, agg); } @@ -1622,7 +1643,7 @@ static void ad_agg_selection_logic(struct aggregator *agg, * answering partner. */ if (active && active->lag_ports && - active->lag_ports->is_enabled && + __agg_active_ports(active) && (__agg_has_partner(active) || (!__agg_has_partner(active) && !__agg_has_partner(best)))) { @@ -2133,7 +2154,7 @@ void bond_3ad_unbind_slave(struct slave *slave) else temp_aggregator->lag_ports = temp_port->next_port_in_aggregator; temp_aggregator->num_of_ports--; - if (temp_aggregator->num_of_ports == 0) { + if (__agg_active_ports(temp_aggregator) == 0) { select_new_active_agg = temp_aggregator->is_active; ad_clear_agg(temp_aggregator); if (select_new_active_agg) { @@ -2432,7 +2453,9 @@ void bond_3ad_adapter_speed_duplex_changed(struct slave *slave) */ void bond_3ad_handle_link_change(struct slave *slave, char link) { + struct aggregator *agg; struct port *port; + bool dummy; port = &(SLAVE_AD_INFO(slave)->port); @@ -2459,6 +2482,9 @@ void bond_3ad_handle_link_change(struct slave *slave, char link) port->is_enabled = false; ad_update_actor_keys(port, true); } + agg = __get_first_agg(port); + ad_agg_selection_logic(agg, &dummy); + netdev_dbg(slave->bond->dev, "Port %d changed link status to %s\n", port->actor_port_number, link == BOND_LINK_UP ? "UP" : "DOWN"); @@ -2499,7 +2525,7 @@ int bond_3ad_set_carrier(struct bonding *bond) active = __get_active_agg(&(SLAVE_AD_INFO(first_slave)->aggregator)); if (active) { /* are enough slaves available to consider link up? */ - if (active->num_of_ports < bond->params.min_links) { + if (__agg_active_ports(active) < bond->params.min_links) { if (netif_carrier_ok(bond->dev)) { netif_carrier_off(bond->dev); goto out; -- cgit v1.2.3-70-g09d2 From d2b13233879ca1268a1c027d4573109e5a777811 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:23:12 -0700 Subject: net: bgmac: Fix SOF bit checking We are checking for the Start of Frame bit in the ctl1 word, while this bit is set in the ctl0 word instead. Read the ctl0 word and update the check to verify that. Fixes: 9cde94506eac ("bgmac: implement scatter/gather support") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index ee5f431ab32a..70926c611f25 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -267,15 +267,16 @@ static void bgmac_dma_tx_free(struct bgmac *bgmac, struct bgmac_dma_ring *ring) while (ring->start != ring->end) { int slot_idx = ring->start % BGMAC_TX_RING_SLOTS; struct bgmac_slot_info *slot = &ring->slots[slot_idx]; - u32 ctl1; + u32 ctl0, ctl1; int len; if (slot_idx == empty_slot) break; + ctl0 = le32_to_cpu(ring->cpu_base[slot_idx].ctl0); ctl1 = le32_to_cpu(ring->cpu_base[slot_idx].ctl1); len = ctl1 & BGMAC_DESC_CTL1_LEN; - if (ctl1 & BGMAC_DESC_CTL0_SOF) + if (ctl0 & BGMAC_DESC_CTL0_SOF) /* Unmap no longer used buffer */ dma_unmap_single(dma_dev, slot->dma_addr, len, DMA_TO_DEVICE); -- cgit v1.2.3-70-g09d2 From c3897f2a69e54dd113fc9abd2daf872e5b495798 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:25:32 -0700 Subject: net: bgmac: Start transmit queue in bgmac_open The driver does not start the transmit queue in bgmac_open(). If the queue was stopped prior to closing then re-opening the interface, we would never be able to wake-up again. Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 70926c611f25..85cd07f72ffb 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1314,6 +1314,9 @@ static int bgmac_open(struct net_device *net_dev) phy_start(bgmac->phy_dev); netif_carrier_on(net_dev); + + netif_start_queue(net_dev); + return 0; } -- cgit v1.2.3-70-g09d2 From 3894396e64994f31c3ef5c7e6f63dded0593e567 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Jun 2016 14:25:33 -0700 Subject: net: bgmac: Remove superflous netif_carrier_on() bgmac_open() calls phy_start() to initialize the PHY state machine, which will set the interface's carrier state accordingly, no need to force that as this could be conflicting with the PHY state determined by PHYLIB. Fixes: dd4544f05469 ("bgmac: driver for GBit MAC core on BCMA bus") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bgmac.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bgmac.c b/drivers/net/ethernet/broadcom/bgmac.c index 85cd07f72ffb..a6333d38ecc0 100644 --- a/drivers/net/ethernet/broadcom/bgmac.c +++ b/drivers/net/ethernet/broadcom/bgmac.c @@ -1313,8 +1313,6 @@ static int bgmac_open(struct net_device *net_dev) phy_start(bgmac->phy_dev); - netif_carrier_on(net_dev); - netif_start_queue(net_dev); return 0; -- cgit v1.2.3-70-g09d2 From f299a02d5f13c4deb52c1a7ddf2b42630fe6294a Mon Sep 17 00:00:00 2001 From: Wang Sheng-Hui Date: Fri, 24 Jun 2016 08:52:11 +0800 Subject: net/mlx5: use mlx5_buf_alloc_node instead of mlx5_buf_alloc in mlx5_wq_ll_create Commit 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node") introduced mlx5_*_alloc_node() but missed changing some calling and warn messages. This patch introduces 2 changes: * Use mlx5_buf_alloc_node() instead of mlx5_buf_alloc() in mlx5_wq_ll_create() * Update the failure warn messages with _node postfix for mlx5_*_alloc function names Fixes: 311c7c71c9bb ("net/mlx5e: Allocate DMA coherent memory on reader NUMA node") Signed-off-by: Wang Sheng-Hui Acked-By: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/wq.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.c b/drivers/net/ethernet/mellanox/mlx5/core/wq.c index ce21ee5b2357..821a087c7ae2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.c @@ -75,14 +75,14 @@ int mlx5_wq_cyc_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } err = mlx5_buf_alloc_node(mdev, mlx5_wq_cyc_get_byte_size(wq), &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } @@ -111,14 +111,14 @@ int mlx5_cqwq_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } err = mlx5_buf_alloc_node(mdev, mlx5_cqwq_get_byte_size(wq), &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } @@ -148,13 +148,14 @@ int mlx5_wq_ll_create(struct mlx5_core_dev *mdev, struct mlx5_wq_param *param, err = mlx5_db_alloc_node(mdev, &wq_ctrl->db, param->db_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_db_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_db_alloc_node() failed, %d\n", err); return err; } - err = mlx5_buf_alloc(mdev, mlx5_wq_ll_get_byte_size(wq), &wq_ctrl->buf); + err = mlx5_buf_alloc_node(mdev, mlx5_wq_ll_get_byte_size(wq), + &wq_ctrl->buf, param->buf_numa_node); if (err) { - mlx5_core_warn(mdev, "mlx5_buf_alloc() failed, %d\n", err); + mlx5_core_warn(mdev, "mlx5_buf_alloc_node() failed, %d\n", err); goto err_db_free; } -- cgit v1.2.3-70-g09d2 From 126e7557328a1cd576be4fca95b133a2695283ff Mon Sep 17 00:00:00 2001 From: Jouni Malinen Date: Sun, 19 Jun 2016 23:51:02 +0300 Subject: mac80211: Fix mesh estab_plinks counting in STA removal case If a user space program (e.g., wpa_supplicant) deletes a STA entry that is currently in NL80211_PLINK_ESTAB state, the number of established plinks counter was not decremented and this could result in rejecting new plink establishment before really hitting the real maximum plink limit. For !user_mpm case, this decrementation is handled by mesh_plink_deactive(). Fix this by decrementing estab_plinks on STA deletion (mesh_sta_cleanup() gets called from there) so that the counter has a correct value and the Beacon frame advertisement in Mesh Configuration element shows the proper value for capability to accept additional peers. Cc: stable@vger.kernel.org Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 21b1fdf5d01d..6a1603bcdced 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -148,14 +148,17 @@ u32 mesh_accept_plinks_update(struct ieee80211_sub_if_data *sdata) void mesh_sta_cleanup(struct sta_info *sta) { struct ieee80211_sub_if_data *sdata = sta->sdata; - u32 changed; + u32 changed = 0; /* * maybe userspace handles peer allocation and peering, but in either * case the beacon is still generated by the kernel and we might need * an update. */ - changed = mesh_accept_plinks_update(sdata); + if (sdata->u.mesh.user_mpm && + sta->mesh->plink_state == NL80211_PLINK_ESTAB) + changed |= mesh_plink_dec_estab_count(sdata); + changed |= mesh_accept_plinks_update(sdata); if (!sdata->u.mesh.user_mpm) { changed |= mesh_plink_deactivate(sta); del_timer_sync(&sta->mesh->plink_timer); -- cgit v1.2.3-70-g09d2 From 0888d5f3c0f183ea6177355752ada433d370ac89 Mon Sep 17 00:00:00 2001 From: daniel Date: Fri, 24 Jun 2016 12:35:18 +0200 Subject: Bridge: Fix ipv6 mc snooping if bridge has no ipv6 address MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The bridge is falsly dropping ipv6 mulitcast packets if there is: 1. No ipv6 address assigned on the brigde. 2. No external mld querier present. 3. The internal querier enabled. When the bridge fails to build mld queries, because it has no ipv6 address, it slilently returns, but keeps the local querier enabled. This specific case causes confusing packet loss. Ipv6 multicast snooping can only work if: a) An external querier is present OR b) The bridge has an ipv6 address an is capable of sending own queries Otherwise it has to forward/flood the ipv6 multicast traffic, because snooping cannot work. This patch fixes the issue by adding a flag to the bridge struct that indicates that there is currently no ipv6 address assinged to the bridge and returns a false state for the local querier in __br_multicast_querier_exists(). Special thanks to Linus Lüssing. Fixes: d1d81d4c3dd8 ("bridge: check return value of ipv6_dev_get_saddr()") Signed-off-by: Daniel Danzberger Acked-by: Linus Lüssing Signed-off-by: David S. Miller --- net/bridge/br_multicast.c | 4 ++++ net/bridge/br_private.h | 23 +++++++++++++++++++---- 2 files changed, 23 insertions(+), 4 deletions(-) diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index 6852f3c7009c..43844144c9c4 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -464,8 +464,11 @@ static struct sk_buff *br_ip6_multicast_alloc_query(struct net_bridge *br, if (ipv6_dev_get_saddr(dev_net(br->dev), br->dev, &ip6h->daddr, 0, &ip6h->saddr)) { kfree_skb(skb); + br->has_ipv6_addr = 0; return NULL; } + + br->has_ipv6_addr = 1; ipv6_eth_mc_map(&ip6h->daddr, eth->h_dest); hopopt = (u8 *)(ip6h + 1); @@ -1745,6 +1748,7 @@ void br_multicast_init(struct net_bridge *br) br->ip6_other_query.delay_time = 0; br->ip6_querier.port = NULL; #endif + br->has_ipv6_addr = 1; spin_lock_init(&br->multicast_lock); setup_timer(&br->multicast_router_timer, diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index c7fb5d7a7218..52edecf3c294 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -314,6 +314,7 @@ struct net_bridge u8 multicast_disabled:1; u8 multicast_querier:1; u8 multicast_query_use_ifaddr:1; + u8 has_ipv6_addr:1; u32 hash_elasticity; u32 hash_max; @@ -588,10 +589,22 @@ static inline bool br_multicast_is_router(struct net_bridge *br) static inline bool __br_multicast_querier_exists(struct net_bridge *br, - struct bridge_mcast_other_query *querier) + struct bridge_mcast_other_query *querier, + const bool is_ipv6) { + bool own_querier_enabled; + + if (br->multicast_querier) { + if (is_ipv6 && !br->has_ipv6_addr) + own_querier_enabled = false; + else + own_querier_enabled = true; + } else { + own_querier_enabled = false; + } + return time_is_before_jiffies(querier->delay_time) && - (br->multicast_querier || timer_pending(&querier->timer)); + (own_querier_enabled || timer_pending(&querier->timer)); } static inline bool br_multicast_querier_exists(struct net_bridge *br, @@ -599,10 +612,12 @@ static inline bool br_multicast_querier_exists(struct net_bridge *br, { switch (eth->h_proto) { case (htons(ETH_P_IP)): - return __br_multicast_querier_exists(br, &br->ip4_other_query); + return __br_multicast_querier_exists(br, + &br->ip4_other_query, false); #if IS_ENABLED(CONFIG_IPV6) case (htons(ETH_P_IPV6)): - return __br_multicast_querier_exists(br, &br->ip6_other_query); + return __br_multicast_querier_exists(br, + &br->ip6_other_query, true); #endif default: return false; -- cgit v1.2.3-70-g09d2 From ab8ed951080e8f59b1da4ea10ac7c05ba24a3cbd Mon Sep 17 00:00:00 2001 From: Aaron Campbell Date: Fri, 24 Jun 2016 10:05:32 -0300 Subject: connector: fix out-of-order cn_proc netlink message delivery The proc connector messages include a sequence number, allowing userspace programs to detect lost messages. However, performing this detection is currently more difficult than necessary, since netlink messages can be delivered to the application out-of-order. To fix this, leave pre-emption disabled during cn_netlink_send(), and use GFP_NOWAIT. The following was written as a test case. Building the kernel w/ make -j32 proved a reliable way to generate out-of-order cn_proc messages. int main(int argc, char *argv[]) { static uint32_t last_seq[CPU_SETSIZE], seq; int cpu, fd; struct sockaddr_nl sa; struct __attribute__((aligned(NLMSG_ALIGNTO))) { struct nlmsghdr nl_hdr; struct __attribute__((__packed__)) { struct cn_msg cn_msg; struct proc_event cn_proc; }; } rmsg; struct __attribute__((aligned(NLMSG_ALIGNTO))) { struct nlmsghdr nl_hdr; struct __attribute__((__packed__)) { struct cn_msg cn_msg; enum proc_cn_mcast_op cn_mcast; }; } smsg; fd = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); if (fd < 0) { perror("socket"); } sa.nl_family = AF_NETLINK; sa.nl_groups = CN_IDX_PROC; sa.nl_pid = getpid(); if (bind(fd, (struct sockaddr *)&sa, sizeof(sa)) < 0) { perror("bind"); } memset(&smsg, 0, sizeof(smsg)); smsg.nl_hdr.nlmsg_len = sizeof(smsg); smsg.nl_hdr.nlmsg_pid = getpid(); smsg.nl_hdr.nlmsg_type = NLMSG_DONE; smsg.cn_msg.id.idx = CN_IDX_PROC; smsg.cn_msg.id.val = CN_VAL_PROC; smsg.cn_msg.len = sizeof(enum proc_cn_mcast_op); smsg.cn_mcast = PROC_CN_MCAST_LISTEN; if (send(fd, &smsg, sizeof(smsg), 0) != sizeof(smsg)) { perror("send"); } while (recv(fd, &rmsg, sizeof(rmsg), 0) == sizeof(rmsg)) { cpu = rmsg.cn_proc.cpu; if (cpu < 0) { continue; } seq = rmsg.cn_msg.seq; if ((last_seq[cpu] != 0) && (seq != last_seq[cpu] + 1)) { printf("out-of-order seq=%d on cpu=%d\n", seq, cpu); } last_seq[cpu] = seq; } /* NOTREACHED */ perror("recv"); return -1; } Signed-off-by: Aaron Campbell Signed-off-by: David S. Miller --- drivers/connector/cn_proc.c | 43 ++++++++++++++++++++++--------------------- 1 file changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/connector/cn_proc.c b/drivers/connector/cn_proc.c index 15d06fcf0b50..b02f9c606e0b 100644 --- a/drivers/connector/cn_proc.c +++ b/drivers/connector/cn_proc.c @@ -56,11 +56,21 @@ static struct cb_id cn_proc_event_id = { CN_IDX_PROC, CN_VAL_PROC }; /* proc_event_counts is used as the sequence number of the netlink message */ static DEFINE_PER_CPU(__u32, proc_event_counts) = { 0 }; -static inline void get_seq(__u32 *ts, int *cpu) +static inline void send_msg(struct cn_msg *msg) { preempt_disable(); - *ts = __this_cpu_inc_return(proc_event_counts) - 1; - *cpu = smp_processor_id(); + + msg->seq = __this_cpu_inc_return(proc_event_counts) - 1; + ((struct proc_event *)msg->data)->cpu = smp_processor_id(); + + /* + * Preemption remains disabled during send to ensure the messages are + * ordered according to their sequence numbers. + * + * If cn_netlink_send() fails, the data is not sent. + */ + cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_NOWAIT); + preempt_enable(); } @@ -77,7 +87,6 @@ void proc_fork_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_FORK; rcu_read_lock(); @@ -92,8 +101,7 @@ void proc_fork_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - /* If cn_netlink_send() failed, the data is not sent */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_exec_connector(struct task_struct *task) @@ -108,7 +116,6 @@ void proc_exec_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_EXEC; ev->event_data.exec.process_pid = task->pid; @@ -118,7 +125,7 @@ void proc_exec_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_id_connector(struct task_struct *task, int which_id) @@ -150,14 +157,13 @@ void proc_id_connector(struct task_struct *task, int which_id) return; } rcu_read_unlock(); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); memcpy(&msg->id, &cn_proc_event_id, sizeof(msg->id)); msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_sid_connector(struct task_struct *task) @@ -172,7 +178,6 @@ void proc_sid_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_SID; ev->event_data.sid.process_pid = task->pid; @@ -182,7 +187,7 @@ void proc_sid_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_ptrace_connector(struct task_struct *task, int ptrace_id) @@ -197,7 +202,6 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_PTRACE; ev->event_data.ptrace.process_pid = task->pid; @@ -215,7 +219,7 @@ void proc_ptrace_connector(struct task_struct *task, int ptrace_id) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_comm_connector(struct task_struct *task) @@ -230,7 +234,6 @@ void proc_comm_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_COMM; ev->event_data.comm.process_pid = task->pid; @@ -241,7 +244,7 @@ void proc_comm_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_coredump_connector(struct task_struct *task) @@ -256,7 +259,6 @@ void proc_coredump_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_COREDUMP; ev->event_data.coredump.process_pid = task->pid; @@ -266,7 +268,7 @@ void proc_coredump_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } void proc_exit_connector(struct task_struct *task) @@ -281,7 +283,6 @@ void proc_exit_connector(struct task_struct *task) msg = buffer_to_cn_msg(buffer); ev = (struct proc_event *)msg->data; memset(&ev->event_data, 0, sizeof(ev->event_data)); - get_seq(&msg->seq, &ev->cpu); ev->timestamp_ns = ktime_get_ns(); ev->what = PROC_EVENT_EXIT; ev->event_data.exit.process_pid = task->pid; @@ -293,7 +294,7 @@ void proc_exit_connector(struct task_struct *task) msg->ack = 0; /* not used */ msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } /* @@ -325,7 +326,7 @@ static void cn_proc_ack(int err, int rcvd_seq, int rcvd_ack) msg->ack = rcvd_ack + 1; msg->len = sizeof(*ev); msg->flags = 0; /* not used */ - cn_netlink_send(msg, 0, CN_IDX_PROC, GFP_KERNEL); + send_msg(msg); } /** -- cgit v1.2.3-70-g09d2 From 9a0fee2b552b1235fb1706ae1fc664ae74573be8 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Fri, 24 Jun 2016 16:02:35 -0400 Subject: sock_diag: do not broadcast raw socket destruction Diag intends to broadcast tcp_sk and udp_sk socket destruction. Testing sk->sk_protocol for IPPROTO_TCP/IPPROTO_UDP alone is not sufficient for this. Raw sockets can have the same type. Add a test for sk->sk_type. Fixes: eb4cb008529c ("sock_diag: define destruction multicast groups") Signed-off-by: Willem de Bruijn Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index 4018b48f2b3b..a0596ca0e80a 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -36,6 +36,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) { switch (sk->sk_family) { case AF_INET: + if (sk->sk_type == SOCK_RAW) + return SKNLGRP_NONE; + switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET_TCP_DESTROY; @@ -45,6 +48,9 @@ enum sknetlink_groups sock_diag_destroy_group(const struct sock *sk) return SKNLGRP_NONE; } case AF_INET6: + if (sk->sk_type == SOCK_RAW) + return SKNLGRP_NONE; + switch (sk->sk_protocol) { case IPPROTO_TCP: return SKNLGRP_INET6_TCP_DESTROY; -- cgit v1.2.3-70-g09d2 From 69fc58a57e56bf5e39b48809aefffdaa1b04c945 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 24 Jun 2016 16:25:24 -0700 Subject: net: phy: Manage fixed PHY address space using IDA If we have a system which uses fixed PHY devices and calls fixed_phy_register() then fixed_phy_unregister() we can exhaust the number of fixed PHYs available after a while, since we keep incrementing the variable phy_fixed_addr, but we never decrement it. This patch fixes that by converting the fixed PHY allocation to using IDA, which takes care of the allocation/dealloaction of the PHY addresses for us. Fixes: a75951217472 ("net: phy: extend fixed driver with fixed_phy_register()") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/phy/fixed_phy.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c index 2d2e4339f0df..9ec7f7353434 100644 --- a/drivers/net/phy/fixed_phy.c +++ b/drivers/net/phy/fixed_phy.c @@ -23,6 +23,7 @@ #include #include #include +#include #define MII_REGS_NUM 29 @@ -286,6 +287,8 @@ err_regs: } EXPORT_SYMBOL_GPL(fixed_phy_add); +static DEFINE_IDA(phy_fixed_ida); + static void fixed_phy_del(int phy_addr) { struct fixed_mdio_bus *fmb = &platform_fmb; @@ -297,14 +300,12 @@ static void fixed_phy_del(int phy_addr) if (gpio_is_valid(fp->link_gpio)) gpio_free(fp->link_gpio); kfree(fp); + ida_simple_remove(&phy_fixed_ida, phy_addr); return; } } } -static int phy_fixed_addr; -static DEFINE_SPINLOCK(phy_fixed_addr_lock); - struct phy_device *fixed_phy_register(unsigned int irq, struct fixed_phy_status *status, int link_gpio, @@ -319,17 +320,15 @@ struct phy_device *fixed_phy_register(unsigned int irq, return ERR_PTR(-EPROBE_DEFER); /* Get the next available PHY address, up to PHY_MAX_ADDR */ - spin_lock(&phy_fixed_addr_lock); - if (phy_fixed_addr == PHY_MAX_ADDR) { - spin_unlock(&phy_fixed_addr_lock); - return ERR_PTR(-ENOSPC); - } - phy_addr = phy_fixed_addr++; - spin_unlock(&phy_fixed_addr_lock); + phy_addr = ida_simple_get(&phy_fixed_ida, 0, PHY_MAX_ADDR, GFP_KERNEL); + if (phy_addr < 0) + return ERR_PTR(phy_addr); ret = fixed_phy_add(irq, phy_addr, status, link_gpio); - if (ret < 0) + if (ret < 0) { + ida_simple_remove(&phy_fixed_ida, phy_addr); return ERR_PTR(ret); + } phy = get_phy_device(fmb->mii_bus, phy_addr, false); if (IS_ERR(phy)) { @@ -434,6 +433,7 @@ static void __exit fixed_mdio_bus_exit(void) list_del(&fp->node); kfree(fp); } + ida_destroy(&phy_fixed_ida); } module_exit(fixed_mdio_bus_exit); -- cgit v1.2.3-70-g09d2 From 0b3dd7dfb81ad8af53791ea2bb64b83bac1b7d32 Mon Sep 17 00:00:00 2001 From: Simon Wunderlich Date: Sun, 26 Jun 2016 11:16:09 +0200 Subject: batman-adv: replace WARN with rate limited output on non-existing VLAN If a VLAN tagged frame is received and the corresponding VLAN is not configured on the soft interface, it will splat a WARN on every packet received. This is a quite annoying behaviour for some scenarios, e.g. if bat0 is bridged with eth0, and there are arbitrary VLAN tagged frames from Ethernet coming in without having any VLAN configuration on bat0. The code should probably create vlan objects on the fly and transparently transport these VLAN-tagged Ethernet frames, but until this is done, at least the WARN splat should be replaced by a rate limited output. Fixes: 354136bcc3c4 ("batman-adv: fix kernel crash due to missing NULL checks") Signed-off-by: Simon Wunderlich Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index feaf492b01ca..72abab7b01eb 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -650,8 +650,10 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, /* increase the refcounter of the related vlan */ vlan = batadv_softif_vlan_get(bat_priv, vid); - if (WARN(!vlan, "adding TT local entry %pM to non-existent VLAN %d", - addr, BATADV_PRINT_VID(vid))) { + if (!vlan) { + net_ratelimited_function(batadv_info, soft_iface, + "adding TT local entry %pM to non-existent VLAN %d\n", + addr, BATADV_PRINT_VID(vid)); kfree(tt_local); tt_local = NULL; goto out; -- cgit v1.2.3-70-g09d2 From 9c4604a298e0a9807eaf2cd912d1ebf24d98fbeb Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 26 Jun 2016 11:16:10 +0200 Subject: batman-adv: Fix use-after-free/double-free of tt_req_node The tt_req_node is added and removed from a list inside a spinlock. But the locking is sometimes removed even when the object is still referenced and will be used later via this reference. For example batadv_send_tt_request can create a new tt_req_node (including add to a list) and later re-acquires the lock to remove it from the list and to free it. But at this time another context could have already removed this tt_req_node from the list and freed it. CPU#0 batadv_batman_skb_recv from net_device 0 -> batadv_iv_ogm_receive -> batadv_iv_ogm_process -> batadv_iv_ogm_process_per_outif -> batadv_tvlv_ogm_receive -> batadv_tvlv_ogm_receive -> batadv_tvlv_containers_process -> batadv_tvlv_call_handler -> batadv_tt_tvlv_ogm_handler_v1 -> batadv_tt_update_orig -> batadv_send_tt_request -> batadv_tt_req_node_new spin_lock(...) allocates new tt_req_node and adds it to list spin_unlock(...) return tt_req_node CPU#1 batadv_batman_skb_recv from net_device 1 -> batadv_recv_unicast_tvlv -> batadv_tvlv_containers_process -> batadv_tvlv_call_handler -> batadv_tt_tvlv_unicast_handler_v1 -> batadv_handle_tt_response spin_lock(...) tt_req_node gets removed from list and is freed spin_unlock(...) CPU#0 <- returned to batadv_send_tt_request spin_lock(...) tt_req_node gets removed from list and is freed MEMORY CORRUPTION/SEGFAULT/... spin_unlock(...) This can only be solved via reference counting to allow multiple contexts to handle the list manipulation while making sure that only the last context holding a reference will free the object. Fixes: a73105b8d4c7 ("batman-adv: improved client announcement mechanism") Signed-off-by: Sven Eckelmann Tested-by: Martin Weinelt Tested-by: Amadeus Alfa Signed-off-by: Marek Lindner Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 43 ++++++++++++++++++++++++++++++++------ net/batman-adv/types.h | 2 ++ 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index 72abab7b01eb..cfb5ccdfd62b 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -2271,6 +2271,29 @@ static u32 batadv_tt_local_crc(struct batadv_priv *bat_priv, return crc; } +/** + * batadv_tt_req_node_release - free tt_req node entry + * @ref: kref pointer of the tt req_node entry + */ +static void batadv_tt_req_node_release(struct kref *ref) +{ + struct batadv_tt_req_node *tt_req_node; + + tt_req_node = container_of(ref, struct batadv_tt_req_node, refcount); + + kfree(tt_req_node); +} + +/** + * batadv_tt_req_node_put - decrement the tt_req_node refcounter and + * possibly release it + * @tt_req_node: tt_req_node to be free'd + */ +static void batadv_tt_req_node_put(struct batadv_tt_req_node *tt_req_node) +{ + kref_put(&tt_req_node->refcount, batadv_tt_req_node_release); +} + static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) { struct batadv_tt_req_node *node; @@ -2280,7 +2303,7 @@ static void batadv_tt_req_list_free(struct batadv_priv *bat_priv) hlist_for_each_entry_safe(node, safe, &bat_priv->tt.req_list, list) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2317,7 +2340,7 @@ static void batadv_tt_req_purge(struct batadv_priv *bat_priv) if (batadv_has_timed_out(node->issued_at, BATADV_TT_REQUEST_TIMEOUT)) { hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } } spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2349,9 +2372,11 @@ batadv_tt_req_node_new(struct batadv_priv *bat_priv, if (!tt_req_node) goto unlock; + kref_init(&tt_req_node->refcount); ether_addr_copy(tt_req_node->addr, orig_node->orig); tt_req_node->issued_at = jiffies; + kref_get(&tt_req_node->refcount); hlist_add_head(&tt_req_node->list, &bat_priv->tt.req_list); unlock: spin_unlock_bh(&bat_priv->tt.req_list_lock); @@ -2615,13 +2640,19 @@ static bool batadv_send_tt_request(struct batadv_priv *bat_priv, out: if (primary_if) batadv_hardif_put(primary_if); + if (ret && tt_req_node) { spin_lock_bh(&bat_priv->tt.req_list_lock); - /* hlist_del_init() verifies tt_req_node still is in the list */ - hlist_del_init(&tt_req_node->list); + if (!hlist_unhashed(&tt_req_node->list)) { + hlist_del_init(&tt_req_node->list); + batadv_tt_req_node_put(tt_req_node); + } spin_unlock_bh(&bat_priv->tt.req_list_lock); - kfree(tt_req_node); } + + if (tt_req_node) + batadv_tt_req_node_put(tt_req_node); + kfree(tvlv_tt_data); return ret; } @@ -3057,7 +3088,7 @@ static void batadv_handle_tt_response(struct batadv_priv *bat_priv, if (!batadv_compare_eth(node->addr, resp_src)) continue; hlist_del_init(&node->list); - kfree(node); + batadv_tt_req_node_put(node); } spin_unlock_bh(&bat_priv->tt.req_list_lock); diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h index 6a577f4f8ba7..ba846b078af8 100644 --- a/net/batman-adv/types.h +++ b/net/batman-adv/types.h @@ -1137,11 +1137,13 @@ struct batadv_tt_change_node { * struct batadv_tt_req_node - data to keep track of the tt requests in flight * @addr: mac address address of the originator this request was sent to * @issued_at: timestamp used for purging stale tt requests + * @refcount: number of contexts the object is used by * @list: list node for batadv_priv_tt::req_list */ struct batadv_tt_req_node { u8 addr[ETH_ALEN]; unsigned long issued_at; + struct kref refcount; struct hlist_node list; }; -- cgit v1.2.3-70-g09d2 From baceced93274ff2f846eae991664f9094425ffa8 Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Sun, 26 Jun 2016 11:16:11 +0200 Subject: batman-adv: Fix double-put of vlan object Each batadv_tt_local_entry hold a single reference to a batadv_softif_vlan. In case a new entry cannot be added to the hash table, the error path puts the reference, but the reference will also now be dropped by batadv_tt_local_entry_release(). Fixes: a33d970d0b54 ("batman-adv: Fix reference counting of vlan object for tt_local_entry") Signed-off-by: Ben Hutchings Signed-off-by: Marek Lindner Signed-off-by: Sven Eckelmann Signed-off-by: David S. Miller --- net/batman-adv/translation-table.c | 1 - 1 file changed, 1 deletion(-) diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c index cfb5ccdfd62b..57ec87f37050 100644 --- a/net/batman-adv/translation-table.c +++ b/net/batman-adv/translation-table.c @@ -693,7 +693,6 @@ bool batadv_tt_local_add(struct net_device *soft_iface, const u8 *addr, if (unlikely(hash_added != 0)) { /* remove the reference for the hash */ batadv_tt_local_entry_put(tt_local); - batadv_softif_vlan_put(vlan); goto out; } -- cgit v1.2.3-70-g09d2 From 3b55e4422087f9f7b241031d758a0c65584e4297 Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 26 Jun 2016 11:16:12 +0200 Subject: batman-adv: Fix ICMP RR ethernet access after skb_linearize The skb_linearize may reallocate the skb. This makes the calculated pointer for ethhdr invalid. But it the pointer is used later to fill in the RR field of the batadv_icmp_packet_rr packet. Instead re-evaluate eth_hdr after the skb_linearize+skb_cow to fix the pointer and avoid the invalid read. Fixes: da6b8c20a5b8 ("batman-adv: generalize batman-adv icmp packet handling") Signed-off-by: Sven Eckelmann Signed-off-by: Marek Lindner Signed-off-by: David S. Miller --- net/batman-adv/routing.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c index e3857ed4057f..6c2901a86230 100644 --- a/net/batman-adv/routing.c +++ b/net/batman-adv/routing.c @@ -374,6 +374,7 @@ int batadv_recv_icmp_packet(struct sk_buff *skb, if (skb_cow(skb, ETH_HLEN) < 0) goto out; + ethhdr = eth_hdr(skb); icmph = (struct batadv_icmp_header *)skb->data; icmp_packet_rr = (struct batadv_icmp_packet_rr *)icmph; if (icmp_packet_rr->rr_cur >= BATADV_RR_LEN) -- cgit v1.2.3-70-g09d2 From 420cb1b764f9169c5d2601b4af90e4a1702345ee Mon Sep 17 00:00:00 2001 From: Sven Eckelmann Date: Sun, 26 Jun 2016 11:16:13 +0200 Subject: batman-adv: Clean up untagged vlan when destroying via rtnl-link The untagged vlan object is only destroyed when the interface is removed via the legacy sysfs interface. But it also has to be destroyed when the standard rtnl-link interface is used. Fixes: 5d2c05b21337 ("batman-adv: add per VLAN interface attribute framework") Signed-off-by: Sven Eckelmann Acked-by: Antonio Quartulli Signed-off-by: Marek Lindner Signed-off-by: David S. Miller --- net/batman-adv/soft-interface.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c index 343d2c904399..287a3879ed7e 100644 --- a/net/batman-adv/soft-interface.c +++ b/net/batman-adv/soft-interface.c @@ -1033,7 +1033,9 @@ void batadv_softif_destroy_sysfs(struct net_device *soft_iface) static void batadv_softif_destroy_netlink(struct net_device *soft_iface, struct list_head *head) { + struct batadv_priv *bat_priv = netdev_priv(soft_iface); struct batadv_hard_iface *hard_iface; + struct batadv_softif_vlan *vlan; list_for_each_entry(hard_iface, &batadv_hardif_list, list) { if (hard_iface->soft_iface == soft_iface) @@ -1041,6 +1043,13 @@ static void batadv_softif_destroy_netlink(struct net_device *soft_iface, BATADV_IF_CLEANUP_KEEP); } + /* destroy the "untagged" VLAN */ + vlan = batadv_softif_vlan_get(bat_priv, BATADV_NO_FLAGS); + if (vlan) { + batadv_softif_destroy_vlan(bat_priv, vlan); + batadv_softif_vlan_put(vlan); + } + batadv_sysfs_del_meshif(soft_iface); unregister_netdevice_queue(soft_iface, head); } -- cgit v1.2.3-70-g09d2 From 3ec0a0f10ceb77c78f540ded1d13bf0cf7f89a07 Mon Sep 17 00:00:00 2001 From: Harini Katakam Date: Mon, 27 Jun 2016 13:09:59 +0530 Subject: net: marvell: Add separate config ANEG function for Marvell 88E1111 Marvell 88E1111 currently uses the generic marvell config ANEG function. This function has a sequence accessing Page 5 and Register 31, both of which are not defined or reserved for this PHY. Hence this patch adds a new config ANEG function for Marvell 88E1111 without these erroneous accesses. Signed-off-by: Harini Katakam Signed-off-by: David S. Miller --- drivers/net/phy/marvell.c | 44 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 43 insertions(+), 1 deletion(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 79ccc11facc3..ec2c1eee6405 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -285,6 +285,48 @@ static int marvell_config_aneg(struct phy_device *phydev) return 0; } +static int m88e1111_config_aneg(struct phy_device *phydev) +{ + int err; + + /* The Marvell PHY has an errata which requires + * that certain registers get written in order + * to restart autonegotiation + */ + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + + err = marvell_set_polarity(phydev, phydev->mdix); + if (err < 0) + return err; + + err = phy_write(phydev, MII_M1111_PHY_LED_CONTROL, + MII_M1111_PHY_LED_DIRECT); + if (err < 0) + return err; + + err = genphy_config_aneg(phydev); + if (err < 0) + return err; + + if (phydev->autoneg != AUTONEG_ENABLE) { + int bmcr; + + /* A write to speed/duplex bits (that is performed by + * genphy_config_aneg() call above) must be followed by + * a software reset. Otherwise, the write has no effect. + */ + bmcr = phy_read(phydev, MII_BMCR); + if (bmcr < 0) + return bmcr; + + err = phy_write(phydev, MII_BMCR, bmcr | BMCR_RESET); + if (err < 0) + return err; + } + + return 0; +} + #ifdef CONFIG_OF_MDIO /* * Set and/or override some configuration registers based on the @@ -1175,7 +1217,7 @@ static struct phy_driver marvell_drivers[] = { .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1111_config_init, - .config_aneg = &marvell_config_aneg, + .config_aneg = &m88e1111_config_aneg, .read_status = &marvell_read_status, .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, -- cgit v1.2.3-70-g09d2 From 5be1ea899da4f11de92897d2ea706e0820609b9e Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 27 Jun 2016 12:08:32 +0300 Subject: net/mlx5: Update command strings Add command string for MODIFY_FLOW_TABLE which is used by the driver. Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index dcd2df6518de..0b4986268cc9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -545,6 +545,7 @@ const char *mlx5_command_str(int command) MLX5_COMMAND_STR_CASE(ALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(DEALLOC_FLOW_COUNTER); MLX5_COMMAND_STR_CASE(QUERY_FLOW_COUNTER); + MLX5_COMMAND_STR_CASE(MODIFY_FLOW_TABLE); default: return "unknown command opcode"; } } -- cgit v1.2.3-70-g09d2 From 7092fe866907f4f243122ab388cbf0e77305afaa Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Mon, 27 Jun 2016 12:08:33 +0300 Subject: net/mlx5: Add ConnectX-5 PCIe 4.0 to list of supported devices Add the upcoming ConnectX-5 PCIe 4.0 device to the list of supported devices by the mlx5 driver. Signed-off-by: Majd Dibbiny Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index a19b59348dd6..c65f4a13e17e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -1508,8 +1508,9 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x1014), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4 VF */ { PCI_VDEVICE(MELLANOX, 0x1015) }, /* ConnectX-4LX */ { PCI_VDEVICE(MELLANOX, 0x1016), MLX5_PCI_DEV_IS_VF}, /* ConnectX-4LX VF */ - { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5 */ + { PCI_VDEVICE(MELLANOX, 0x1017) }, /* ConnectX-5, PCIe 3.0 */ { PCI_VDEVICE(MELLANOX, 0x1018), MLX5_PCI_DEV_IS_VF}, /* ConnectX-5 VF */ + { PCI_VDEVICE(MELLANOX, 0x1019) }, /* ConnectX-5, PCIe 4.0 */ { 0, } }; -- cgit v1.2.3-70-g09d2 From e0f46eb9f64ca2e97d242b6e02b8ae2c7fe6dc56 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Mon, 27 Jun 2016 12:08:34 +0300 Subject: net/mlx5e: Change enum to better reflect usage Change MLX5E_STATE_ASYNC_EVENTS_ENABLE to MLX5E_STATE_ASYNC_EVENTS_ENABLED since it represent a state and not an operation. Fixes: acff797cd1874 ('net/mlx5: Extend mlx5_core to support ConnectX-4 Ethernet functionality') Signed-off-by: Eli Cohen Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index e8a6c3325b39..baa991a23475 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -401,7 +401,7 @@ enum mlx5e_traffic_types { }; enum { - MLX5E_STATE_ASYNC_EVENTS_ENABLE, + MLX5E_STATE_ASYNC_EVENTS_ENABLED, MLX5E_STATE_OPENED, MLX5E_STATE_DESTROYING, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index f5c8d5db25a8..4383f8c737e5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -244,7 +244,7 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, { struct mlx5e_priv *priv = vpriv; - if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state)) + if (!test_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state)) return; switch (event) { @@ -260,12 +260,12 @@ static void mlx5e_async_event(struct mlx5_core_dev *mdev, void *vpriv, static void mlx5e_enable_async_events(struct mlx5e_priv *priv) { - set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + set_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); } static void mlx5e_disable_async_events(struct mlx5e_priv *priv) { - clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLE, &priv->state); + clear_bit(MLX5E_STATE_ASYNC_EVENTS_ENABLED, &priv->state); synchronize_irq(mlx5_get_msix_vec(priv->mdev, MLX5_EQ_VEC_ASYNC)); } -- cgit v1.2.3-70-g09d2 From fd4782c21359cfd52af4c3180a2bb6bad55c1eba Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2016 12:08:35 +0300 Subject: net/mlx5e: Check for BlueFlame capability before allocating SQ uar Previous to this patch mapping was always set to write combining without checking whether BlueFlame is supported in the device. Fixes: 0ba422410bbf ('net/mlx5: Fix global UAR mapping') Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 4383f8c737e5..793d7a1f92b5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -580,7 +580,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, void *sqc_wq = MLX5_ADDR_OF(sqc, sqc, wq); int err; - err = mlx5_alloc_map_uar(mdev, &sq->uar, true); + err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); if (err) return err; -- cgit v1.2.3-70-g09d2 From 9ceec359e4ebdbbfd35375203c5bd06d602225f7 Mon Sep 17 00:00:00 2001 From: Matthew Finlay Date: Mon, 27 Jun 2016 12:08:36 +0300 Subject: net/mlx5e: Prevent adding the same vxlan port Do not allow the same vxlan udp port to be added to the device more than once. Fixes: b3f63c3d5e2c ("net/mlx5e: Add netdev support for VXLAN tunneling") Signed-off-by: Matthew Finlay Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/vxlan.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c index f2fd1ef16da7..05de77267d58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/vxlan.c @@ -105,6 +105,9 @@ static void mlx5e_vxlan_add_port(struct work_struct *work) struct mlx5e_vxlan *vxlan; int err; + if (mlx5e_vxlan_lookup_port(priv, port)) + goto free_work; + if (mlx5e_vxlan_core_add_port_cmd(priv->mdev, port)) goto free_work; -- cgit v1.2.3-70-g09d2 From ed80ec4c179d1f44cc1b36c7a102353ae6103793 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2016 12:08:37 +0300 Subject: net/mlx5e: Fix number of PFC counters reported to ethtool Number of PFC counters used to count only number of priorities with PFC enabled, but each priority has more than one counter, hence the need to multiply it by the number of PFC counters per priority. Fixes: cf678570d5a1 ('net/mlx5e: Add per priority group to PPort counters') Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index fc7dcc03b1de..c709d41c4b70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -184,7 +184,9 @@ static unsigned long mlx5e_query_pfc_combined(struct mlx5e_priv *priv) #define MLX5E_NUM_SQ_STATS(priv) \ (NUM_SQ_STATS * priv->params.num_channels * priv->params.num_tc * \ test_bit(MLX5E_STATE_OPENED, &priv->state)) -#define MLX5E_NUM_PFC_COUNTERS(priv) hweight8(mlx5e_query_pfc_combined(priv)) +#define MLX5E_NUM_PFC_COUNTERS(priv) \ + (hweight8(mlx5e_query_pfc_combined(priv)) * \ + NUM_PPORT_PER_PRIO_PFC_COUNTERS) static int mlx5e_get_sset_count(struct net_device *dev, int sset) { -- cgit v1.2.3-70-g09d2 From bfe6d8d1d433cbd5513a93132695e6dbdd79e6f2 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Mon, 27 Jun 2016 12:08:38 +0300 Subject: net/mlx5e: Reorganize ethtool statistics Categorize and reorganize ethtool statistics counters by renaming to "rx_*" and "tx_*" and removing redundant and duplicated counters, this way they are easier to grasp and more user friendly. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- .../net/ethernet/mellanox/mlx5/core/en_ethtool.c | 30 ++- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 27 ++- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 4 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 228 +++++++++------------ drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 4 +- 5 files changed, 130 insertions(+), 163 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index c709d41c4b70..e667a870e0c2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -213,42 +213,41 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) /* SW counters */ for (i = 0; i < NUM_SW_COUNTERS; i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].name); + strcpy(data + (idx++) * ETH_GSTRING_LEN, sw_stats_desc[i].format); /* Q counters */ for (i = 0; i < MLX5E_NUM_Q_CNTRS(priv); i++) - strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].name); + strcpy(data + (idx++) * ETH_GSTRING_LEN, q_stats_desc[i].format); /* VPORT counters */ for (i = 0; i < NUM_VPORT_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - vport_stats_desc[i].name); + vport_stats_desc[i].format); /* PPORT counters */ for (i = 0; i < NUM_PPORT_802_3_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_802_3_stats_desc[i].name); + pport_802_3_stats_desc[i].format); for (i = 0; i < NUM_PPORT_2863_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2863_stats_desc[i].name); + pport_2863_stats_desc[i].format); for (i = 0; i < NUM_PPORT_2819_COUNTERS; i++) strcpy(data + (idx++) * ETH_GSTRING_LEN, - pport_2819_stats_desc[i].name); + pport_2819_stats_desc[i].format); for (prio = 0; prio < NUM_PPORT_PRIO; prio++) { for (i = 0; i < NUM_PPORT_PER_PRIO_TRAFFIC_COUNTERS; i++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", - prio, - pport_per_prio_traffic_stats_desc[i].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_traffic_stats_desc[i].format, prio); } pfc_combined = mlx5e_query_pfc_combined(priv); for_each_set_bit(prio, &pfc_combined, NUM_PPORT_PRIO) { for (i = 0; i < NUM_PPORT_PER_PRIO_PFC_COUNTERS; i++) { - sprintf(data + (idx++) * ETH_GSTRING_LEN, "prio%d_%s", - prio, pport_per_prio_pfc_stats_desc[i].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + pport_per_prio_pfc_stats_desc[i].format, prio); } } @@ -258,16 +257,15 @@ static void mlx5e_fill_stats_strings(struct mlx5e_priv *priv, uint8_t *data) /* per channel counters */ for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_RQ_STATS; j++) - sprintf(data + (idx++) * ETH_GSTRING_LEN, "rx%d_%s", i, - rq_stats_desc[j].name); + sprintf(data + (idx++) * ETH_GSTRING_LEN, + rq_stats_desc[j].format, i); for (tc = 0; tc < priv->params.num_tc; tc++) for (i = 0; i < priv->params.num_channels; i++) for (j = 0; j < NUM_SQ_STATS; j++) sprintf(data + (idx++) * ETH_GSTRING_LEN, - "tx%d_%s", - priv->channeltc_to_txq_map[i][tc], - sq_stats_desc[j].name); + sq_stats_desc[j].format, + priv->channeltc_to_txq_map[i][tc]); } static void mlx5e_get_strings(struct net_device *dev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 793d7a1f92b5..cb6defd71fc1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -105,11 +105,11 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_packets += rq_stats->packets; s->rx_bytes += rq_stats->bytes; - s->lro_packets += rq_stats->lro_packets; - s->lro_bytes += rq_stats->lro_bytes; + s->rx_lro_packets += rq_stats->lro_packets; + s->rx_lro_bytes += rq_stats->lro_bytes; s->rx_csum_none += rq_stats->csum_none; - s->rx_csum_sw += rq_stats->csum_sw; - s->rx_csum_inner += rq_stats->csum_inner; + s->rx_csum_complete += rq_stats->csum_complete; + s->rx_csum_unnecessary_inner += rq_stats->csum_unnecessary_inner; s->rx_wqe_err += rq_stats->wqe_err; s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_mpwqe_frag += rq_stats->mpwqe_frag; @@ -122,24 +122,23 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->tx_packets += sq_stats->packets; s->tx_bytes += sq_stats->bytes; - s->tso_packets += sq_stats->tso_packets; - s->tso_bytes += sq_stats->tso_bytes; - s->tso_inner_packets += sq_stats->tso_inner_packets; - s->tso_inner_bytes += sq_stats->tso_inner_bytes; + s->tx_tso_packets += sq_stats->tso_packets; + s->tx_tso_bytes += sq_stats->tso_bytes; + s->tx_tso_inner_packets += sq_stats->tso_inner_packets; + s->tx_tso_inner_bytes += sq_stats->tso_inner_bytes; s->tx_queue_stopped += sq_stats->stopped; s->tx_queue_wake += sq_stats->wake; s->tx_queue_dropped += sq_stats->dropped; - s->tx_csum_inner += sq_stats->csum_offload_inner; - tx_offload_none += sq_stats->csum_offload_none; + s->tx_csum_partial_inner += sq_stats->csum_partial_inner; + tx_offload_none += sq_stats->csum_none; } } /* Update calculated offload counters */ - s->tx_csum_offload = s->tx_packets - tx_offload_none - s->tx_csum_inner; - s->rx_csum_good = s->rx_packets - s->rx_csum_none - - s->rx_csum_sw; + s->tx_csum_partial = s->tx_packets - tx_offload_none - s->tx_csum_partial_inner; + s->rx_csum_unnecessary = s->rx_packets - s->rx_csum_none - s->rx_csum_complete; - s->link_down_events = MLX5_GET(ppcnt_reg, + s->link_down_events_phy = MLX5_GET(ppcnt_reg, priv->stats.pport.phy_counters, counter_set.phys_layer_cntrs.link_down_events); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index bd947704b59c..022acc2e8922 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -689,7 +689,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (is_first_ethertype_ip(skb)) { skb->ip_summed = CHECKSUM_COMPLETE; skb->csum = csum_unfold((__force __sum16)cqe->check_sum); - rq->stats.csum_sw++; + rq->stats.csum_complete++; return; } @@ -699,7 +699,7 @@ static inline void mlx5e_handle_csum(struct net_device *netdev, if (cqe_is_tunneled(cqe)) { skb->csum_level = 1; skb->encapsulation = 1; - rq->stats.csum_inner++; + rq->stats.csum_unnecessary_inner++; } return; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 83bc32b25849..fcd490cc5610 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -42,9 +42,11 @@ be64_to_cpu(*(__be32 *)((char *)ptr + dsc[i].offset)) #define MLX5E_DECLARE_STAT(type, fld) #fld, offsetof(type, fld) +#define MLX5E_DECLARE_RX_STAT(type, fld) "rx%d_"#fld, offsetof(type, fld) +#define MLX5E_DECLARE_TX_STAT(type, fld) "tx%d_"#fld, offsetof(type, fld) struct counter_desc { - char name[ETH_GSTRING_LEN]; + char format[ETH_GSTRING_LEN]; int offset; /* Byte offset */ }; @@ -53,18 +55,18 @@ struct mlx5e_sw_stats { u64 rx_bytes; u64 tx_packets; u64 tx_bytes; - u64 tso_packets; - u64 tso_bytes; - u64 tso_inner_packets; - u64 tso_inner_bytes; - u64 lro_packets; - u64 lro_bytes; - u64 rx_csum_good; + u64 tx_tso_packets; + u64 tx_tso_bytes; + u64 tx_tso_inner_packets; + u64 tx_tso_inner_bytes; + u64 rx_lro_packets; + u64 rx_lro_bytes; + u64 rx_csum_unnecessary; u64 rx_csum_none; - u64 rx_csum_sw; - u64 rx_csum_inner; - u64 tx_csum_offload; - u64 tx_csum_inner; + u64 rx_csum_complete; + u64 rx_csum_unnecessary_inner; + u64 tx_csum_partial; + u64 tx_csum_partial_inner; u64 tx_queue_stopped; u64 tx_queue_wake; u64 tx_queue_dropped; @@ -76,7 +78,7 @@ struct mlx5e_sw_stats { u64 rx_cqe_compress_pkts; /* Special handling counters */ - u64 link_down_events; + u64 link_down_events_phy; }; static const struct counter_desc sw_stats_desc[] = { @@ -84,18 +86,18 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_bytes) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_packets) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tso_inner_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, lro_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_good) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_tso_inner_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_packets) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_lro_bytes) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_sw) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_offload) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_complete) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_csum_unnecessary_inner) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_csum_partial_inner) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_stopped) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_wake) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, tx_queue_dropped) }, @@ -105,7 +107,7 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events_phy) }, }; struct mlx5e_qcounter_stats { @@ -125,12 +127,6 @@ struct mlx5e_vport_stats { }; static const struct counter_desc vport_stats_desc[] = { - { "rx_vport_error_packets", - VPORT_COUNTER_OFF(received_errors.packets) }, - { "rx_vport_error_bytes", VPORT_COUNTER_OFF(received_errors.octets) }, - { "tx_vport_error_packets", - VPORT_COUNTER_OFF(transmit_errors.packets) }, - { "tx_vport_error_bytes", VPORT_COUNTER_OFF(transmit_errors.octets) }, { "rx_vport_unicast_packets", VPORT_COUNTER_OFF(received_eth_unicast.packets) }, { "rx_vport_unicast_bytes", @@ -192,94 +188,68 @@ struct mlx5e_pport_stats { }; static const struct counter_desc pport_802_3_stats_desc[] = { - { "frames_tx", PPORT_802_3_OFF(a_frames_transmitted_ok) }, - { "frames_rx", PPORT_802_3_OFF(a_frames_received_ok) }, - { "check_seq_err", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, - { "alignment_err", PPORT_802_3_OFF(a_alignment_errors) }, - { "octets_tx", PPORT_802_3_OFF(a_octets_transmitted_ok) }, - { "octets_received", PPORT_802_3_OFF(a_octets_received_ok) }, - { "multicast_xmitted", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, - { "broadcast_xmitted", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, - { "multicast_rx", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, - { "broadcast_rx", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, - { "in_range_len_errors", PPORT_802_3_OFF(a_in_range_length_errors) }, - { "out_of_range_len", PPORT_802_3_OFF(a_out_of_range_length_field) }, - { "too_long_errors", PPORT_802_3_OFF(a_frame_too_long_errors) }, - { "symbol_err", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, - { "mac_control_tx", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, - { "mac_control_rx", PPORT_802_3_OFF(a_mac_control_frames_received) }, - { "unsupported_op_rx", - PPORT_802_3_OFF(a_unsupported_opcodes_received) }, - { "pause_ctrl_rx", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, - { "pause_ctrl_tx", - PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, + { "tx_packets_phy", PPORT_802_3_OFF(a_frames_transmitted_ok) }, + { "rx_packets_phy", PPORT_802_3_OFF(a_frames_received_ok) }, + { "rx_crc_errors_phy", PPORT_802_3_OFF(a_frame_check_sequence_errors) }, + { "tx_bytes_phy", PPORT_802_3_OFF(a_octets_transmitted_ok) }, + { "rx_bytes_phy", PPORT_802_3_OFF(a_octets_received_ok) }, + { "tx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_xmitted_ok) }, + { "tx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_xmitted_ok) }, + { "rx_multicast_phy", PPORT_802_3_OFF(a_multicast_frames_received_ok) }, + { "rx_broadcast_phy", PPORT_802_3_OFF(a_broadcast_frames_received_ok) }, + { "rx_in_range_len_errors_phy", PPORT_802_3_OFF(a_in_range_length_errors) }, + { "rx_out_of_range_len_phy", PPORT_802_3_OFF(a_out_of_range_length_field) }, + { "rx_oversize_pkts_phy", PPORT_802_3_OFF(a_frame_too_long_errors) }, + { "rx_symbol_err_phy", PPORT_802_3_OFF(a_symbol_error_during_carrier) }, + { "tx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_transmitted) }, + { "rx_mac_control_phy", PPORT_802_3_OFF(a_mac_control_frames_received) }, + { "rx_unsupported_op_phy", PPORT_802_3_OFF(a_unsupported_opcodes_received) }, + { "rx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_received) }, + { "tx_pause_ctrl_phy", PPORT_802_3_OFF(a_pause_mac_ctrl_frames_transmitted) }, }; static const struct counter_desc pport_2863_stats_desc[] = { - { "in_octets", PPORT_2863_OFF(if_in_octets) }, - { "in_ucast_pkts", PPORT_2863_OFF(if_in_ucast_pkts) }, - { "in_discards", PPORT_2863_OFF(if_in_discards) }, - { "in_errors", PPORT_2863_OFF(if_in_errors) }, - { "in_unknown_protos", PPORT_2863_OFF(if_in_unknown_protos) }, - { "out_octets", PPORT_2863_OFF(if_out_octets) }, - { "out_ucast_pkts", PPORT_2863_OFF(if_out_ucast_pkts) }, - { "out_discards", PPORT_2863_OFF(if_out_discards) }, - { "out_errors", PPORT_2863_OFF(if_out_errors) }, - { "in_multicast_pkts", PPORT_2863_OFF(if_in_multicast_pkts) }, - { "in_broadcast_pkts", PPORT_2863_OFF(if_in_broadcast_pkts) }, - { "out_multicast_pkts", PPORT_2863_OFF(if_out_multicast_pkts) }, - { "out_broadcast_pkts", PPORT_2863_OFF(if_out_broadcast_pkts) }, + { "rx_discards_phy", PPORT_2863_OFF(if_in_discards) }, + { "tx_discards_phy", PPORT_2863_OFF(if_out_discards) }, + { "tx_errors_phy", PPORT_2863_OFF(if_out_errors) }, }; static const struct counter_desc pport_2819_stats_desc[] = { - { "drop_events", PPORT_2819_OFF(ether_stats_drop_events) }, - { "octets", PPORT_2819_OFF(ether_stats_octets) }, - { "pkts", PPORT_2819_OFF(ether_stats_pkts) }, - { "broadcast_pkts", PPORT_2819_OFF(ether_stats_broadcast_pkts) }, - { "multicast_pkts", PPORT_2819_OFF(ether_stats_multicast_pkts) }, - { "crc_align_errors", PPORT_2819_OFF(ether_stats_crc_align_errors) }, - { "undersize_pkts", PPORT_2819_OFF(ether_stats_undersize_pkts) }, - { "oversize_pkts", PPORT_2819_OFF(ether_stats_oversize_pkts) }, - { "fragments", PPORT_2819_OFF(ether_stats_fragments) }, - { "jabbers", PPORT_2819_OFF(ether_stats_jabbers) }, - { "collisions", PPORT_2819_OFF(ether_stats_collisions) }, - { "p64octets", PPORT_2819_OFF(ether_stats_pkts64octets) }, - { "p65to127octets", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, - { "p128to255octets", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, - { "p256to511octets", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, - { "p512to1023octets", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, - { "p1024to1518octets", - PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, - { "p1519to2047octets", - PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, - { "p2048to4095octets", - PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, - { "p4096to8191octets", - PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, - { "p8192to10239octets", - PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, + { "rx_undersize_pkts_phy", PPORT_2819_OFF(ether_stats_undersize_pkts) }, + { "rx_fragments_phy", PPORT_2819_OFF(ether_stats_fragments) }, + { "rx_jabbers_phy", PPORT_2819_OFF(ether_stats_jabbers) }, + { "rx_64_bytes_phy", PPORT_2819_OFF(ether_stats_pkts64octets) }, + { "rx_65_to_127_bytes_phy", PPORT_2819_OFF(ether_stats_pkts65to127octets) }, + { "rx_128_to_255_bytes_phy", PPORT_2819_OFF(ether_stats_pkts128to255octets) }, + { "rx_256_to_511_bytes_phy", PPORT_2819_OFF(ether_stats_pkts256to511octets) }, + { "rx_512_to_1023_bytes_phy", PPORT_2819_OFF(ether_stats_pkts512to1023octets) }, + { "rx_1024_to_1518_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1024to1518octets) }, + { "rx_1519_to_2047_bytes_phy", PPORT_2819_OFF(ether_stats_pkts1519to2047octets) }, + { "rx_2048_to_4095_bytes_phy", PPORT_2819_OFF(ether_stats_pkts2048to4095octets) }, + { "rx_4096_to_8191_bytes_phy", PPORT_2819_OFF(ether_stats_pkts4096to8191octets) }, + { "rx_8192_to_10239_bytes_phy", PPORT_2819_OFF(ether_stats_pkts8192to10239octets) }, }; static const struct counter_desc pport_per_prio_traffic_stats_desc[] = { - { "rx_octets", PPORT_PER_PRIO_OFF(rx_octets) }, - { "rx_frames", PPORT_PER_PRIO_OFF(rx_frames) }, - { "tx_octets", PPORT_PER_PRIO_OFF(tx_octets) }, - { "tx_frames", PPORT_PER_PRIO_OFF(tx_frames) }, + { "rx_prio%d_bytes", PPORT_PER_PRIO_OFF(rx_octets) }, + { "rx_prio%d_packets", PPORT_PER_PRIO_OFF(rx_frames) }, + { "tx_prio%d_bytes", PPORT_PER_PRIO_OFF(tx_octets) }, + { "tx_prio%d_packets", PPORT_PER_PRIO_OFF(tx_frames) }, }; static const struct counter_desc pport_per_prio_pfc_stats_desc[] = { - { "rx_pause", PPORT_PER_PRIO_OFF(rx_pause) }, - { "rx_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, - { "tx_pause", PPORT_PER_PRIO_OFF(tx_pause) }, - { "tx_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, - { "rx_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, + { "rx_prio%d_pause", PPORT_PER_PRIO_OFF(rx_pause) }, + { "rx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(rx_pause_duration) }, + { "tx_prio%d_pause", PPORT_PER_PRIO_OFF(tx_pause) }, + { "tx_prio%d_pause_duration", PPORT_PER_PRIO_OFF(tx_pause_duration) }, + { "rx_prio%d_pause_transition", PPORT_PER_PRIO_OFF(rx_pause_transition) }, }; struct mlx5e_rq_stats { u64 packets; u64 bytes; - u64 csum_sw; - u64 csum_inner; + u64 csum_complete; + u64 csum_unnecessary_inner; u64 csum_none; u64 lro_packets; u64 lro_bytes; @@ -292,19 +262,19 @@ struct mlx5e_rq_stats { }; static const struct counter_desc rq_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_sw) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, csum_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, lro_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, wqe_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, - { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_complete) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_unnecessary_inner) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, csum_none) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_packets) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, lro_bytes) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, wqe_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_RX_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, }; struct mlx5e_sq_stats { @@ -315,28 +285,28 @@ struct mlx5e_sq_stats { u64 tso_bytes; u64 tso_inner_packets; u64 tso_inner_bytes; - u64 csum_offload_inner; + u64 csum_partial_inner; u64 nop; /* less likely accessed in data path */ - u64 csum_offload_none; + u64 csum_none; u64 stopped; u64 wake; u64 dropped; }; static const struct counter_desc sq_stats_desc[] = { - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_inner) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, nop) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, csum_offload_none) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, stopped) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, wake) }, - { MLX5E_DECLARE_STAT(struct mlx5e_sq_stats, dropped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_packets) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, tso_inner_bytes) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_partial_inner) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, nop) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, csum_none) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, stopped) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, wake) }, + { MLX5E_DECLARE_TX_STAT(struct mlx5e_sq_stats, dropped) }, }; #define NUM_SW_COUNTERS ARRAY_SIZE(sw_stats_desc) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index b000ddc29553..5a750b9cd006 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -192,12 +192,12 @@ static netdev_tx_t mlx5e_sq_xmit(struct mlx5e_sq *sq, struct sk_buff *skb) if (skb->encapsulation) { eseg->cs_flags |= MLX5_ETH_WQE_L3_INNER_CSUM | MLX5_ETH_WQE_L4_INNER_CSUM; - sq->stats.csum_offload_inner++; + sq->stats.csum_partial_inner++; } else { eseg->cs_flags |= MLX5_ETH_WQE_L4_CSUM; } } else - sq->stats.csum_offload_none++; + sq->stats.csum_none++; if (sq->cc != sq->prev_cc) { sq->prev_cc = sq->cc; -- cgit v1.2.3-70-g09d2 From 3f4c68cfde30caa1f6d8368fd19590671411ade2 Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Mon, 27 Jun 2016 15:30:02 +0530 Subject: net: thunderx: Fix link status reporting Check for SMU RX local/remote faults along with SPU LINK status. Otherwise at times link is UP at our end but DOWN at link partner's side. Also due to an issue in BGX it's rarely seen that initialization doesn't happen properly and SMU RX reports faults with everything fine at SPU. This patch tries to reinitialize LMAC to fix it. Also fixed LMAC disable sequence to properly bring down link. Signed-off-by: Sunil Goutham Signed-off-by: Tao Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/thunder_bgx.c | 91 +++++++++++++++-------- drivers/net/ethernet/cavium/thunder/thunder_bgx.h | 2 + 2 files changed, 62 insertions(+), 31 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c index 3ed21988626b..63a39ac97d53 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.c +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.c @@ -551,7 +551,9 @@ static int bgx_xaui_check_link(struct lmac *lmac) } /* Clear rcvflt bit (latching high) and read it back */ - bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); + if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_STATUS2, SPU_STATUS2_RCVFLT); if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { dev_err(&bgx->pdev->dev, "Receive fault, retry training\n"); if (bgx->use_training) { @@ -570,13 +572,6 @@ static int bgx_xaui_check_link(struct lmac *lmac) return -1; } - /* Wait for MAC RX to be ready */ - if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_RX_CTL, - SMU_RX_CTL_STATUS, true)) { - dev_err(&bgx->pdev->dev, "SMU RX link not okay\n"); - return -1; - } - /* Wait for BGX RX to be idle */ if (bgx_poll_reg(bgx, lmacid, BGX_SMUX_CTL, SMU_CTL_RX_IDLE, false)) { dev_err(&bgx->pdev->dev, "SMU RX not idle\n"); @@ -589,29 +584,30 @@ static int bgx_xaui_check_link(struct lmac *lmac) return -1; } - if (bgx_reg_read(bgx, lmacid, BGX_SPUX_STATUS2) & SPU_STATUS2_RCVFLT) { - dev_err(&bgx->pdev->dev, "Receive fault\n"); - return -1; - } - - /* Receive link is latching low. Force it high and verify it */ - bgx_reg_modify(bgx, lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK); - if (bgx_poll_reg(bgx, lmacid, BGX_SPUX_STATUS1, - SPU_STATUS1_RCV_LNK, false)) { - dev_err(&bgx->pdev->dev, "SPU receive link down\n"); - return -1; - } - + /* Clear receive packet disable */ cfg = bgx_reg_read(bgx, lmacid, BGX_SPUX_MISC_CONTROL); cfg &= ~SPU_MISC_CTL_RX_DIS; bgx_reg_write(bgx, lmacid, BGX_SPUX_MISC_CONTROL, cfg); - return 0; + + /* Check for MAC RX faults */ + cfg = bgx_reg_read(bgx, lmacid, BGX_SMUX_RX_CTL); + /* 0 - Link is okay, 1 - Local fault, 2 - Remote fault */ + cfg &= SMU_RX_CTL_STATUS; + if (!cfg) + return 0; + + /* Rx local/remote fault seen. + * Do lmac reinit to see if condition recovers + */ + bgx_lmac_xaui_init(bgx, lmacid, bgx->lmac_type); + + return -1; } static void bgx_poll_for_link(struct work_struct *work) { struct lmac *lmac; - u64 link; + u64 spu_link, smu_link; lmac = container_of(work, struct lmac, dwork.work); @@ -621,8 +617,11 @@ static void bgx_poll_for_link(struct work_struct *work) bgx_poll_reg(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1, SPU_STATUS1_RCV_LNK, false); - link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1); - if (link & SPU_STATUS1_RCV_LNK) { + spu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SPUX_STATUS1); + smu_link = bgx_reg_read(lmac->bgx, lmac->lmacid, BGX_SMUX_RX_CTL); + + if ((spu_link & SPU_STATUS1_RCV_LNK) && + !(smu_link & SMU_RX_CTL_STATUS)) { lmac->link_up = 1; if (lmac->bgx->lmac_type == BGX_MODE_XLAUI) lmac->last_speed = 40000; @@ -636,9 +635,15 @@ static void bgx_poll_for_link(struct work_struct *work) } if (lmac->last_link != lmac->link_up) { + if (lmac->link_up) { + if (bgx_xaui_check_link(lmac)) { + /* Errors, clear link_up state */ + lmac->link_up = 0; + lmac->last_speed = SPEED_UNKNOWN; + lmac->last_duplex = DUPLEX_UNKNOWN; + } + } lmac->last_link = lmac->link_up; - if (lmac->link_up) - bgx_xaui_check_link(lmac); } queue_delayed_work(lmac->check_link, &lmac->dwork, HZ * 2); @@ -710,7 +715,7 @@ static int bgx_lmac_enable(struct bgx *bgx, u8 lmacid) static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) { struct lmac *lmac; - u64 cmrx_cfg; + u64 cfg; lmac = &bgx->lmac[lmacid]; if (lmac->check_link) { @@ -719,9 +724,33 @@ static void bgx_lmac_disable(struct bgx *bgx, u8 lmacid) destroy_workqueue(lmac->check_link); } - cmrx_cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); - cmrx_cfg &= ~(1 << 15); - bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cmrx_cfg); + /* Disable packet reception */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_PKT_RX_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + /* Give chance for Rx/Tx FIFO to get drained */ + bgx_poll_reg(bgx, lmacid, BGX_CMRX_RX_FIFO_LEN, (u64)0x1FFF, true); + bgx_poll_reg(bgx, lmacid, BGX_CMRX_TX_FIFO_LEN, (u64)0x3FFF, true); + + /* Disable packet transmission */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_PKT_TX_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + + /* Disable serdes lanes */ + if (!lmac->is_sgmii) + bgx_reg_modify(bgx, lmacid, + BGX_SPUX_CONTROL1, SPU_CTL_LOW_POWER); + else + bgx_reg_modify(bgx, lmacid, + BGX_GMP_PCS_MRX_CTL, PCS_MRX_CTL_PWR_DN); + + /* Disable LMAC */ + cfg = bgx_reg_read(bgx, lmacid, BGX_CMRX_CFG); + cfg &= ~CMR_EN; + bgx_reg_write(bgx, lmacid, BGX_CMRX_CFG, cfg); + bgx_flush_dmac_addrs(bgx, lmacid); if ((bgx->lmac_type != BGX_MODE_XFI) && diff --git a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h index 149e179363a1..42010d2e5ddf 100644 --- a/drivers/net/ethernet/cavium/thunder/thunder_bgx.h +++ b/drivers/net/ethernet/cavium/thunder/thunder_bgx.h @@ -41,6 +41,7 @@ #define BGX_CMRX_RX_STAT10 0xC0 #define BGX_CMRX_RX_BP_DROP 0xC8 #define BGX_CMRX_RX_DMAC_CTL 0x0E8 +#define BGX_CMRX_RX_FIFO_LEN 0x108 #define BGX_CMR_RX_DMACX_CAM 0x200 #define RX_DMACX_CAM_EN BIT_ULL(48) #define RX_DMACX_CAM_LMACID(x) (x << 49) @@ -50,6 +51,7 @@ #define BGX_CMR_CHAN_MSK_AND 0x450 #define BGX_CMR_BIST_STATUS 0x460 #define BGX_CMR_RX_LMACS 0x468 +#define BGX_CMRX_TX_FIFO_LEN 0x518 #define BGX_CMRX_TX_STAT0 0x600 #define BGX_CMRX_TX_STAT1 0x608 #define BGX_CMRX_TX_STAT2 0x610 -- cgit v1.2.3-70-g09d2 From 3e29adba567306504e99b8363edf2d47c19dbe1b Mon Sep 17 00:00:00 2001 From: Sunil Goutham Date: Mon, 27 Jun 2016 15:30:03 +0530 Subject: net: thunderx: Fix TL4 configuration for secondary Qsets TL4 calculation for a given SQ of secondary Qsets is incorrect and goes out of bounds and also for some SQ's TL4 chosen will transmit data via a different BGX interface and not same as primary Qset's interface. This patch fixes this issue. Signed-off-by: Sunil Goutham Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nic_main.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/cavium/thunder/nic_main.c b/drivers/net/ethernet/cavium/thunder/nic_main.c index 95f17f8cadac..16ed20357c5c 100644 --- a/drivers/net/ethernet/cavium/thunder/nic_main.c +++ b/drivers/net/ethernet/cavium/thunder/nic_main.c @@ -499,6 +499,7 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, u32 rr_quantum; u8 sq_idx = sq->sq_num; u8 pqs_vnic; + int svf; if (sq->sqs_mode) pqs_vnic = nic->pqs_vf[vnic]; @@ -511,10 +512,19 @@ static void nic_tx_channel_cfg(struct nicpf *nic, u8 vnic, /* 24 bytes for FCS, IPG and preamble */ rr_quantum = ((NIC_HW_MAX_FRS + 24) / 4); - tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); + if (!sq->sqs_mode) { + tl4 = (lmac * NIC_TL4_PER_LMAC) + (bgx * NIC_TL4_PER_BGX); + } else { + for (svf = 0; svf < MAX_SQS_PER_VF; svf++) { + if (nic->vf_sqs[pqs_vnic][svf] == vnic) + break; + } + tl4 = (MAX_LMAC_PER_BGX * NIC_TL4_PER_LMAC); + tl4 += (lmac * NIC_TL4_PER_LMAC * MAX_SQS_PER_VF); + tl4 += (svf * NIC_TL4_PER_LMAC); + tl4 += (bgx * NIC_TL4_PER_BGX); + } tl4 += sq_idx; - if (sq->sqs_mode) - tl4 += vnic * 8; tl3 = tl4 / (NIC_MAX_TL4 / NIC_MAX_TL3); nic_reg_write(nic, NIC_PF_QSET_0_127_SQ_0_7_CFG2 | -- cgit v1.2.3-70-g09d2 From 96183182ad05d1ce31b9048921c12bf4ad621eaf Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Mon, 27 Jun 2016 20:48:53 +0800 Subject: ibmvnic: fix to use list_for_each_safe() when delete items Since we will remove items off the list using list_del() we need to use a safe version of the list_for_each() macro aptly named list_for_each_safe(). Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 864cb21351a4..ecdb6854a898 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -2121,7 +2121,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq, struct ibmvnic_adapter *adapter) { struct device *dev = &adapter->vdev->dev; - struct ibmvnic_error_buff *error_buff; + struct ibmvnic_error_buff *error_buff, *tmp; unsigned long flags; bool found = false; int i; @@ -2133,7 +2133,7 @@ static void handle_error_info_rsp(union ibmvnic_crq *crq, } spin_lock_irqsave(&adapter->error_list_lock, flags); - list_for_each_entry(error_buff, &adapter->errors, list) + list_for_each_entry_safe(error_buff, tmp, &adapter->errors, list) if (error_buff->error_id == crq->request_error_rsp.error_id) { found = true; list_del(&error_buff->list); @@ -3141,14 +3141,14 @@ static void handle_request_ras_comp_num_rsp(union ibmvnic_crq *crq, static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) { - struct ibmvnic_inflight_cmd *inflight_cmd; + struct ibmvnic_inflight_cmd *inflight_cmd, *tmp1; struct device *dev = &adapter->vdev->dev; - struct ibmvnic_error_buff *error_buff; + struct ibmvnic_error_buff *error_buff, *tmp2; unsigned long flags; unsigned long flags2; spin_lock_irqsave(&adapter->inflight_lock, flags); - list_for_each_entry(inflight_cmd, &adapter->inflight, list) { + list_for_each_entry_safe(inflight_cmd, tmp1, &adapter->inflight, list) { switch (inflight_cmd->crq.generic.cmd) { case LOGIN: dma_unmap_single(dev, adapter->login_buf_token, @@ -3165,8 +3165,8 @@ static void ibmvnic_free_inflight(struct ibmvnic_adapter *adapter) break; case REQUEST_ERROR_INFO: spin_lock_irqsave(&adapter->error_list_lock, flags2); - list_for_each_entry(error_buff, &adapter->errors, - list) { + list_for_each_entry_safe(error_buff, tmp2, + &adapter->errors, list) { dma_unmap_single(dev, error_buff->dma, error_buff->len, DMA_FROM_DEVICE); -- cgit v1.2.3-70-g09d2 From a3d2e9f8eb1487f4191ff08ce2d3d63702c65a90 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Jun 2016 17:38:50 +0200 Subject: tcp: do not send too big packets at retransmit time Arjun reported a bug in TCP stack and bisected it to a recent commit. In case where we process SACK, we can coalesce multiple skbs into fat ones (tcp_shift_skb_data()), to lower write queue overhead, because we do not expect to retransmit these packets. However, SACK reneging can happen, forcing the sender to retransmit all these packets. If skb->len is above 64KB, we then send buggy IP packets that could hang TSO engine on cxgb4. Neal suggested to use tcp_tso_autosize() instead of tp->gso_segs so that we cook packets of optimal size vs TCP/pacing. Thanks to Arjun for reporting the bug and running the tests ! Fixes: 10d3be569243 ("tcp-tso: do not split TSO packets at retransmit time") Signed-off-by: Eric Dumazet Reported-by: Arjun V Tested-by: Arjun V Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 8bd9911fdd16..e00e972c4e6a 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2751,7 +2751,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb; struct sk_buff *hole = NULL; - u32 last_lost; + u32 max_segs, last_lost; int mib_idx; int fwd_rexmitting = 0; @@ -2771,6 +2771,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk) last_lost = tp->snd_una; } + max_segs = tcp_tso_autosize(sk, tcp_current_mss(sk)); tcp_for_write_queue_from(skb, sk) { __u8 sacked = TCP_SKB_CB(skb)->sacked; int segs; @@ -2784,6 +2785,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk) segs = tp->snd_cwnd - tcp_packets_in_flight(tp); if (segs <= 0) return; + /* In case tcp_shift_skb_data() have aggregated large skbs, + * we need to make sure not sending too bigs TSO packets + */ + segs = min_t(int, segs, max_segs); if (fwd_rexmitting) { begin_fwd: -- cgit v1.2.3-70-g09d2 From 565ce8f32ac4a233b474f401e1d3e7e1de0a31fd Mon Sep 17 00:00:00 2001 From: Nikolay Aleksandrov Date: Mon, 27 Jun 2016 18:34:42 +0200 Subject: net: bridge: fix vlan stats continue counter I made a dumb off-by-one mistake when I added the vlan stats counter dumping code. The increment should happen before the check, not after otherwise we miss one entry when we continue dumping. Fixes: a60c090361ea ("bridge: netlink: export per-vlan stats") Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index a5343c7232bf..85e89f693589 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -1273,7 +1273,7 @@ static int br_fill_linkxstats(struct sk_buff *skb, const struct net_device *dev, struct bridge_vlan_xstats vxi; struct br_vlan_stats stats; - if (vl_idx++ < *prividx) + if (++vl_idx < *prividx) continue; memset(&vxi, 0, sizeof(vxi)); vxi.vid = v->vid; -- cgit v1.2.3-70-g09d2 From ceb56070359b7329b5678b5d95a376fcb24767be Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 27 Jun 2016 21:38:11 +0200 Subject: bpf, perf: delay release of BPF prog after grace period Commit dead9f29ddcc ("perf: Fix race in BPF program unregister") moved destruction of BPF program from free_event_rcu() callback to __free_event(), which is problematic if used with tail calls: if prog A is attached as trace event directly, but at the same time present in a tail call map used by another trace event program elsewhere, then we need to delay destruction via RCU grace period since it can still be in use by the program doing the tail call (the prog first needs to be dropped from the tail call map, then trace event with prog A attached destroyed, so we get immediate destruction). Fixes: dead9f29ddcc ("perf: Fix race in BPF program unregister") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Jann Horn Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ kernel/events/core.c | 2 +- 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8269cafc6eb1..0de4de6dd43e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -264,6 +264,10 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) static inline void bpf_prog_put(struct bpf_prog *prog) { } + +static inline void bpf_prog_put_rcu(struct bpf_prog *prog) +{ +} #endif /* CONFIG_BPF_SYSCALL */ /* verifier prototypes for helper functions called from eBPF programs */ diff --git a/kernel/events/core.c b/kernel/events/core.c index 274450efea90..d00c47b67a97 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -7531,7 +7531,7 @@ static void perf_event_free_bpf_prog(struct perf_event *event) prog = event->tp_event->prog; if (prog) { event->tp_event->prog = NULL; - bpf_prog_put(prog); + bpf_prog_put_rcu(prog); } } -- cgit v1.2.3-70-g09d2 From 5b4d10f5e0369ed79434593b7cd8e85eebbe473f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 27 Jun 2016 23:50:29 +0300 Subject: qlcnic: use the correct ring in qlcnic_83xx_process_rcv_ring_diag() There is a static checker warning here "warn: mask and shift to zero" and the code sets "ring" to zero every time. From looking at how QLCNIC_FETCH_RING_ID() is used in qlcnic_83xx_process_rcv_ring() the qlcnic_83xx_hndl() should be removed. Fixes: 4be41e92f7c6 ('qlcnic: 83xx data path routines') Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c index 7bd6f25b4625..607bb7d4514d 100644 --- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c +++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c @@ -2220,7 +2220,7 @@ void qlcnic_83xx_process_rcv_ring_diag(struct qlcnic_host_sds_ring *sds_ring) if (!opcode) return; - ring = QLCNIC_FETCH_RING_ID(qlcnic_83xx_hndl(sts_data[0])); + ring = QLCNIC_FETCH_RING_ID(sts_data[0]); qlcnic_83xx_process_rcv_diag(adapter, ring, sts_data); desc = &sds_ring->desc_head[consumer]; desc->status_desc_data[0] = cpu_to_le64(STATUS_OWNER_PHANTOM); -- cgit v1.2.3-70-g09d2 From c041778c966c92c964033f1cdfee60a9f2b5e465 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 29 Jun 2016 10:36:39 +0200 Subject: cfg80211: fix proto in ieee80211_data_to_8023 for frames without LLC header The PDU length of incoming LLC frames is set to the total skb payload size in __ieee80211_data_to_8023() of net/wireless/util.c which incorrectly includes the length of the IEEE 802.11 header. The resulting LLC frame header has a too large PDU length, causing the llc_fixup_skb() function of net/llc/llc_input.c to reject the incoming skb, effectively breaking STP. Solve the problem by properly substracting the IEEE 802.11 frame header size from the PDU length, allowing the LLC processor to pick up the incoming control messages. Special thanks to Gerry Rozema for tracking down the regression and proposing a suitable patch. Fixes: 2d1c304cb2d5 ("cfg80211: add function for 802.3 conversion with separate output buffer") Cc: stable@vger.kernel.org Reported-by: Gerry Rozema Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/wireless/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/wireless/util.c b/net/wireless/util.c index 4e809e978b7d..2443ee30ba5b 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -509,7 +509,7 @@ static int __ieee80211_data_to_8023(struct sk_buff *skb, struct ethhdr *ehdr, * replace EtherType */ hdrlen += ETH_ALEN + 2; else - tmp.h_proto = htons(skb->len); + tmp.h_proto = htons(skb->len - hdrlen); pskb_pull(skb, hdrlen); -- cgit v1.2.3-70-g09d2 From 889ad4566604610804df984e1a3dd5e2c66256e5 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 28 Jun 2016 20:41:31 -0700 Subject: e1000e: keep VLAN interfaces functional after rxvlan off I've got a bug report about an e1000e interface, where a VLAN interface is set up on top of it: $ ip link add link ens1f0 name ens1f0.99 type vlan id 99 $ ip link set ens1f0 up $ ip link set ens1f0.99 up $ ip addr add 192.168.99.92 dev ens1f0.99 At this point, I can ping another host on vlan 99, ip 192.168.99.91. However, if I do the following: $ ethtool -K ens1f0 rxvlan off Then no traffic passes on ens1f0.99. It comes back if I toggle rxvlan on again. I'm not sure if this is actually intended behavior, or if there's a lack of software VLAN stripping fallback, or what, but things continue to work if I simply don't call e1000e_vlan_strip_disable() if there are active VLANs (plagiarizing a function from the e1000 driver here) on the interface. Also slipped a related-ish fix to the kerneldoc text for e1000e_vlan_strip_disable here... Signed-off-by: Jarod Wilson Tested-by: Aaron Brown Signed-off-by: Jeff Kirsher Signed-off-by: David S. Miller --- drivers/net/ethernet/intel/e1000e/netdev.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index 75e60897b7e7..73f745205a1c 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -154,6 +154,16 @@ void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val) writel(val, hw->hw_addr + reg); } +static bool e1000e_vlan_used(struct e1000_adapter *adapter) +{ + u16 vid; + + for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) + return true; + + return false; +} + /** * e1000_regdump - register printout routine * @hw: pointer to the HW structure @@ -2789,7 +2799,7 @@ static void e1000e_vlan_filter_enable(struct e1000_adapter *adapter) } /** - * e1000e_vlan_strip_enable - helper to disable HW VLAN stripping + * e1000e_vlan_strip_disable - helper to disable HW VLAN stripping * @adapter: board private structure to initialize **/ static void e1000e_vlan_strip_disable(struct e1000_adapter *adapter) @@ -3443,7 +3453,8 @@ static void e1000e_set_rx_mode(struct net_device *netdev) ew32(RCTL, rctl); - if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX) + if (netdev->features & NETIF_F_HW_VLAN_CTAG_RX || + e1000e_vlan_used(adapter)) e1000e_vlan_strip_enable(adapter); else e1000e_vlan_strip_disable(adapter); -- cgit v1.2.3-70-g09d2 From b560f03ddfb072bca65e9440ff0dc4f9b1d1f056 Mon Sep 17 00:00:00 2001 From: David Barroso Date: Tue, 28 Jun 2016 11:16:43 +0300 Subject: neigh: Explicitly declare RCU-bh read side critical section in neigh_xmit() neigh_xmit() expects to be called inside an RCU-bh read side critical section, and while one of its two current callers gets this right, the other one doesn't. More specifically, neigh_xmit() has two callers, mpls_forward() and mpls_output(), and while both callers call neigh_xmit() under rcu_read_lock(), this provides sufficient protection for neigh_xmit() only in the case of mpls_forward(), as that is always called from softirq context and therefore doesn't need explicit BH protection, while mpls_output() can be called from process context with softirqs enabled. When mpls_output() is called from process context, with softirqs enabled, we can be preempted by a softirq at any time, and RCU-bh considers the completion of a softirq as signaling the end of any pending read-side critical sections, so if we do get a softirq while we are in the part of neigh_xmit() that expects to be run inside an RCU-bh read side critical section, we can end up with an unexpected RCU grace period running right in the middle of that critical section, making things go boom. This patch fixes this impedance mismatch in the callee, by making neigh_xmit() always take rcu_read_{,un}lock_bh() around the code that expects to be treated as an RCU-bh read side critical section, as this seems a safer option than fixing it in the callers. Fixes: 4fd3d7d9e868f ("neigh: Add helper function neigh_xmit") Signed-off-by: David Barroso Signed-off-by: Lennert Buytenhek Acked-by: David Ahern Acked-by: Robert Shearman Signed-off-by: David S. Miller --- net/core/neighbour.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 29dd8cc22bbf..510cd62fcb99 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2469,13 +2469,17 @@ int neigh_xmit(int index, struct net_device *dev, tbl = neigh_tables[index]; if (!tbl) goto out; + rcu_read_lock_bh(); neigh = __neigh_lookup_noref(tbl, addr, dev); if (!neigh) neigh = __neigh_create(tbl, addr, dev, false); err = PTR_ERR(neigh); - if (IS_ERR(neigh)) + if (IS_ERR(neigh)) { + rcu_read_unlock_bh(); goto out_kfree_skb; + } err = neigh->output(neigh, skb); + rcu_read_unlock_bh(); } else if (index == NEIGH_LINK_TABLE) { err = dev_hard_header(skb, dev, ntohs(skb->protocol), -- cgit v1.2.3-70-g09d2 From 34c7bb4705a0a2d344b0c82eaf3d3bfa2bc9da45 Mon Sep 17 00:00:00 2001 From: Sudarsana Reddy Kalluru Date: Tue, 28 Jun 2016 07:46:03 -0400 Subject: qed: Protect the doorbell BAR with the write barriers. SPQ doorbell is currently protected with the compilation barrier. Under the stress scenarios, we may get into a state where (due to the weak ordering) several ramrod doorbells were written to the BAR with an out-of-order producer values. Need to change the barrier type to a write barrier to make sure that the write buffer is flushed after each doorbell. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_spq.c | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c index 67d9893774d8..b122f6013b6c 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_spq.c +++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c @@ -213,19 +213,15 @@ static int qed_spq_hw_post(struct qed_hwfn *p_hwfn, SET_FIELD(db.params, CORE_DB_DATA_AGG_VAL_SEL, DQ_XCM_CORE_SPQ_PROD_CMD); db.agg_flags = DQ_XCM_CORE_DQ_CF_CMD; - - /* validate producer is up to-date */ - rmb(); - db.spq_prod = cpu_to_le16(qed_chain_get_prod_idx(p_chain)); - /* do not reorder */ - barrier(); + /* make sure the SPQE is updated before the doorbell */ + wmb(); DOORBELL(p_hwfn, qed_db_addr(p_spq->cid, DQ_DEMS_LEGACY), *(u32 *)&db); /* make sure doorbell is rang */ - mmiowb(); + wmb(); DP_VERBOSE(p_hwfn, QED_MSG_SPQ, "Doorbelled [0x%08x, CID 0x%08x] with Flags: %02x agg_params: %02x, prod: %04x\n", -- cgit v1.2.3-70-g09d2 From d913d3a763a6f66a862a6eafcf6da89a7905832a Mon Sep 17 00:00:00 2001 From: Samuel Gauthier Date: Tue, 28 Jun 2016 17:22:26 +0200 Subject: openvswitch: fix conntrack netlink event delivery Only the first and last netlink message for a particular conntrack are actually sent. The first message is sent through nf_conntrack_confirm when the conntrack is committed. The last one is sent when the conntrack is destroyed on timeout. The other conntrack state change messages are not advertised. When the conntrack subsystem is used from netfilter, nf_conntrack_confirm is called for each packet, from the postrouting hook, which in turn calls nf_ct_deliver_cached_events to send the state change netlink messages. This commit fixes the problem by calling nf_ct_deliver_cached_events in the non-commit case as well. Fixes: 7f8a436eaa2c ("openvswitch: Add conntrack action") CC: Joe Stringer CC: Justin Pettit CC: Andy Zhou CC: Thomas Graf Signed-off-by: Samuel Gauthier Acked-by: Joe Stringer Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 3d5feede962d..d84312584ee4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -818,8 +818,18 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key, */ state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED; __ovs_ct_update_key(key, state, &info->zone, exp->master); - } else - return __ovs_ct_lookup(net, key, info, skb); + } else { + struct nf_conn *ct; + int err; + + err = __ovs_ct_lookup(net, key, info, skb); + if (err) + return err; + + ct = (struct nf_conn *)skb->nfct; + if (ct) + nf_ct_deliver_cached_events(ct); + } return 0; } -- cgit v1.2.3-70-g09d2