From 6b127c71fbdd3daacfd8b9f80b8e6ebfb70a889e Mon Sep 17 00:00:00 2001
From: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Date: Wed, 10 Dec 2014 21:26:10 +0530
Subject: mac80211: Move IEEE80211_TX_CTL_PS_RESPONSE

Move IEEE80211_TX_CTL_PS_RESPONSE to info->control.flags since
this is used only in the TX path (by ath9k). This frees up
a bit which can be used for other purposes.

Signed-off-by: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 58d719ddaa60..b36e60d4c518 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -505,8 +505,6 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_DONTFRAG: Don't fragment this packet even if it
  *	would be fragmented by size (this is optional, only used for
  *	monitor injection).
- * @IEEE80211_TX_CTL_PS_RESPONSE: This frame is a response to a poll
- *	frame (PS-Poll or uAPSD).
  *
  * Note: If you have to add new flags to the enumeration, then don't
  *	 forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary.
@@ -542,7 +540,6 @@ enum mac80211_tx_info_flags {
 	IEEE80211_TX_STATUS_EOSP		= BIT(28),
 	IEEE80211_TX_CTL_USE_MINRATE		= BIT(29),
 	IEEE80211_TX_CTL_DONTFRAG		= BIT(30),
-	IEEE80211_TX_CTL_PS_RESPONSE		= BIT(31),
 };
 
 #define IEEE80211_TX_CTL_STBC_SHIFT		23
@@ -552,11 +549,14 @@ enum mac80211_tx_info_flags {
  *
  * @IEEE80211_TX_CTRL_PORT_CTRL_PROTO: this frame is a port control
  *	protocol frame (e.g. EAP)
+ * @IEEE80211_TX_CTRL_PS_RESPONSE: This frame is a response to a poll
+ *	frame (PS-Poll or uAPSD).
  *
  * These flags are used in tx_info->control.flags.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTRL_PORT_CTRL_PROTO	= BIT(0),
+	IEEE80211_TX_CTRL_PS_RESPONSE		= BIT(1),
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 5cf16616e152dd5c274a65954c77f64892d025a8 Mon Sep 17 00:00:00 2001
From: Sujith Manoharan <c_manoha@qca.qualcomm.com>
Date: Wed, 10 Dec 2014 21:26:11 +0530
Subject: mac80211: Fix accounting of multicast frames

Since multicast frames are marked as no-ack, using
IEEE80211_TX_STAT_ACK to check if they have been
successfully transmitted by the driver is incorrect
since a driver can choose to ignore transmission status
for no-ack frames. This results in incorrect accounting
for such frames.

To fix this issue, this patch introduces a new flag
that can be used by drivers to indicate error-free
transmission of no-ack frames.

Signed-off-by: Sujith Manoharan <c_manoha@qca.qualcomm.com>
[add a note about not setting the flag for non-no-ack frames]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++++++
 net/mac80211/status.c  | 9 ++++++---
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b36e60d4c518..b24ef577aed9 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -505,6 +505,11 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_DONTFRAG: Don't fragment this packet even if it
  *	would be fragmented by size (this is optional, only used for
  *	monitor injection).
+ * @IEEE80211_TX_STAT_NOACK_TRANSMITTED: A frame that was marked with
+ *	IEEE80211_TX_CTL_NO_ACK has been successfully transmitted without
+ *	any errors (like issues specific to the driver/HW).
+ *	This flag must not be set for frames that don't request no-ack
+ *	behaviour with IEEE80211_TX_CTL_NO_ACK.
  *
  * Note: If you have to add new flags to the enumeration, then don't
  *	 forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary.
@@ -540,6 +545,7 @@ enum mac80211_tx_info_flags {
 	IEEE80211_TX_STATUS_EOSP		= BIT(28),
 	IEEE80211_TX_CTL_USE_MINRATE		= BIT(29),
 	IEEE80211_TX_CTL_DONTFRAG		= BIT(30),
+	IEEE80211_TX_STAT_NOACK_TRANSMITTED	= BIT(31),
 };
 
 #define IEEE80211_TX_CTL_STBC_SHIFT		23
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index bb146f377ee4..d64037c96729 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -664,13 +664,15 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
 	struct ieee80211_supported_band *sband;
 	int retry_count;
 	int rates_idx;
-	bool acked;
+	bool acked, noack_success;
 
 	rates_idx = ieee80211_tx_get_rates(hw, info, &retry_count);
 
 	sband = hw->wiphy->bands[info->band];
 
 	acked = !!(info->flags & IEEE80211_TX_STAT_ACK);
+	noack_success = !!(info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED);
+
 	if (pubsta) {
 		struct sta_info *sta;
 
@@ -696,7 +698,7 @@ void ieee80211_tx_status_noskb(struct ieee80211_hw *hw,
 		rate_control_tx_status_noskb(local, sband, sta, info);
 	}
 
-	if (acked) {
+	if (acked || noack_success) {
 		    local->dot11TransmittedFrameCount++;
 		    if (!pubsta)
 			    local->dot11MulticastTransmittedFrameCount++;
@@ -856,7 +858,8 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb)
 	 * Fragments are passed to low-level drivers as separate skbs, so these
 	 * are actually fragments, not frames. Update frame counters only for
 	 * the first fragment of the frame. */
-	if (info->flags & IEEE80211_TX_STAT_ACK) {
+	if ((info->flags & IEEE80211_TX_STAT_ACK) ||
+	    (info->flags & IEEE80211_TX_STAT_NOACK_TRANSMITTED)) {
 		if (ieee80211_is_first_frag(hdr->seq_ctrl)) {
 			local->dot11TransmittedFrameCount++;
 			if (is_multicast_ether_addr(hdr->addr1))
-- 
cgit v1.2.3-70-g09d2


From 848955ccf0bdf42fff33e021a76f6daec98fe59b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 11 Nov 2014 12:48:42 +0100
Subject: mac80211: move U-APSD enablement to vif flags

In order to let drivers have more dynamic U-APSD support,
move the enablement flag to the virtual interface driver
flags. This lets drivers not only set it up differently
for different interfaces, but also enable/disable on the
fly if needed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/mac.c       |  3 ++-
 drivers/net/wireless/cw1200/main.c          |  1 -
 drivers/net/wireless/cw1200/sta.c           |  1 +
 drivers/net/wireless/iwlwifi/mvm/mac80211.c | 13 ++++++-------
 drivers/net/wireless/ti/wl1251/main.c       |  5 ++---
 drivers/net/wireless/ti/wlcore/main.c       |  2 +-
 include/net/mac80211.h                      | 15 +++++++--------
 net/mac80211/debugfs.c                      |  2 --
 net/mac80211/main.c                         |  4 ----
 net/mac80211/mlme.c                         |  7 ++++++-
 10 files changed, 25 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index c4005670cba2..2619db1e3e74 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2871,6 +2871,8 @@ static int ath10k_add_interface(struct ieee80211_hw *hw,
 	int bit;
 	u32 vdev_param;
 
+	vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;
+
 	mutex_lock(&ar->conf_mutex);
 
 	memset(arvif, 0, sizeof(*arvif));
@@ -5024,7 +5026,6 @@ int ath10k_mac_register(struct ath10k *ar)
 	ar->hw->flags = IEEE80211_HW_SIGNAL_DBM |
 			IEEE80211_HW_SUPPORTS_PS |
 			IEEE80211_HW_SUPPORTS_DYNAMIC_PS |
-			IEEE80211_HW_SUPPORTS_UAPSD |
 			IEEE80211_HW_MFP_CAPABLE |
 			IEEE80211_HW_REPORTS_TX_ACK_STATUS |
 			IEEE80211_HW_HAS_RATE_CONTROL |
diff --git a/drivers/net/wireless/cw1200/main.c b/drivers/net/wireless/cw1200/main.c
index 3e78cc3ccb78..0da6e423da63 100644
--- a/drivers/net/wireless/cw1200/main.c
+++ b/drivers/net/wireless/cw1200/main.c
@@ -282,7 +282,6 @@ static struct ieee80211_hw *cw1200_init_common(const u8 *macaddr,
 		    IEEE80211_HW_SUPPORTS_PS |
 		    IEEE80211_HW_SUPPORTS_DYNAMIC_PS |
 		    IEEE80211_HW_REPORTS_TX_ACK_STATUS |
-		    IEEE80211_HW_SUPPORTS_UAPSD |
 		    IEEE80211_HW_CONNECTION_MONITOR |
 		    IEEE80211_HW_AMPDU_AGGREGATION |
 		    IEEE80211_HW_TX_AMPDU_SETUP_IN_HW |
diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index 5b84664db13b..a1e3237c0be8 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -213,6 +213,7 @@ int cw1200_add_interface(struct ieee80211_hw *dev,
 	/* __le32 auto_calibration_mode = __cpu_to_le32(1); */
 
 	vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
+			     IEEE80211_VIF_SUPPORTS_UAPSD |
 			     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 
 	mutex_lock(&priv->conf_mutex);
diff --git a/drivers/net/wireless/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
index 31a5b3f4266c..346331d3c696 100644
--- a/drivers/net/wireless/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/iwlwifi/mvm/mac80211.c
@@ -326,6 +326,8 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	hw->radiotap_vht_details |= IEEE80211_RADIOTAP_VHT_KNOWN_STBC |
 		IEEE80211_RADIOTAP_VHT_KNOWN_BEAMFORMED;
 	hw->rate_control_algorithm = "iwl-mvm-rs";
+	hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES;
+	hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP;
 
 	/*
 	 * Enable 11w if advertised by firmware and software crypto
@@ -336,13 +338,6 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 	    !iwlwifi_mod_params.sw_crypto)
 		hw->flags |= IEEE80211_HW_MFP_CAPABLE;
 
-	if (mvm->fw->ucode_capa.flags & IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT &&
-	    !iwlwifi_mod_params.uapsd_disable) {
-		hw->flags |= IEEE80211_HW_SUPPORTS_UAPSD;
-		hw->uapsd_queues = IWL_MVM_UAPSD_QUEUES;
-		hw->uapsd_max_sp_len = IWL_UAPSD_MAX_SP;
-	}
-
 	if (mvm->fw->ucode_capa.api[0] & IWL_UCODE_TLV_API_LMAC_SCAN ||
 	    mvm->fw->ucode_capa.capa[0] & IWL_UCODE_TLV_CAPA_UMAC_SCAN) {
 		hw->flags |= IEEE80211_SINGLE_HW_SCAN_ON_ALL_BANDS;
@@ -1147,6 +1142,10 @@ static int iwl_mvm_mac_add_interface(struct ieee80211_hw *hw,
 		mvm->bf_allowed_vif = mvmvif;
 		vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
 				     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
+		if (mvm->fw->ucode_capa.flags &
+					IWL_UCODE_TLV_FLAGS_UAPSD_SUPPORT &&
+		    !iwlwifi_mod_params.uapsd_disable)
+			vif->driver_flags |= IEEE80211_VIF_SUPPORTS_UAPSD;
 	}
 
 	/*
diff --git a/drivers/net/wireless/ti/wl1251/main.c b/drivers/net/wireless/ti/wl1251/main.c
index 0b30a7b4d663..d4ba009ac9aa 100644
--- a/drivers/net/wireless/ti/wl1251/main.c
+++ b/drivers/net/wireless/ti/wl1251/main.c
@@ -500,6 +500,7 @@ static int wl1251_op_add_interface(struct ieee80211_hw *hw,
 	int ret = 0;
 
 	vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
+			     IEEE80211_VIF_SUPPORTS_UAPSD |
 			     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 
 	wl1251_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %pM",
@@ -1480,9 +1481,7 @@ int wl1251_init_ieee80211(struct wl1251 *wl)
 	/* unit us */
 	/* FIXME: find a proper value */
 
-	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM |
-		IEEE80211_HW_SUPPORTS_PS |
-		IEEE80211_HW_SUPPORTS_UAPSD;
+	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM | IEEE80211_HW_SUPPORTS_PS;
 
 	wl->hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) |
 					 BIT(NL80211_IFTYPE_ADHOC);
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 6ad3fcedab9b..2a99456b6b8f 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -2508,6 +2508,7 @@ static int wl1271_op_add_interface(struct ieee80211_hw *hw,
 	}
 
 	vif->driver_flags |= IEEE80211_VIF_BEACON_FILTER |
+			     IEEE80211_VIF_SUPPORTS_UAPSD |
 			     IEEE80211_VIF_SUPPORTS_CQM_RSSI;
 
 	wl1271_debug(DEBUG_MAC80211, "mac80211 add interface type %d mac %pM",
@@ -5776,7 +5777,6 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
 	wl->hw->flags = IEEE80211_HW_SIGNAL_DBM |
 		IEEE80211_HW_SUPPORTS_PS |
 		IEEE80211_HW_SUPPORTS_DYNAMIC_PS |
-		IEEE80211_HW_SUPPORTS_UAPSD |
 		IEEE80211_HW_HAS_RATE_CONTROL |
 		IEEE80211_HW_CONNECTION_MONITOR |
 		IEEE80211_HW_REPORTS_TX_ACK_STATUS |
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b24ef577aed9..4913c00539fb 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1187,10 +1187,15 @@ struct ieee80211_channel_switch {
  *	monitoring on this virtual interface -- i.e. it can monitor
  *	connection quality related parameters, such as the RSSI level and
  *	provide notifications if configured trigger levels are reached.
+ * @IEEE80211_VIF_SUPPORTS_UAPSD: The device can do U-APSD for this
+ *	interface. This flag should be set during interface addition,
+ *	but may be set/cleared as late as authentication to an AP. It is
+ *	only valid for managed/station mode interfaces.
  */
 enum ieee80211_vif_flags {
 	IEEE80211_VIF_BEACON_FILTER		= BIT(0),
 	IEEE80211_VIF_SUPPORTS_CQM_RSSI		= BIT(1),
+	IEEE80211_VIF_SUPPORTS_UAPSD		= BIT(2),
 };
 
 /**
@@ -1589,11 +1594,6 @@ struct ieee80211_tx_control {
  * @IEEE80211_HW_MFP_CAPABLE:
  *	Hardware supports management frame protection (MFP, IEEE 802.11w).
  *
- * @IEEE80211_HW_SUPPORTS_UAPSD:
- *	Hardware supports Unscheduled Automatic Power Save Delivery
- *	(U-APSD) in managed mode. The mode is configured with
- *	conf_tx() operation.
- *
  * @IEEE80211_HW_REPORTS_TX_ACK_STATUS:
  *	Hardware can provide ack status reports of Tx frames to
  *	the stack.
@@ -1679,8 +1679,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_MFP_CAPABLE			= 1<<13,
 	IEEE80211_HW_WANT_MONITOR_VIF			= 1<<14,
 	IEEE80211_HW_NO_AUTO_VIF			= 1<<15,
-	/* free slot */
-	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
+	/* free slots */
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
 	IEEE80211_HW_QUEUE_CONTROL			= 1<<20,
@@ -2032,7 +2031,7 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * enabled whenever user has enabled powersave.
  *
  * Driver informs U-APSD client support by enabling
- * %IEEE80211_HW_SUPPORTS_UAPSD flag. The mode is configured through the
+ * %IEEE80211_VIF_SUPPORTS_UAPSD flag. The mode is configured through the
  * uapsd parameter in conf_tx() operation. Hardware needs to send the QoS
  * Nullfunc frames and stay awake until the service period has ended. To
  * utilize U-APSD, dynamic powersave is disabled for voip AC and all frames
diff --git a/net/mac80211/debugfs.c b/net/mac80211/debugfs.c
index 54a189f0393e..eeb0bbd69d98 100644
--- a/net/mac80211/debugfs.c
+++ b/net/mac80211/debugfs.c
@@ -303,8 +303,6 @@ static ssize_t hwflags_read(struct file *file, char __user *user_buf,
 		sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_DYNAMIC_PS\n");
 	if (local->hw.flags & IEEE80211_HW_MFP_CAPABLE)
 		sf += scnprintf(buf + sf, mxln - sf, "MFP_CAPABLE\n");
-	if (local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
-		sf += scnprintf(buf + sf, mxln - sf, "SUPPORTS_UAPSD\n");
 	if (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)
 		sf += scnprintf(buf + sf, mxln - sf,
 				"REPORTS_TX_ACK_STATUS\n");
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 6ab99da38db9..d9ce33663c73 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -916,10 +916,6 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 		}
 	}
 
-	WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
-	     && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
-	     "U-APSD not supported with HW_PS_NULLFUNC_STACK\n");
-
 	/*
 	 * Calculate scan IE length -- we need this to alloc
 	 * memory and to subtract from the driver limit. It
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 75a9bf50207e..f495b800b92c 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4667,8 +4667,13 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 	rcu_read_unlock();
 
+	if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
+		 (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
+	     "U-APSD not supported with HW_PS_NULLFUNC_STACK\n"))
+		sdata->vif.driver_flags &= ~IEEE80211_VIF_SUPPORTS_UAPSD;
+
 	if (bss->wmm_used && bss->uapsd_supported &&
-	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
+	    (sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD)) {
 		assoc_data->uapsd = true;
 		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
 	} else {
-- 
cgit v1.2.3-70-g09d2


From ad30ca2c03cecfb1b0749874bdceead269542de6 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 15 Dec 2014 19:25:59 +0200
Subject: cfg80211: allow usermode to query wiphy specific regdom

If a wiphy-idx is specified, the kernel will return the wiphy specific
regdomain, if such exists. Otherwise return the global regdom.

When no wiphy-idx is specified, return the global regdomain as well as
all wiphy-specific regulatory domains in the system, via a new nested
list of attributes.

Add a new attribute for each wiphy-specific regdomain, for usermode to
identify it as such.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  10 ++-
 net/wireless/nl80211.c       | 173 ++++++++++++++++++++++++++++++++++---------
 net/wireless/reg.c           |   2 +-
 net/wireless/reg.h           |   1 +
 4 files changed, 151 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b37bd5a1cb82..2d384d041224 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -252,7 +252,15 @@
  *	%NL80211_ATTR_IFINDEX.
  *
  * @NL80211_CMD_GET_REG: ask the wireless core to send us its currently set
- * 	regulatory domain.
+ *	regulatory domain. If %NL80211_ATTR_WIPHY is specified and the device
+ *	has a private regulatory domain, it will be returned. Otherwise, the
+ *	global regdomain will be returned.
+ *	A device will have a private regulatory domain if it uses the
+ *	regulatory_hint() API. Even when a private regdomain is used the channel
+ *	information will still be mended according to further hints from
+ *	the regulatory core to help with compliance. A dump version of this API
+ *	is now available which will returns the global regdomain as well as
+ *	all private regdomains of present wiphys (for those that have it).
  * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
  *	after being queried by the kernel. CRDA replies by sending a regulatory
  *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a17d6bc6b22c..2d5dc428c5ab 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -5327,42 +5327,20 @@ static int nl80211_update_mesh_config(struct sk_buff *skb,
 	return err;
 }
 
-static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
+static int nl80211_put_regdom(const struct ieee80211_regdomain *regdom,
+			      struct sk_buff *msg)
 {
-	const struct ieee80211_regdomain *regdom;
-	struct sk_buff *msg;
-	void *hdr = NULL;
 	struct nlattr *nl_reg_rules;
 	unsigned int i;
 
-	if (!cfg80211_regdomain)
-		return -EINVAL;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return -ENOBUFS;
-
-	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
-			     NL80211_CMD_GET_REG);
-	if (!hdr)
-		goto put_failure;
-
-	if (reg_last_request_cell_base() &&
-	    nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
-			NL80211_USER_REG_HINT_CELL_BASE))
-		goto nla_put_failure;
-
-	rcu_read_lock();
-	regdom = rcu_dereference(cfg80211_regdomain);
-
 	if (nla_put_string(msg, NL80211_ATTR_REG_ALPHA2, regdom->alpha2) ||
 	    (regdom->dfs_region &&
 	     nla_put_u8(msg, NL80211_ATTR_DFS_REGION, regdom->dfs_region)))
-		goto nla_put_failure_rcu;
+		goto nla_put_failure;
 
 	nl_reg_rules = nla_nest_start(msg, NL80211_ATTR_REG_RULES);
 	if (!nl_reg_rules)
-		goto nla_put_failure_rcu;
+		goto nla_put_failure;
 
 	for (i = 0; i < regdom->n_reg_rules; i++) {
 		struct nlattr *nl_reg_rule;
@@ -5377,7 +5355,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 
 		nl_reg_rule = nla_nest_start(msg, i);
 		if (!nl_reg_rule)
-			goto nla_put_failure_rcu;
+			goto nla_put_failure;
 
 		max_bandwidth_khz = freq_range->max_bandwidth_khz;
 		if (!max_bandwidth_khz)
@@ -5398,13 +5376,64 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info)
 				power_rule->max_eirp) ||
 		    nla_put_u32(msg, NL80211_ATTR_DFS_CAC_TIME,
 				reg_rule->dfs_cac_ms))
-			goto nla_put_failure_rcu;
+			goto nla_put_failure;
 
 		nla_nest_end(msg, nl_reg_rule);
 	}
-	rcu_read_unlock();
 
 	nla_nest_end(msg, nl_reg_rules);
+	return 0;
+
+nla_put_failure:
+	return -EMSGSIZE;
+}
+
+static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
+{
+	const struct ieee80211_regdomain *regdom = NULL;
+	struct cfg80211_registered_device *rdev;
+	struct wiphy *wiphy = NULL;
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOBUFS;
+
+	hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0,
+			     NL80211_CMD_GET_REG);
+	if (!hdr)
+		goto put_failure;
+
+	if (info->attrs[NL80211_ATTR_WIPHY]) {
+		rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
+		if (IS_ERR(rdev)) {
+			nlmsg_free(msg);
+			return PTR_ERR(rdev);
+		}
+
+		wiphy = &rdev->wiphy;
+		regdom = get_wiphy_regdom(wiphy);
+
+		if (regdom &&
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
+			goto nla_put_failure;
+	}
+
+	if (!wiphy && reg_last_request_cell_base() &&
+	    nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
+			NL80211_USER_REG_HINT_CELL_BASE))
+		goto nla_put_failure;
+
+	rcu_read_lock();
+
+	if (!regdom)
+		regdom = rcu_dereference(cfg80211_regdomain);
+
+	if (nl80211_put_regdom(regdom, msg))
+		goto nla_put_failure_rcu;
+
+	rcu_read_unlock();
 
 	genlmsg_end(msg, hdr);
 	return genlmsg_reply(msg, info);
@@ -5418,6 +5447,79 @@ put_failure:
 	return -EMSGSIZE;
 }
 
+static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
+			       u32 seq, int flags, struct wiphy *wiphy,
+			       const struct ieee80211_regdomain *regdom)
+{
+	void *hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags,
+				   NL80211_CMD_GET_REG);
+
+	if (!hdr)
+		return -1;
+
+	genl_dump_check_consistent(cb, hdr, &nl80211_fam);
+
+	if (nl80211_put_regdom(regdom, msg))
+		goto nla_put_failure;
+
+	if (!wiphy && reg_last_request_cell_base() &&
+	    nla_put_u32(msg, NL80211_ATTR_USER_REG_HINT_TYPE,
+			NL80211_USER_REG_HINT_CELL_BASE))
+		goto nla_put_failure;
+
+	if (wiphy &&
+	    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
+		goto nla_put_failure;
+
+	return genlmsg_end(msg, hdr);
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+	return -EMSGSIZE;
+}
+
+static int nl80211_get_reg_dump(struct sk_buff *skb,
+				struct netlink_callback *cb)
+{
+	const struct ieee80211_regdomain *regdom = NULL;
+	struct cfg80211_registered_device *rdev;
+	int err, reg_idx, start = cb->args[2];
+
+	rtnl_lock();
+
+	if (cfg80211_regdomain && start == 0) {
+		err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq,
+					  NLM_F_MULTI, NULL,
+					  rtnl_dereference(cfg80211_regdomain));
+		if (err < 0)
+			goto out_err;
+	}
+
+	/* the global regdom is idx 0 */
+	reg_idx = 1;
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		regdom = get_wiphy_regdom(&rdev->wiphy);
+		if (!regdom)
+			continue;
+
+		if (++reg_idx <= start)
+			continue;
+
+		err = nl80211_send_regdom(skb, cb, cb->nlh->nlmsg_seq,
+					  NLM_F_MULTI, &rdev->wiphy, regdom);
+		if (err < 0) {
+			reg_idx--;
+			break;
+		}
+	}
+
+	cb->args[2] = reg_idx;
+	err = skb->len;
+out_err:
+	rtnl_unlock();
+	return err;
+}
+
 static int nl80211_set_reg(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[NL80211_REG_RULE_ATTR_MAX + 1];
@@ -10225,7 +10327,8 @@ static const struct genl_ops nl80211_ops[] = {
 	},
 	{
 		.cmd = NL80211_CMD_GET_REG,
-		.doit = nl80211_get_reg,
+		.doit = nl80211_get_reg_do,
+		.dumpit = nl80211_get_reg_dump,
 		.policy = nl80211_policy,
 		.internal_flags = NL80211_FLAG_NEED_RTNL,
 		/* can be retrieved by unprivileged users */
@@ -10983,9 +11086,13 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 			goto nla_put_failure;
 	}
 
-	if (request->wiphy_idx != WIPHY_IDX_INVALID &&
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
-		goto nla_put_failure;
+	if (request->wiphy_idx != WIPHY_IDX_INVALID) {
+		struct wiphy *wiphy = wiphy_idx_to_wiphy(request->wiphy_idx);
+
+		if (wiphy &&
+		    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
+			goto nla_put_failure;
+	}
 
 	genlmsg_end(msg, hdr);
 
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d83480b6efde..2d9760084b74 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -142,7 +142,7 @@ static const struct ieee80211_regdomain *get_cfg80211_regdom(void)
 	return rtnl_dereference(cfg80211_regdomain);
 }
 
-static const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
+const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy)
 {
 	return rtnl_dereference(wiphy->regd);
 }
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 5e48031ccb9a..4b45d6e61d24 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -38,6 +38,7 @@ unsigned int reg_get_max_bandwidth(const struct ieee80211_regdomain *rd,
 				   const struct ieee80211_reg_rule *rule);
 
 bool reg_last_request_cell_base(void);
+const struct ieee80211_regdomain *get_wiphy_regdom(struct wiphy *wiphy);
 
 /**
  * regulatory_hint_found_beacon - hints a beacon was found on a channel
-- 
cgit v1.2.3-70-g09d2


From b0d7aa59592b4270531de5ce65dcf18338a2d98c Mon Sep 17 00:00:00 2001
From: Jonathan Doron <jond@wizery.com>
Date: Mon, 15 Dec 2014 19:26:00 +0200
Subject: cfg80211: allow wiphy specific regdomain management

Add a new regulatory flag that allows a driver to manage regdomain
changes/updates for its own wiphy.
A self-managed wiphys only employs regulatory information obtained from
the FW and driver and does not use other cfg80211 sources like
beacon-hints, country-code IEs and hints from other devices on the same
system. Conversely, a self-managed wiphy does not share its regulatory
hints with other devices in the system. If a system contains several
devices, one or more of which are self-managed, there might be
contradictory regulatory settings between them. Usage of flag is
generally discouraged. Only use it if the FW/driver is incompatible
with non-locally originated hints.

A new API lets the driver send a complete regdomain, to be applied on
its wiphy only.

After a wiphy-specific regdomain change takes place, usermode will get
a new type of change notification. The regulatory core also takes care
enforce regulatory restrictions, in case some interfaces are on
forbidden channels.

Signed-off-by: Jonathan Doron <jonathanx.doron@intel.com>
Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 14 +++++++
 include/net/regulatory.h     | 19 ++++++++++
 include/uapi/linux/nl80211.h |  6 +++
 net/wireless/core.c          |  8 ++++
 net/wireless/core.h          |  7 ++++
 net/wireless/nl80211.c       | 49 +++++++++++++++---------
 net/wireless/nl80211.h       | 16 +++++++-
 net/wireless/reg.c           | 88 ++++++++++++++++++++++++++++++++++++++++++++
 8 files changed, 188 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4ebb816241fa..4bc1fc9971a5 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3807,6 +3807,20 @@ const u8 *cfg80211_find_vendor_ie(unsigned int oui, u8 oui_type,
  */
 int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
 
+/**
+ * regulatory_set_wiphy_regd - set regdom info for self managed drivers
+ * @wiphy: the wireless device we want to process the regulatory domain on
+ * @rd: the regulatory domain informatoin to use for this wiphy
+ *
+ * Set the regulatory domain information for self-managed wiphys, only they
+ * may use this function. See %REGULATORY_WIPHY_SELF_MANAGED for more
+ * information.
+ *
+ * Return: 0 on success. -EINVAL, -EPERM
+ */
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd);
+
 /**
  * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
  * @wiphy: the wireless device we want to process the regulatory domain on
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index b776d72d84be..ebc5a2ed8631 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -147,6 +147,24 @@ struct regulatory_request {
  *	NL80211_IFTYPE_P2P_CLIENT, NL80211_IFTYPE_P2P_GO,
  *	NL80211_IFTYPE_P2P_DEVICE. The flag will be set by default if a device
  *	includes any modes unsupported for enforcement checking.
+ * @REGULATORY_WIPHY_SELF_MANAGED: for devices that employ wiphy-specific
+ *	regdom management. These devices will ignore all regdom changes not
+ *	originating from their own wiphy.
+ *	A self-managed wiphys only employs regulatory information obtained from
+ *	the FW and driver and does not use other cfg80211 sources like
+ *	beacon-hints, country-code IEs and hints from other devices on the same
+ *	system. Conversely, a self-managed wiphy does not share its regulatory
+ *	hints with other devices in the system. If a system contains several
+ *	devices, one or more of which are self-managed, there might be
+ *	contradictory regulatory settings between them. Usage of flag is
+ *	generally discouraged. Only use it if the FW/driver is incompatible
+ *	with non-locally originated hints.
+ *	This flag is incompatible with the flags: %REGULATORY_CUSTOM_REG,
+ *	%REGULATORY_STRICT_REG, %REGULATORY_COUNTRY_IE_FOLLOW_POWER,
+ *	%REGULATORY_COUNTRY_IE_IGNORE and %REGULATORY_DISABLE_BEACON_HINTS.
+ *	Mixing any of the above flags with this flag will result in a failure
+ *	to register the wiphy. This flag implies
+ *	%REGULATORY_DISABLE_BEACON_HINTS and %REGULATORY_COUNTRY_IE_IGNORE.
  */
 enum ieee80211_regulatory_flags {
 	REGULATORY_CUSTOM_REG			= BIT(0),
@@ -156,6 +174,7 @@ enum ieee80211_regulatory_flags {
 	REGULATORY_COUNTRY_IE_IGNORE		= BIT(4),
 	REGULATORY_ENABLE_RELAX_NO_IR           = BIT(5),
 	REGULATORY_IGNORE_STALE_KICKOFF         = BIT(6),
+	REGULATORY_WIPHY_SELF_MANAGED		= BIT(7),
 };
 
 struct ieee80211_freq_range {
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2d384d041224..fb58e654f523 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -782,6 +782,10 @@
  *	peer given by %NL80211_ATTR_MAC. Both peers must be on the base channel
  *	when this command completes.
  *
+ * @NL80211_CMD_WIPHY_REG_CHANGE: Similar to %NL80211_CMD_REG_CHANGE, but used
+ *	as an event to indicate changes for devices with wiphy-specific regdom
+ *	management.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -966,6 +970,8 @@ enum nl80211_commands {
 	NL80211_CMD_TDLS_CHANNEL_SWITCH,
 	NL80211_CMD_TDLS_CANCEL_CHANNEL_SWITCH,
 
+	NL80211_CMD_WIPHY_REG_CHANGE,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 4910758baab1..b661fcce7e3c 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -561,6 +561,14 @@ int wiphy_register(struct wiphy *wiphy)
 				       BIT(NL80211_IFTYPE_MONITOR)))
 		wiphy->regulatory_flags |= REGULATORY_IGNORE_STALE_KICKOFF;
 
+	if (WARN_ON((wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) &&
+		    (wiphy->regulatory_flags &
+					(REGULATORY_CUSTOM_REG |
+					 REGULATORY_STRICT_REG |
+					 REGULATORY_COUNTRY_IE_FOLLOW_POWER |
+					 REGULATORY_COUNTRY_IE_IGNORE))))
+		return -EINVAL;
+
 	if (WARN_ON(wiphy->coalesce &&
 		    (!wiphy->coalesce->n_rules ||
 		     !wiphy->coalesce->n_patterns) &&
diff --git a/net/wireless/core.h b/net/wireless/core.h
index faa5b1609aae..e87cae57a83b 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -36,6 +36,13 @@ struct cfg80211_registered_device {
 	 * the country on the country IE changed. */
 	char country_ie_alpha2[2];
 
+	/*
+	 * the driver requests the regulatory core to set this regulatory
+	 * domain as the wiphy's. Only used for %REGULATORY_WIPHY_SELF_MANAGED
+	 * devices using the regulatory_set_wiphy_regd() API
+	 */
+	const struct ieee80211_regdomain *requested_regd;
+
 	/* If a Country IE has been received this tells us the environment
 	 * which its telling us its in. This defaults to ENVIRON_ANY */
 	enum environment_cap env;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2d5dc428c5ab..eebb7e422989 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -11042,25 +11042,9 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 				NL80211_MCGRP_SCAN, GFP_KERNEL);
 }
 
-/*
- * This can happen on global regulatory changes or device specific settings
- * based on custom world regulatory domains.
- */
-void nl80211_send_reg_change_event(struct regulatory_request *request)
+static bool nl80211_reg_change_event_fill(struct sk_buff *msg,
+					  struct regulatory_request *request)
 {
-	struct sk_buff *msg;
-	void *hdr;
-
-	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
-	if (!msg)
-		return;
-
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_REG_CHANGE);
-	if (!hdr) {
-		nlmsg_free(msg);
-		return;
-	}
-
 	/* Userspace can always count this one always being set */
 	if (nla_put_u8(msg, NL80211_ATTR_REG_INITIATOR, request->initiator))
 		goto nla_put_failure;
@@ -11094,6 +11078,35 @@ void nl80211_send_reg_change_event(struct regulatory_request *request)
 			goto nla_put_failure;
 	}
 
+	return true;
+
+nla_put_failure:
+	return false;
+}
+
+/*
+ * This can happen on global regulatory changes or device specific settings
+ * based on custom regulatory domains.
+ */
+void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
+				     struct regulatory_request *request)
+{
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return;
+
+	hdr = nl80211hdr_put(msg, 0, 0, 0, cmd_id);
+	if (!hdr) {
+		nlmsg_free(msg);
+		return;
+	}
+
+	if (nl80211_reg_change_event_fill(msg, request) == false)
+		goto nla_put_failure;
+
 	genlmsg_end(msg, hdr);
 
 	rcu_read_lock();
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index 7ad70d6f0cc6..84d4edf1d545 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -17,7 +17,21 @@ void nl80211_send_sched_scan(struct cfg80211_registered_device *rdev,
 			     struct net_device *netdev, u32 cmd);
 void nl80211_send_sched_scan_results(struct cfg80211_registered_device *rdev,
 				     struct net_device *netdev);
-void nl80211_send_reg_change_event(struct regulatory_request *request);
+void nl80211_common_reg_change_event(enum nl80211_commands cmd_id,
+				     struct regulatory_request *request);
+
+static inline void
+nl80211_send_reg_change_event(struct regulatory_request *request)
+{
+	nl80211_common_reg_change_event(NL80211_CMD_REG_CHANGE, request);
+}
+
+static inline void
+nl80211_send_wiphy_reg_change_event(struct regulatory_request *request)
+{
+	nl80211_common_reg_change_event(NL80211_CMD_WIPHY_REG_CHANGE, request);
+}
+
 void nl80211_send_rx_auth(struct cfg80211_registered_device *rdev,
 			  struct net_device *netdev,
 			  const u8 *buf, size_t len, gfp_t gfp);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 2d9760084b74..c040f8a0f1ed 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1307,6 +1307,9 @@ static bool ignore_reg_update(struct wiphy *wiphy,
 {
 	struct regulatory_request *lr = get_last_request();
 
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		return true;
+
 	if (!lr) {
 		REG_DBG_PRINT("Ignoring regulatory request set by %s "
 			      "since last_request is not set\n",
@@ -2147,11 +2150,52 @@ static void reg_process_pending_beacon_hints(void)
 	spin_unlock_bh(&reg_pending_beacons_lock);
 }
 
+static void reg_process_self_managed_hints(void)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wiphy *wiphy;
+	const struct ieee80211_regdomain *tmp;
+	const struct ieee80211_regdomain *regd;
+	enum ieee80211_band band;
+	struct regulatory_request request = {};
+
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		wiphy = &rdev->wiphy;
+
+		spin_lock(&reg_requests_lock);
+		regd = rdev->requested_regd;
+		rdev->requested_regd = NULL;
+		spin_unlock(&reg_requests_lock);
+
+		if (regd == NULL)
+			continue;
+
+		tmp = get_wiphy_regdom(wiphy);
+		rcu_assign_pointer(wiphy->regd, regd);
+		rcu_free_regdom(tmp);
+
+		for (band = 0; band < IEEE80211_NUM_BANDS; band++)
+			handle_band_custom(wiphy, wiphy->bands[band], regd);
+
+		reg_process_ht_flags(wiphy);
+
+		request.wiphy_idx = get_wiphy_idx(wiphy);
+		request.alpha2[0] = regd->alpha2[0];
+		request.alpha2[1] = regd->alpha2[1];
+		request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
+
+		nl80211_send_wiphy_reg_change_event(&request);
+	}
+
+	reg_check_channels();
+}
+
 static void reg_todo(struct work_struct *work)
 {
 	rtnl_lock();
 	reg_process_pending_hints();
 	reg_process_pending_beacon_hints();
+	reg_process_self_managed_hints();
 	rtnl_unlock();
 }
 
@@ -2432,6 +2476,8 @@ static void restore_regulatory_settings(bool reset_user)
 	world_alpha2[1] = cfg80211_world_regdom->alpha2[1];
 
 	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+			continue;
 		if (rdev->wiphy.regulatory_flags & REGULATORY_CUSTOM_REG)
 			restore_custom_reg_settings(&rdev->wiphy);
 	}
@@ -2835,10 +2881,52 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 	return 0;
 }
 
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd)
+{
+	const struct ieee80211_regdomain *regd;
+	const struct ieee80211_regdomain *prev_regd;
+	struct cfg80211_registered_device *rdev;
+
+	if (WARN_ON(!wiphy || !rd))
+		return -EINVAL;
+
+	if (WARN(!(wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED),
+		 "wiphy should have REGULATORY_WIPHY_SELF_MANAGED\n"))
+		return -EPERM;
+
+	if (WARN(!is_valid_rd(rd), "Invalid regulatory domain detected\n")) {
+		print_regdomain_info(rd);
+		return -EINVAL;
+	}
+
+	regd = reg_copy_regd(rd);
+	if (IS_ERR(regd))
+		return PTR_ERR(regd);
+
+	rdev = wiphy_to_rdev(wiphy);
+
+	spin_lock(&reg_requests_lock);
+	prev_regd = rdev->requested_regd;
+	rdev->requested_regd = regd;
+	spin_unlock(&reg_requests_lock);
+
+	kfree(prev_regd);
+
+	schedule_work(&reg_work);
+	return 0;
+}
+EXPORT_SYMBOL(regulatory_set_wiphy_regd);
+
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
 	struct regulatory_request *lr;
 
+	/* self-managed devices ignore external hints */
+	if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED)
+		wiphy->regulatory_flags |= REGULATORY_DISABLE_BEACON_HINTS |
+					   REGULATORY_COUNTRY_IE_IGNORE;
+
 	if (!reg_dev_ignore_cell_hint(wiphy))
 		reg_num_devs_support_basehint++;
 
-- 
cgit v1.2.3-70-g09d2


From 1bdd716cbccabc8127fbbaaa663c3090302ef78b Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Mon, 15 Dec 2014 19:26:01 +0200
Subject: cfg80211: return private regdom for self-managed devices

If a device has self-managed regulatory, insist on returning the wiphy
specific regdomain if a wiphy-idx is specified. The global regdomain is
meaningless for such devices.

Also add an attribute for self-managed devices, so usermode can
distinguish them as such.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Luis R. Rodriguez <mcgrof@suse.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 10 ++++++++++
 net/wireless/nl80211.c       | 24 ++++++++++++++++++++++++
 2 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index fb58e654f523..b3ada0b3a276 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -261,6 +261,9 @@
  *	the regulatory core to help with compliance. A dump version of this API
  *	is now available which will returns the global regdomain as well as
  *	all private regdomains of present wiphys (for those that have it).
+ *	If a wiphy is self-managed (%NL80211_ATTR_WIPHY_SELF_MANAGED_REG), then
+ *	its private regdomain is the only valid one for it. The regulatory
+ *	core is not used to help with compliance in this case.
  * @NL80211_CMD_SET_REG: Set current regulatory domain. CRDA sends this command
  *	after being queried by the kernel. CRDA replies by sending a regulatory
  *	domain structure which consists of %NL80211_ATTR_REG_ALPHA set to our
@@ -1702,6 +1705,11 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_MAC_MASK: MAC address mask
  *
+ * @NL80211_ATTR_WIPHY_SELF_MANAGED_REG: flag attribute indicating this device
+ *	is self-managing its regulatory information and any regulatory domain
+ *	obtained from it is coming from the device's wiphy and not the global
+ *	cfg80211 regdomain.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2059,6 +2067,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_MAC_MASK,
 
+	NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index eebb7e422989..5b1907f4c181 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -396,6 +396,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
+	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -1701,6 +1702,10 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			       rdev->wiphy.max_num_csa_counters))
 			goto nla_put_failure;
 
+		if (rdev->wiphy.regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
@@ -5406,6 +5411,8 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 		goto put_failure;
 
 	if (info->attrs[NL80211_ATTR_WIPHY]) {
+		bool self_managed;
+
 		rdev = cfg80211_get_dev_from_info(genl_info_net(info), info);
 		if (IS_ERR(rdev)) {
 			nlmsg_free(msg);
@@ -5413,8 +5420,16 @@ static int nl80211_get_reg_do(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		wiphy = &rdev->wiphy;
+		self_managed = wiphy->regulatory_flags &
+			       REGULATORY_WIPHY_SELF_MANAGED;
 		regdom = get_wiphy_regdom(wiphy);
 
+		/* a self-managed-reg device must have a private regdom */
+		if (WARN_ON(!regdom && self_managed)) {
+			nlmsg_free(msg);
+			return -EINVAL;
+		}
+
 		if (regdom &&
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
 			goto nla_put_failure;
@@ -5471,6 +5486,10 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
 	    nla_put_u32(msg, NL80211_ATTR_WIPHY, get_wiphy_idx(wiphy)))
 		goto nla_put_failure;
 
+	if (wiphy && wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+	    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+		goto nla_put_failure;
+
 	return genlmsg_end(msg, hdr);
 
 nla_put_failure:
@@ -11076,6 +11095,11 @@ static bool nl80211_reg_change_event_fill(struct sk_buff *msg,
 		if (wiphy &&
 		    nla_put_u32(msg, NL80211_ATTR_WIPHY, request->wiphy_idx))
 			goto nla_put_failure;
+
+		if (wiphy &&
+		    wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED &&
+		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
+			goto nla_put_failure;
 	}
 
 	return true;
-- 
cgit v1.2.3-70-g09d2


From 31a60ed1e95ab8afbadb65599bef12b195080a0c Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Mon, 15 Dec 2014 13:25:38 +0200
Subject: nl80211: Convert sched_scan_req pointer to RCU pointer

Because of possible races when accessing sched_scan_req pointer in
rdev, the sched_scan_req is converted to RCU pointer.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h |  2 ++
 net/wireless/core.c    | 10 +++++++---
 net/wireless/core.h    |  2 +-
 net/wireless/nl80211.c | 19 +++++++++++--------
 net/wireless/scan.c    | 13 ++++++++-----
 5 files changed, 29 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 4bc1fc9971a5..45d4d7292e53 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1516,6 +1516,7 @@ struct cfg80211_match_set {
  * @mac_addr_mask: MAC address mask used with randomisation, bits that
  *	are 0 in the mask should be randomised, bits that are 1 should
  *	be taken from the @mac_addr
+ * @rcu_head: RCU callback used to free the struct
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1537,6 +1538,7 @@ struct cfg80211_sched_scan_request {
 	struct wiphy *wiphy;
 	struct net_device *dev;
 	unsigned long scan_start;
+	struct rcu_head rcu_head;
 
 	/* keep last */
 	struct ieee80211_channel *channels[0];
diff --git a/net/wireless/core.c b/net/wireless/core.c
index b661fcce7e3c..0743449405ca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -867,6 +867,7 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
 		      struct wireless_dev *wdev)
 {
 	struct net_device *dev = wdev->netdev;
+	struct cfg80211_sched_scan_request *sched_scan_req;
 
 	ASSERT_RTNL();
 	ASSERT_WDEV_LOCK(wdev);
@@ -877,7 +878,8 @@ void __cfg80211_leave(struct cfg80211_registered_device *rdev,
 		break;
 	case NL80211_IFTYPE_P2P_CLIENT:
 	case NL80211_IFTYPE_STATION:
-		if (rdev->sched_scan_req && dev == rdev->sched_scan_req->dev)
+		sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
+		if (sched_scan_req && dev == sched_scan_req->dev)
 			__cfg80211_stop_sched_scan(rdev, false);
 
 #ifdef CONFIG_CFG80211_WEXT
@@ -956,6 +958,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev;
+	struct cfg80211_sched_scan_request *sched_scan_req;
 
 	if (!wdev)
 		return NOTIFY_DONE;
@@ -1021,8 +1024,9 @@ static int cfg80211_netdev_notifier_call(struct notifier_block *nb,
 			___cfg80211_scan_done(rdev, false);
 		}
 
-		if (WARN_ON(rdev->sched_scan_req &&
-			    rdev->sched_scan_req->dev == wdev->netdev)) {
+		sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
+		if (WARN_ON(sched_scan_req &&
+			    sched_scan_req->dev == wdev->netdev)) {
 			__cfg80211_stop_sched_scan(rdev, false);
 		}
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e87cae57a83b..e82030c32311 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -70,7 +70,7 @@ struct cfg80211_registered_device {
 	u32 bss_generation;
 	struct cfg80211_scan_request *scan_req; /* protected by RTNL */
 	struct sk_buff *scan_msg;
-	struct cfg80211_sched_scan_request *sched_scan_req;
+	struct cfg80211_sched_scan_request __rcu *sched_scan_req;
 	unsigned long suspend_at;
 	struct work_struct scan_done_wk;
 	struct work_struct sched_scan_results_wk;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 5b1907f4c181..bacdf22fa472 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6190,6 +6190,7 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	struct cfg80211_sched_scan_request *sched_scan_req;
 	int err;
 
 	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN) ||
@@ -6199,27 +6200,29 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	if (rdev->sched_scan_req)
 		return -EINPROGRESS;
 
-	rdev->sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev,
-							info->attrs);
-	err = PTR_ERR_OR_ZERO(rdev->sched_scan_req);
+	sched_scan_req = nl80211_parse_sched_scan(&rdev->wiphy, wdev,
+						  info->attrs);
+
+	err = PTR_ERR_OR_ZERO(sched_scan_req);
 	if (err)
 		goto out_err;
 
-	err = rdev_sched_scan_start(rdev, dev, rdev->sched_scan_req);
+	err = rdev_sched_scan_start(rdev, dev, sched_scan_req);
 	if (err)
 		goto out_free;
 
-	rdev->sched_scan_req->dev = dev;
-	rdev->sched_scan_req->wiphy = &rdev->wiphy;
+	sched_scan_req->dev = dev;
+	sched_scan_req->wiphy = &rdev->wiphy;
+
+	rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req);
 
 	nl80211_send_sched_scan(rdev, dev,
 				NL80211_CMD_START_SCHED_SCAN);
 	return 0;
 
 out_free:
-	kfree(rdev->sched_scan_req);
+	kfree(sched_scan_req);
 out_err:
-	rdev->sched_scan_req = NULL;
 	return err;
 }
 
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index bda39f149810..c705c3e2b751 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -257,7 +257,7 @@ void __cfg80211_sched_scan_results(struct work_struct *wk)
 
 	rtnl_lock();
 
-	request = rdev->sched_scan_req;
+	request = rtnl_dereference(rdev->sched_scan_req);
 
 	/* we don't have sched_scan_req anymore if the scan is stopping */
 	if (request) {
@@ -279,7 +279,8 @@ void cfg80211_sched_scan_results(struct wiphy *wiphy)
 {
 	trace_cfg80211_sched_scan_results(wiphy);
 	/* ignore if we're not scanning */
-	if (wiphy_to_rdev(wiphy)->sched_scan_req)
+
+	if (rcu_access_pointer(wiphy_to_rdev(wiphy)->sched_scan_req))
 		queue_work(cfg80211_wq,
 			   &wiphy_to_rdev(wiphy)->sched_scan_results_wk);
 }
@@ -308,6 +309,7 @@ EXPORT_SYMBOL(cfg80211_sched_scan_stopped);
 int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 			       bool driver_initiated)
 {
+	struct cfg80211_sched_scan_request *sched_scan_req;
 	struct net_device *dev;
 
 	ASSERT_RTNL();
@@ -315,7 +317,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 	if (!rdev->sched_scan_req)
 		return -ENOENT;
 
-	dev = rdev->sched_scan_req->dev;
+	sched_scan_req = rtnl_dereference(rdev->sched_scan_req);
+	dev = sched_scan_req->dev;
 
 	if (!driver_initiated) {
 		int err = rdev_sched_scan_stop(rdev, dev);
@@ -325,8 +328,8 @@ int __cfg80211_stop_sched_scan(struct cfg80211_registered_device *rdev,
 
 	nl80211_send_sched_scan(rdev, dev, NL80211_CMD_SCHED_SCAN_STOPPED);
 
-	kfree(rdev->sched_scan_req);
-	rdev->sched_scan_req = NULL;
+	RCU_INIT_POINTER(rdev->sched_scan_req, NULL);
+	kfree_rcu(sched_scan_req, rcu_head);
 
 	return 0;
 }
-- 
cgit v1.2.3-70-g09d2


From 93a1e86ce10e4898f9ca9cd09d659a8a7780ee5e Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Mon, 15 Dec 2014 13:25:39 +0200
Subject: nl80211: Stop scheduled scan if netlink client disappears

An attribute NL80211_ATTR_SOCKET_OWNER can be set by the scan initiator.
If present, the attribute will cause the scan to be stopped if the client
dies.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 +++
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/core.c          | 16 ++++++++++++++++
 net/wireless/core.h          |  2 ++
 net/wireless/nl80211.c       | 16 ++++++++++++++++
 5 files changed, 40 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 45d4d7292e53..bd672ea08c9a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1517,6 +1517,8 @@ struct cfg80211_match_set {
  *	are 0 in the mask should be randomised, bits that are 1 should
  *	be taken from the @mac_addr
  * @rcu_head: RCU callback used to free the struct
+ * @owner_nlportid: netlink portid of owner (if this should is a request
+ *	owned by a particular socket)
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1539,6 +1541,7 @@ struct cfg80211_sched_scan_request {
 	struct net_device *dev;
 	unsigned long scan_start;
 	struct rcu_head rcu_head;
+	u32 owner_nlportid;
 
 	/* keep last */
 	struct ieee80211_channel *channels[0];
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b3ada0b3a276..c0383e983544 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1672,6 +1672,9 @@ enum nl80211_commands {
  * @NL80211_ATTR_SOCKET_OWNER: Flag attribute, if set during interface
  *	creation then the new interface will be owned by the netlink socket
  *	that created it and will be destroyed when the socket is closed.
+ *	If set during scheduled scan start then the new scan req will be
+ *	owned by the netlink socket that created it and the scheduled scan will
+ *	be stopped when the socket is closed.
  *
  * @NL80211_ATTR_TDLS_INITIATOR: flag attribute indicating the current end is
  *	the TDLS link initiator.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 0743449405ca..456e4c38c279 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -321,6 +321,20 @@ static void cfg80211_destroy_iface_wk(struct work_struct *work)
 	rtnl_unlock();
 }
 
+static void cfg80211_sched_scan_stop_wk(struct work_struct *work)
+{
+	struct cfg80211_registered_device *rdev;
+
+	rdev = container_of(work, struct cfg80211_registered_device,
+			   sched_scan_stop_wk);
+
+	rtnl_lock();
+
+	__cfg80211_stop_sched_scan(rdev, false);
+
+	rtnl_unlock();
+}
+
 /* exported functions */
 
 struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv,
@@ -407,6 +421,7 @@ use_default_name:
 	INIT_LIST_HEAD(&rdev->destroy_list);
 	spin_lock_init(&rdev->destroy_list_lock);
 	INIT_WORK(&rdev->destroy_work, cfg80211_destroy_iface_wk);
+	INIT_WORK(&rdev->sched_scan_stop_wk, cfg80211_sched_scan_stop_wk);
 
 #ifdef CONFIG_CFG80211_DEFAULT_PS
 	rdev->wiphy.flags |= WIPHY_FLAG_PS_ON_BY_DEFAULT;
@@ -787,6 +802,7 @@ void wiphy_unregister(struct wiphy *wiphy)
 	flush_work(&rdev->event_work);
 	cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
 	flush_work(&rdev->destroy_work);
+	flush_work(&rdev->sched_scan_stop_wk);
 
 #ifdef CONFIG_PM
 	if (rdev->wiphy.wowlan_config && rdev->ops->set_wakeup)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index e82030c32311..801cd49c5a0c 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -91,6 +91,8 @@ struct cfg80211_registered_device {
 	struct list_head destroy_list;
 	struct work_struct destroy_work;
 
+	struct work_struct sched_scan_stop_wk;
+
 	/* must be last because of the way we do wiphy_priv(),
 	 * and it should at least be aligned to NETDEV_ALIGN */
 	struct wiphy wiphy __aligned(NETDEV_ALIGN);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bacdf22fa472..702920134b34 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6214,6 +6214,9 @@ static int nl80211_start_sched_scan(struct sk_buff *skb,
 	sched_scan_req->dev = dev;
 	sched_scan_req->wiphy = &rdev->wiphy;
 
+	if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
+		sched_scan_req->owner_nlportid = info->snd_portid;
+
 	rcu_assign_pointer(rdev->sched_scan_req, sched_scan_req);
 
 	nl80211_send_sched_scan(rdev, dev,
@@ -12618,6 +12621,13 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 
 	list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) {
 		bool schedule_destroy_work = false;
+		bool schedule_scan_stop = false;
+		struct cfg80211_sched_scan_request *sched_scan_req =
+			rcu_dereference(rdev->sched_scan_req);
+
+		if (sched_scan_req && notify->portid &&
+		    sched_scan_req->owner_nlportid == notify->portid)
+			schedule_scan_stop = true;
 
 		list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) {
 			cfg80211_mlme_unregister_socket(wdev, notify->portid);
@@ -12648,6 +12658,12 @@ static int nl80211_netlink_notify(struct notifier_block * nb,
 				spin_unlock(&rdev->destroy_list_lock);
 				schedule_work(&rdev->destroy_work);
 			}
+		} else if (schedule_scan_stop) {
+			sched_scan_req->owner_nlportid = 0;
+
+			if (rdev->ops->sched_scan_stop &&
+			    rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_SCHED_SCAN)
+				schedule_work(&rdev->sched_scan_stop_wk);
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 79f241b41b5f581c6be66785ab8b9c8e3b1651c7 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 17 Dec 2014 18:00:44 +0200
Subject: nl80211: increase the max number of rules in regdomain

Some network cards (Intel) produce per-channel regdomains and rely on
cfg80211 to merge rules as needed. This hits the max rules limit and
fails.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c0383e983544..18cb0aa06351 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2112,7 +2112,7 @@ enum nl80211_attrs {
 
 #define NL80211_MAX_SUPP_RATES			32
 #define NL80211_MAX_SUPP_HT_RATES		77
-#define NL80211_MAX_SUPP_REG_RULES		32
+#define NL80211_MAX_SUPP_REG_RULES		64
 #define NL80211_TKIP_DATA_OFFSET_ENCR_KEY	0
 #define NL80211_TKIP_DATA_OFFSET_TX_MIC_KEY	16
 #define NL80211_TKIP_DATA_OFFSET_RX_MIC_KEY	24
-- 
cgit v1.2.3-70-g09d2


From b40d6376ff470572e2fafb20ca06a68f2d7940cb Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 10 Dec 2014 15:33:11 +0100
Subject: nl802154: introduce cca mode enums

This patch adds enums for 802.15.4 specific CCA settings.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/nl802154.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 6dbd406ca41b..86c1b2f15b57 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -119,4 +119,47 @@ enum nl802154_iftype {
 	NL802154_IFTYPE_MAX = NUM_NL802154_IFTYPES - 1
 };
 
+/**
+ * enum nl802154_cca_modes - cca modes
+ *
+ * @__NL802154_CCA_INVALID: cca mode number 0 is reserved
+ * @NL802154_CCA_ENERGY: Energy above threshold
+ * @NL802154_CCA_CARRIER: Carrier sense only
+ * @NL802154_CCA_ENERGY_CARRIER: Carrier sense with energy above threshold
+ * @NL802154_CCA_ALOHA: CCA shall always report an idle medium
+ * @NL802154_CCA_UWB_SHR: UWB preamble sense based on the SHR of a frame
+ * @NL802154_CCA_UWB_MULTIPEXED: UWB preamble sense based on the packet with
+ *	the multiplexed preamble
+ * @__NL802154_CCA_ATTR_AFTER_LAST: Internal
+ * @NL802154_CCA_ATTR_MAX: Maximum CCA attribute number
+ */
+enum nl802154_cca_modes {
+	__NL802154_CCA_INVALID,
+	NL802154_CCA_ENERGY,
+	NL802154_CCA_CARRIER,
+	NL802154_CCA_ENERGY_CARRIER,
+	NL802154_CCA_ALOHA,
+	NL802154_CCA_UWB_SHR,
+	NL802154_CCA_UWB_MULTIPEXED,
+
+	/* keep last */
+	__NL802154_CCA_ATTR_AFTER_LAST,
+	NL802154_CCA_ATTR_MAX = __NL802154_CCA_ATTR_AFTER_LAST - 1
+};
+
+/**
+ * enum nl802154_cca_opts - additional options for cca modes
+ *
+ * @NL802154_CCA_OPT_ENERGY_CARRIER_OR: NL802154_CCA_ENERGY_CARRIER with OR
+ * @NL802154_CCA_OPT_ENERGY_CARRIER_AND: NL802154_CCA_ENERGY_CARRIER with AND
+ */
+enum nl802154_cca_opts {
+	NL802154_CCA_OPT_ENERGY_CARRIER_AND,
+	NL802154_CCA_OPT_ENERGY_CARRIER_OR,
+
+	/* keep last */
+	__NL802154_CCA_OPT_ATTR_AFTER_LAST,
+	NL802154_CCA_OPT_ATTR_MAX = __NL802154_CCA_OPT_ATTR_AFTER_LAST - 1
+};
+
 #endif /* __NL802154_H */
-- 
cgit v1.2.3-70-g09d2


From 7fe9a3882bb37195c41ab125a0f2852398d2646a Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 10 Dec 2014 15:33:12 +0100
Subject: ieee802154: rework cca setting

The current cca setting handle is a driver specific call. We need to
introduce some 802.15.4 specific layer and mapping 802.15.4 cca modes to
driver specific ones inside the 802.15.4 driver. This patch will add
such 802.15.4 layer and mapping the cca settings to driver specific ones.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/at86rf230.c | 30 ++++++++++++++++++++++++++++--
 include/net/cfg802154.h            |  7 ++++++-
 include/net/ieee802154_netdev.h    |  4 +++-
 include/net/mac802154.h            |  5 ++++-
 net/ieee802154/nl-mac.c            |  4 ++--
 net/ieee802154/nl802154.c          |  2 +-
 net/ieee802154/sysfs.c             |  2 +-
 net/mac802154/driver-ops.h         |  5 +++--
 net/mac802154/mac_cmd.c            |  6 +++---
 9 files changed, 51 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 1c0135620c62..1ac46ba41fd8 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -1146,11 +1146,37 @@ at86rf230_set_lbt(struct ieee802154_hw *hw, bool on)
 }
 
 static int
-at86rf230_set_cca_mode(struct ieee802154_hw *hw, u8 mode)
+at86rf230_set_cca_mode(struct ieee802154_hw *hw,
+		       const struct wpan_phy_cca *cca)
 {
 	struct at86rf230_local *lp = hw->priv;
+	u8 val;
 
-	return at86rf230_write_subreg(lp, SR_CCA_MODE, mode);
+	/* mapping 802.15.4 to driver spec */
+	switch (cca->mode) {
+	case NL802154_CCA_ENERGY:
+		val = 1;
+		break;
+	case NL802154_CCA_CARRIER:
+		val = 2;
+		break;
+	case NL802154_CCA_ENERGY_CARRIER:
+		switch (cca->opt) {
+		case NL802154_CCA_OPT_ENERGY_CARRIER_AND:
+			val = 3;
+			break;
+		case NL802154_CCA_OPT_ENERGY_CARRIER_OR:
+			val = 0;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return at86rf230_write_subreg(lp, SR_CCA_MODE, val);
 }
 
 static int
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 7f713acfa106..6ee2618ac78a 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -56,6 +56,11 @@ struct cfg802154_ops {
 				struct wpan_dev *wpan_dev, bool mode);
 };
 
+struct wpan_phy_cca {
+	enum nl802154_cca_modes mode;
+	enum nl802154_cca_opts opt;
+};
+
 struct wpan_phy {
 	struct mutex pib_lock;
 
@@ -76,7 +81,7 @@ struct wpan_phy {
 	u8 current_page;
 	u32 channels_supported[IEEE802154_MAX_PAGE + 1];
 	s8 transmit_power;
-	u8 cca_mode;
+	struct wpan_phy_cca cca;
 
 	__le64 perm_extended_addr;
 
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 83bb8a73d23c..94a297052442 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -28,6 +28,8 @@
 #include <linux/skbuff.h>
 #include <linux/ieee802154.h>
 
+#include <net/cfg802154.h>
+
 struct ieee802154_sechdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
 	u8 level:3,
@@ -337,7 +339,7 @@ struct ieee802154_mac_params {
 	s8 frame_retries;
 
 	bool lbt;
-	u8 cca_mode;
+	struct wpan_phy_cca cca;
 	s32 cca_ed_level;
 };
 
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index c823d910b46c..850647811749 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -20,6 +20,8 @@
 #include <linux/ieee802154.h>
 #include <linux/skbuff.h>
 
+#include <net/cfg802154.h>
+
 /* General MAC frame format:
  *  2 bytes: Frame Control
  *  1 byte:  Sequence Number
@@ -212,7 +214,8 @@ struct ieee802154_ops {
 					    unsigned long changed);
 	int		(*set_txpower)(struct ieee802154_hw *hw, int db);
 	int		(*set_lbt)(struct ieee802154_hw *hw, bool on);
-	int		(*set_cca_mode)(struct ieee802154_hw *hw, u8 mode);
+	int		(*set_cca_mode)(struct ieee802154_hw *hw,
+					const struct wpan_phy_cca *cca);
 	int		(*set_cca_ed_level)(struct ieee802154_hw *hw,
 					    s32 level);
 	int		(*set_csma_params)(struct ieee802154_hw *hw,
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index cd919493c976..3c902e9516fb 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -121,7 +121,7 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
 			       params.transmit_power) ||
 		    nla_put_u8(msg, IEEE802154_ATTR_LBT_ENABLED, params.lbt) ||
 		    nla_put_u8(msg, IEEE802154_ATTR_CCA_MODE,
-			       params.cca_mode) ||
+			       params.cca.mode) ||
 		    nla_put_s32(msg, IEEE802154_ATTR_CCA_ED_LEVEL,
 				params.cca_ed_level) ||
 		    nla_put_u8(msg, IEEE802154_ATTR_CSMA_RETRIES,
@@ -516,7 +516,7 @@ int ieee802154_set_macparams(struct sk_buff *skb, struct genl_info *info)
 		params.lbt = nla_get_u8(info->attrs[IEEE802154_ATTR_LBT_ENABLED]);
 
 	if (info->attrs[IEEE802154_ATTR_CCA_MODE])
-		params.cca_mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
+		params.cca.mode = nla_get_u8(info->attrs[IEEE802154_ATTR_CCA_MODE]);
 
 	if (info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL])
 		params.cca_ed_level = nla_get_s32(info->attrs[IEEE802154_ATTR_CCA_ED_LEVEL]);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 889647744697..1efbe4250024 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -291,7 +291,7 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
 
 	/* cca mode */
 	if (nla_put_u8(msg, NL802154_ATTR_CCA_MODE,
-		       rdev->wpan_phy.cca_mode))
+		       rdev->wpan_phy.cca.mode))
 		goto nla_put_failure;
 
 	if (nla_put_s8(msg, NL802154_ATTR_TX_POWER,
diff --git a/net/ieee802154/sysfs.c b/net/ieee802154/sysfs.c
index 1613b9c65dfa..dff55c2d87f3 100644
--- a/net/ieee802154/sysfs.c
+++ b/net/ieee802154/sysfs.c
@@ -68,7 +68,7 @@ static DEVICE_ATTR_RO(name)
 MASTER_SHOW(current_channel, "%d");
 MASTER_SHOW(current_page, "%d");
 MASTER_SHOW(transmit_power, "%d +- 1 dB");
-MASTER_SHOW(cca_mode, "%d");
+MASTER_SHOW_COMPLEX(cca_mode, "%d", phy->cca.mode);
 
 static ssize_t channels_supported_show(struct device *dev,
 				       struct device_attribute *attr,
diff --git a/net/mac802154/driver-ops.h b/net/mac802154/driver-ops.h
index f21e864613d0..98180a9fff4a 100644
--- a/net/mac802154/driver-ops.h
+++ b/net/mac802154/driver-ops.h
@@ -70,7 +70,8 @@ static inline int drv_set_tx_power(struct ieee802154_local *local, s8 dbm)
 	return local->ops->set_txpower(&local->hw, dbm);
 }
 
-static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode)
+static inline int drv_set_cca_mode(struct ieee802154_local *local,
+				   const struct wpan_phy_cca *cca)
 {
 	might_sleep();
 
@@ -79,7 +80,7 @@ static inline int drv_set_cca_mode(struct ieee802154_local *local, u8 cca_mode)
 		return -EOPNOTSUPP;
 	}
 
-	return local->ops->set_cca_mode(&local->hw, cca_mode);
+	return local->ops->set_cca_mode(&local->hw, cca);
 }
 
 static inline int drv_set_lbt_mode(struct ieee802154_local *local, bool mode)
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index 6aacb1816889..bdccb4ecd30f 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -81,7 +81,7 @@ static int mac802154_set_mac_params(struct net_device *dev,
 
 	/* PHY */
 	wpan_dev->wpan_phy->transmit_power = params->transmit_power;
-	wpan_dev->wpan_phy->cca_mode = params->cca_mode;
+	wpan_dev->wpan_phy->cca = params->cca;
 	wpan_dev->wpan_phy->cca_ed_level = params->cca_ed_level;
 
 	/* MAC */
@@ -98,7 +98,7 @@ static int mac802154_set_mac_params(struct net_device *dev,
 	}
 
 	if (local->hw.flags & IEEE802154_HW_CCA_MODE) {
-		ret = drv_set_cca_mode(local, params->cca_mode);
+		ret = drv_set_cca_mode(local, &params->cca);
 		if (ret < 0)
 			return ret;
 	}
@@ -122,7 +122,7 @@ static void mac802154_get_mac_params(struct net_device *dev,
 
 	/* PHY */
 	params->transmit_power = wpan_dev->wpan_phy->transmit_power;
-	params->cca_mode = wpan_dev->wpan_phy->cca_mode;
+	params->cca = wpan_dev->wpan_phy->cca;
 	params->cca_ed_level = wpan_dev->wpan_phy->cca_ed_level;
 
 	/* MAC */
-- 
cgit v1.2.3-70-g09d2


From ba2a9506a76450568cbc0d51626d94cf8528c0c7 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 10 Dec 2014 15:33:13 +0100
Subject: nl802154: introduce support for cca settings

This patch adds support for setting cca parameters via nl802154.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/cfg802154.h   |  3 +++
 include/net/nl802154.h    |  2 +-
 net/ieee802154/nl802154.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
 net/ieee802154/rdev-ops.h |  7 +++++++
 net/mac802154/cfg.c       | 21 +++++++++++++++++++++
 5 files changed, 75 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 6ee2618ac78a..eeda67652766 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -25,6 +25,7 @@
 #include <net/nl802154.h>
 
 struct wpan_phy;
+struct wpan_phy_cca;
 
 struct cfg802154_ops {
 	struct net_device * (*add_virtual_intf_deprecated)(struct wpan_phy *wpan_phy,
@@ -39,6 +40,8 @@ struct cfg802154_ops {
 	int	(*del_virtual_intf)(struct wpan_phy *wpan_phy,
 				    struct wpan_dev *wpan_dev);
 	int	(*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel);
+	int	(*set_cca_mode)(struct wpan_phy *wpan_phy,
+				const struct wpan_phy_cca *cca);
 	int	(*set_pan_id)(struct wpan_phy *wpan_phy,
 			      struct wpan_dev *wpan_dev, __le16 pan_id);
 	int	(*set_short_addr)(struct wpan_phy *wpan_phy,
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index 86c1b2f15b57..f8b5bc997959 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -82,7 +82,7 @@ enum nl802154_attrs {
 	NL802154_ATTR_TX_POWER,
 
 	NL802154_ATTR_CCA_MODE,
-	NL802154_ATTR_CCA_MODE3_AND,
+	NL802154_ATTR_CCA_OPT,
 	NL802154_ATTR_CCA_ED_LEVEL,
 
 	NL802154_ATTR_MAX_FRAME_RETRIES,
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 1efbe4250024..a25b9bbd077b 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -209,7 +209,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
 
 	[NL802154_ATTR_TX_POWER] = { .type = NLA_S8, },
 
-	[NL802154_ATTR_CCA_MODE] = { .type = NLA_U8, },
+	[NL802154_ATTR_CCA_MODE] = { .type = NLA_U32, },
+	[NL802154_ATTR_CCA_OPT] = { .type = NLA_U32, },
 
 	[NL802154_ATTR_SUPPORTED_CHANNEL] = { .type = NLA_U32, },
 
@@ -290,10 +291,16 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
 		goto nla_put_failure;
 
 	/* cca mode */
-	if (nla_put_u8(msg, NL802154_ATTR_CCA_MODE,
-		       rdev->wpan_phy.cca.mode))
+	if (nla_put_u32(msg, NL802154_ATTR_CCA_MODE,
+			rdev->wpan_phy.cca.mode))
 		goto nla_put_failure;
 
+	if (rdev->wpan_phy.cca.mode == NL802154_CCA_ENERGY_CARRIER) {
+		if (nla_put_u32(msg, NL802154_ATTR_CCA_OPT,
+				rdev->wpan_phy.cca.opt))
+			goto nla_put_failure;
+	}
+
 	if (nla_put_s8(msg, NL802154_ATTR_TX_POWER,
 		       rdev->wpan_phy.transmit_power))
 		goto nla_put_failure;
@@ -622,6 +629,31 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_channel(rdev, page, channel);
 }
 
+static int nl802154_set_cca_mode(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct wpan_phy_cca cca;
+
+	if (!info->attrs[NL802154_ATTR_CCA_MODE])
+		return -EINVAL;
+
+	cca.mode = nla_get_u32(info->attrs[NL802154_ATTR_CCA_MODE]);
+	/* checking 802.15.4 constraints */
+	if (cca.mode < NL802154_CCA_ENERGY || cca.mode > NL802154_CCA_ATTR_MAX)
+		return -EINVAL;
+
+	if (cca.mode == NL802154_CCA_ENERGY_CARRIER) {
+		if (!info->attrs[NL802154_ATTR_CCA_OPT])
+			return -EINVAL;
+
+		cca.opt = nla_get_u32(info->attrs[NL802154_ATTR_CCA_OPT]);
+		if (cca.opt > NL802154_CCA_OPT_ATTR_MAX)
+			return -EINVAL;
+	}
+
+	return rdev_set_cca_mode(rdev, &cca);
+}
+
 static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg802154_registered_device *rdev = info->user_ptr[0];
@@ -894,6 +926,14 @@ static const struct genl_ops nl802154_ops[] = {
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL802154_CMD_SET_CCA_MODE,
+		.doit = nl802154_set_cca_mode,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
+				  NL802154_FLAG_NEED_RTNL,
+	},
 	{
 		.cmd = NL802154_CMD_SET_PAN_ID,
 		.doit = nl802154_set_pan_id,
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index aff54fbd9264..7c46732fad2b 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -41,6 +41,13 @@ rdev_set_channel(struct cfg802154_registered_device *rdev, u8 page, u8 channel)
 	return rdev->ops->set_channel(&rdev->wpan_phy, page, channel);
 }
 
+static inline int
+rdev_set_cca_mode(struct cfg802154_registered_device *rdev,
+		  const struct wpan_phy_cca *cca)
+{
+	return rdev->ops->set_cca_mode(&rdev->wpan_phy, cca);
+}
+
 static inline int
 rdev_set_pan_id(struct cfg802154_registered_device *rdev,
 		struct wpan_dev *wpan_dev, __le16 pan_id)
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index c035708ada16..7d31da503dcf 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -86,6 +86,26 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel)
 	return ret;
 }
 
+static int
+ieee802154_set_cca_mode(struct wpan_phy *wpan_phy,
+			const struct wpan_phy_cca *cca)
+{
+	struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+	int ret;
+
+	ASSERT_RTNL();
+
+	/* check if phy support this setting */
+	if (!(local->hw.flags & IEEE802154_HW_CCA_MODE))
+		return -EOPNOTSUPP;
+
+	ret = drv_set_cca_mode(local, cca);
+	if (!ret)
+		wpan_phy->cca = *cca;
+
+	return ret;
+}
+
 static int
 ieee802154_set_pan_id(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
 		      __le16 pan_id)
@@ -201,6 +221,7 @@ const struct cfg802154_ops mac802154_config_ops = {
 	.add_virtual_intf = ieee802154_add_iface,
 	.del_virtual_intf = ieee802154_del_iface,
 	.set_channel = ieee802154_set_channel,
+	.set_cca_mode = ieee802154_set_cca_mode,
 	.set_pan_id = ieee802154_set_pan_id,
 	.set_short_addr = ieee802154_set_short_addr,
 	.set_backoff_exponent = ieee802154_set_backoff_exponent,
-- 
cgit v1.2.3-70-g09d2


From cab9e3a0559c039b4e13b569fcf393618c661902 Mon Sep 17 00:00:00 2001
From: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Date: Fri, 19 Dec 2014 10:39:08 +0200
Subject: Bluetooth: 6lowpan: Add IPSP PSM value

The Internet Protocol Support Profile a.k.a BT 6LoWPAN specification
is ready so PSM value for it is now known.

Signed-off-by: Jukka Rissanen <jukka.rissanen@linux.intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index d1bb342d083f..2239a3753092 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -248,6 +248,7 @@ struct l2cap_conn_rsp {
 #define L2CAP_PSM_SDP		0x0001
 #define L2CAP_PSM_RFCOMM	0x0003
 #define L2CAP_PSM_3DSP		0x0021
+#define L2CAP_PSM_IPSP		0x0023 /* 6LoWPAN */
 
 /* channel identifier */
 #define L2CAP_CID_SIGNALING	0x0001
-- 
cgit v1.2.3-70-g09d2


From 1d2dc5b7b32393bb2d818e0de82a66b1a654d329 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 13:40:19 +0200
Subject: Bluetooth: Split hci_update_page_scan into two functions

To keep the parameter list and its semantics clear it makes sense to
split the hci_update_page_scan function into two separate functions: one
taking a hci_dev and another taking a hci_request. The one taking a
hci_dev constructs its own hci_request and then calls the other
function.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  3 ++-
 net/bluetooth/hci_core.c         | 17 ++++++++++++-----
 net/bluetooth/hci_event.c        |  4 ++--
 net/bluetooth/mgmt.c             | 16 ++++++++--------
 4 files changed, 24 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3c7827005c25..664451a64d72 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1306,7 +1306,8 @@ bool hci_req_pending(struct hci_dev *hdev);
 void hci_req_add_le_scan_disable(struct hci_request *req);
 void hci_req_add_le_passive_scan(struct hci_request *req);
 
-void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req);
+void hci_update_page_scan(struct hci_dev *hdev);
+void __hci_update_page_scan(struct hci_request *req);
 
 struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
 			       const void *param, u32 timeout);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5dcacf9607e4..3aa9015a8858 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -5787,8 +5787,9 @@ static bool disconnected_whitelist_entries(struct hci_dev *hdev)
 	return false;
 }
 
-void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req)
+void __hci_update_page_scan(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	u8 scan;
 
 	if (!test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags))
@@ -5812,8 +5813,14 @@ void hci_update_page_scan(struct hci_dev *hdev, struct hci_request *req)
 	if (test_bit(HCI_DISCOVERABLE, &hdev->dev_flags))
 		scan |= SCAN_INQUIRY;
 
-	if (req)
-		hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
-	else
-		hci_send_cmd(hdev, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+	hci_req_add(req, HCI_OP_WRITE_SCAN_ENABLE, 1, &scan);
+}
+
+void hci_update_page_scan(struct hci_dev *hdev)
+{
+	struct hci_request req;
+
+	hci_req_init(&req, hdev);
+	__hci_update_page_scan(&req);
+	hci_req_run(&req, NULL);
 }
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 39a5c8a01726..65f4ec8945a4 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2127,7 +2127,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 			hci_send_cmd(hdev, HCI_OP_READ_REMOTE_FEATURES,
 				     sizeof(cp), &cp);
 
-			hci_update_page_scan(hdev, NULL);
+			hci_update_page_scan(hdev);
 		}
 
 		/* Set packet type for incoming connection */
@@ -2308,7 +2308,7 @@ static void hci_disconn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		if (test_bit(HCI_CONN_FLUSH_KEY, &conn->flags))
 			hci_remove_link_key(hdev, &conn->dst);
 
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 	}
 
 	params = hci_conn_params_lookup(hdev, &conn->dst, conn->dst_type);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 693ce8bcd06e..f86f3ec684ba 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1566,7 +1566,7 @@ static void set_discoverable_complete(struct hci_dev *hdev, u8 status)
 	 * entries.
 	 */
 	hci_req_init(&req, hdev);
-	hci_update_page_scan(hdev, &req);
+	__hci_update_page_scan(&req);
 	update_class(&req);
 	hci_req_run(&req, NULL);
 
@@ -1813,7 +1813,7 @@ static void set_connectable_complete(struct hci_dev *hdev, u8 status)
 
 	if (conn_changed || discov_changed) {
 		new_settings(hdev, cmd->sk);
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 		if (discov_changed)
 			mgmt_update_adv_data(hdev);
 		hci_update_background_scan(hdev);
@@ -1847,7 +1847,7 @@ static int set_connectable_update_settings(struct hci_dev *hdev,
 		return err;
 
 	if (changed) {
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 		hci_update_background_scan(hdev);
 		return new_settings(hdev, sk);
 	}
@@ -4697,7 +4697,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	hci_req_init(&req, hdev);
 
 	write_fast_connectable(&req, false);
-	hci_update_page_scan(hdev, &req);
+	__hci_update_page_scan(&req);
 
 	/* Since only the advertising data flags will change, there
 	 * is no need to update the scan response data.
@@ -5473,7 +5473,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 		if (err)
 			goto unlock;
 
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 
 		goto added;
 	}
@@ -5556,7 +5556,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 				goto unlock;
 			}
 
-			hci_update_page_scan(hdev, NULL);
+			hci_update_page_scan(hdev);
 
 			device_removed(sk, hdev, &cp->addr.bdaddr,
 				       cp->addr.type);
@@ -5607,7 +5607,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 			kfree(b);
 		}
 
-		hci_update_page_scan(hdev, NULL);
+		hci_update_page_scan(hdev);
 
 		list_for_each_entry_safe(p, tmp, &hdev->le_conn_params, list) {
 			if (p->auto_connect == HCI_AUTO_CONN_DISABLED)
@@ -6139,7 +6139,7 @@ static int powered_update_hci(struct hci_dev *hdev)
 
 	if (lmp_bredr_capable(hdev)) {
 		write_fast_connectable(&req, false);
-		hci_update_page_scan(hdev, &req);
+		__hci_update_page_scan(&req);
 		update_class(&req);
 		update_name(&req);
 		update_eir(&req);
-- 
cgit v1.2.3-70-g09d2


From 0857dd3bed62d8f905f61a399d1ed76464b5270f Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 13:40:20 +0200
Subject: Bluetooth: Split hci_request helpers to hci_request.[ch]

None of the hci_request related things in net/bluetooth/hci_core.h are
needed anywhere outside of the core bluetooth module. This patch creates
a new net/bluetooth/hci_request.c file with its corresponding h-file and
moves the functionality there from hci_core.c and hci_core.h.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  25 ---
 net/bluetooth/Makefile           |   2 +-
 net/bluetooth/hci_conn.c         |   1 +
 net/bluetooth/hci_core.c         | 370 +-----------------------------------
 net/bluetooth/hci_event.c        |   1 +
 net/bluetooth/hci_request.c      | 397 +++++++++++++++++++++++++++++++++++++++
 net/bluetooth/hci_request.h      |  51 +++++
 net/bluetooth/mgmt.c             |   1 +
 8 files changed, 453 insertions(+), 395 deletions(-)
 create mode 100644 net/bluetooth/hci_request.c
 create mode 100644 net/bluetooth/hci_request.h

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 664451a64d72..93066f70f8ab 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1284,31 +1284,8 @@ static inline int hci_check_conn_params(u16 min, u16 max, u16 latency,
 int hci_register_cb(struct hci_cb *hcb);
 int hci_unregister_cb(struct hci_cb *hcb);
 
-struct hci_request {
-	struct hci_dev		*hdev;
-	struct sk_buff_head	cmd_q;
-
-	/* If something goes wrong when building the HCI request, the error
-	 * value is stored in this field.
-	 */
-	int			err;
-};
-
-void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
-int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
-void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
-		 const void *param);
-void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
-		    const void *param, u8 event);
-void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status);
 bool hci_req_pending(struct hci_dev *hdev);
 
-void hci_req_add_le_scan_disable(struct hci_request *req);
-void hci_req_add_le_passive_scan(struct hci_request *req);
-
-void hci_update_page_scan(struct hci_dev *hdev);
-void __hci_update_page_scan(struct hci_request *req);
-
 struct sk_buff *__hci_cmd_sync(struct hci_dev *hdev, u16 opcode, u32 plen,
 			       const void *param, u32 timeout);
 struct sk_buff *__hci_cmd_sync_ev(struct hci_dev *hdev, u16 opcode, u32 plen,
@@ -1418,8 +1395,6 @@ u8 hci_le_conn_update(struct hci_conn *conn, u16 min, u16 max, u16 latency,
 void hci_le_start_enc(struct hci_conn *conn, __le16 ediv, __le64 rand,
 							__u8 ltk[16]);
 
-int hci_update_random_address(struct hci_request *req, bool require_privacy,
-			      u8 *own_addr_type);
 void hci_copy_identity_address(struct hci_dev *hdev, bdaddr_t *bdaddr,
 			       u8 *bdaddr_type);
 
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index a5432a6a0ae6..0a176fc9e291 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -13,6 +13,6 @@ bluetooth_6lowpan-y := 6lowpan.o
 
 bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
 	hci_sock.o hci_sysfs.o l2cap_core.o l2cap_sock.o smp.o sco.o lib.o \
-	a2mp.o amp.o ecc.o
+	a2mp.o amp.o ecc.o hci_request.o
 
 subdir-ccflags-y += -D__CHECK_ENDIAN__
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index fe18825cc8a4..4405fb352c70 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -30,6 +30,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/l2cap.h>
 
+#include "hci_request.h"
 #include "smp.h"
 #include "a2mp.h"
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 3aa9015a8858..2cfaaa6acb04 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -37,6 +37,7 @@
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/mgmt.h>
 
+#include "hci_request.h"
 #include "smp.h"
 
 static void hci_rx_work(struct work_struct *work);
@@ -3901,112 +3902,6 @@ static void le_scan_disable_work(struct work_struct *work)
 		BT_ERR("Disable LE scanning request failed: err %d", err);
 }
 
-static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
-{
-	struct hci_dev *hdev = req->hdev;
-
-	/* If we're advertising or initiating an LE connection we can't
-	 * go ahead and change the random address at this time. This is
-	 * because the eventual initiator address used for the
-	 * subsequently created connection will be undefined (some
-	 * controllers use the new address and others the one we had
-	 * when the operation started).
-	 *
-	 * In this kind of scenario skip the update and let the random
-	 * address be updated at the next cycle.
-	 */
-	if (test_bit(HCI_LE_ADV, &hdev->dev_flags) ||
-	    hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
-		BT_DBG("Deferring random address update");
-		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
-		return;
-	}
-
-	hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
-}
-
-int hci_update_random_address(struct hci_request *req, bool require_privacy,
-			      u8 *own_addr_type)
-{
-	struct hci_dev *hdev = req->hdev;
-	int err;
-
-	/* If privacy is enabled use a resolvable private address. If
-	 * current RPA has expired or there is something else than
-	 * the current RPA in use, then generate a new one.
-	 */
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
-		int to;
-
-		*own_addr_type = ADDR_LE_DEV_RANDOM;
-
-		if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) &&
-		    !bacmp(&hdev->random_addr, &hdev->rpa))
-			return 0;
-
-		err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
-		if (err < 0) {
-			BT_ERR("%s failed to generate new RPA", hdev->name);
-			return err;
-		}
-
-		set_random_addr(req, &hdev->rpa);
-
-		to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
-		queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
-
-		return 0;
-	}
-
-	/* In case of required privacy without resolvable private address,
-	 * use an non-resolvable private address. This is useful for active
-	 * scanning and non-connectable advertising.
-	 */
-	if (require_privacy) {
-		bdaddr_t nrpa;
-
-		while (true) {
-			/* The non-resolvable private address is generated
-			 * from random six bytes with the two most significant
-			 * bits cleared.
-			 */
-			get_random_bytes(&nrpa, 6);
-			nrpa.b[5] &= 0x3f;
-
-			/* The non-resolvable private address shall not be
-			 * equal to the public address.
-			 */
-			if (bacmp(&hdev->bdaddr, &nrpa))
-				break;
-		}
-
-		*own_addr_type = ADDR_LE_DEV_RANDOM;
-		set_random_addr(req, &nrpa);
-		return 0;
-	}
-
-	/* If forcing static address is in use or there is no public
-	 * address use the static address as random address (but skip
-	 * the HCI command if the current random address is already the
-	 * static one.
-	 */
-	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
-	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
-		*own_addr_type = ADDR_LE_DEV_RANDOM;
-		if (bacmp(&hdev->static_addr, &hdev->random_addr))
-			hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
-				    &hdev->static_addr);
-		return 0;
-	}
-
-	/* Neither privacy nor static address is being used so use a
-	 * public address.
-	 */
-	*own_addr_type = ADDR_LE_DEV_PUBLIC;
-
-	return 0;
-}
-
 /* Copy the Identity Address of the controller.
  *
  * If the controller has a public BD_ADDR, then by default use that one.
@@ -4539,76 +4434,11 @@ static void hci_send_frame(struct hci_dev *hdev, struct sk_buff *skb)
 	}
 }
 
-void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
-{
-	skb_queue_head_init(&req->cmd_q);
-	req->hdev = hdev;
-	req->err = 0;
-}
-
-int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
-{
-	struct hci_dev *hdev = req->hdev;
-	struct sk_buff *skb;
-	unsigned long flags;
-
-	BT_DBG("length %u", skb_queue_len(&req->cmd_q));
-
-	/* If an error occurred during request building, remove all HCI
-	 * commands queued on the HCI request queue.
-	 */
-	if (req->err) {
-		skb_queue_purge(&req->cmd_q);
-		return req->err;
-	}
-
-	/* Do not allow empty requests */
-	if (skb_queue_empty(&req->cmd_q))
-		return -ENODATA;
-
-	skb = skb_peek_tail(&req->cmd_q);
-	bt_cb(skb)->req.complete = complete;
-
-	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
-	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
-	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
-
-	queue_work(hdev->workqueue, &hdev->cmd_work);
-
-	return 0;
-}
-
 bool hci_req_pending(struct hci_dev *hdev)
 {
 	return (hdev->req_status == HCI_REQ_PEND);
 }
 
-static struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode,
-				       u32 plen, const void *param)
-{
-	int len = HCI_COMMAND_HDR_SIZE + plen;
-	struct hci_command_hdr *hdr;
-	struct sk_buff *skb;
-
-	skb = bt_skb_alloc(len, GFP_ATOMIC);
-	if (!skb)
-		return NULL;
-
-	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
-	hdr->opcode = cpu_to_le16(opcode);
-	hdr->plen   = plen;
-
-	if (plen)
-		memcpy(skb_put(skb, plen), param, plen);
-
-	BT_DBG("skb len %d", skb->len);
-
-	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
-	bt_cb(skb)->opcode = opcode;
-
-	return skb;
-}
-
 /* Send HCI command */
 int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 		 const void *param)
@@ -4634,43 +4464,6 @@ int hci_send_cmd(struct hci_dev *hdev, __u16 opcode, __u32 plen,
 	return 0;
 }
 
-/* Queue a command to an asynchronous HCI request */
-void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
-		    const void *param, u8 event)
-{
-	struct hci_dev *hdev = req->hdev;
-	struct sk_buff *skb;
-
-	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
-
-	/* If an error occurred during request building, there is no point in
-	 * queueing the HCI command. We can simply return.
-	 */
-	if (req->err)
-		return;
-
-	skb = hci_prepare_cmd(hdev, opcode, plen, param);
-	if (!skb) {
-		BT_ERR("%s no memory for command (opcode 0x%4.4x)",
-		       hdev->name, opcode);
-		req->err = -ENOMEM;
-		return;
-	}
-
-	if (skb_queue_empty(&req->cmd_q))
-		bt_cb(skb)->req.start = true;
-
-	bt_cb(skb)->req.event = event;
-
-	skb_queue_tail(&req->cmd_q, skb);
-}
-
-void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
-		 const void *param)
-{
-	hci_req_add_ev(req, opcode, plen, param, 0);
-}
-
 /* Get data from the previously sent command */
 void *hci_sent_cmd_data(struct hci_dev *hdev, __u16 opcode)
 {
@@ -5519,167 +5312,6 @@ static void hci_cmd_work(struct work_struct *work)
 	}
 }
 
-void hci_req_add_le_scan_disable(struct hci_request *req)
-{
-	struct hci_cp_le_set_scan_enable cp;
-
-	memset(&cp, 0, sizeof(cp));
-	cp.enable = LE_SCAN_DISABLE;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
-}
-
-static void add_to_white_list(struct hci_request *req,
-			      struct hci_conn_params *params)
-{
-	struct hci_cp_le_add_to_white_list cp;
-
-	cp.bdaddr_type = params->addr_type;
-	bacpy(&cp.bdaddr, &params->addr);
-
-	hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
-}
-
-static u8 update_white_list(struct hci_request *req)
-{
-	struct hci_dev *hdev = req->hdev;
-	struct hci_conn_params *params;
-	struct bdaddr_list *b;
-	uint8_t white_list_entries = 0;
-
-	/* Go through the current white list programmed into the
-	 * controller one by one and check if that address is still
-	 * in the list of pending connections or list of devices to
-	 * report. If not present in either list, then queue the
-	 * command to remove it from the controller.
-	 */
-	list_for_each_entry(b, &hdev->le_white_list, list) {
-		struct hci_cp_le_del_from_white_list cp;
-
-		if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
-					      &b->bdaddr, b->bdaddr_type) ||
-		    hci_pend_le_action_lookup(&hdev->pend_le_reports,
-					      &b->bdaddr, b->bdaddr_type)) {
-			white_list_entries++;
-			continue;
-		}
-
-		cp.bdaddr_type = b->bdaddr_type;
-		bacpy(&cp.bdaddr, &b->bdaddr);
-
-		hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
-			    sizeof(cp), &cp);
-	}
-
-	/* Since all no longer valid white list entries have been
-	 * removed, walk through the list of pending connections
-	 * and ensure that any new device gets programmed into
-	 * the controller.
-	 *
-	 * If the list of the devices is larger than the list of
-	 * available white list entries in the controller, then
-	 * just abort and return filer policy value to not use the
-	 * white list.
-	 */
-	list_for_each_entry(params, &hdev->pend_le_conns, action) {
-		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
-					   &params->addr, params->addr_type))
-			continue;
-
-		if (white_list_entries >= hdev->le_white_list_size) {
-			/* Select filter policy to accept all advertising */
-			return 0x00;
-		}
-
-		if (hci_find_irk_by_addr(hdev, &params->addr,
-					 params->addr_type)) {
-			/* White list can not be used with RPAs */
-			return 0x00;
-		}
-
-		white_list_entries++;
-		add_to_white_list(req, params);
-	}
-
-	/* After adding all new pending connections, walk through
-	 * the list of pending reports and also add these to the
-	 * white list if there is still space.
-	 */
-	list_for_each_entry(params, &hdev->pend_le_reports, action) {
-		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
-					   &params->addr, params->addr_type))
-			continue;
-
-		if (white_list_entries >= hdev->le_white_list_size) {
-			/* Select filter policy to accept all advertising */
-			return 0x00;
-		}
-
-		if (hci_find_irk_by_addr(hdev, &params->addr,
-					 params->addr_type)) {
-			/* White list can not be used with RPAs */
-			return 0x00;
-		}
-
-		white_list_entries++;
-		add_to_white_list(req, params);
-	}
-
-	/* Select filter policy to use white list */
-	return 0x01;
-}
-
-void hci_req_add_le_passive_scan(struct hci_request *req)
-{
-	struct hci_cp_le_set_scan_param param_cp;
-	struct hci_cp_le_set_scan_enable enable_cp;
-	struct hci_dev *hdev = req->hdev;
-	u8 own_addr_type;
-	u8 filter_policy;
-
-	/* Set require_privacy to false since no SCAN_REQ are send
-	 * during passive scanning. Not using an non-resolvable address
-	 * here is important so that peer devices using direct
-	 * advertising with our address will be correctly reported
-	 * by the controller.
-	 */
-	if (hci_update_random_address(req, false, &own_addr_type))
-		return;
-
-	/* Adding or removing entries from the white list must
-	 * happen before enabling scanning. The controller does
-	 * not allow white list modification while scanning.
-	 */
-	filter_policy = update_white_list(req);
-
-	/* When the controller is using random resolvable addresses and
-	 * with that having LE privacy enabled, then controllers with
-	 * Extended Scanner Filter Policies support can now enable support
-	 * for handling directed advertising.
-	 *
-	 * So instead of using filter polices 0x00 (no whitelist)
-	 * and 0x01 (whitelist enabled) use the new filter policies
-	 * 0x02 (no whitelist) and 0x03 (whitelist enabled).
-	 */
-	if (test_bit(HCI_PRIVACY, &hdev->dev_flags) &&
-	    (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
-		filter_policy |= 0x02;
-
-	memset(&param_cp, 0, sizeof(param_cp));
-	param_cp.type = LE_SCAN_PASSIVE;
-	param_cp.interval = cpu_to_le16(hdev->le_scan_interval);
-	param_cp.window = cpu_to_le16(hdev->le_scan_window);
-	param_cp.own_address_type = own_addr_type;
-	param_cp.filter_policy = filter_policy;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
-		    &param_cp);
-
-	memset(&enable_cp, 0, sizeof(enable_cp));
-	enable_cp.enable = LE_SCAN_ENABLE;
-	enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
-	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
-		    &enable_cp);
-}
-
 static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
 {
 	if (status)
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 65f4ec8945a4..a412eb1e1f61 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -30,6 +30,7 @@
 #include <net/bluetooth/hci_core.h>
 #include <net/bluetooth/mgmt.h>
 
+#include "hci_request.h"
 #include "a2mp.h"
 #include "amp.h"
 #include "smp.h"
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
new file mode 100644
index 000000000000..eba83a2a6556
--- /dev/null
+++ b/net/bluetooth/hci_request.c
@@ -0,0 +1,397 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "smp.h"
+#include "hci_request.h"
+
+void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
+{
+	skb_queue_head_init(&req->cmd_q);
+	req->hdev = hdev;
+	req->err = 0;
+}
+
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct sk_buff *skb;
+	unsigned long flags;
+
+	BT_DBG("length %u", skb_queue_len(&req->cmd_q));
+
+	/* If an error occurred during request building, remove all HCI
+	 * commands queued on the HCI request queue.
+	 */
+	if (req->err) {
+		skb_queue_purge(&req->cmd_q);
+		return req->err;
+	}
+
+	/* Do not allow empty requests */
+	if (skb_queue_empty(&req->cmd_q))
+		return -ENODATA;
+
+	skb = skb_peek_tail(&req->cmd_q);
+	bt_cb(skb)->req.complete = complete;
+
+	spin_lock_irqsave(&hdev->cmd_q.lock, flags);
+	skb_queue_splice_tail(&req->cmd_q, &hdev->cmd_q);
+	spin_unlock_irqrestore(&hdev->cmd_q.lock, flags);
+
+	queue_work(hdev->workqueue, &hdev->cmd_work);
+
+	return 0;
+}
+
+struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
+				const void *param)
+{
+	int len = HCI_COMMAND_HDR_SIZE + plen;
+	struct hci_command_hdr *hdr;
+	struct sk_buff *skb;
+
+	skb = bt_skb_alloc(len, GFP_ATOMIC);
+	if (!skb)
+		return NULL;
+
+	hdr = (struct hci_command_hdr *) skb_put(skb, HCI_COMMAND_HDR_SIZE);
+	hdr->opcode = cpu_to_le16(opcode);
+	hdr->plen   = plen;
+
+	if (plen)
+		memcpy(skb_put(skb, plen), param, plen);
+
+	BT_DBG("skb len %d", skb->len);
+
+	bt_cb(skb)->pkt_type = HCI_COMMAND_PKT;
+	bt_cb(skb)->opcode = opcode;
+
+	return skb;
+}
+
+/* Queue a command to an asynchronous HCI request */
+void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
+		    const void *param, u8 event)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct sk_buff *skb;
+
+	BT_DBG("%s opcode 0x%4.4x plen %d", hdev->name, opcode, plen);
+
+	/* If an error occurred during request building, there is no point in
+	 * queueing the HCI command. We can simply return.
+	 */
+	if (req->err)
+		return;
+
+	skb = hci_prepare_cmd(hdev, opcode, plen, param);
+	if (!skb) {
+		BT_ERR("%s no memory for command (opcode 0x%4.4x)",
+		       hdev->name, opcode);
+		req->err = -ENOMEM;
+		return;
+	}
+
+	if (skb_queue_empty(&req->cmd_q))
+		bt_cb(skb)->req.start = true;
+
+	bt_cb(skb)->req.event = event;
+
+	skb_queue_tail(&req->cmd_q, skb);
+}
+
+void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
+		 const void *param)
+{
+	hci_req_add_ev(req, opcode, plen, param, 0);
+}
+
+void hci_req_add_le_scan_disable(struct hci_request *req)
+{
+	struct hci_cp_le_set_scan_enable cp;
+
+	memset(&cp, 0, sizeof(cp));
+	cp.enable = LE_SCAN_DISABLE;
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+}
+
+static void add_to_white_list(struct hci_request *req,
+			      struct hci_conn_params *params)
+{
+	struct hci_cp_le_add_to_white_list cp;
+
+	cp.bdaddr_type = params->addr_type;
+	bacpy(&cp.bdaddr, &params->addr);
+
+	hci_req_add(req, HCI_OP_LE_ADD_TO_WHITE_LIST, sizeof(cp), &cp);
+}
+
+static u8 update_white_list(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_conn_params *params;
+	struct bdaddr_list *b;
+	uint8_t white_list_entries = 0;
+
+	/* Go through the current white list programmed into the
+	 * controller one by one and check if that address is still
+	 * in the list of pending connections or list of devices to
+	 * report. If not present in either list, then queue the
+	 * command to remove it from the controller.
+	 */
+	list_for_each_entry(b, &hdev->le_white_list, list) {
+		struct hci_cp_le_del_from_white_list cp;
+
+		if (hci_pend_le_action_lookup(&hdev->pend_le_conns,
+					      &b->bdaddr, b->bdaddr_type) ||
+		    hci_pend_le_action_lookup(&hdev->pend_le_reports,
+					      &b->bdaddr, b->bdaddr_type)) {
+			white_list_entries++;
+			continue;
+		}
+
+		cp.bdaddr_type = b->bdaddr_type;
+		bacpy(&cp.bdaddr, &b->bdaddr);
+
+		hci_req_add(req, HCI_OP_LE_DEL_FROM_WHITE_LIST,
+			    sizeof(cp), &cp);
+	}
+
+	/* Since all no longer valid white list entries have been
+	 * removed, walk through the list of pending connections
+	 * and ensure that any new device gets programmed into
+	 * the controller.
+	 *
+	 * If the list of the devices is larger than the list of
+	 * available white list entries in the controller, then
+	 * just abort and return filer policy value to not use the
+	 * white list.
+	 */
+	list_for_each_entry(params, &hdev->pend_le_conns, action) {
+		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
+					   &params->addr, params->addr_type))
+			continue;
+
+		if (white_list_entries >= hdev->le_white_list_size) {
+			/* Select filter policy to accept all advertising */
+			return 0x00;
+		}
+
+		if (hci_find_irk_by_addr(hdev, &params->addr,
+					 params->addr_type)) {
+			/* White list can not be used with RPAs */
+			return 0x00;
+		}
+
+		white_list_entries++;
+		add_to_white_list(req, params);
+	}
+
+	/* After adding all new pending connections, walk through
+	 * the list of pending reports and also add these to the
+	 * white list if there is still space.
+	 */
+	list_for_each_entry(params, &hdev->pend_le_reports, action) {
+		if (hci_bdaddr_list_lookup(&hdev->le_white_list,
+					   &params->addr, params->addr_type))
+			continue;
+
+		if (white_list_entries >= hdev->le_white_list_size) {
+			/* Select filter policy to accept all advertising */
+			return 0x00;
+		}
+
+		if (hci_find_irk_by_addr(hdev, &params->addr,
+					 params->addr_type)) {
+			/* White list can not be used with RPAs */
+			return 0x00;
+		}
+
+		white_list_entries++;
+		add_to_white_list(req, params);
+	}
+
+	/* Select filter policy to use white list */
+	return 0x01;
+}
+
+void hci_req_add_le_passive_scan(struct hci_request *req)
+{
+	struct hci_cp_le_set_scan_param param_cp;
+	struct hci_cp_le_set_scan_enable enable_cp;
+	struct hci_dev *hdev = req->hdev;
+	u8 own_addr_type;
+	u8 filter_policy;
+
+	/* Set require_privacy to false since no SCAN_REQ are send
+	 * during passive scanning. Not using an non-resolvable address
+	 * here is important so that peer devices using direct
+	 * advertising with our address will be correctly reported
+	 * by the controller.
+	 */
+	if (hci_update_random_address(req, false, &own_addr_type))
+		return;
+
+	/* Adding or removing entries from the white list must
+	 * happen before enabling scanning. The controller does
+	 * not allow white list modification while scanning.
+	 */
+	filter_policy = update_white_list(req);
+
+	/* When the controller is using random resolvable addresses and
+	 * with that having LE privacy enabled, then controllers with
+	 * Extended Scanner Filter Policies support can now enable support
+	 * for handling directed advertising.
+	 *
+	 * So instead of using filter polices 0x00 (no whitelist)
+	 * and 0x01 (whitelist enabled) use the new filter policies
+	 * 0x02 (no whitelist) and 0x03 (whitelist enabled).
+	 */
+	if (test_bit(HCI_PRIVACY, &hdev->dev_flags) &&
+	    (hdev->le_features[0] & HCI_LE_EXT_SCAN_POLICY))
+		filter_policy |= 0x02;
+
+	memset(&param_cp, 0, sizeof(param_cp));
+	param_cp.type = LE_SCAN_PASSIVE;
+	param_cp.interval = cpu_to_le16(hdev->le_scan_interval);
+	param_cp.window = cpu_to_le16(hdev->le_scan_window);
+	param_cp.own_address_type = own_addr_type;
+	param_cp.filter_policy = filter_policy;
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_PARAM, sizeof(param_cp),
+		    &param_cp);
+
+	memset(&enable_cp, 0, sizeof(enable_cp));
+	enable_cp.enable = LE_SCAN_ENABLE;
+	enable_cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+	hci_req_add(req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(enable_cp),
+		    &enable_cp);
+}
+
+static void set_random_addr(struct hci_request *req, bdaddr_t *rpa)
+{
+	struct hci_dev *hdev = req->hdev;
+
+	/* If we're advertising or initiating an LE connection we can't
+	 * go ahead and change the random address at this time. This is
+	 * because the eventual initiator address used for the
+	 * subsequently created connection will be undefined (some
+	 * controllers use the new address and others the one we had
+	 * when the operation started).
+	 *
+	 * In this kind of scenario skip the update and let the random
+	 * address be updated at the next cycle.
+	 */
+	if (test_bit(HCI_LE_ADV, &hdev->dev_flags) ||
+	    hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT)) {
+		BT_DBG("Deferring random address update");
+		set_bit(HCI_RPA_EXPIRED, &hdev->dev_flags);
+		return;
+	}
+
+	hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6, rpa);
+}
+
+int hci_update_random_address(struct hci_request *req, bool require_privacy,
+			      u8 *own_addr_type)
+{
+	struct hci_dev *hdev = req->hdev;
+	int err;
+
+	/* If privacy is enabled use a resolvable private address. If
+	 * current RPA has expired or there is something else than
+	 * the current RPA in use, then generate a new one.
+	 */
+	if (test_bit(HCI_PRIVACY, &hdev->dev_flags)) {
+		int to;
+
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+
+		if (!test_and_clear_bit(HCI_RPA_EXPIRED, &hdev->dev_flags) &&
+		    !bacmp(&hdev->random_addr, &hdev->rpa))
+			return 0;
+
+		err = smp_generate_rpa(hdev, hdev->irk, &hdev->rpa);
+		if (err < 0) {
+			BT_ERR("%s failed to generate new RPA", hdev->name);
+			return err;
+		}
+
+		set_random_addr(req, &hdev->rpa);
+
+		to = msecs_to_jiffies(hdev->rpa_timeout * 1000);
+		queue_delayed_work(hdev->workqueue, &hdev->rpa_expired, to);
+
+		return 0;
+	}
+
+	/* In case of required privacy without resolvable private address,
+	 * use an non-resolvable private address. This is useful for active
+	 * scanning and non-connectable advertising.
+	 */
+	if (require_privacy) {
+		bdaddr_t nrpa;
+
+		while (true) {
+			/* The non-resolvable private address is generated
+			 * from random six bytes with the two most significant
+			 * bits cleared.
+			 */
+			get_random_bytes(&nrpa, 6);
+			nrpa.b[5] &= 0x3f;
+
+			/* The non-resolvable private address shall not be
+			 * equal to the public address.
+			 */
+			if (bacmp(&hdev->bdaddr, &nrpa))
+				break;
+		}
+
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+		set_random_addr(req, &nrpa);
+		return 0;
+	}
+
+	/* If forcing static address is in use or there is no public
+	 * address use the static address as random address (but skip
+	 * the HCI command if the current random address is already the
+	 * static one.
+	 */
+	if (test_bit(HCI_FORCE_STATIC_ADDR, &hdev->dbg_flags) ||
+	    !bacmp(&hdev->bdaddr, BDADDR_ANY)) {
+		*own_addr_type = ADDR_LE_DEV_RANDOM;
+		if (bacmp(&hdev->static_addr, &hdev->random_addr))
+			hci_req_add(req, HCI_OP_LE_SET_RANDOM_ADDR, 6,
+				    &hdev->static_addr);
+		return 0;
+	}
+
+	/* Neither privacy nor static address is being used so use a
+	 * public address.
+	 */
+	*own_addr_type = ADDR_LE_DEV_PUBLIC;
+
+	return 0;
+}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
new file mode 100644
index 000000000000..1793a46fea65
--- /dev/null
+++ b/net/bluetooth/hci_request.h
@@ -0,0 +1,51 @@
+/*
+   BlueZ - Bluetooth protocol stack for Linux
+   Copyright (C) 2014 Intel Corporation
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License version 2 as
+   published by the Free Software Foundation;
+
+   THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+   OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+   FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT OF THIRD PARTY RIGHTS.
+   IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) AND AUTHOR(S) BE LIABLE FOR ANY
+   CLAIM, OR ANY SPECIAL INDIRECT OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES
+   WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+   ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+   OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+
+   ALL LIABILITY, INCLUDING LIABILITY FOR INFRINGEMENT OF ANY PATENTS,
+   COPYRIGHTS, TRADEMARKS OR OTHER RIGHTS, RELATING TO USE OF THIS
+   SOFTWARE IS DISCLAIMED.
+*/
+
+struct hci_request {
+	struct hci_dev		*hdev;
+	struct sk_buff_head	cmd_q;
+
+	/* If something goes wrong when building the HCI request, the error
+	 * value is stored in this field.
+	 */
+	int			err;
+};
+
+void hci_req_init(struct hci_request *req, struct hci_dev *hdev);
+int hci_req_run(struct hci_request *req, hci_req_complete_t complete);
+void hci_req_add(struct hci_request *req, u16 opcode, u32 plen,
+		 const void *param);
+void hci_req_add_ev(struct hci_request *req, u16 opcode, u32 plen,
+		    const void *param, u8 event);
+void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status);
+
+struct sk_buff *hci_prepare_cmd(struct hci_dev *hdev, u16 opcode, u32 plen,
+				const void *param);
+
+void hci_req_add_le_scan_disable(struct hci_request *req);
+void hci_req_add_le_passive_scan(struct hci_request *req);
+
+void hci_update_page_scan(struct hci_dev *hdev);
+void __hci_update_page_scan(struct hci_request *req);
+
+int hci_update_random_address(struct hci_request *req, bool require_privacy,
+			      u8 *own_addr_type);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f86f3ec684ba..95473e966703 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -32,6 +32,7 @@
 #include <net/bluetooth/l2cap.h>
 #include <net/bluetooth/mgmt.h>
 
+#include "hci_request.h"
 #include "smp.h"
 
 #define MGMT_VERSION	1
-- 
cgit v1.2.3-70-g09d2


From 2cf22218b00f46f93b39a9355b830e9e8e4fd077 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 22:26:00 +0200
Subject: Bluetooth: Add hci_request support for hci_update_background_scan

Many places using hci_update_background_scan() try to synchronize
whatever they're doing with the help of hci_request callbacks. However,
since the hci_update_background_scan() function hasn't so far accepted a
hci_request pointer any commands triggered by it have been left out by
the synchronization. This patch modifies the API in a similar way as was
done for hci_update_page_scan, i.e. there's a variant that takes a
hci_request and another one that takes a hci_dev.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  2 -
 net/bluetooth/hci_core.c         | 89 -------------------------------------
 net/bluetooth/hci_request.c      | 96 ++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/hci_request.h      |  3 ++
 net/bluetooth/mgmt.c             | 12 ++---
 5 files changed, 105 insertions(+), 97 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 93066f70f8ab..8eccdf029500 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -930,8 +930,6 @@ struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
 						  bdaddr_t *addr,
 						  u8 addr_type);
 
-void hci_update_background_scan(struct hci_dev *hdev);
-
 void hci_uuids_clear(struct hci_dev *hdev);
 
 void hci_link_keys_clear(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2cfaaa6acb04..def6fba01b45 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -5312,95 +5312,6 @@ static void hci_cmd_work(struct work_struct *work)
 	}
 }
 
-static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
-{
-	if (status)
-		BT_DBG("HCI request failed to update background scanning: "
-		       "status 0x%2.2x", status);
-}
-
-/* This function controls the background scanning based on hdev->pend_le_conns
- * list. If there are pending LE connection we start the background scanning,
- * otherwise we stop it.
- *
- * This function requires the caller holds hdev->lock.
- */
-void hci_update_background_scan(struct hci_dev *hdev)
-{
-	struct hci_request req;
-	struct hci_conn *conn;
-	int err;
-
-	if (!test_bit(HCI_UP, &hdev->flags) ||
-	    test_bit(HCI_INIT, &hdev->flags) ||
-	    test_bit(HCI_SETUP, &hdev->dev_flags) ||
-	    test_bit(HCI_CONFIG, &hdev->dev_flags) ||
-	    test_bit(HCI_AUTO_OFF, &hdev->dev_flags) ||
-	    test_bit(HCI_UNREGISTER, &hdev->dev_flags))
-		return;
-
-	/* No point in doing scanning if LE support hasn't been enabled */
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
-		return;
-
-	/* If discovery is active don't interfere with it */
-	if (hdev->discovery.state != DISCOVERY_STOPPED)
-		return;
-
-	/* Reset RSSI and UUID filters when starting background scanning
-	 * since these filters are meant for service discovery only.
-	 *
-	 * The Start Discovery and Start Service Discovery operations
-	 * ensure to set proper values for RSSI threshold and UUID
-	 * filter list. So it is safe to just reset them here.
-	 */
-	hci_discovery_filter_clear(hdev);
-
-	hci_req_init(&req, hdev);
-
-	if (list_empty(&hdev->pend_le_conns) &&
-	    list_empty(&hdev->pend_le_reports)) {
-		/* If there is no pending LE connections or devices
-		 * to be scanned for, we should stop the background
-		 * scanning.
-		 */
-
-		/* If controller is not scanning we are done. */
-		if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
-			return;
-
-		hci_req_add_le_scan_disable(&req);
-
-		BT_DBG("%s stopping background scanning", hdev->name);
-	} else {
-		/* If there is at least one pending LE connection, we should
-		 * keep the background scan running.
-		 */
-
-		/* If controller is connecting, we should not start scanning
-		 * since some controllers are not able to scan and connect at
-		 * the same time.
-		 */
-		conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
-		if (conn)
-			return;
-
-		/* If controller is currently scanning, we stop it to ensure we
-		 * don't miss any advertising (due to duplicates filter).
-		 */
-		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
-			hci_req_add_le_scan_disable(&req);
-
-		hci_req_add_le_passive_scan(&req);
-
-		BT_DBG("%s starting background scanning", hdev->name);
-	}
-
-	err = hci_req_run(&req, update_background_scan_complete);
-	if (err)
-		BT_ERR("Failed to run HCI request: err %d", err);
-}
-
 static bool disconnected_whitelist_entries(struct hci_dev *hdev)
 {
 	struct bdaddr_list *b;
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index eba83a2a6556..e49f682f1550 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -395,3 +395,99 @@ int hci_update_random_address(struct hci_request *req, bool require_privacy,
 
 	return 0;
 }
+
+/* This function controls the background scanning based on hdev->pend_le_conns
+ * list. If there are pending LE connection we start the background scanning,
+ * otherwise we stop it.
+ *
+ * This function requires the caller holds hdev->lock.
+ */
+void __hci_update_background_scan(struct hci_request *req)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_conn *conn;
+
+	if (!test_bit(HCI_UP, &hdev->flags) ||
+	    test_bit(HCI_INIT, &hdev->flags) ||
+	    test_bit(HCI_SETUP, &hdev->dev_flags) ||
+	    test_bit(HCI_CONFIG, &hdev->dev_flags) ||
+	    test_bit(HCI_AUTO_OFF, &hdev->dev_flags) ||
+	    test_bit(HCI_UNREGISTER, &hdev->dev_flags))
+		return;
+
+	/* No point in doing scanning if LE support hasn't been enabled */
+	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
+		return;
+
+	/* If discovery is active don't interfere with it */
+	if (hdev->discovery.state != DISCOVERY_STOPPED)
+		return;
+
+	/* Reset RSSI and UUID filters when starting background scanning
+	 * since these filters are meant for service discovery only.
+	 *
+	 * The Start Discovery and Start Service Discovery operations
+	 * ensure to set proper values for RSSI threshold and UUID
+	 * filter list. So it is safe to just reset them here.
+	 */
+	hci_discovery_filter_clear(hdev);
+
+	if (list_empty(&hdev->pend_le_conns) &&
+	    list_empty(&hdev->pend_le_reports)) {
+		/* If there is no pending LE connections or devices
+		 * to be scanned for, we should stop the background
+		 * scanning.
+		 */
+
+		/* If controller is not scanning we are done. */
+		if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+			return;
+
+		hci_req_add_le_scan_disable(req);
+
+		BT_DBG("%s stopping background scanning", hdev->name);
+	} else {
+		/* If there is at least one pending LE connection, we should
+		 * keep the background scan running.
+		 */
+
+		/* If controller is connecting, we should not start scanning
+		 * since some controllers are not able to scan and connect at
+		 * the same time.
+		 */
+		conn = hci_conn_hash_lookup_state(hdev, LE_LINK, BT_CONNECT);
+		if (conn)
+			return;
+
+		/* If controller is currently scanning, we stop it to ensure we
+		 * don't miss any advertising (due to duplicates filter).
+		 */
+		if (test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+			hci_req_add_le_scan_disable(req);
+
+		hci_req_add_le_passive_scan(req);
+
+		BT_DBG("%s starting background scanning", hdev->name);
+	}
+}
+
+static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
+{
+	if (status)
+		BT_DBG("HCI request failed to update background scanning: "
+		       "status 0x%2.2x", status);
+}
+
+void hci_update_background_scan(struct hci_dev *hdev)
+{
+	int err;
+	struct hci_request req;
+
+	hci_req_init(&req, hdev);
+
+	__hci_update_background_scan(&req);
+
+	err = hci_req_run(&req, update_background_scan_complete);
+	if (err && err != -ENODATA)
+		BT_ERR("Failed to run HCI request: err %d", err);
+}
diff --git a/net/bluetooth/hci_request.h b/net/bluetooth/hci_request.h
index 1793a46fea65..adf074d33544 100644
--- a/net/bluetooth/hci_request.h
+++ b/net/bluetooth/hci_request.h
@@ -49,3 +49,6 @@ void __hci_update_page_scan(struct hci_request *req);
 
 int hci_update_random_address(struct hci_request *req, bool require_privacy,
 			      u8 *own_addr_type);
+
+void hci_update_background_scan(struct hci_dev *hdev);
+void __hci_update_background_scan(struct hci_request *req);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 95473e966703..3afe1e175eb8 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -2228,9 +2228,8 @@ static void le_enable_complete(struct hci_dev *hdev, u8 status)
 		hci_req_init(&req, hdev);
 		update_adv_data(&req);
 		update_scan_rsp_data(&req);
+		__hci_update_background_scan(&req);
 		hci_req_run(&req, NULL);
-
-		hci_update_background_scan(hdev);
 	}
 
 unlock:
@@ -6038,8 +6037,9 @@ void mgmt_index_removed(struct hci_dev *hdev)
 }
 
 /* This function requires the caller holds hdev->lock */
-static void restart_le_actions(struct hci_dev *hdev)
+static void restart_le_actions(struct hci_request *req)
 {
+	struct hci_dev *hdev = req->hdev;
 	struct hci_conn_params *p;
 
 	list_for_each_entry(p, &hdev->le_conn_params, list) {
@@ -6061,7 +6061,7 @@ static void restart_le_actions(struct hci_dev *hdev)
 		}
 	}
 
-	hci_update_background_scan(hdev);
+	__hci_update_background_scan(req);
 }
 
 static void powered_complete(struct hci_dev *hdev, u8 status)
@@ -6072,8 +6072,6 @@ static void powered_complete(struct hci_dev *hdev, u8 status)
 
 	hci_dev_lock(hdev);
 
-	restart_le_actions(hdev);
-
 	mgmt_pending_foreach(MGMT_OP_SET_POWERED, hdev, settings_rsp, &match);
 
 	new_settings(hdev, match.sk);
@@ -6131,6 +6129,8 @@ static int powered_update_hci(struct hci_dev *hdev)
 
 		if (test_bit(HCI_ADVERTISING, &hdev->dev_flags))
 			enable_advertising(&req);
+
+		restart_le_actions(&req);
 	}
 
 	link_sec = test_bit(HCI_LINK_SECURITY, &hdev->dev_flags);
-- 
cgit v1.2.3-70-g09d2


From 5a154e6f71dfd41c7b5cf96a13c83fca91e7df7f Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 19 Dec 2014 22:26:02 +0200
Subject: Bluetooth: Fix Add Device to wait for HCI before sending cmd_complete

This patch updates the Add Device mgmt command handler to use a
hci_request to wait for HCI command completion before notifying user
space of the mgmt command completion. To do this we need to add an extra
hci_request parameter to the hci_conn_params_set function. Since this
function has no other users besides mgmt.c it's moved there as a static
function.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |   2 -
 net/bluetooth/hci_core.c         |  58 -------------------
 net/bluetooth/mgmt.c             | 119 +++++++++++++++++++++++++++++++++++----
 3 files changed, 109 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 8eccdf029500..79724c87ab00 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -920,8 +920,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
 					       bdaddr_t *addr, u8 addr_type);
 struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
 					    bdaddr_t *addr, u8 addr_type);
-int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type,
-			u8 auto_connect);
 void hci_conn_params_del(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type);
 void hci_conn_params_clear_all(struct hci_dev *hdev);
 void hci_conn_params_clear_disabled(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index def6fba01b45..ee2096c7ec2c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3660,23 +3660,6 @@ struct hci_conn_params *hci_conn_params_lookup(struct hci_dev *hdev,
 	return NULL;
 }
 
-static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
-{
-	struct hci_conn *conn;
-
-	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
-	if (!conn)
-		return false;
-
-	if (conn->dst_type != type)
-		return false;
-
-	if (conn->state != BT_CONNECTED)
-		return false;
-
-	return true;
-}
-
 /* This function requires the caller holds hdev->lock */
 struct hci_conn_params *hci_pend_le_action_lookup(struct list_head *list,
 						  bdaddr_t *addr, u8 addr_type)
@@ -3732,47 +3715,6 @@ struct hci_conn_params *hci_conn_params_add(struct hci_dev *hdev,
 	return params;
 }
 
-/* This function requires the caller holds hdev->lock */
-int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr, u8 addr_type,
-			u8 auto_connect)
-{
-	struct hci_conn_params *params;
-
-	params = hci_conn_params_add(hdev, addr, addr_type);
-	if (!params)
-		return -EIO;
-
-	if (params->auto_connect == auto_connect)
-		return 0;
-
-	list_del_init(&params->action);
-
-	switch (auto_connect) {
-	case HCI_AUTO_CONN_DISABLED:
-	case HCI_AUTO_CONN_LINK_LOSS:
-		hci_update_background_scan(hdev);
-		break;
-	case HCI_AUTO_CONN_REPORT:
-		list_add(&params->action, &hdev->pend_le_reports);
-		hci_update_background_scan(hdev);
-		break;
-	case HCI_AUTO_CONN_DIRECT:
-	case HCI_AUTO_CONN_ALWAYS:
-		if (!is_connected(hdev, addr, addr_type)) {
-			list_add(&params->action, &hdev->pend_le_conns);
-			hci_update_background_scan(hdev);
-		}
-		break;
-	}
-
-	params->auto_connect = auto_connect;
-
-	BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
-	       auto_connect);
-
-	return 0;
-}
-
 static void hci_conn_params_free(struct hci_conn_params *params)
 {
 	if (params->conn) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6b925733c6f8..ec7c0ec3d8d3 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5425,6 +5425,65 @@ unlock:
 	return err;
 }
 
+static bool is_connected(struct hci_dev *hdev, bdaddr_t *addr, u8 type)
+{
+	struct hci_conn *conn;
+
+	conn = hci_conn_hash_lookup_ba(hdev, LE_LINK, addr);
+	if (!conn)
+		return false;
+
+	if (conn->dst_type != type)
+		return false;
+
+	if (conn->state != BT_CONNECTED)
+		return false;
+
+	return true;
+}
+
+/* This function requires the caller holds hdev->lock */
+static int hci_conn_params_set(struct hci_request *req, bdaddr_t *addr,
+			       u8 addr_type, u8 auto_connect)
+{
+	struct hci_dev *hdev = req->hdev;
+	struct hci_conn_params *params;
+
+	params = hci_conn_params_add(hdev, addr, addr_type);
+	if (!params)
+		return -EIO;
+
+	if (params->auto_connect == auto_connect)
+		return 0;
+
+	list_del_init(&params->action);
+
+	switch (auto_connect) {
+	case HCI_AUTO_CONN_DISABLED:
+	case HCI_AUTO_CONN_LINK_LOSS:
+		__hci_update_background_scan(req);
+		break;
+	case HCI_AUTO_CONN_REPORT:
+		list_add(&params->action, &hdev->pend_le_reports);
+		__hci_update_background_scan(req);
+		break;
+	case HCI_AUTO_CONN_DIRECT:
+	case HCI_AUTO_CONN_ALWAYS:
+		if (!is_connected(hdev, addr, addr_type)) {
+			list_add(&params->action, &hdev->pend_le_conns);
+			__hci_update_background_scan(req);
+		}
+		break;
+	}
+
+	params->auto_connect = auto_connect;
+
+	BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
+	       auto_connect);
+
+	return 0;
+}
+
 static void device_added(struct sock *sk, struct hci_dev *hdev,
 			 bdaddr_t *bdaddr, u8 type, u8 action)
 {
@@ -5437,10 +5496,31 @@ static void device_added(struct sock *sk, struct hci_dev *hdev,
 	mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk);
 }
 
+static void add_device_complete(struct hci_dev *hdev, u8 status)
+{
+	struct pending_cmd *cmd;
+
+	BT_DBG("status 0x%02x", status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_ADD_DEVICE, hdev);
+	if (!cmd)
+		goto unlock;
+
+	cmd->cmd_complete(cmd, mgmt_status(status));
+	mgmt_pending_remove(cmd);
+
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static int add_device(struct sock *sk, struct hci_dev *hdev,
 		      void *data, u16 len)
 {
 	struct mgmt_cp_add_device *cp = data;
+	struct pending_cmd *cmd;
+	struct hci_request req;
 	u8 auto_conn, addr_type;
 	int err;
 
@@ -5457,14 +5537,24 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 				    MGMT_STATUS_INVALID_PARAMS,
 				    &cp->addr, sizeof(cp->addr));
 
+	hci_req_init(&req, hdev);
+
 	hci_dev_lock(hdev);
 
+	cmd = mgmt_pending_add(sk, MGMT_OP_ADD_DEVICE, hdev, data, len);
+	if (!cmd) {
+		err = -ENOMEM;
+		goto unlock;
+	}
+
+	cmd->cmd_complete = addr_cmd_complete;
+
 	if (cp->addr.type == BDADDR_BREDR) {
 		/* Only incoming connections action is supported for now */
 		if (cp->action != 0x01) {
-			err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
-					   MGMT_STATUS_INVALID_PARAMS,
-					   &cp->addr, sizeof(cp->addr));
+			err = 0;
+			cmd->cmd_complete(cmd, MGMT_STATUS_INVALID_PARAMS);
+			mgmt_pending_remove(cmd);
 			goto unlock;
 		}
 
@@ -5473,7 +5563,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 		if (err)
 			goto unlock;
 
-		hci_update_page_scan(hdev);
+		__hci_update_page_scan(&req);
 
 		goto added;
 	}
@@ -5493,19 +5583,28 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 	/* If the connection parameters don't exist for this device,
 	 * they will be created and configured with defaults.
 	 */
-	if (hci_conn_params_set(hdev, &cp->addr.bdaddr, addr_type,
+	if (hci_conn_params_set(&req, &cp->addr.bdaddr, addr_type,
 				auto_conn) < 0) {
-		err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
-				   MGMT_STATUS_FAILED,
-				   &cp->addr, sizeof(cp->addr));
+		err = 0;
+		cmd->cmd_complete(cmd, MGMT_STATUS_FAILED);
+		mgmt_pending_remove(cmd);
 		goto unlock;
 	}
 
 added:
 	device_added(sk, hdev, &cp->addr.bdaddr, cp->addr.type, cp->action);
 
-	err = cmd_complete(sk, hdev->id, MGMT_OP_ADD_DEVICE,
-			   MGMT_STATUS_SUCCESS, &cp->addr, sizeof(cp->addr));
+	err = hci_req_run(&req, add_device_complete);
+	if (err < 0) {
+		/* ENODATA means no HCI commands were needed (e.g. if
+		 * the adapter is powered off).
+		 */
+		if (err == -ENODATA) {
+			cmd->cmd_complete(cmd, MGMT_STATUS_SUCCESS);
+			err = 0;
+		}
+		mgmt_pending_remove(cmd);
+	}
 
 unlock:
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3-70-g09d2


From 94a3bd02a6b79b98236d9805a1f95a2857e7696a Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:28:38 +0100
Subject: Bluetooth: Add structures for LE Data Length Extension feature

This patch adds the structures for HCI commands and events of the
LE Data Length Extension feature from Bluetooth 4.2 specification.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 40129b3838b2..f39926ee06f7 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -343,6 +343,7 @@ enum {
 #define HCI_LE_ENCRYPTION		0x01
 #define HCI_LE_CONN_PARAM_REQ_PROC	0x02
 #define HCI_LE_PING			0x10
+#define HCI_LE_DATA_LEN_EXT		0x20
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 
 /* Connection modes */
@@ -1371,6 +1372,39 @@ struct hci_cp_le_conn_param_req_neg_reply {
 	__u8	reason;
 } __packed;
 
+#define HCI_OP_LE_SET_DATA_LEN		0x2022
+struct hci_cp_le_set_data_len {
+	__le16	handle;
+	__le16	tx_len;
+	__le16	tx_time;
+} __packed;
+struct hci_rp_le_set_data_len {
+	__u8	status;
+	__le16	handle;
+} __packed;
+
+#define HCI_OP_LE_READ_DEF_DATA_LEN	0x2023
+struct hci_rp_le_read_def_data_len {
+	__u8	status;
+	__le16	tx_len;
+	__le16	tx_time;
+} __packed;
+
+#define HCI_OP_LE_WRITE_DEF_DATA_LEN	0x2024
+struct hci_cp_le_write_def_data_len {
+	__le16	tx_len;
+	__le16	tx_time;
+} __packed;
+
+#define HCI_OP_LE_READ_MAX_DATA_LEN	0x202f
+struct hci_rp_le_read_max_data_len {
+	__u8	status;
+	__le16	tx_len;
+	__le16	tx_time;
+	__le16	rx_len;
+	__le16	rx_time;
+} __packed;
+
 /* ---- HCI Events ---- */
 #define HCI_EV_INQUIRY_COMPLETE		0x01
 
@@ -1796,6 +1830,15 @@ struct hci_ev_le_remote_conn_param_req {
 	__le16 timeout;
 } __packed;
 
+#define HCI_EV_LE_DATA_LEN_CHANGE	0x07
+struct hci_ev_le_data_len_change {
+	__le16	handle;
+	__le16	tx_len;
+	__le16	tx_time;
+	__le16	rx_len;
+	__le16	rx_time;
+} __packed;
+
 #define HCI_EV_LE_DIRECT_ADV_REPORT	0x0B
 struct hci_ev_le_direct_adv_info {
 	__u8	 evt_type;
-- 
cgit v1.2.3-70-g09d2


From a8e1bfaa55cf8ac4e419a09bdda5bb45bcd8f985 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 16:28:40 +0100
Subject: Bluetooth: Store default and maximum LE data length settings

When the controller supports the LE Data Length Extension feature, the
default and maximum data length are read and now stored.

For backwards compatibility all values are initialized to the data
length values from Bluetooth 4.1 and earlier specifications.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  6 ++++
 net/bluetooth/hci_core.c         |  6 ++++
 net/bluetooth/hci_event.c        | 61 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 73 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 79724c87ab00..f20f6bd668bd 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -220,6 +220,12 @@ struct hci_dev {
 	__u16		le_conn_max_interval;
 	__u16		le_conn_latency;
 	__u16		le_supv_timeout;
+	__u16		le_def_tx_len;
+	__u16		le_def_tx_time;
+	__u16		le_max_tx_len;
+	__u16		le_max_tx_time;
+	__u16		le_max_rx_len;
+	__u16		le_max_rx_time;
 	__u16		discov_interleaved_timeout;
 	__u16		conn_info_min_age;
 	__u16		conn_info_max_age;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 01e35ef6d201..47f0311d1006 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2896,6 +2896,12 @@ struct hci_dev *hci_alloc_dev(void)
 	hdev->le_conn_max_interval = 0x0038;
 	hdev->le_conn_latency = 0x0000;
 	hdev->le_supv_timeout = 0x002a;
+	hdev->le_def_tx_len = 0x001b;
+	hdev->le_def_tx_time = 0x0148;
+	hdev->le_max_tx_len = 0x001b;
+	hdev->le_max_tx_time = 0x0148;
+	hdev->le_max_rx_len = 0x001b;
+	hdev->le_max_rx_time = 0x0148;
 
 	hdev->rpa_timeout = HCI_DEFAULT_RPA_TIMEOUT;
 	hdev->discov_interleaved_timeout = DISCOV_INTERLEAVED_TIMEOUT;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a412eb1e1f61..a3055e90a5bb 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -1280,6 +1280,55 @@ static void hci_cc_le_read_supported_states(struct hci_dev *hdev,
 	memcpy(hdev->le_states, rp->le_states, 8);
 }
 
+static void hci_cc_le_read_def_data_len(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	struct hci_rp_le_read_def_data_len *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hdev->le_def_tx_len = le16_to_cpu(rp->tx_len);
+	hdev->le_def_tx_time = le16_to_cpu(rp->tx_time);
+}
+
+static void hci_cc_le_write_def_data_len(struct hci_dev *hdev,
+					 struct sk_buff *skb)
+{
+	struct hci_cp_le_write_def_data_len *sent;
+	__u8 status = *((__u8 *) skb->data);
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, status);
+
+	if (status)
+		return;
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_LE_WRITE_DEF_DATA_LEN);
+	if (!sent)
+		return;
+
+	hdev->le_def_tx_len = le16_to_cpu(sent->tx_len);
+	hdev->le_def_tx_time = le16_to_cpu(sent->tx_time);
+}
+
+static void hci_cc_le_read_max_data_len(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	struct hci_rp_le_read_max_data_len *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	hdev->le_max_tx_len = le16_to_cpu(rp->tx_len);
+	hdev->le_max_tx_time = le16_to_cpu(rp->tx_time);
+	hdev->le_max_rx_len = le16_to_cpu(rp->rx_len);
+	hdev->le_max_rx_time = le16_to_cpu(rp->rx_time);
+}
+
 static void hci_cc_write_le_host_supported(struct hci_dev *hdev,
 					   struct sk_buff *skb)
 {
@@ -2847,6 +2896,18 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_le_read_supported_states(hdev, skb);
 		break;
 
+	case HCI_OP_LE_READ_DEF_DATA_LEN:
+		hci_cc_le_read_def_data_len(hdev, skb);
+		break;
+
+	case HCI_OP_LE_WRITE_DEF_DATA_LEN:
+		hci_cc_le_write_def_data_len(hdev, skb);
+		break;
+
+	case HCI_OP_LE_READ_MAX_DATA_LEN:
+		hci_cc_le_read_max_data_len(hdev, skb);
+		break;
+
 	case HCI_OP_WRITE_LE_HOST_SUPPORTED:
 		hci_cc_write_le_host_supported(hdev, skb);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 23b9ceb74f8e46bddd61a1e2afd9317221be74b7 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 17:13:41 +0100
Subject: Bluetooth: Create debugfs directory for each connection handle

For every internal representation of a Bluetooth connection which is
identified by hci_conn, create a debugfs directory with the handle
number as directory name.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_conn.c         |  3 +++
 net/bluetooth/hci_debugfs.c      | 12 ++++++++++++
 net/bluetooth/hci_debugfs.h      |  1 +
 net/bluetooth/hci_event.c        |  5 +++++
 5 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index f20f6bd668bd..3e7e5110f298 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -440,6 +440,7 @@ struct hci_conn {
 	struct delayed_work le_conn_timeout;
 
 	struct device	dev;
+	struct dentry	*debugfs;
 
 	struct hci_dev	*hdev;
 	void		*l2cap_data;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 4405fb352c70..75240aaca101 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -25,6 +25,7 @@
 /* Bluetooth HCI connection handling. */
 
 #include <linux/export.h>
+#include <linux/debugfs.h>
 
 #include <net/bluetooth/bluetooth.h>
 #include <net/bluetooth/hci_core.h>
@@ -547,6 +548,8 @@ int hci_conn_del(struct hci_conn *conn)
 
 	hci_conn_del_sysfs(conn);
 
+	debugfs_remove_recursive(conn->debugfs);
+
 	if (test_bit(HCI_CONN_PARAM_REMOVAL_PEND, &conn->flags))
 		hci_conn_params_del(conn->hdev, &conn->dst, conn->dst_type);
 
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index a7a0db03b0b8..ee33ce88d3d8 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -1062,3 +1062,15 @@ void hci_debugfs_create_le(struct hci_dev *hdev)
 	debugfs_create_u16("discov_interleaved_timeout", 0644, hdev->debugfs,
 			   &hdev->discov_interleaved_timeout);
 }
+
+void hci_debugfs_create_conn(struct hci_conn *conn)
+{
+	struct hci_dev *hdev = conn->hdev;
+	char name[6];
+
+	if (IS_ERR_OR_NULL(hdev->debugfs))
+		return;
+
+	snprintf(name, sizeof(name), "%u", conn->handle);
+	conn->debugfs = debugfs_create_dir(name, hdev->debugfs);
+}
diff --git a/net/bluetooth/hci_debugfs.h b/net/bluetooth/hci_debugfs.h
index f191100b50c5..fb68efe083c5 100644
--- a/net/bluetooth/hci_debugfs.h
+++ b/net/bluetooth/hci_debugfs.h
@@ -23,3 +23,4 @@
 void hci_debugfs_create_common(struct hci_dev *hdev);
 void hci_debugfs_create_bredr(struct hci_dev *hdev);
 void hci_debugfs_create_le(struct hci_dev *hdev);
+void hci_debugfs_create_conn(struct hci_conn *conn);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a3055e90a5bb..eed44c643c0c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -31,6 +31,7 @@
 #include <net/bluetooth/mgmt.h>
 
 #include "hci_request.h"
+#include "hci_debugfs.h"
 #include "a2mp.h"
 #include "amp.h"
 #include "smp.h"
@@ -2162,6 +2163,7 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		} else
 			conn->state = BT_CONNECTED;
 
+		hci_debugfs_create_conn(conn);
 		hci_conn_add_sysfs(conn);
 
 		if (test_bit(HCI_AUTH, &hdev->flags))
@@ -3638,6 +3640,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 		conn->handle = __le16_to_cpu(ev->handle);
 		conn->state  = BT_CONNECTED;
 
+		hci_debugfs_create_conn(conn);
 		hci_conn_add_sysfs(conn);
 		break;
 
@@ -4178,6 +4181,7 @@ static void hci_phy_link_complete_evt(struct hci_dev *hdev,
 	hcon->disc_timeout = HCI_DISCONN_TIMEOUT;
 	hci_conn_drop(hcon);
 
+	hci_debugfs_create_conn(hcon);
 	hci_conn_add_sysfs(hcon);
 
 	amp_physical_cfm(bredr_hcon, hcon);
@@ -4384,6 +4388,7 @@ static void hci_le_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	conn->le_conn_latency = le16_to_cpu(ev->latency);
 	conn->le_supv_timeout = le16_to_cpu(ev->supervision_timeout);
 
+	hci_debugfs_create_conn(conn);
 	hci_conn_add_sysfs(conn);
 
 	hci_proto_connect_cfm(conn, ev->status);
-- 
cgit v1.2.3-70-g09d2


From 72e4a6bd02204eed0464d6139439d7e89b94266e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sat, 20 Dec 2014 18:00:41 +0100
Subject: Bluetooth: Remove duplicate constant for RFCOMM PSM

The RFCOMM_PSM constant is actually a duplicate. So remove it and
use the L2CAP_PSM_RFCOMM constant instead.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/rfcomm.h | 2 --
 net/bluetooth/rfcomm/core.c    | 4 ++--
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h
index 578b83127af1..4190af53a46a 100644
--- a/include/net/bluetooth/rfcomm.h
+++ b/include/net/bluetooth/rfcomm.h
@@ -24,8 +24,6 @@
 #ifndef __RFCOMM_H
 #define __RFCOMM_H
 
-#define RFCOMM_PSM 3
-
 #define RFCOMM_CONN_TIMEOUT (HZ * 30)
 #define RFCOMM_DISC_TIMEOUT (HZ * 20)
 #define RFCOMM_AUTH_TIMEOUT (HZ * 25)
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 73f8c75abe6e..4fea24275b17 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -771,7 +771,7 @@ static struct rfcomm_session *rfcomm_session_create(bdaddr_t *src,
 
 	bacpy(&addr.l2_bdaddr, dst);
 	addr.l2_family = AF_BLUETOOTH;
-	addr.l2_psm    = cpu_to_le16(RFCOMM_PSM);
+	addr.l2_psm    = cpu_to_le16(L2CAP_PSM_RFCOMM);
 	addr.l2_cid    = 0;
 	addr.l2_bdaddr_type = BDADDR_BREDR;
 	*err = kernel_connect(sock, (struct sockaddr *) &addr, sizeof(addr), O_NONBLOCK);
@@ -2038,7 +2038,7 @@ static int rfcomm_add_listener(bdaddr_t *ba)
 	/* Bind socket */
 	bacpy(&addr.l2_bdaddr, ba);
 	addr.l2_family = AF_BLUETOOTH;
-	addr.l2_psm    = cpu_to_le16(RFCOMM_PSM);
+	addr.l2_psm    = cpu_to_le16(L2CAP_PSM_RFCOMM);
 	addr.l2_cid    = 0;
 	addr.l2_bdaddr_type = BDADDR_BREDR;
 	err = kernel_bind(sock, (struct sockaddr *) &addr, sizeof(addr));
-- 
cgit v1.2.3-70-g09d2


From 711ffa78f4b23e25a6c91cb80be436a48b5e725d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 26 Dec 2014 04:42:31 +0100
Subject: Bluetooth: Introduce HCI_QUIRK_BROKEN_LOCAL_COMMANDS constant

Some controllers advertise support for Bluetooth 1.2 specification,
but they do not support the HCI Read Local Supported Commands command.

If that is the case, then the driver can quirk the behavior and force
the core to skip this command. This will allow removing vendor specific
checks out of the core.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index f39926ee06f7..1849a437f6e1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -102,6 +102,16 @@ enum {
 	 */
 	HCI_QUIRK_FIXUP_BUFFER_SIZE,
 
+	/* When this quirk is set, then the HCI Read Local Supported
+	 * Commands command is not supported. In general Bluetooth 1.2
+	 * and later controllers should support this command. However
+	 * some controllers indicate Bluetooth 1.2 support, but do
+	 * not support this command.
+	 *
+	 * This quirk must be set before hci_register_dev is called.
+	 */
+	HCI_QUIRK_BROKEN_LOCAL_COMMANDS,
+
 	/* When this quirk is set, then no stored link key handling
 	 * is performed. This is mainly due to the fact that the
 	 * HCI Delete Stored Link Key command is advertised, but
-- 
cgit v1.2.3-70-g09d2


From 74d23cc704d19732e70ef1579a669f7d5f09dd9a Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 21 Dec 2014 19:46:56 +0100
Subject: time: move the timecounter/cyclecounter code into its own file.

The timecounter code has almost nothing to do with the clocksource
code. Let it live in its own file. This will help isolate the
timecounter users from the clocksource users in the source tree.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe.h        |   2 +-
 drivers/net/ethernet/broadcom/bnx2x/bnx2x.h |   2 +-
 drivers/net/ethernet/freescale/fec.h        |   1 +
 drivers/net/ethernet/intel/e1000e/e1000.h   |   2 +-
 drivers/net/ethernet/intel/igb/igb.h        |   2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe.h    |   2 +-
 drivers/net/ethernet/ti/cpts.h              |   1 +
 include/clocksource/arm_arch_timer.h        |   2 +-
 include/linux/clocksource.h                 | 102 -----------------------
 include/linux/mlx4/device.h                 |   2 +-
 include/linux/timecounter.h                 | 122 ++++++++++++++++++++++++++++
 include/linux/types.h                       |   3 +
 kernel/time/Makefile                        |   2 +-
 kernel/time/clocksource.c                   |  76 -----------------
 kernel/time/timecounter.c                   |  95 ++++++++++++++++++++++
 sound/pci/hda/hda_priv.h                    |   2 +-
 16 files changed, 231 insertions(+), 187 deletions(-)
 create mode 100644 include/linux/timecounter.h
 create mode 100644 kernel/time/timecounter.c

(limited to 'include')

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe.h b/drivers/net/ethernet/amd/xgbe/xgbe.h
index f9ec762ac3f0..2af6affc35a7 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe.h
+++ b/drivers/net/ethernet/amd/xgbe/xgbe.h
@@ -124,7 +124,7 @@
 #include <linux/if_vlan.h>
 #include <linux/bitops.h>
 #include <linux/ptp_clock_kernel.h>
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
 #include <net/dcbnl.h>
 
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
index c3a6072134f5..792ba72fb5c8 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x.h
@@ -22,7 +22,7 @@
 
 #include <linux/ptp_clock_kernel.h>
 #include <linux/net_tstamp.h>
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 
 /* compilation time flags */
 
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index 469691ad4a1e..df8bbddaeb37 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -16,6 +16,7 @@
 #include <linux/clocksource.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
+#include <linux/timecounter.h>
 
 #if defined(CONFIG_M523x) || defined(CONFIG_M527x) || defined(CONFIG_M528x) || \
     defined(CONFIG_M520x) || defined(CONFIG_M532x) || \
diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 7785240a0da1..9416e5a7e0c8 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -34,7 +34,7 @@
 #include <linux/pci-aspm.h>
 #include <linux/crc32.h>
 #include <linux/if_vlan.h>
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/ptp_classify.h>
diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h
index 82d891e183b1..ee22da391474 100644
--- a/drivers/net/ethernet/intel/igb/igb.h
+++ b/drivers/net/ethernet/intel/igb/igb.h
@@ -29,7 +29,7 @@
 #include "e1000_mac.h"
 #include "e1000_82575.h"
 
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/bitops.h>
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index b6137be43920..38fc64cf5dca 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -38,7 +38,7 @@
 #include <linux/if_vlan.h>
 #include <linux/jiffies.h>
 
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
 #include <linux/ptp_clock_kernel.h>
 
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 1a581ef7eee8..69a46b92c7d6 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -27,6 +27,7 @@
 #include <linux/list.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/skbuff.h>
+#include <linux/timecounter.h>
 
 struct cpsw_cpts {
 	u32 idver;                /* Identification and version */
diff --git a/include/clocksource/arm_arch_timer.h b/include/clocksource/arm_arch_timer.h
index 6d26b40cbf5d..9916d0e4eff5 100644
--- a/include/clocksource/arm_arch_timer.h
+++ b/include/clocksource/arm_arch_timer.h
@@ -16,7 +16,7 @@
 #ifndef __CLKSOURCE_ARM_ARCH_TIMER_H
 #define __CLKSOURCE_ARM_ARCH_TIMER_H
 
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 #include <linux/types.h>
 
 #define ARCH_TIMER_CTRL_ENABLE		(1 << 0)
diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index abcafaa20b86..9c78d15d33e4 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -18,8 +18,6 @@
 #include <asm/div64.h>
 #include <asm/io.h>
 
-/* clocksource cycle base type */
-typedef u64 cycle_t;
 struct clocksource;
 struct module;
 
@@ -27,106 +25,6 @@ struct module;
 #include <asm/clocksource.h>
 #endif
 
-/**
- * struct cyclecounter - hardware abstraction for a free running counter
- *	Provides completely state-free accessors to the underlying hardware.
- *	Depending on which hardware it reads, the cycle counter may wrap
- *	around quickly. Locking rules (if necessary) have to be defined
- *	by the implementor and user of specific instances of this API.
- *
- * @read:		returns the current cycle value
- * @mask:		bitmask for two's complement
- *			subtraction of non 64 bit counters,
- *			see CLOCKSOURCE_MASK() helper macro
- * @mult:		cycle to nanosecond multiplier
- * @shift:		cycle to nanosecond divisor (power of two)
- */
-struct cyclecounter {
-	cycle_t (*read)(const struct cyclecounter *cc);
-	cycle_t mask;
-	u32 mult;
-	u32 shift;
-};
-
-/**
- * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
- *	Contains the state needed by timecounter_read() to detect
- *	cycle counter wrap around. Initialize with
- *	timecounter_init(). Also used to convert cycle counts into the
- *	corresponding nanosecond counts with timecounter_cyc2time(). Users
- *	of this code are responsible for initializing the underlying
- *	cycle counter hardware, locking issues and reading the time
- *	more often than the cycle counter wraps around. The nanosecond
- *	counter will only wrap around after ~585 years.
- *
- * @cc:			the cycle counter used by this instance
- * @cycle_last:		most recent cycle counter value seen by
- *			timecounter_read()
- * @nsec:		continuously increasing count
- */
-struct timecounter {
-	const struct cyclecounter *cc;
-	cycle_t cycle_last;
-	u64 nsec;
-};
-
-/**
- * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds
- * @cc:		Pointer to cycle counter.
- * @cycles:	Cycles
- *
- * XXX - This could use some mult_lxl_ll() asm optimization. Same code
- * as in cyc2ns, but with unsigned result.
- */
-static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
-				      cycle_t cycles)
-{
-	u64 ret = (u64)cycles;
-	ret = (ret * cc->mult) >> cc->shift;
-	return ret;
-}
-
-/**
- * timecounter_init - initialize a time counter
- * @tc:			Pointer to time counter which is to be initialized/reset
- * @cc:			A cycle counter, ready to be used.
- * @start_tstamp:	Arbitrary initial time stamp.
- *
- * After this call the current cycle register (roughly) corresponds to
- * the initial time stamp. Every call to timecounter_read() increments
- * the time stamp counter by the number of elapsed nanoseconds.
- */
-extern void timecounter_init(struct timecounter *tc,
-			     const struct cyclecounter *cc,
-			     u64 start_tstamp);
-
-/**
- * timecounter_read - return nanoseconds elapsed since timecounter_init()
- *                    plus the initial time stamp
- * @tc:          Pointer to time counter.
- *
- * In other words, keeps track of time since the same epoch as
- * the function which generated the initial time stamp.
- */
-extern u64 timecounter_read(struct timecounter *tc);
-
-/**
- * timecounter_cyc2time - convert a cycle counter to same
- *                        time base as values returned by
- *                        timecounter_read()
- * @tc:		Pointer to time counter.
- * @cycle_tstamp:	a value returned by tc->cc->read()
- *
- * Cycle counts that are converted correctly as long as they
- * fall into the interval [-1/2 max cycle count, +1/2 max cycle count],
- * with "max cycle count" == cs->mask+1.
- *
- * This allows conversion of cycle counter values which were generated
- * in the past.
- */
-extern u64 timecounter_cyc2time(struct timecounter *tc,
-				cycle_t cycle_tstamp);
-
 /**
  * struct clocksource - hardware abstraction for a free running counter
  *	Provides mostly state-free accessors to the underlying hardware.
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 25c791e295fd..f1e41b33462f 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -42,7 +42,7 @@
 
 #include <linux/atomic.h>
 
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 
 #define MAX_MSIX_P_PORT		17
 #define MAX_MSIX		64
diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
new file mode 100644
index 000000000000..146f07a6651b
--- /dev/null
+++ b/include/linux/timecounter.h
@@ -0,0 +1,122 @@
+/*
+ * linux/include/linux/timecounter.h
+ *
+ * based on code that migrated away from
+ * linux/include/linux/clocksource.h
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+#ifndef _LINUX_TIMECOUNTER_H
+#define _LINUX_TIMECOUNTER_H
+
+#include <linux/types.h>
+
+/**
+ * struct cyclecounter - hardware abstraction for a free running counter
+ *	Provides completely state-free accessors to the underlying hardware.
+ *	Depending on which hardware it reads, the cycle counter may wrap
+ *	around quickly. Locking rules (if necessary) have to be defined
+ *	by the implementor and user of specific instances of this API.
+ *
+ * @read:		returns the current cycle value
+ * @mask:		bitmask for two's complement
+ *			subtraction of non 64 bit counters,
+ *			see CLOCKSOURCE_MASK() helper macro
+ * @mult:		cycle to nanosecond multiplier
+ * @shift:		cycle to nanosecond divisor (power of two)
+ */
+struct cyclecounter {
+	cycle_t (*read)(const struct cyclecounter *cc);
+	cycle_t mask;
+	u32 mult;
+	u32 shift;
+};
+
+/**
+ * struct timecounter - layer above a %struct cyclecounter which counts nanoseconds
+ *	Contains the state needed by timecounter_read() to detect
+ *	cycle counter wrap around. Initialize with
+ *	timecounter_init(). Also used to convert cycle counts into the
+ *	corresponding nanosecond counts with timecounter_cyc2time(). Users
+ *	of this code are responsible for initializing the underlying
+ *	cycle counter hardware, locking issues and reading the time
+ *	more often than the cycle counter wraps around. The nanosecond
+ *	counter will only wrap around after ~585 years.
+ *
+ * @cc:			the cycle counter used by this instance
+ * @cycle_last:		most recent cycle counter value seen by
+ *			timecounter_read()
+ * @nsec:		continuously increasing count
+ */
+struct timecounter {
+	const struct cyclecounter *cc;
+	cycle_t cycle_last;
+	u64 nsec;
+};
+
+/**
+ * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds
+ * @cc:		Pointer to cycle counter.
+ * @cycles:	Cycles
+ *
+ * XXX - This could use some mult_lxl_ll() asm optimization. Same code
+ * as in cyc2ns, but with unsigned result.
+ */
+static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
+				      cycle_t cycles)
+{
+	u64 ret = (u64)cycles;
+	ret = (ret * cc->mult) >> cc->shift;
+	return ret;
+}
+
+/**
+ * timecounter_init - initialize a time counter
+ * @tc:			Pointer to time counter which is to be initialized/reset
+ * @cc:			A cycle counter, ready to be used.
+ * @start_tstamp:	Arbitrary initial time stamp.
+ *
+ * After this call the current cycle register (roughly) corresponds to
+ * the initial time stamp. Every call to timecounter_read() increments
+ * the time stamp counter by the number of elapsed nanoseconds.
+ */
+extern void timecounter_init(struct timecounter *tc,
+			     const struct cyclecounter *cc,
+			     u64 start_tstamp);
+
+/**
+ * timecounter_read - return nanoseconds elapsed since timecounter_init()
+ *                    plus the initial time stamp
+ * @tc:          Pointer to time counter.
+ *
+ * In other words, keeps track of time since the same epoch as
+ * the function which generated the initial time stamp.
+ */
+extern u64 timecounter_read(struct timecounter *tc);
+
+/**
+ * timecounter_cyc2time - convert a cycle counter to same
+ *                        time base as values returned by
+ *                        timecounter_read()
+ * @tc:		Pointer to time counter.
+ * @cycle_tstamp:	a value returned by tc->cc->read()
+ *
+ * Cycle counts that are converted correctly as long as they
+ * fall into the interval [-1/2 max cycle count, +1/2 max cycle count],
+ * with "max cycle count" == cs->mask+1.
+ *
+ * This allows conversion of cycle counter values which were generated
+ * in the past.
+ */
+extern u64 timecounter_cyc2time(struct timecounter *tc,
+				cycle_t cycle_tstamp);
+
+#endif
diff --git a/include/linux/types.h b/include/linux/types.h
index a0bb7048687f..62323825cff9 100644
--- a/include/linux/types.h
+++ b/include/linux/types.h
@@ -213,5 +213,8 @@ struct callback_head {
 };
 #define rcu_head callback_head
 
+/* clocksource cycle base type */
+typedef u64 cycle_t;
+
 #endif /*  __ASSEMBLY__ */
 #endif /* _LINUX_TYPES_H */
diff --git a/kernel/time/Makefile b/kernel/time/Makefile
index f622cf28628a..c09c07817d7a 100644
--- a/kernel/time/Makefile
+++ b/kernel/time/Makefile
@@ -1,6 +1,6 @@
 obj-y += time.o timer.o hrtimer.o itimer.o posix-timers.o posix-cpu-timers.o
 obj-y += timekeeping.o ntp.o clocksource.o jiffies.o timer_list.o
-obj-y += timeconv.o posix-clock.o alarmtimer.o
+obj-y += timeconv.o timecounter.o posix-clock.o alarmtimer.o
 
 obj-$(CONFIG_GENERIC_CLOCKEVENTS_BUILD)		+= clockevents.o
 obj-$(CONFIG_GENERIC_CLOCKEVENTS)		+= tick-common.o
diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c
index b79f39bda7e1..4892352f0e49 100644
--- a/kernel/time/clocksource.c
+++ b/kernel/time/clocksource.c
@@ -34,82 +34,6 @@
 #include "tick-internal.h"
 #include "timekeeping_internal.h"
 
-void timecounter_init(struct timecounter *tc,
-		      const struct cyclecounter *cc,
-		      u64 start_tstamp)
-{
-	tc->cc = cc;
-	tc->cycle_last = cc->read(cc);
-	tc->nsec = start_tstamp;
-}
-EXPORT_SYMBOL_GPL(timecounter_init);
-
-/**
- * timecounter_read_delta - get nanoseconds since last call of this function
- * @tc:         Pointer to time counter
- *
- * When the underlying cycle counter runs over, this will be handled
- * correctly as long as it does not run over more than once between
- * calls.
- *
- * The first call to this function for a new time counter initializes
- * the time tracking and returns an undefined result.
- */
-static u64 timecounter_read_delta(struct timecounter *tc)
-{
-	cycle_t cycle_now, cycle_delta;
-	u64 ns_offset;
-
-	/* read cycle counter: */
-	cycle_now = tc->cc->read(tc->cc);
-
-	/* calculate the delta since the last timecounter_read_delta(): */
-	cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
-
-	/* convert to nanoseconds: */
-	ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
-
-	/* update time stamp of timecounter_read_delta() call: */
-	tc->cycle_last = cycle_now;
-
-	return ns_offset;
-}
-
-u64 timecounter_read(struct timecounter *tc)
-{
-	u64 nsec;
-
-	/* increment time by nanoseconds since last call */
-	nsec = timecounter_read_delta(tc);
-	nsec += tc->nsec;
-	tc->nsec = nsec;
-
-	return nsec;
-}
-EXPORT_SYMBOL_GPL(timecounter_read);
-
-u64 timecounter_cyc2time(struct timecounter *tc,
-			 cycle_t cycle_tstamp)
-{
-	u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
-	u64 nsec;
-
-	/*
-	 * Instead of always treating cycle_tstamp as more recent
-	 * than tc->cycle_last, detect when it is too far in the
-	 * future and treat it as old time stamp instead.
-	 */
-	if (cycle_delta > tc->cc->mask / 2) {
-		cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
-		nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
-	} else {
-		nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
-	}
-
-	return nsec;
-}
-EXPORT_SYMBOL_GPL(timecounter_cyc2time);
-
 /**
  * clocks_calc_mult_shift - calculate mult/shift factors for scaled math of clocks
  * @mult:	pointer to mult variable
diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c
new file mode 100644
index 000000000000..59a1ec3a57cb
--- /dev/null
+++ b/kernel/time/timecounter.c
@@ -0,0 +1,95 @@
+/*
+ * linux/kernel/time/timecounter.c
+ *
+ * based on code that migrated away from
+ * linux/kernel/time/clocksource.c
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/export.h>
+#include <linux/timecounter.h>
+
+void timecounter_init(struct timecounter *tc,
+		      const struct cyclecounter *cc,
+		      u64 start_tstamp)
+{
+	tc->cc = cc;
+	tc->cycle_last = cc->read(cc);
+	tc->nsec = start_tstamp;
+}
+EXPORT_SYMBOL_GPL(timecounter_init);
+
+/**
+ * timecounter_read_delta - get nanoseconds since last call of this function
+ * @tc:         Pointer to time counter
+ *
+ * When the underlying cycle counter runs over, this will be handled
+ * correctly as long as it does not run over more than once between
+ * calls.
+ *
+ * The first call to this function for a new time counter initializes
+ * the time tracking and returns an undefined result.
+ */
+static u64 timecounter_read_delta(struct timecounter *tc)
+{
+	cycle_t cycle_now, cycle_delta;
+	u64 ns_offset;
+
+	/* read cycle counter: */
+	cycle_now = tc->cc->read(tc->cc);
+
+	/* calculate the delta since the last timecounter_read_delta(): */
+	cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
+
+	/* convert to nanoseconds: */
+	ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
+
+	/* update time stamp of timecounter_read_delta() call: */
+	tc->cycle_last = cycle_now;
+
+	return ns_offset;
+}
+
+u64 timecounter_read(struct timecounter *tc)
+{
+	u64 nsec;
+
+	/* increment time by nanoseconds since last call */
+	nsec = timecounter_read_delta(tc);
+	nsec += tc->nsec;
+	tc->nsec = nsec;
+
+	return nsec;
+}
+EXPORT_SYMBOL_GPL(timecounter_read);
+
+u64 timecounter_cyc2time(struct timecounter *tc,
+			 cycle_t cycle_tstamp)
+{
+	u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
+	u64 nsec;
+
+	/*
+	 * Instead of always treating cycle_tstamp as more recent
+	 * than tc->cycle_last, detect when it is too far in the
+	 * future and treat it as old time stamp instead.
+	 */
+	if (cycle_delta > tc->cc->mask / 2) {
+		cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
+		nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
+	} else {
+		nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
+	}
+
+	return nsec;
+}
+EXPORT_SYMBOL_GPL(timecounter_cyc2time);
diff --git a/sound/pci/hda/hda_priv.h b/sound/pci/hda/hda_priv.h
index 166e3e84b963..daf458299753 100644
--- a/sound/pci/hda/hda_priv.h
+++ b/sound/pci/hda/hda_priv.h
@@ -15,7 +15,7 @@
 #ifndef __SOUND_HDA_PRIV_H
 #define __SOUND_HDA_PRIV_H
 
-#include <linux/clocksource.h>
+#include <linux/timecounter.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 
-- 
cgit v1.2.3-70-g09d2


From 796c1efd6fa0ed696d550b68f4410ab1a1749d01 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 21 Dec 2014 19:46:57 +0100
Subject: timecounter: provide a helper function to shift the time.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some PTP Hardware Clock drivers use a struct timecounter to represent
their clock. To adjust the time by a given offset, these drivers all
perform a two step read/write of their timecounter. However, it is
better and simpler just to adjust the offset in one step. This patch
introduces a little routine to help drivers implement the adjtime
method.

Suggested-by: Janusz Użycki <j.uzycki@elproma.com.pl>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/timecounter.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index 146f07a6651b..af3dfa4e90f0 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -78,6 +78,15 @@ static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
 	return ret;
 }
 
+/**
+ * timecounter_adjtime - Shifts the time of the clock.
+ * @delta:	Desired change in nanoseconds.
+ */
+static inline void timecounter_adjtime(struct timecounter *tc, s64 delta)
+{
+	tc->nsec += delta;
+}
+
 /**
  * timecounter_init - initialize a time counter
  * @tc:			Pointer to time counter which is to be initialized/reset
-- 
cgit v1.2.3-70-g09d2


From 2eebdde6528a722fbf8e2cffcf7aa52cbb4c2de0 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 21 Dec 2014 19:47:06 +0100
Subject: timecounter: keep track of accumulated fractional nanoseconds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The current timecounter implementation will drop a variable amount
of resolution, depending on the magnitude of the time delta. In
other words, reading the clock too often or too close to a time
stamp conversion will introduce errors into the time values. This
patch fixes the issue by introducing a fractional nanosecond field
that accumulates the low order bits.

Reported-by: Janusz Użycki <j.uzycki@elproma.com.pl>
Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_clock.c |  4 ++--
 include/linux/timecounter.h                   | 19 ++++++++++------
 kernel/time/timecounter.c                     | 31 +++++++++++++++++++++------
 virt/kvm/arm/arch_timer.c                     |  3 ++-
 4 files changed, 40 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_clock.c b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
index df35d0e1b899..e9cce4f72b24 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_clock.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_clock.c
@@ -240,7 +240,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
 {
 	struct mlx4_dev *dev = mdev->dev;
 	unsigned long flags;
-	u64 ns;
+	u64 ns, zero = 0;
 
 	rwlock_init(&mdev->clock_lock);
 
@@ -265,7 +265,7 @@ void mlx4_en_init_timestamp(struct mlx4_en_dev *mdev)
 	/* Calculate period in seconds to call the overflow watchdog - to make
 	 * sure counter is checked at least once every wrap around.
 	 */
-	ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask);
+	ns = cyclecounter_cyc2ns(&mdev->cycles, mdev->cycles.mask, zero, &zero);
 	do_div(ns, NSEC_PER_SEC / 2 / HZ);
 	mdev->overflow_period = ns;
 
diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index af3dfa4e90f0..74f45496e6d1 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -55,27 +55,32 @@ struct cyclecounter {
  * @cycle_last:		most recent cycle counter value seen by
  *			timecounter_read()
  * @nsec:		continuously increasing count
+ * @mask:		bit mask for maintaining the 'frac' field
+ * @frac:		accumulated fractional nanoseconds
  */
 struct timecounter {
 	const struct cyclecounter *cc;
 	cycle_t cycle_last;
 	u64 nsec;
+	u64 mask;
+	u64 frac;
 };
 
 /**
  * cyclecounter_cyc2ns - converts cycle counter cycles to nanoseconds
  * @cc:		Pointer to cycle counter.
  * @cycles:	Cycles
- *
- * XXX - This could use some mult_lxl_ll() asm optimization. Same code
- * as in cyc2ns, but with unsigned result.
+ * @mask:	bit mask for maintaining the 'frac' field
+ * @frac:	pointer to storage for the fractional nanoseconds.
  */
 static inline u64 cyclecounter_cyc2ns(const struct cyclecounter *cc,
-				      cycle_t cycles)
+				      cycle_t cycles, u64 mask, u64 *frac)
 {
-	u64 ret = (u64)cycles;
-	ret = (ret * cc->mult) >> cc->shift;
-	return ret;
+	u64 ns = (u64) cycles;
+
+	ns = (ns * cc->mult) + *frac;
+	*frac = ns & mask;
+	return ns >> cc->shift;
 }
 
 /**
diff --git a/kernel/time/timecounter.c b/kernel/time/timecounter.c
index 59a1ec3a57cb..4687b3104bae 100644
--- a/kernel/time/timecounter.c
+++ b/kernel/time/timecounter.c
@@ -25,6 +25,8 @@ void timecounter_init(struct timecounter *tc,
 	tc->cc = cc;
 	tc->cycle_last = cc->read(cc);
 	tc->nsec = start_tstamp;
+	tc->mask = (1ULL << cc->shift) - 1;
+	tc->frac = 0;
 }
 EXPORT_SYMBOL_GPL(timecounter_init);
 
@@ -51,7 +53,8 @@ static u64 timecounter_read_delta(struct timecounter *tc)
 	cycle_delta = (cycle_now - tc->cycle_last) & tc->cc->mask;
 
 	/* convert to nanoseconds: */
-	ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta);
+	ns_offset = cyclecounter_cyc2ns(tc->cc, cycle_delta,
+					tc->mask, &tc->frac);
 
 	/* update time stamp of timecounter_read_delta() call: */
 	tc->cycle_last = cycle_now;
@@ -72,22 +75,36 @@ u64 timecounter_read(struct timecounter *tc)
 }
 EXPORT_SYMBOL_GPL(timecounter_read);
 
+/*
+ * This is like cyclecounter_cyc2ns(), but it is used for computing a
+ * time previous to the time stored in the cycle counter.
+ */
+static u64 cc_cyc2ns_backwards(const struct cyclecounter *cc,
+			       cycle_t cycles, u64 mask, u64 frac)
+{
+	u64 ns = (u64) cycles;
+
+	ns = ((ns * cc->mult) - frac) >> cc->shift;
+
+	return ns;
+}
+
 u64 timecounter_cyc2time(struct timecounter *tc,
 			 cycle_t cycle_tstamp)
 {
-	u64 cycle_delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
-	u64 nsec;
+	u64 delta = (cycle_tstamp - tc->cycle_last) & tc->cc->mask;
+	u64 nsec = tc->nsec, frac = tc->frac;
 
 	/*
 	 * Instead of always treating cycle_tstamp as more recent
 	 * than tc->cycle_last, detect when it is too far in the
 	 * future and treat it as old time stamp instead.
 	 */
-	if (cycle_delta > tc->cc->mask / 2) {
-		cycle_delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
-		nsec = tc->nsec - cyclecounter_cyc2ns(tc->cc, cycle_delta);
+	if (delta > tc->cc->mask / 2) {
+		delta = (tc->cycle_last - cycle_tstamp) & tc->cc->mask;
+		nsec -= cc_cyc2ns_backwards(tc->cc, delta, tc->mask, frac);
 	} else {
-		nsec = cyclecounter_cyc2ns(tc->cc, cycle_delta) + tc->nsec;
+		nsec += cyclecounter_cyc2ns(tc->cc, delta, tc->mask, &frac);
 	}
 
 	return nsec;
diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index 1c0772b340d8..6e54f3542126 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -152,7 +152,8 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu)
 		return;
 	}
 
-	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now);
+	ns = cyclecounter_cyc2ns(timecounter->cc, cval - now, timecounter->mask,
+				 &timecounter->frac);
 	timer_arm(timer, ns);
 }
 
-- 
cgit v1.2.3-70-g09d2


From dd450777990baae668c1143064f2f234dbab1b9b Mon Sep 17 00:00:00 2001
From: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Date: Wed, 24 Dec 2014 01:14:14 +0300
Subject: arm: sa1100: move irda header to linux/platform_data

In the end asm/mach/irda.h header is not used by anybody except sa1100.
Move the header to the platform data includes dir and rename it to
irda-sa11x0.h.

Signed-off-by: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/include/asm/mach/irda.h          | 20 --------------------
 arch/arm/mach-sa1100/assabet.c            |  2 +-
 arch/arm/mach-sa1100/collie.c             |  2 +-
 arch/arm/mach-sa1100/h3100.c              |  2 +-
 arch/arm/mach-sa1100/h3600.c              |  2 +-
 drivers/net/irda/sa1100_ir.c              |  2 +-
 include/linux/platform_data/irda-sa11x0.h | 20 ++++++++++++++++++++
 7 files changed, 25 insertions(+), 25 deletions(-)
 delete mode 100644 arch/arm/include/asm/mach/irda.h
 create mode 100644 include/linux/platform_data/irda-sa11x0.h

(limited to 'include')

diff --git a/arch/arm/include/asm/mach/irda.h b/arch/arm/include/asm/mach/irda.h
deleted file mode 100644
index 38f77b5e56cf..000000000000
--- a/arch/arm/include/asm/mach/irda.h
+++ /dev/null
@@ -1,20 +0,0 @@
-/*
- *  arch/arm/include/asm/mach/irda.h
- *
- *  Copyright (C) 2004 Russell King.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef __ASM_ARM_MACH_IRDA_H
-#define __ASM_ARM_MACH_IRDA_H
-
-struct irda_platform_data {
-	int (*startup)(struct device *);
-	void (*shutdown)(struct device *);
-	int (*set_power)(struct device *, unsigned int state);
-	void (*set_speed)(struct device *, unsigned int speed);
-};
-
-#endif
diff --git a/arch/arm/mach-sa1100/assabet.c b/arch/arm/mach-sa1100/assabet.c
index 7dd894ece9ae..d28ecb9ef172 100644
--- a/arch/arm/mach-sa1100/assabet.c
+++ b/arch/arm/mach-sa1100/assabet.c
@@ -37,7 +37,7 @@
 
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 #include <asm/mach/map.h>
 #include <mach/assabet.h>
 #include <linux/platform_data/mfd-mcp-sa11x0.h>
diff --git a/arch/arm/mach-sa1100/collie.c b/arch/arm/mach-sa1100/collie.c
index b90c7d828391..7fcbe3d119c7 100644
--- a/arch/arm/mach-sa1100/collie.c
+++ b/arch/arm/mach-sa1100/collie.c
@@ -43,7 +43,7 @@
 #include <asm/mach/arch.h>
 #include <asm/mach/flash.h>
 #include <asm/mach/map.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 
 #include <asm/hardware/scoop.h>
 #include <asm/mach/sharpsl_param.h>
diff --git a/arch/arm/mach-sa1100/h3100.c b/arch/arm/mach-sa1100/h3100.c
index 3c43219bc881..c6b412054a3c 100644
--- a/arch/arm/mach-sa1100/h3100.c
+++ b/arch/arm/mach-sa1100/h3100.c
@@ -18,7 +18,7 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 
 #include <mach/h3xxx.h>
 #include <mach/irqs.h>
diff --git a/arch/arm/mach-sa1100/h3600.c b/arch/arm/mach-sa1100/h3600.c
index 5be54c214c7c..118338efd790 100644
--- a/arch/arm/mach-sa1100/h3600.c
+++ b/arch/arm/mach-sa1100/h3600.c
@@ -18,7 +18,7 @@
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 
 #include <mach/h3xxx.h>
 #include <mach/irqs.h>
diff --git a/drivers/net/irda/sa1100_ir.c b/drivers/net/irda/sa1100_ir.c
index 7b17fa2114e1..b6e44ff4e373 100644
--- a/drivers/net/irda/sa1100_ir.c
+++ b/drivers/net/irda/sa1100_ir.c
@@ -38,7 +38,7 @@
 #include <net/irda/irda_device.h>
 
 #include <mach/hardware.h>
-#include <asm/mach/irda.h>
+#include <linux/platform_data/irda-sa11x0.h>
 
 static int power_level = 3;
 static int tx_lpm;
diff --git a/include/linux/platform_data/irda-sa11x0.h b/include/linux/platform_data/irda-sa11x0.h
new file mode 100644
index 000000000000..38f77b5e56cf
--- /dev/null
+++ b/include/linux/platform_data/irda-sa11x0.h
@@ -0,0 +1,20 @@
+/*
+ *  arch/arm/include/asm/mach/irda.h
+ *
+ *  Copyright (C) 2004 Russell King.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef __ASM_ARM_MACH_IRDA_H
+#define __ASM_ARM_MACH_IRDA_H
+
+struct irda_platform_data {
+	int (*startup)(struct device *);
+	void (*shutdown)(struct device *);
+	int (*set_power)(struct device *, unsigned int state);
+	void (*set_speed)(struct device *, unsigned int speed);
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From de40ed31b3c577cefd7b54972365a272ecbe9dd6 Mon Sep 17 00:00:00 2001
From: Nimrod Andy <B38611@freescale.com>
Date: Wed, 24 Dec 2014 17:30:39 +0800
Subject: net: fec: add Wake-on-LAN support

Support for Wake-on-LAN using Magic Packet. ENET IP supports sleep mode
in low power status, when system enter suspend status, Magic packet can
wake up system even if all SOC clocks are gate. The patch doing below things:
- flagging the device as a wakeup source for the system, as well as
  its Wake-on-LAN interrupt
- prepare the hardware for entering WoL mode
- add standard ethtool WOL interface
- enable the ENET interrupt to wake us

Tested on i.MX6q/dl sabresd, sabreauto boards, i.MX6SX arm2 boards.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/fsl-fec.txt |   2 +
 drivers/net/ethernet/freescale/fec.h              |   2 +
 drivers/net/ethernet/freescale/fec_main.c         | 104 +++++++++++++++++++---
 include/linux/fec.h                               |   1 +
 4 files changed, 99 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index 0c8775c45798..a9eb611bee68 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -22,6 +22,8 @@ Optional properties:
 - fsl,num-rx-queues : The property is valid for enet-avb IP, which supports
   hw multi queues. Should specify the rx queue number, otherwise set rx queue
   number to 1.
+- fsl,magic-packet : If present, indicates that the hardware supports waking
+  up via magic packet.
 
 Optional subnodes:
 - mdio : specifies the mdio bus in the FEC, used as a container for phy nodes
diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index df8bbddaeb37..d77a96fdf1dd 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -357,6 +357,7 @@ struct bufdesc_ex {
 #define FEC_ENET_RXB    ((uint)0x01000000)      /* A buffer was received */
 #define FEC_ENET_MII    ((uint)0x00800000)      /* MII interrupt */
 #define FEC_ENET_EBERR  ((uint)0x00400000)      /* SDMA bus error */
+#define FEC_ENET_WAKEUP	((uint)0x00020000)	/* Wakeup request */
 #define FEC_ENET_TXF	(FEC_ENET_TXF_0 | FEC_ENET_TXF_1 | FEC_ENET_TXF_2)
 #define FEC_ENET_RXF	(FEC_ENET_RXF_0 | FEC_ENET_RXF_1 | FEC_ENET_RXF_2)
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
@@ -512,6 +513,7 @@ struct fec_enet_private {
 	int	irq[FEC_IRQ_NUM];
 	bool	bufdesc_ex;
 	int	pause_flag;
+	int	wol_flag;
 	u32	quirks;
 
 	struct	napi_struct napi;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 5ebdf8dc8a31..49cd358c30fa 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -187,6 +187,9 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 #define FEC_MMFR_RA(v)		((v & 0x1f) << 18)
 #define FEC_MMFR_TA		(2 << 16)
 #define FEC_MMFR_DATA(v)	(v & 0xffff)
+/* FEC ECR bits definition */
+#define FEC_ECR_MAGICEN		(1 << 2)
+#define FEC_ECR_SLEEP		(1 << 3)
 
 #define FEC_MII_TIMEOUT		30000 /* us */
 
@@ -195,6 +198,9 @@ MODULE_PARM_DESC(macaddr, "FEC Ethernet MAC address");
 
 #define FEC_PAUSE_FLAG_AUTONEG	0x1
 #define FEC_PAUSE_FLAG_ENABLE	0x2
+#define FEC_WOL_HAS_MAGIC_PACKET	(0x1 << 0)
+#define FEC_WOL_FLAG_ENABLE		(0x1 << 1)
+#define FEC_WOL_FLAG_SLEEP_ON		(0x1 << 2)
 
 #define COPYBREAK_DEFAULT	256
 
@@ -1089,7 +1095,9 @@ static void
 fec_stop(struct net_device *ndev)
 {
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
 	u32 rmii_mode = readl(fep->hwp + FEC_R_CNTRL) & (1 << 8);
+	u32 val;
 
 	/* We cannot expect a graceful transmit stop without link !!! */
 	if (fep->link) {
@@ -1103,17 +1111,28 @@ fec_stop(struct net_device *ndev)
 	 * For i.MX6SX SOC, enet use AXI bus, we use disable MAC
 	 * instead of reset MAC itself.
 	 */
-	if (fep->quirks & FEC_QUIRK_HAS_AVB) {
-		writel(0, fep->hwp + FEC_ECNTRL);
+	if (!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
+		if (fep->quirks & FEC_QUIRK_HAS_AVB) {
+			writel(0, fep->hwp + FEC_ECNTRL);
+		} else {
+			writel(1, fep->hwp + FEC_ECNTRL);
+			udelay(10);
+		}
+		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	} else {
-		writel(1, fep->hwp + FEC_ECNTRL);
-		udelay(10);
+		writel(FEC_DEFAULT_IMASK | FEC_ENET_WAKEUP, fep->hwp + FEC_IMASK);
+		val = readl(fep->hwp + FEC_ECNTRL);
+		val |= (FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
+		writel(val, fep->hwp + FEC_ECNTRL);
+
+		if (pdata && pdata->sleep_mode_enable)
+			pdata->sleep_mode_enable(true);
 	}
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
-	writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 
 	/* We have to keep ENET enabled to have MII interrupt stay working */
-	if (fep->quirks & FEC_QUIRK_ENET_MAC) {
+	if (fep->quirks & FEC_QUIRK_ENET_MAC &&
+		!(fep->wol_flag & FEC_WOL_FLAG_SLEEP_ON)) {
 		writel(2, fep->hwp + FEC_ECNTRL);
 		writel(rmii_mode, fep->hwp + FEC_R_CNTRL);
 	}
@@ -2427,6 +2446,44 @@ static int fec_enet_set_tunable(struct net_device *netdev,
 	return ret;
 }
 
+static void
+fec_enet_get_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+
+	if (fep->wol_flag & FEC_WOL_HAS_MAGIC_PACKET) {
+		wol->supported = WAKE_MAGIC;
+		wol->wolopts = fep->wol_flag & FEC_WOL_FLAG_ENABLE ? WAKE_MAGIC : 0;
+	} else {
+		wol->supported = wol->wolopts = 0;
+	}
+}
+
+static int
+fec_enet_set_wol(struct net_device *ndev, struct ethtool_wolinfo *wol)
+{
+	struct fec_enet_private *fep = netdev_priv(ndev);
+
+	if (!(fep->wol_flag & FEC_WOL_HAS_MAGIC_PACKET))
+		return -EINVAL;
+
+	if (wol->wolopts & ~WAKE_MAGIC)
+		return -EINVAL;
+
+	device_set_wakeup_enable(&ndev->dev, wol->wolopts & WAKE_MAGIC);
+	if (device_may_wakeup(&ndev->dev)) {
+		fep->wol_flag |= FEC_WOL_FLAG_ENABLE;
+		if (fep->irq[0] > 0)
+			enable_irq_wake(fep->irq[0]);
+	} else {
+		fep->wol_flag &= (~FEC_WOL_FLAG_ENABLE);
+		if (fep->irq[0] > 0)
+			disable_irq_wake(fep->irq[0]);
+	}
+
+	return 0;
+}
+
 static const struct ethtool_ops fec_enet_ethtool_ops = {
 	.get_settings		= fec_enet_get_settings,
 	.set_settings		= fec_enet_set_settings,
@@ -2445,6 +2502,8 @@ static const struct ethtool_ops fec_enet_ethtool_ops = {
 	.get_ts_info		= fec_enet_get_ts_info,
 	.get_tunable		= fec_enet_get_tunable,
 	.set_tunable		= fec_enet_set_tunable,
+	.get_wol		= fec_enet_get_wol,
+	.set_wol		= fec_enet_set_wol,
 };
 
 static int fec_enet_ioctl(struct net_device *ndev, struct ifreq *rq, int cmd)
@@ -2705,6 +2764,9 @@ fec_enet_open(struct net_device *ndev)
 	phy_start(fep->phy_dev);
 	netif_tx_start_all_queues(ndev);
 
+	device_set_wakeup_enable(&ndev->dev, fep->wol_flag &
+				 FEC_WOL_FLAG_ENABLE);
+
 	return 0;
 
 err_enet_mii_probe:
@@ -3153,6 +3215,9 @@ fec_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, ndev);
 
+	if (of_get_property(np, "fsl,magic-packet", NULL))
+		fep->wol_flag |= FEC_WOL_HAS_MAGIC_PACKET;
+
 	phy_node = of_parse_phandle(np, "phy-handle", 0);
 	if (!phy_node && of_phy_is_fixed_link(np)) {
 		ret = of_phy_register_fixed_link(np);
@@ -3247,6 +3312,8 @@ fec_probe(struct platform_device *pdev)
 				       0, pdev->name, ndev);
 		if (ret)
 			goto failed_irq;
+
+		fep->irq[i] = irq;
 	}
 
 	init_completion(&fep->mdio_done);
@@ -3263,6 +3330,9 @@ fec_probe(struct platform_device *pdev)
 	if (ret)
 		goto failed_register;
 
+	device_init_wakeup(&ndev->dev, fep->wol_flag &
+			   FEC_WOL_HAS_MAGIC_PACKET);
+
 	if (fep->bufdesc_ex && fep->ptp_clock)
 		netdev_info(ndev, "registered PHC device %d\n", fep->dev_id);
 
@@ -3316,6 +3386,8 @@ static int __maybe_unused fec_suspend(struct device *dev)
 
 	rtnl_lock();
 	if (netif_running(ndev)) {
+		if (fep->wol_flag & FEC_WOL_FLAG_ENABLE)
+			fep->wol_flag |= FEC_WOL_FLAG_SLEEP_ON;
 		phy_stop(fep->phy_dev);
 		napi_disable(&fep->napi);
 		netif_tx_lock_bh(ndev);
@@ -3323,11 +3395,12 @@ static int __maybe_unused fec_suspend(struct device *dev)
 		netif_tx_unlock_bh(ndev);
 		fec_stop(ndev);
 		fec_enet_clk_enable(ndev, false);
-		pinctrl_pm_select_sleep_state(&fep->pdev->dev);
+		if (!(fep->wol_flag & FEC_WOL_FLAG_ENABLE))
+			pinctrl_pm_select_sleep_state(&fep->pdev->dev);
 	}
 	rtnl_unlock();
 
-	if (fep->reg_phy)
+	if (fep->reg_phy && !(fep->wol_flag & FEC_WOL_FLAG_ENABLE))
 		regulator_disable(fep->reg_phy);
 
 	/* SOC supply clock to phy, when clock is disabled, phy link down
@@ -3343,9 +3416,11 @@ static int __maybe_unused fec_resume(struct device *dev)
 {
 	struct net_device *ndev = dev_get_drvdata(dev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	struct fec_platform_data *pdata = fep->pdev->dev.platform_data;
 	int ret;
+	int val;
 
-	if (fep->reg_phy) {
+	if (fep->reg_phy && !(fep->wol_flag & FEC_WOL_FLAG_ENABLE)) {
 		ret = regulator_enable(fep->reg_phy);
 		if (ret)
 			return ret;
@@ -3353,12 +3428,21 @@ static int __maybe_unused fec_resume(struct device *dev)
 
 	rtnl_lock();
 	if (netif_running(ndev)) {
-		pinctrl_pm_select_default_state(&fep->pdev->dev);
 		ret = fec_enet_clk_enable(ndev, true);
 		if (ret) {
 			rtnl_unlock();
 			goto failed_clk;
 		}
+		if (fep->wol_flag & FEC_WOL_FLAG_ENABLE) {
+			if (pdata && pdata->sleep_mode_enable)
+				pdata->sleep_mode_enable(false);
+			val = readl(fep->hwp + FEC_ECNTRL);
+			val &= ~(FEC_ECR_MAGICEN | FEC_ECR_SLEEP);
+			writel(val, fep->hwp + FEC_ECNTRL);
+			fep->wol_flag &= ~FEC_WOL_FLAG_SLEEP_ON;
+		} else {
+			pinctrl_pm_select_default_state(&fep->pdev->dev);
+		}
 		fec_restart(ndev);
 		netif_tx_lock_bh(ndev);
 		netif_device_attach(ndev);
diff --git a/include/linux/fec.h b/include/linux/fec.h
index bcff455d1d53..1454a503622d 100644
--- a/include/linux/fec.h
+++ b/include/linux/fec.h
@@ -19,6 +19,7 @@
 struct fec_platform_data {
 	phy_interface_t phy;
 	unsigned char mac[ETH_ALEN];
+	void (*sleep_mode_enable)(int enabled);
 };
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 33f72e6f0c67f673fd0c63a8182dbd9ffb8cf50b Mon Sep 17 00:00:00 2001
From: Bill Hong <bhong@brocade.com>
Date: Sat, 27 Dec 2014 10:12:39 -0800
Subject: l2tp : multicast notification to the registered listeners

Previously l2tp module did not provide any means for the user space to
get notified when tunnels/sessions are added/modified/deleted.
This change contains the following
- create a multicast group for the listeners to register.
- notify the registered listeners when the tunnels/sessions are
  created/modified/deleted.

Signed-off-by: Bill Hong <bhong@brocade.com>
Reviewed-by: Stephen Hemminger <stephen@networkplumber.org>
Reviewed-by: Sven-Thorsten Dietrich <sven@brocade.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/l2tp.h |   1 +
 net/l2tp/l2tp_netlink.c   | 101 +++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 92 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/l2tp.h b/include/uapi/linux/l2tp.h
index 21caa2631c20..347ef22a964e 100644
--- a/include/uapi/linux/l2tp.h
+++ b/include/uapi/linux/l2tp.h
@@ -178,5 +178,6 @@ enum l2tp_seqmode {
  */
 #define L2TP_GENL_NAME		"l2tp"
 #define L2TP_GENL_VERSION	0x1
+#define L2TP_GENL_MCGROUP       "l2tp"
 
 #endif /* _UAPI_LINUX_L2TP_H_ */
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 0ac907adb2f4..6b16598f31d5 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -40,6 +40,18 @@ static struct genl_family l2tp_nl_family = {
 	.netnsok	= true,
 };
 
+static const struct genl_multicast_group l2tp_multicast_group[] = {
+	{
+		.name = L2TP_GENL_MCGROUP,
+	},
+};
+
+static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq,
+			       int flags, struct l2tp_tunnel *tunnel, u8 cmd);
+static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq,
+				int flags, struct l2tp_session *session,
+				u8 cmd);
+
 /* Accessed under genl lock */
 static const struct l2tp_nl_cmd_ops *l2tp_nl_cmd_ops[__L2TP_PWTYPE_MAX];
 
@@ -97,6 +109,52 @@ out:
 	return ret;
 }
 
+static int l2tp_tunnel_notify(struct genl_family *family,
+			      struct genl_info *info,
+			      struct l2tp_tunnel *tunnel,
+			      u8 cmd)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
+				  NLM_F_ACK, tunnel, cmd);
+
+	if (ret >= 0)
+		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+
+	nlmsg_free(msg);
+
+	return ret;
+}
+
+static int l2tp_session_notify(struct genl_family *family,
+			       struct genl_info *info,
+			       struct l2tp_session *session,
+			       u8 cmd)
+{
+	struct sk_buff *msg;
+	int ret;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
+				   NLM_F_ACK, session, cmd);
+
+	if (ret >= 0)
+		return genlmsg_multicast_allns(family, msg, 0,	0, GFP_ATOMIC);
+
+	nlmsg_free(msg);
+
+	return ret;
+}
+
 static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info)
 {
 	u32 tunnel_id;
@@ -188,6 +246,9 @@ static int l2tp_nl_cmd_tunnel_create(struct sk_buff *skb, struct genl_info *info
 		break;
 	}
 
+	if (ret >= 0)
+		ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
+					 tunnel, L2TP_CMD_TUNNEL_CREATE);
 out:
 	return ret;
 }
@@ -211,6 +272,9 @@ static int l2tp_nl_cmd_tunnel_delete(struct sk_buff *skb, struct genl_info *info
 		goto out;
 	}
 
+	l2tp_tunnel_notify(&l2tp_nl_family, info,
+			   tunnel, L2TP_CMD_TUNNEL_DELETE);
+
 	(void) l2tp_tunnel_delete(tunnel);
 
 out:
@@ -239,12 +303,15 @@ static int l2tp_nl_cmd_tunnel_modify(struct sk_buff *skb, struct genl_info *info
 	if (info->attrs[L2TP_ATTR_DEBUG])
 		tunnel->debug = nla_get_u32(info->attrs[L2TP_ATTR_DEBUG]);
 
+	ret = l2tp_tunnel_notify(&l2tp_nl_family, info,
+				 tunnel, L2TP_CMD_TUNNEL_MODIFY);
+
 out:
 	return ret;
 }
 
 static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-			       struct l2tp_tunnel *tunnel)
+			       struct l2tp_tunnel *tunnel, u8 cmd)
 {
 	void *hdr;
 	struct nlattr *nest;
@@ -254,8 +321,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	struct ipv6_pinfo *np = NULL;
 #endif
 
-	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags,
-			  L2TP_CMD_TUNNEL_GET);
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
 	if (!hdr)
 		return -EMSGSIZE;
 
@@ -359,7 +425,7 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq,
-				  NLM_F_ACK, tunnel);
+				  NLM_F_ACK, tunnel, L2TP_CMD_TUNNEL_GET);
 	if (ret < 0)
 		goto err_out;
 
@@ -385,7 +451,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					tunnel) <= 0)
+					tunnel, L2TP_CMD_TUNNEL_GET) <= 0)
 			goto out;
 
 		ti++;
@@ -539,6 +605,13 @@ static int l2tp_nl_cmd_session_create(struct sk_buff *skb, struct genl_info *inf
 		ret = (*l2tp_nl_cmd_ops[cfg.pw_type]->session_create)(net, tunnel_id,
 			session_id, peer_session_id, &cfg);
 
+	if (ret >= 0) {
+		session = l2tp_session_find(net, tunnel, session_id);
+		if (session)
+			ret = l2tp_session_notify(&l2tp_nl_family, info, session,
+						  L2TP_CMD_SESSION_CREATE);
+	}
+
 out:
 	return ret;
 }
@@ -555,6 +628,9 @@ static int l2tp_nl_cmd_session_delete(struct sk_buff *skb, struct genl_info *inf
 		goto out;
 	}
 
+	l2tp_session_notify(&l2tp_nl_family, info,
+			    session, L2TP_CMD_SESSION_DELETE);
+
 	pw_type = session->pwtype;
 	if (pw_type < __L2TP_PWTYPE_MAX)
 		if (l2tp_nl_cmd_ops[pw_type] && l2tp_nl_cmd_ops[pw_type]->session_delete)
@@ -601,12 +677,15 @@ static int l2tp_nl_cmd_session_modify(struct sk_buff *skb, struct genl_info *inf
 	if (info->attrs[L2TP_ATTR_MRU])
 		session->mru = nla_get_u16(info->attrs[L2TP_ATTR_MRU]);
 
+	ret = l2tp_session_notify(&l2tp_nl_family, info,
+				  session, L2TP_CMD_SESSION_MODIFY);
+
 out:
 	return ret;
 }
 
 static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags,
-				struct l2tp_session *session)
+				struct l2tp_session *session, u8 cmd)
 {
 	void *hdr;
 	struct nlattr *nest;
@@ -615,7 +694,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 
 	sk = tunnel->sock;
 
-	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET);
+	hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, cmd);
 	if (!hdr)
 		return -EMSGSIZE;
 
@@ -699,7 +778,7 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq,
-				   0, session);
+				   0, session, L2TP_CMD_SESSION_GET);
 	if (ret < 0)
 		goto err_out;
 
@@ -737,7 +816,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 session) <= 0)
+					 session, L2TP_CMD_SESSION_GET) <= 0)
 			break;
 
 		si++;
@@ -896,7 +975,9 @@ EXPORT_SYMBOL_GPL(l2tp_nl_unregister_ops);
 static int l2tp_nl_init(void)
 {
 	pr_info("L2TP netlink interface\n");
-	return genl_register_family_with_ops(&l2tp_nl_family, l2tp_nl_ops);
+	return genl_register_family_with_ops_groups(&l2tp_nl_family,
+						    l2tp_nl_ops,
+						    l2tp_multicast_group);
 }
 
 static void l2tp_nl_cleanup(void)
-- 
cgit v1.2.3-70-g09d2


From 345e9b54268ae065520a7252c182d22ef4591718 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 31 Dec 2014 10:56:24 -0800
Subject: fib_trie: Push rcu_read_lock/unlock to callers

This change is to start cleaning up some of the rcu_read_lock/unlock
handling.  I realized while reviewing the code there are several spots that
I don't believe are being handled correctly or are masking warnings by
locally calling rcu_read_lock/unlock instead of calling them at the correct
level.

A common example is a call to fib_get_table followed by fib_table_lookup.
The rcu_read_lock/unlock ought to wrap both but there are several spots where
they were not wrapped.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_fib.h    |  50 ++++++++++--------
 net/ipv4/fib_frontend.c |  27 +++++-----
 net/ipv4/fib_rules.c    |  22 ++++----
 net/ipv4/fib_trie.c     | 137 +++++++++++++++++++++---------------------------
 4 files changed, 114 insertions(+), 122 deletions(-)

(limited to 'include')

diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 09a819ee2151..5bd120e4bc0a 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -222,16 +222,19 @@ static inline struct fib_table *fib_new_table(struct net *net, u32 id)
 static inline int fib_lookup(struct net *net, const struct flowi4 *flp,
 			     struct fib_result *res)
 {
-	struct fib_table *table;
+	int err = -ENETUNREACH;
+
+	rcu_read_lock();
+
+	if (!fib_table_lookup(fib_get_table(net, RT_TABLE_LOCAL), flp, res,
+			      FIB_LOOKUP_NOREF) ||
+	    !fib_table_lookup(fib_get_table(net, RT_TABLE_MAIN), flp, res,
+			      FIB_LOOKUP_NOREF))
+		err = 0;
 
-	table = fib_get_table(net, RT_TABLE_LOCAL);
-	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
-		return 0;
+	rcu_read_unlock();
 
-	table = fib_get_table(net, RT_TABLE_MAIN);
-	if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF))
-		return 0;
-	return -ENETUNREACH;
+	return err;
 }
 
 #else /* CONFIG_IP_MULTIPLE_TABLES */
@@ -247,20 +250,25 @@ static inline int fib_lookup(struct net *net, struct flowi4 *flp,
 			     struct fib_result *res)
 {
 	if (!net->ipv4.fib_has_custom_rules) {
+		int err = -ENETUNREACH;
+
+		rcu_read_lock();
+
 		res->tclassid = 0;
-		if (net->ipv4.fib_local &&
-		    !fib_table_lookup(net->ipv4.fib_local, flp, res,
-				      FIB_LOOKUP_NOREF))
-			return 0;
-		if (net->ipv4.fib_main &&
-		    !fib_table_lookup(net->ipv4.fib_main, flp, res,
-				      FIB_LOOKUP_NOREF))
-			return 0;
-		if (net->ipv4.fib_default &&
-		    !fib_table_lookup(net->ipv4.fib_default, flp, res,
-				      FIB_LOOKUP_NOREF))
-			return 0;
-		return -ENETUNREACH;
+		if ((net->ipv4.fib_local &&
+		     !fib_table_lookup(net->ipv4.fib_local, flp, res,
+				       FIB_LOOKUP_NOREF)) ||
+		    (net->ipv4.fib_main &&
+		     !fib_table_lookup(net->ipv4.fib_main, flp, res,
+				       FIB_LOOKUP_NOREF)) ||
+		    (net->ipv4.fib_default &&
+		     !fib_table_lookup(net->ipv4.fib_default, flp, res,
+				       FIB_LOOKUP_NOREF)))
+			err = 0;
+
+		rcu_read_unlock();
+
+		return err;
 	}
 	return __fib_lookup(net, flp, res);
 }
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 66890209208f..57be71dd6a9e 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -109,6 +109,7 @@ struct fib_table *fib_new_table(struct net *net, u32 id)
 	return tb;
 }
 
+/* caller must hold either rtnl or rcu read lock */
 struct fib_table *fib_get_table(struct net *net, u32 id)
 {
 	struct fib_table *tb;
@@ -119,15 +120,11 @@ struct fib_table *fib_get_table(struct net *net, u32 id)
 		id = RT_TABLE_MAIN;
 	h = id & (FIB_TABLE_HASHSZ - 1);
 
-	rcu_read_lock();
 	head = &net->ipv4.fib_table_hash[h];
 	hlist_for_each_entry_rcu(tb, head, tb_hlist) {
-		if (tb->tb_id == id) {
-			rcu_read_unlock();
+		if (tb->tb_id == id)
 			return tb;
-		}
 	}
-	rcu_read_unlock();
 	return NULL;
 }
 #endif /* CONFIG_IP_MULTIPLE_TABLES */
@@ -167,16 +164,18 @@ static inline unsigned int __inet_dev_addr_type(struct net *net,
 	if (ipv4_is_multicast(addr))
 		return RTN_MULTICAST;
 
+	rcu_read_lock();
+
 	local_table = fib_get_table(net, RT_TABLE_LOCAL);
 	if (local_table) {
 		ret = RTN_UNICAST;
-		rcu_read_lock();
 		if (!fib_table_lookup(local_table, &fl4, &res, FIB_LOOKUP_NOREF)) {
 			if (!dev || dev == res.fi->fib_dev)
 				ret = res.type;
 		}
-		rcu_read_unlock();
 	}
+
+	rcu_read_unlock();
 	return ret;
 }
 
@@ -919,7 +918,7 @@ void fib_del_ifaddr(struct in_ifaddr *ifa, struct in_ifaddr *iprim)
 #undef BRD1_OK
 }
 
-static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
+static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
 {
 
 	struct fib_result       res;
@@ -929,6 +928,11 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
 		.flowi4_tos = frn->fl_tos,
 		.flowi4_scope = frn->fl_scope,
 	};
+	struct fib_table *tb;
+
+	rcu_read_lock();
+
+	tb = fib_get_table(net, frn->tb_id_in);
 
 	frn->err = -ENOENT;
 	if (tb) {
@@ -945,6 +949,8 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb)
 		}
 		local_bh_enable();
 	}
+
+	rcu_read_unlock();
 }
 
 static void nl_fib_input(struct sk_buff *skb)
@@ -952,7 +958,6 @@ static void nl_fib_input(struct sk_buff *skb)
 	struct net *net;
 	struct fib_result_nl *frn;
 	struct nlmsghdr *nlh;
-	struct fib_table *tb;
 	u32 portid;
 
 	net = sock_net(skb->sk);
@@ -967,9 +972,7 @@ static void nl_fib_input(struct sk_buff *skb)
 	nlh = nlmsg_hdr(skb);
 
 	frn = (struct fib_result_nl *) nlmsg_data(nlh);
-	tb = fib_get_table(net, frn->tb_id_in);
-
-	nl_fib_lookup(frn, tb);
+	nl_fib_lookup(net, frn);
 
 	portid = NETLINK_CB(skb).portid;      /* netlink portid */
 	NETLINK_CB(skb).portid = 0;        /* from kernel */
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 8f7bd56955b0..d3db718be51d 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -81,27 +81,25 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp,
 		break;
 
 	case FR_ACT_UNREACHABLE:
-		err = -ENETUNREACH;
-		goto errout;
+		return -ENETUNREACH;
 
 	case FR_ACT_PROHIBIT:
-		err = -EACCES;
-		goto errout;
+		return -EACCES;
 
 	case FR_ACT_BLACKHOLE:
 	default:
-		err = -EINVAL;
-		goto errout;
+		return -EINVAL;
 	}
 
+	rcu_read_lock();
+
 	tbl = fib_get_table(rule->fr_net, rule->table);
-	if (!tbl)
-		goto errout;
+	if (tbl)
+		err = fib_table_lookup(tbl, &flp->u.ip4,
+				       (struct fib_result *)arg->result,
+				       arg->flags);
 
-	err = fib_table_lookup(tbl, &flp->u.ip4, (struct fib_result *) arg->result, arg->flags);
-	if (err > 0)
-		err = -EAGAIN;
-errout:
+	rcu_read_unlock();
 	return err;
 }
 
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 28a3065470bc..987b06d1effe 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1181,72 +1181,6 @@ err:
 	return err;
 }
 
-/* should be called with rcu_read_lock */
-static int check_leaf(struct fib_table *tb, struct trie *t, struct tnode *l,
-		      t_key key,  const struct flowi4 *flp,
-		      struct fib_result *res, int fib_flags)
-{
-	struct leaf_info *li;
-	struct hlist_head *hhead = &l->list;
-
-	hlist_for_each_entry_rcu(li, hhead, hlist) {
-		struct fib_alias *fa;
-
-		if (l->key != (key & li->mask_plen))
-			continue;
-
-		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
-			struct fib_info *fi = fa->fa_info;
-			int nhsel, err;
-
-			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
-				continue;
-			if (fi->fib_dead)
-				continue;
-			if (fa->fa_info->fib_scope < flp->flowi4_scope)
-				continue;
-			fib_alias_accessed(fa);
-			err = fib_props[fa->fa_type].error;
-			if (unlikely(err < 0)) {
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-				this_cpu_inc(t->stats->semantic_match_passed);
-#endif
-				return err;
-			}
-			if (fi->fib_flags & RTNH_F_DEAD)
-				continue;
-			for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
-				const struct fib_nh *nh = &fi->fib_nh[nhsel];
-
-				if (nh->nh_flags & RTNH_F_DEAD)
-					continue;
-				if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
-					continue;
-
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-				this_cpu_inc(t->stats->semantic_match_passed);
-#endif
-				res->prefixlen = li->plen;
-				res->nh_sel = nhsel;
-				res->type = fa->fa_type;
-				res->scope = fi->fib_scope;
-				res->fi = fi;
-				res->table = tb;
-				res->fa_head = &li->falh;
-				if (!(fib_flags & FIB_LOOKUP_NOREF))
-					atomic_inc(&fi->fib_clntref);
-				return 0;
-			}
-		}
-
-#ifdef CONFIG_IP_FIB_TRIE_STATS
-		this_cpu_inc(t->stats->semantic_match_miss);
-#endif
-	}
-
-	return 1;
-}
-
 static inline t_key prefix_mismatch(t_key key, struct tnode *n)
 {
 	t_key prefix = n->key;
@@ -1254,6 +1188,7 @@ static inline t_key prefix_mismatch(t_key key, struct tnode *n)
 	return (key ^ prefix) & (prefix | -prefix);
 }
 
+/* should be called with rcu_read_lock */
 int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 		     struct fib_result *res, int fib_flags)
 {
@@ -1263,14 +1198,12 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi4 *flp,
 #endif
 	const t_key key = ntohl(flp->daddr);
 	struct tnode *n, *pn;
+	struct leaf_info *li;
 	t_key cindex;
-	int ret = 1;
-
-	rcu_read_lock();
 
 	n = rcu_dereference(t->trie);
 	if (!n)
-		goto failed;
+		return -EAGAIN;
 
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 	this_cpu_inc(stats->gets);
@@ -1350,7 +1283,7 @@ backtrace:
 
 				pn = node_parent_rcu(pn);
 				if (unlikely(!pn))
-					goto failed;
+					return -EAGAIN;
 #ifdef CONFIG_IP_FIB_TRIE_STATS
 				this_cpu_inc(stats->backtrack);
 #endif
@@ -1368,12 +1301,62 @@ backtrace:
 
 found:
 	/* Step 3: Process the leaf, if that fails fall back to backtracing */
-	ret = check_leaf(tb, t, n, key, flp, res, fib_flags);
-	if (unlikely(ret > 0))
-		goto backtrace;
-failed:
-	rcu_read_unlock();
-	return ret;
+	hlist_for_each_entry_rcu(li, &n->list, hlist) {
+		struct fib_alias *fa;
+
+		if ((key ^ n->key) & li->mask_plen)
+			continue;
+
+		list_for_each_entry_rcu(fa, &li->falh, fa_list) {
+			struct fib_info *fi = fa->fa_info;
+			int nhsel, err;
+
+			if (fa->fa_tos && fa->fa_tos != flp->flowi4_tos)
+				continue;
+			if (fi->fib_dead)
+				continue;
+			if (fa->fa_info->fib_scope < flp->flowi4_scope)
+				continue;
+			fib_alias_accessed(fa);
+			err = fib_props[fa->fa_type].error;
+			if (unlikely(err < 0)) {
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+				this_cpu_inc(stats->semantic_match_passed);
+#endif
+				return err;
+			}
+			if (fi->fib_flags & RTNH_F_DEAD)
+				continue;
+			for (nhsel = 0; nhsel < fi->fib_nhs; nhsel++) {
+				const struct fib_nh *nh = &fi->fib_nh[nhsel];
+
+				if (nh->nh_flags & RTNH_F_DEAD)
+					continue;
+				if (flp->flowi4_oif && flp->flowi4_oif != nh->nh_oif)
+					continue;
+
+				if (!(fib_flags & FIB_LOOKUP_NOREF))
+					atomic_inc(&fi->fib_clntref);
+
+				res->prefixlen = li->plen;
+				res->nh_sel = nhsel;
+				res->type = fa->fa_type;
+				res->scope = fi->fib_scope;
+				res->fi = fi;
+				res->table = tb;
+				res->fa_head = &li->falh;
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+				this_cpu_inc(stats->semantic_match_passed);
+#endif
+				return err;
+			}
+		}
+
+#ifdef CONFIG_IP_FIB_TRIE_STATS
+		this_cpu_inc(stats->semantic_match_miss);
+#endif
+	}
+	goto backtrace;
 }
 EXPORT_SYMBOL_GPL(fib_table_lookup);
 
-- 
cgit v1.2.3-70-g09d2


From 3cf8e53a48f67ccdbc527860e852eef135971d98 Mon Sep 17 00:00:00 2001
From: Roger Chen <roger.chen@rock-chips.com>
Date: Mon, 29 Dec 2014 17:43:55 +0800
Subject: GMAC: define clock ID used for GMAC

changes since v2:
1. remove SCLK_MAC_PLL

Signed-off-by: Roger Chen <roger.chen@rock-chips.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/dt-bindings/clock/rk3288-cru.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/dt-bindings/clock/rk3288-cru.h b/include/dt-bindings/clock/rk3288-cru.h
index f60ce72a2b2c..1c34c24efe08 100644
--- a/include/dt-bindings/clock/rk3288-cru.h
+++ b/include/dt-bindings/clock/rk3288-cru.h
@@ -81,6 +81,9 @@
 #define SCLK_SDIO1_SAMPLE	120
 #define SCLK_EMMC_SAMPLE	121
 
+#define SCLK_MAC		151
+#define SCLK_MACREF_OUT		152
+
 #define DCLK_VOP0		190
 #define DCLK_VOP1		191
 
-- 
cgit v1.2.3-70-g09d2


From 9b174d88c257150562b0101fcc6cb6c3cb74275c Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 30 Dec 2014 19:10:15 -0800
Subject: net: Add Transparent Ethernet Bridging GRO support.

Currently the only tunnel protocol that supports GRO with encapsulated
Ethernet is VXLAN. This pulls out the Ethernet code into a proper layer
so that it can be used by other tunnel protocols such as GRE and Geneve.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c         | 53 +++-----------------------
 include/linux/etherdevice.h |  4 ++
 net/ethernet/eth.c          | 92 +++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 102 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 7fbd89fbe107..2ab0922af0b4 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -549,10 +549,7 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 {
 	struct sk_buff *p, **pp = NULL;
 	struct vxlanhdr *vh, *vh2;
-	struct ethhdr *eh, *eh2;
-	unsigned int hlen, off_vx, off_eth;
-	const struct packet_offload *ptype;
-	__be16 type;
+	unsigned int hlen, off_vx;
 	int flush = 1;
 
 	off_vx = skb_gro_offset(skb);
@@ -563,17 +560,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 		if (unlikely(!vh))
 			goto out;
 	}
-	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
-	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
-
-	off_eth = skb_gro_offset(skb);
-	hlen = off_eth + sizeof(*eh);
-	eh   = skb_gro_header_fast(skb, off_eth);
-	if (skb_gro_header_hard(skb, hlen)) {
-		eh = skb_gro_header_slow(skb, hlen, off_eth);
-		if (unlikely(!eh))
-			goto out;
-	}
 
 	flush = 0;
 
@@ -582,28 +568,16 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff
 			continue;
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		eh2 = (struct ethhdr   *)(p->data + off_eth);
-		if (vh->vx_vni != vh2->vx_vni || compare_ether_header(eh, eh2)) {
+		if (vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
 	}
 
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_receive_by_type(type);
-	if (ptype == NULL) {
-		flush = 1;
-		goto out_unlock;
-	}
-
-	skb_gro_pull(skb, sizeof(*eh)); /* pull inner eth header */
-	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
-	pp = ptype->callbacks.gro_receive(head, skb);
+	skb_gro_pull(skb, sizeof(struct vxlanhdr));
+	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+	pp = eth_gro_receive(head, skb);
 
-out_unlock:
-	rcu_read_unlock();
 out:
 	NAPI_GRO_CB(skb)->flush |= flush;
 
@@ -612,24 +586,9 @@ out:
 
 static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
 {
-	struct ethhdr *eh;
-	struct packet_offload *ptype;
-	__be16 type;
-	int vxlan_len  = sizeof(struct vxlanhdr) + sizeof(struct ethhdr);
-	int err = -ENOSYS;
-
 	udp_tunnel_gro_complete(skb, nhoff);
 
-	eh = (struct ethhdr *)(skb->data + nhoff + sizeof(struct vxlanhdr));
-	type = eh->h_proto;
-
-	rcu_read_lock();
-	ptype = gro_find_complete_by_type(type);
-	if (ptype != NULL)
-		err = ptype->callbacks.gro_complete(skb, nhoff + vxlan_len);
-
-	rcu_read_unlock();
-	return err;
+	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
 /* Notify netdevs that UDP port started listening */
diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 41c891d05f04..1d869d185a0d 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -52,6 +52,10 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 #define alloc_etherdev(sizeof_priv) alloc_etherdev_mq(sizeof_priv, 1)
 #define alloc_etherdev_mq(sizeof_priv, count) alloc_etherdev_mqs(sizeof_priv, count, count)
 
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb);
+int eth_gro_complete(struct sk_buff *skb, int nhoff);
+
 /* Reserved Ethernet Addresses per IEEE 802.1Q */
 static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) =
 { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 33a140e15834..238f38d21641 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -424,3 +424,95 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 	return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
 }
 EXPORT_SYMBOL(sysfs_format_mac);
+
+struct sk_buff **eth_gro_receive(struct sk_buff **head,
+				 struct sk_buff *skb)
+{
+	struct sk_buff *p, **pp = NULL;
+	struct ethhdr *eh, *eh2;
+	unsigned int hlen, off_eth;
+	const struct packet_offload *ptype;
+	__be16 type;
+	int flush = 1;
+
+	off_eth = skb_gro_offset(skb);
+	hlen = off_eth + sizeof(*eh);
+	eh = skb_gro_header_fast(skb, off_eth);
+	if (skb_gro_header_hard(skb, hlen)) {
+		eh = skb_gro_header_slow(skb, hlen, off_eth);
+		if (unlikely(!eh))
+			goto out;
+	}
+
+	flush = 0;
+
+	for (p = *head; p; p = p->next) {
+		if (!NAPI_GRO_CB(p)->same_flow)
+			continue;
+
+		eh2 = (struct ethhdr *)(p->data + off_eth);
+		if (compare_ether_header(eh, eh2)) {
+			NAPI_GRO_CB(p)->same_flow = 0;
+			continue;
+		}
+	}
+
+	type = eh->h_proto;
+
+	rcu_read_lock();
+	ptype = gro_find_receive_by_type(type);
+	if (ptype == NULL) {
+		flush = 1;
+		goto out_unlock;
+	}
+
+	skb_gro_pull(skb, sizeof(*eh));
+	skb_gro_postpull_rcsum(skb, eh, sizeof(*eh));
+	pp = ptype->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+out:
+	NAPI_GRO_CB(skb)->flush |= flush;
+
+	return pp;
+}
+EXPORT_SYMBOL(eth_gro_receive);
+
+int eth_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	struct ethhdr *eh = (struct ethhdr *)(skb->data + nhoff);
+	__be16 type = eh->h_proto;
+	struct packet_offload *ptype;
+	int err = -ENOSYS;
+
+	if (skb->encapsulation)
+		skb_set_inner_mac_header(skb, nhoff);
+
+	rcu_read_lock();
+	ptype = gro_find_complete_by_type(type);
+	if (ptype != NULL)
+		err = ptype->callbacks.gro_complete(skb, nhoff +
+						    sizeof(struct ethhdr));
+
+	rcu_read_unlock();
+	return err;
+}
+EXPORT_SYMBOL(eth_gro_complete);
+
+static struct packet_offload eth_packet_offload __read_mostly = {
+	.type = cpu_to_be16(ETH_P_TEB),
+	.callbacks = {
+		.gro_receive = eth_gro_receive,
+		.gro_complete = eth_gro_complete,
+	},
+};
+
+static int __init eth_offload_init(void)
+{
+	dev_add_offload(&eth_packet_offload);
+
+	return 0;
+}
+
+fs_initcall(eth_offload_init);
-- 
cgit v1.2.3-70-g09d2


From 300acfdec916be696373864226267b11302d3b84 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 14:43:16 -0800
Subject: Bluetooth: Introduce force_bredr_smp debugfs option for testing

Testing cross-transport pairing that starts on BR/EDR is only valid when
using a controller with BR/EDR Secure Connections. Devices will indicate
this by providing BR/EDR SMP fixed channel over L2CAP. To allow testing
of this feature on Bluetooth 4.0 controller or controllers without the
BR/EDR Secure Connections features, introduce a force_bredr_smp debugfs
option that allows faking the required AES connection.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/l2cap_core.c  |  2 +-
 net/bluetooth/smp.c         | 78 ++++++++++++++++++++++++++++++++++++++++++---
 3 files changed, 76 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 1849a437f6e1..fddb93f168b8 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -174,6 +174,7 @@ enum {
 	HCI_DUT_MODE,
 	HCI_FORCE_SC,
 	HCI_FORCE_LESC,
+	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
 };
 
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index d04dc0095736..11029b212874 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -6968,7 +6968,7 @@ static struct l2cap_conn *l2cap_conn_add(struct hci_conn *hcon)
 
 	if (test_bit(HCI_LE_ENABLED, &hcon->hdev->dev_flags) &&
 	    (bredr_sc_enabled(hcon->hdev) ||
-	     test_bit(HCI_FORCE_LESC, &hcon->hdev->dbg_flags)))
+	     test_bit(HCI_FORCE_BREDR_SMP, &hcon->hdev->dbg_flags)))
 		conn->local_fixed_chan |= L2CAP_FC_SMP_BREDR;
 
 	mutex_init(&conn->ident_lock);
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 358264c0e785..73643a6ca3cf 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -20,6 +20,7 @@
    SOFTWARE IS DISCLAIMED.
 */
 
+#include <linux/debugfs.h>
 #include <linux/crypto.h>
 #include <linux/scatterlist.h>
 #include <crypto/b128ops.h>
@@ -1675,7 +1676,7 @@ static u8 smp_cmd_pairing_req(struct l2cap_conn *conn, struct sk_buff *skb)
 	if (conn->hcon->type == ACL_LINK) {
 		/* We must have a BR/EDR SC link */
 		if (!test_bit(HCI_CONN_AES_CCM, &conn->hcon->flags) &&
-		    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
+		    !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
 			return SMP_CROSS_TRANSP_NOT_ALLOWED;
 
 		set_bit(SMP_FLAG_SC, &smp->flags);
@@ -2738,7 +2739,7 @@ static void bredr_pairing(struct l2cap_chan *chan)
 
 	/* BR/EDR must use Secure Connections for SMP */
 	if (!test_bit(HCI_CONN_AES_CCM, &hcon->flags) &&
-	    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
+	    !test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
 		return;
 
 	/* If our LE support is not enabled don't do anything */
@@ -2976,6 +2977,66 @@ static void smp_del_chan(struct l2cap_chan *chan)
 	l2cap_chan_put(chan);
 }
 
+static ssize_t force_bredr_smp_read(struct file *file,
+				    char __user *user_buf,
+				    size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags) ? 'Y': 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_bredr_smp_write(struct file *file,
+				     const char __user *user_buf,
+				     size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[32];
+	size_t buf_size = min(count, (sizeof(buf)-1));
+	bool enable;
+
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	if (strtobool(buf, &enable))
+		return -EINVAL;
+
+	if (enable == test_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags))
+		return -EALREADY;
+
+	if (enable) {
+		struct l2cap_chan *chan;
+
+		chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR);
+		if (IS_ERR(chan))
+			return PTR_ERR(chan);
+
+		hdev->smp_bredr_data = chan;
+	} else {
+		struct l2cap_chan *chan;
+
+		chan = hdev->smp_bredr_data;
+		hdev->smp_bredr_data = NULL;
+		smp_del_chan(chan);
+	}
+
+	change_bit(HCI_FORCE_BREDR_SMP, &hdev->dbg_flags);
+
+	return count;
+}
+
+static const struct file_operations force_bredr_smp_fops = {
+	.open		= simple_open,
+	.read		= force_bredr_smp_read,
+	.write		= force_bredr_smp_write,
+	.llseek		= default_llseek,
+};
+
 int smp_register(struct hci_dev *hdev)
 {
 	struct l2cap_chan *chan;
@@ -2988,9 +3049,18 @@ int smp_register(struct hci_dev *hdev)
 
 	hdev->smp_data = chan;
 
-	if (!lmp_sc_capable(hdev) &&
-	    !test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
+	/* If the controller does not support BR/EDR Secure Connections
+	 * feature, then the BR/EDR SMP channel shall not be present.
+	 *
+	 * To test this with Bluetooth 4.0 controllers, create a debugfs
+	 * switch that allows forcing BR/EDR SMP support and accepting
+	 * cross-transport pairing on non-AES encrypted connections.
+	 */
+	if (!lmp_sc_capable(hdev)) {
+		debugfs_create_file("force_bredr_smp", 0644, hdev->debugfs,
+				    hdev, &force_bredr_smp_fops);
 		return 0;
+	}
 
 	chan = smp_add_cid(hdev, L2CAP_CID_SMP_BREDR);
 	if (IS_ERR(chan)) {
-- 
cgit v1.2.3-70-g09d2


From 91389af67c3a8d8f9eca5f51deda58fae4d9326e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 14:43:17 -0800
Subject: Bluetooth: Remove broken force_lesc_support debugfs option

The force_lesc_support debugfs option never really worked. It has a race
condition between creating the debugfs entry and registering the L2CAP
fixed channel for BR/EDR SMP support.

Also this has been replaced with a working force_bredr_smp debugfs
switch that developers can use now.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  1 -
 net/bluetooth/hci_debugfs.c | 49 ---------------------------------------------
 2 files changed, 50 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index fddb93f168b8..884ba004237e 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -173,7 +173,6 @@ enum {
 enum {
 	HCI_DUT_MODE,
 	HCI_FORCE_SC,
-	HCI_FORCE_LESC,
 	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
 };
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index ee33ce88d3d8..dc8f994a957b 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -421,50 +421,6 @@ static const struct file_operations force_sc_support_fops = {
 	.llseek		= default_llseek,
 };
 
-static ssize_t force_lesc_support_read(struct file *file,
-				       char __user *user_buf,
-				       size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_FORCE_LESC, &hdev->dbg_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t force_lesc_support_write(struct file *file,
-					const char __user *user_buf,
-					size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[32];
-	size_t buf_size = min(count, (sizeof(buf)-1));
-	bool enable;
-
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &enable))
-		return -EINVAL;
-
-	if (enable == test_bit(HCI_FORCE_LESC, &hdev->dbg_flags))
-		return -EALREADY;
-
-	change_bit(HCI_FORCE_LESC, &hdev->dbg_flags);
-
-	return count;
-}
-
-static const struct file_operations force_lesc_support_fops = {
-	.open		= simple_open,
-	.read		= force_lesc_support_read,
-	.write		= force_lesc_support_write,
-	.llseek		= default_llseek,
-};
-
 static int idle_timeout_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -568,11 +524,6 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 
 		debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
 				    hdev, &force_sc_support_fops);
-
-		if (lmp_le_capable(hdev))
-			debugfs_create_file("force_lesc_support", 0644,
-					    hdev->debugfs, hdev,
-					    &force_lesc_support_fops);
 	}
 
 	if (lmp_sniff_capable(hdev)) {
-- 
cgit v1.2.3-70-g09d2


From 05b3c3e7905d00a1fe2e9184fdd9b5eac427c736 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 31 Dec 2014 14:43:18 -0800
Subject: Bluetooth: Remove no longer needed force_sc_support debugfs option

The force_sc_support debugfs option was introduced to easily work with
pre-production Bluetooth 4.1 silicon. This option is no longer needed
since controllers supporting BR/EDR Secure Connections feature are now
available.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h      |  1 -
 include/net/bluetooth/hci_core.h |  3 +--
 net/bluetooth/hci_debugfs.c      | 49 ----------------------------------------
 net/bluetooth/mgmt.c             | 11 ++++-----
 4 files changed, 5 insertions(+), 59 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 884ba004237e..aee16bf5d34f 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -172,7 +172,6 @@ enum {
  */
 enum {
 	HCI_DUT_MODE,
-	HCI_FORCE_SC,
 	HCI_FORCE_BREDR_SMP,
 	HCI_FORCE_STATIC_ADDR,
 };
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3e7e5110f298..89f4e3c8a097 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1017,8 +1017,7 @@ void hci_conn_del_sysfs(struct hci_conn *conn);
 
 #define hdev_is_powered(hdev) (test_bit(HCI_UP, &hdev->flags) && \
 				!test_bit(HCI_AUTO_OFF, &hdev->dev_flags))
-#define bredr_sc_enabled(dev) ((lmp_sc_capable(dev) || \
-				test_bit(HCI_FORCE_SC, &(dev)->dbg_flags)) && \
+#define bredr_sc_enabled(dev) (lmp_sc_capable(dev) && \
 			       test_bit(HCI_SC_ENABLED, &(dev)->dev_flags))
 
 /* ----- HCI protocols ----- */
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index dc8f994a957b..d72ebc2b11fa 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -375,52 +375,6 @@ static const struct file_operations sc_only_mode_fops = {
 	.llseek		= default_llseek,
 };
 
-static ssize_t force_sc_support_read(struct file *file, char __user *user_buf,
-				     size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[3];
-
-	buf[0] = test_bit(HCI_FORCE_SC, &hdev->dbg_flags) ? 'Y': 'N';
-	buf[1] = '\n';
-	buf[2] = '\0';
-	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
-}
-
-static ssize_t force_sc_support_write(struct file *file,
-				      const char __user *user_buf,
-				      size_t count, loff_t *ppos)
-{
-	struct hci_dev *hdev = file->private_data;
-	char buf[32];
-	size_t buf_size = min(count, (sizeof(buf)-1));
-	bool enable;
-
-	if (test_bit(HCI_UP, &hdev->flags))
-		return -EBUSY;
-
-	if (copy_from_user(buf, user_buf, buf_size))
-		return -EFAULT;
-
-	buf[buf_size] = '\0';
-	if (strtobool(buf, &enable))
-		return -EINVAL;
-
-	if (enable == test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
-		return -EALREADY;
-
-	change_bit(HCI_FORCE_SC, &hdev->dbg_flags);
-
-	return count;
-}
-
-static const struct file_operations force_sc_support_fops = {
-	.open		= simple_open,
-	.read		= force_sc_support_read,
-	.write		= force_sc_support_write,
-	.llseek		= default_llseek,
-};
-
 static int idle_timeout_set(void *data, u64 val)
 {
 	struct hci_dev *hdev = data;
@@ -521,9 +475,6 @@ void hci_debugfs_create_bredr(struct hci_dev *hdev)
 				    hdev, &auto_accept_delay_fops);
 		debugfs_create_file("sc_only_mode", 0444, hdev->debugfs,
 				    hdev, &sc_only_mode_fops);
-
-		debugfs_create_file("force_sc_support", 0644, hdev->debugfs,
-				    hdev, &force_sc_support_fops);
 	}
 
 	if (lmp_sniff_capable(hdev)) {
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3d2f7ad1e655..6b3f5537e441 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -570,8 +570,7 @@ static u32 get_supported_settings(struct hci_dev *hdev)
 			settings |= MGMT_SETTING_HS;
 		}
 
-		if (lmp_sc_capable(hdev) ||
-		    test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
+		if (lmp_sc_capable(hdev))
 			settings |= MGMT_SETTING_SECURE_CONN;
 	}
 
@@ -4727,8 +4726,8 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 
 	BT_DBG("request for %s", hdev->name);
 
-	if (!test_bit(HCI_LE_ENABLED, &hdev->dev_flags) &&
-	    !lmp_sc_capable(hdev) && !test_bit(HCI_FORCE_SC, &hdev->dbg_flags))
+	if (!lmp_sc_capable(hdev) &&
+	    !test_bit(HCI_LE_ENABLED, &hdev->dev_flags))
 		return cmd_status(sk, hdev->id, MGMT_OP_SET_SECURE_CONN,
 				  MGMT_STATUS_NOT_SUPPORTED);
 
@@ -4738,9 +4737,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 
 	hci_dev_lock(hdev);
 
-	if (!hdev_is_powered(hdev) ||
-	    (!lmp_sc_capable(hdev) &&
-	     !test_bit(HCI_FORCE_SC, &hdev->dbg_flags)) ||
+	if (!hdev_is_powered(hdev) || !lmp_sc_capable(hdev) ||
 	    !test_bit(HCI_BREDR_ENABLED, &hdev->dev_flags)) {
 		bool changed;
 
-- 
cgit v1.2.3-70-g09d2


From 1891172aa5c32f08ad9931b794edd71e91a4a527 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Fri, 2 Jan 2015 20:22:03 +0100
Subject: timecounter: provide a macro to initialize the cyclecounter mask
 field.

There is no need for users of the timecounter/cyclecounter code to include
clocksource.h just for a single macro.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/timecounter.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/timecounter.h b/include/linux/timecounter.h
index 74f45496e6d1..4382035a75bb 100644
--- a/include/linux/timecounter.h
+++ b/include/linux/timecounter.h
@@ -19,6 +19,9 @@
 
 #include <linux/types.h>
 
+/* simplify initialization of mask field */
+#define CYCLECOUNTER_MASK(bits) (cycle_t)((bits) < 64 ? ((1ULL<<(bits))-1) : -1)
+
 /**
  * struct cyclecounter - hardware abstraction for a free running counter
  *	Provides completely state-free accessors to the underlying hardware.
@@ -29,7 +32,7 @@
  * @read:		returns the current cycle value
  * @mask:		bitmask for two's complement
  *			subtraction of non 64 bit counters,
- *			see CLOCKSOURCE_MASK() helper macro
+ *			see CYCLECOUNTER_MASK() helper macro
  * @mult:		cycle to nanosecond multiplier
  * @shift:		cycle to nanosecond divisor (power of two)
  */
-- 
cgit v1.2.3-70-g09d2


From 8d24c0b43125ec26cc80e04588477a9a2afc025c Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:14 +0100
Subject: rhashtable: Do hashing inside of rhashtable_lookup_compare()

Hash the key inside of rhashtable_lookup_compare() like
rhashtable_lookup() does. This allows to simplify the hashing
functions and keep them private.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  5 +--
 lib/rhashtable.c           | 91 +++++++++++++++-------------------------------
 net/netfilter/nft_hash.c   | 46 ++++++++++++++---------
 net/netlink/af_netlink.c   |  5 +--
 4 files changed, 61 insertions(+), 86 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index b93fd89b2e5e..1b51221c6bbd 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -96,9 +96,6 @@ static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 
 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 
-u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len);
-u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
-
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
@@ -111,7 +108,7 @@ int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
 void *rhashtable_lookup(const struct rhashtable *ht, const void *key);
-void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
+void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg);
 
 void rhashtable_destroy(const struct rhashtable *ht);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 6c3c723e902b..1ee0eb636ca3 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -42,69 +42,39 @@ static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
 	return (void *) he - ht->p.head_offset;
 }
 
-static u32 __hashfn(const struct rhashtable *ht, const void *key,
-		      u32 len, u32 hsize)
+static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash)
 {
-	u32 h;
-
-	h = ht->p.hashfn(key, len, ht->p.hash_rnd);
-
-	return h & (hsize - 1);
-}
-
-/**
- * rhashtable_hashfn - compute hash for key of given length
- * @ht:		hash table to compute for
- * @key:	pointer to key
- * @len:	length of key
- *
- * Computes the hash value using the hash function provided in the 'hashfn'
- * of struct rhashtable_params. The returned value is guaranteed to be
- * smaller than the number of buckets in the hash table.
- */
-u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len)
-{
-	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
-
-	return __hashfn(ht, key, len, tbl->size);
+	return hash & (tbl->size - 1);
 }
-EXPORT_SYMBOL_GPL(rhashtable_hashfn);
 
-static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize)
+static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
 {
-	if (unlikely(!ht->p.key_len)) {
-		u32 h;
-
-		h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
+	u32 hash;
 
-		return h & (hsize - 1);
-	}
+	if (unlikely(!ht->p.key_len))
+		hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd);
+	else
+		hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
+				    ht->p.hash_rnd);
 
-	return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize);
+	return hash;
 }
 
-/**
- * rhashtable_obj_hashfn - compute hash for hashed object
- * @ht:		hash table to compute for
- * @ptr:	pointer to hashed object
- *
- * Computes the hash value using the hash function `hashfn` respectively
- * 'obj_hashfn' depending on whether the hash table is set up to work with
- * a fixed length key. The returned value is guaranteed to be smaller than
- * the number of buckets in the hash table.
- */
-u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr)
+static u32 key_hashfn(const struct rhashtable *ht, const void *key, u32 len)
 {
 	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	u32 hash;
+
+	hash = ht->p.hashfn(key, len, ht->p.hash_rnd);
 
-	return obj_hashfn(ht, ptr, tbl->size);
+	return rht_bucket_index(tbl, hash);
 }
-EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn);
 
 static u32 head_hashfn(const struct rhashtable *ht,
-		       const struct rhash_head *he, u32 hsize)
+		       const struct bucket_table *tbl,
+		       const struct rhash_head *he)
 {
-	return obj_hashfn(ht, rht_obj(ht, he), hsize);
+	return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he)));
 }
 
 static struct bucket_table *bucket_table_alloc(size_t nbuckets)
@@ -170,9 +140,9 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	 * reaches a node that doesn't hash to the same bucket as the
 	 * previous node p. Call the previous node p;
 	 */
-	h = head_hashfn(ht, p, new_tbl->size);
+	h = head_hashfn(ht, new_tbl, p);
 	rht_for_each(he, p->next, ht) {
-		if (head_hashfn(ht, he, new_tbl->size) != h)
+		if (head_hashfn(ht, new_tbl, he) != h)
 			break;
 		p = he;
 	}
@@ -184,7 +154,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	next = NULL;
 	if (he) {
 		rht_for_each(he, he->next, ht) {
-			if (head_hashfn(ht, he, new_tbl->size) == h) {
+			if (head_hashfn(ht, new_tbl, he) == h) {
 				next = he;
 				break;
 			}
@@ -237,9 +207,9 @@ int rhashtable_expand(struct rhashtable *ht)
 	 * single imprecise chain.
 	 */
 	for (i = 0; i < new_tbl->size; i++) {
-		h = i & (old_tbl->size - 1);
+		h = rht_bucket_index(old_tbl, i);
 		rht_for_each(he, old_tbl->buckets[h], ht) {
-			if (head_hashfn(ht, he, new_tbl->size) == i) {
+			if (head_hashfn(ht, new_tbl, he) == i) {
 				RCU_INIT_POINTER(new_tbl->buckets[i], he);
 				break;
 			}
@@ -353,7 +323,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	hash = head_hashfn(ht, obj, tbl->size);
+	hash = head_hashfn(ht, tbl, obj);
 	RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 	ht->nelems++;
@@ -413,7 +383,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	h = head_hashfn(ht, obj, tbl->size);
+	h = head_hashfn(ht, tbl, obj);
 
 	pprev = &tbl->buckets[h];
 	rht_for_each(he, tbl->buckets[h], ht) {
@@ -452,7 +422,7 @@ void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
 
 	BUG_ON(!ht->p.key_len);
 
-	h = __hashfn(ht, key, ht->p.key_len, tbl->size);
+	h = key_hashfn(ht, key, ht->p.key_len);
 	rht_for_each_rcu(he, tbl->buckets[h], ht) {
 		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
 			   ht->p.key_len))
@@ -467,7 +437,7 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
 /**
  * rhashtable_lookup_compare - search hash table with compare function
  * @ht:		hash table
- * @hash:	hash value of desired entry
+ * @key:	the pointer to the key
  * @compare:	compare function, must return true on match
  * @arg:	argument passed on to compare function
  *
@@ -479,15 +449,14 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
  *
  * Returns the first entry on which the compare function returned true.
  */
-void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
+void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg)
 {
 	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 	struct rhash_head *he;
+	u32 hash;
 
-	if (unlikely(hash >= tbl->size))
-		return NULL;
-
+	hash = key_hashfn(ht, key, ht->p.key_len);
 	rht_for_each_rcu(he, tbl->buckets[hash], ht) {
 		if (!compare(rht_obj(ht, he), arg))
 			continue;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 1e316ce4cb5d..614ee099ba36 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -94,28 +94,40 @@ static void nft_hash_remove(const struct nft_set *set,
 	kfree(he);
 }
 
+struct nft_compare_arg {
+	const struct nft_set *set;
+	struct nft_set_elem *elem;
+};
+
+static bool nft_hash_compare(void *ptr, void *arg)
+{
+	struct nft_hash_elem *he = ptr;
+	struct nft_compare_arg *x = arg;
+
+	if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
+		x->elem->cookie = &he->node;
+		x->elem->flags = 0;
+		if (x->set->flags & NFT_SET_MAP)
+			nft_data_copy(&x->elem->data, he->data);
+
+		return true;
+	}
+
+	return false;
+}
+
 static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 {
 	const struct rhashtable *priv = nft_set_priv(set);
-	const struct bucket_table *tbl = rht_dereference_rcu(priv->tbl, priv);
-	struct rhash_head __rcu * const *pprev;
-	struct nft_hash_elem *he;
-	u32 h;
-
-	h = rhashtable_hashfn(priv, &elem->key, set->klen);
-	pprev = &tbl->buckets[h];
-	rht_for_each_entry_rcu(he, tbl->buckets[h], node) {
-		if (nft_data_cmp(&he->key, &elem->key, set->klen)) {
-			pprev = &he->node.next;
-			continue;
-		}
+	struct nft_compare_arg arg = {
+		.set = set,
+		.elem = elem,
+	};
 
-		elem->cookie = (void *)pprev;
-		elem->flags = 0;
-		if (set->flags & NFT_SET_MAP)
-			nft_data_copy(&elem->data, he->data);
+	if (rhashtable_lookup_compare(priv, &elem->key,
+				      &nft_hash_compare, &arg))
 		return 0;
-	}
+
 	return -ENOENT;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 84ea76ca3f1f..a5d7ed627563 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1002,11 +1002,8 @@ static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
 		.net = net,
 		.portid = portid,
 	};
-	u32 hash;
 
-	hash = rhashtable_hashfn(&table->hash, &portid, sizeof(portid));
-
-	return rhashtable_lookup_compare(&table->hash, hash,
+	return rhashtable_lookup_compare(&table->hash, &portid,
 					 &netlink_compare, &arg);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 88d6ed15acff1cb44b1d1f3c0a393b7f7744957a Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:16 +0100
Subject: rhashtable: Convert bucket iterators to take table and index

This patch is in preparation to introduce per bucket spinlocks. It
extends all iterator macros to take the bucket table and bucket
index. It also introduces a new rht_dereference_bucket() to
handle protected accesses to buckets.

It introduces a barrier() to the RCU iterators to the prevent
the compiler from caching the first element.

The lockdep verifier is introduced as stub which always succeeds
and properly implement in the next patch when the locks are
introduced.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 173 ++++++++++++++++++++++++++++++---------------
 lib/rhashtable.c           |  30 +++++---
 net/netfilter/nft_hash.c   |  12 ++--
 net/netlink/af_netlink.c   |  12 ++--
 net/netlink/diag.c         |   4 +-
 5 files changed, 152 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 1b51221c6bbd..b54e24a08806 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -87,11 +87,18 @@ struct rhashtable {
 
 #ifdef CONFIG_PROVE_LOCKING
 int lockdep_rht_mutex_is_held(const struct rhashtable *ht);
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
 #else
 static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 {
 	return 1;
 }
+
+static inline int lockdep_rht_bucket_is_held(const struct bucket_table *tbl,
+					     u32 hash)
+{
+	return 1;
+}
 #endif /* CONFIG_PROVE_LOCKING */
 
 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
@@ -119,92 +126,144 @@ void rhashtable_destroy(const struct rhashtable *ht);
 #define rht_dereference_rcu(p, ht) \
 	rcu_dereference_check(p, lockdep_rht_mutex_is_held(ht))
 
-#define rht_entry(ptr, type, member) container_of(ptr, type, member)
-#define rht_entry_safe(ptr, type, member) \
-({ \
-	typeof(ptr) __ptr = (ptr); \
-	   __ptr ? rht_entry(__ptr, type, member) : NULL; \
-})
+#define rht_dereference_bucket(p, tbl, hash) \
+	rcu_dereference_protected(p, lockdep_rht_bucket_is_held(tbl, hash))
 
-#define rht_next_entry_safe(pos, ht, member) \
-({ \
-	pos ? rht_entry_safe(rht_dereference((pos)->member.next, ht), \
-			     typeof(*(pos)), member) : NULL; \
-})
+#define rht_dereference_bucket_rcu(p, tbl, hash) \
+	rcu_dereference_check(p, lockdep_rht_bucket_is_held(tbl, hash))
+
+#define rht_entry(tpos, pos, member) \
+	({ tpos = container_of(pos, typeof(*tpos), member); 1; })
 
 /**
- * rht_for_each - iterate over hash chain
- * @pos:	&struct rhash_head to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
+ * rht_for_each_continue - continue iterating over hash chain
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
  */
-#define rht_for_each(pos, head, ht) \
-	for (pos = rht_dereference(head, ht); \
+#define rht_for_each_continue(pos, head, tbl, hash) \
+	for (pos = rht_dereference_bucket(head, tbl, hash); \
 	     pos; \
-	     pos = rht_dereference((pos)->next, ht))
+	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
+
+/**
+ * rht_for_each - iterate over hash chain
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ */
+#define rht_for_each(pos, tbl, hash) \
+	rht_for_each_continue(pos, (tbl)->buckets[hash], tbl, hash)
+
+/**
+ * rht_for_each_entry_continue - continue iterating over hash chain
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
+ */
+#define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member)	\
+	for (pos = rht_dereference_bucket(head, tbl, hash);		\
+	     pos && rht_entry(tpos, pos, member);			\
+	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
 
 /**
  * rht_for_each_entry - iterate over hash chain of given type
- * @pos:	type * to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
- * @member:	name of the rhash_head within the hashable struct.
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
  */
-#define rht_for_each_entry(pos, head, ht, member) \
-	for (pos = rht_entry_safe(rht_dereference(head, ht), \
-				   typeof(*(pos)), member); \
-	     pos; \
-	     pos = rht_next_entry_safe(pos, ht, member))
+#define rht_for_each_entry(tpos, pos, tbl, hash, member)		\
+	rht_for_each_entry_continue(tpos, pos, (tbl)->buckets[hash],	\
+				    tbl, hash, member)
 
 /**
  * rht_for_each_entry_safe - safely iterate over hash chain of given type
- * @pos:	type * to use as a loop cursor.
- * @n:		type * to use for temporary next object storage
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
- * @member:	name of the rhash_head within the hashable struct.
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @next:	the &struct rhash_head to use as next in loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
  *
  * This hash chain list-traversal primitive allows for the looped code to
  * remove the loop cursor from the list.
  */
-#define rht_for_each_entry_safe(pos, n, head, ht, member)		\
-	for (pos = rht_entry_safe(rht_dereference(head, ht), \
-				  typeof(*(pos)), member), \
-	     n = rht_next_entry_safe(pos, ht, member); \
-	     pos; \
-	     pos = n, \
-	     n = rht_next_entry_safe(pos, ht, member))
+#define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
+	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
+	     next = pos ? rht_dereference_bucket(pos->next, tbl, hash)      \
+			: NULL;						    \
+	     pos && rht_entry(tpos, pos, member);			    \
+	     pos = next)
+
+/**
+ * rht_for_each_rcu_continue - continue iterating over rcu hash chain
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_rcu_continue(pos, head, tbl, hash)			\
+	for (({barrier(); }),						\
+	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		\
+	     pos;							\
+	     pos = rcu_dereference_raw(pos->next))
 
 /**
  * rht_for_each_rcu - iterate over rcu hash chain
- * @pos:	&struct rhash_head to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @ht:		pointer to your struct rhashtable
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
  *
  * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu fkht mutation primitives such as rht_insert() as long as the
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_rcu(pos, head, ht) \
-	for (pos = rht_dereference_rcu(head, ht); \
-	     pos; \
-	     pos = rht_dereference_rcu((pos)->next, ht))
+#define rht_for_each_rcu(pos, tbl, hash)				\
+	rht_for_each_rcu_continue(pos, (tbl)->buckets[hash], tbl, hash)
+
+/**
+ * rht_for_each_entry_rcu_continue - continue iterating over rcu hash chain
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @head:	the previous &struct rhash_head to continue from
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
+ *
+ * This hash chain list-traversal primitive may safely run concurrently with
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
+ * traversal is guarded by rcu_read_lock().
+ */
+#define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
+	for (({barrier(); }),						    \
+	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		    \
+	     pos && rht_entry(tpos, pos, member);			    \
+	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
 
 /**
  * rht_for_each_entry_rcu - iterate over rcu hash chain of given type
- * @pos:	type * to use as a loop cursor.
- * @head:	head of the hash chain (struct rhash_head *)
- * @member:	name of the rhash_head within the hashable struct.
+ * @tpos:	the type * to use as a loop cursor.
+ * @pos:	the &struct rhash_head to use as a loop cursor.
+ * @tbl:	the &struct bucket_table
+ * @hash:	the hash value / bucket index
+ * @member:	name of the &struct rhash_head within the hashable struct.
  *
  * This hash chain list-traversal primitive may safely run concurrently with
- * the _rcu fkht mutation primitives such as rht_insert() as long as the
+ * the _rcu mutation primitives such as rhashtable_insert() as long as the
  * traversal is guarded by rcu_read_lock().
  */
-#define rht_for_each_entry_rcu(pos, head, member) \
-	for (pos = rht_entry_safe(rcu_dereference_raw(head), \
-				  typeof(*(pos)), member); \
-	     pos; \
-	     pos = rht_entry_safe(rcu_dereference_raw((pos)->member.next), \
-				  typeof(*(pos)), member))
+#define rht_for_each_entry_rcu(tpos, pos, tbl, hash, member)		\
+	rht_for_each_entry_rcu_continue(tpos, pos, (tbl)->buckets[hash],\
+					tbl, hash, member)
 
 #endif /* _LINUX_RHASHTABLE_H */
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index b658245826a1..ce450d095fdf 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -35,6 +35,12 @@ int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 	return ht->p.mutex_is_held(ht->p.parent);
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
+
+int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
+{
+	return 1;
+}
+EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #endif
 
 static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he)
@@ -141,7 +147,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	 * previous node p. Call the previous node p;
 	 */
 	h = head_hashfn(ht, new_tbl, p);
-	rht_for_each(he, p->next, ht) {
+	rht_for_each_continue(he, p->next, old_tbl, n) {
 		if (head_hashfn(ht, new_tbl, he) != h)
 			break;
 		p = he;
@@ -153,7 +159,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	 */
 	next = NULL;
 	if (he) {
-		rht_for_each(he, he->next, ht) {
+		rht_for_each_continue(he, he->next, old_tbl, n) {
 			if (head_hashfn(ht, new_tbl, he) == h) {
 				next = he;
 				break;
@@ -208,7 +214,7 @@ int rhashtable_expand(struct rhashtable *ht)
 	 */
 	for (i = 0; i < new_tbl->size; i++) {
 		h = rht_bucket_index(old_tbl, i);
-		rht_for_each(he, old_tbl->buckets[h], ht) {
+		rht_for_each(he, old_tbl, h) {
 			if (head_hashfn(ht, new_tbl, he) == i) {
 				RCU_INIT_POINTER(new_tbl->buckets[i], he);
 				break;
@@ -286,7 +292,7 @@ int rhashtable_shrink(struct rhashtable *ht)
 		 * to the new bucket.
 		 */
 		for (pprev = &ntbl->buckets[i]; *pprev != NULL;
-		     pprev = &rht_dereference(*pprev, ht)->next)
+		     pprev = &rht_dereference_bucket(*pprev, ntbl, i)->next)
 			;
 		RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]);
 	}
@@ -386,7 +392,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 	h = head_hashfn(ht, tbl, obj);
 
 	pprev = &tbl->buckets[h];
-	rht_for_each(he, tbl->buckets[h], ht) {
+	rht_for_each(he, tbl, h) {
 		if (he != obj) {
 			pprev = &he->next;
 			continue;
@@ -423,7 +429,7 @@ void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
 	BUG_ON(!ht->p.key_len);
 
 	h = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl->buckets[h], ht) {
+	rht_for_each_rcu(he, tbl, h) {
 		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
 			   ht->p.key_len))
 			continue;
@@ -457,7 +463,7 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
 	u32 hash;
 
 	hash = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl->buckets[hash], ht) {
+	rht_for_each_rcu(he, tbl, hash) {
 		if (!compare(rht_obj(ht, he), arg))
 			continue;
 		return rht_obj(ht, he);
@@ -625,6 +631,7 @@ static int __init test_rht_lookup(struct rhashtable *ht)
 static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 {
 	unsigned int cnt, rcu_cnt, i, total = 0;
+	struct rhash_head *pos;
 	struct test_obj *obj;
 	struct bucket_table *tbl;
 
@@ -635,14 +642,14 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 		if (!quiet)
 			pr_info(" [%#4x/%zu]", i, tbl->size);
 
-		rht_for_each_entry_rcu(obj, tbl->buckets[i], node) {
+		rht_for_each_entry_rcu(obj, pos, tbl, i, node) {
 			cnt++;
 			total++;
 			if (!quiet)
 				pr_cont(" [%p],", obj);
 		}
 
-		rht_for_each_entry_rcu(obj, tbl->buckets[i], node)
+		rht_for_each_entry_rcu(obj, pos, tbl, i, node)
 			rcu_cnt++;
 
 		if (rcu_cnt != cnt)
@@ -664,7 +671,8 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 static int __init test_rhashtable(struct rhashtable *ht)
 {
 	struct bucket_table *tbl;
-	struct test_obj *obj, *next;
+	struct test_obj *obj;
+	struct rhash_head *pos, *next;
 	int err;
 	unsigned int i;
 
@@ -733,7 +741,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 error:
 	tbl = rht_dereference_rcu(ht->tbl, ht);
 	for (i = 0; i < tbl->size; i++)
-		rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node)
+		rht_for_each_entry_safe(obj, pos, next, tbl, i, node)
 			kfree(obj);
 
 	return err;
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 614ee099ba36..d93f1f4c22a9 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -142,7 +142,9 @@ static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 
 	tbl = rht_dereference_rcu(priv->tbl, priv);
 	for (i = 0; i < tbl->size; i++) {
-		rht_for_each_entry_rcu(he, tbl->buckets[i], node) {
+		struct rhash_head *pos;
+
+		rht_for_each_entry_rcu(he, pos, tbl, i, node) {
 			if (iter->count < iter->skip)
 				goto cont;
 
@@ -197,15 +199,13 @@ static void nft_hash_destroy(const struct nft_set *set)
 {
 	const struct rhashtable *priv = nft_set_priv(set);
 	const struct bucket_table *tbl = priv->tbl;
-	struct nft_hash_elem *he, *next;
+	struct nft_hash_elem *he;
+	struct rhash_head *pos, *next;
 	unsigned int i;
 
 	for (i = 0; i < tbl->size; i++) {
-		for (he = rht_entry(tbl->buckets[i], struct nft_hash_elem, node);
-		     he != NULL; he = next) {
-			next = rht_entry(he->node.next, struct nft_hash_elem, node);
+		rht_for_each_entry_safe(he, pos, next, tbl, i, node)
 			nft_hash_elem_destroy(set, he);
-		}
 	}
 	rhashtable_destroy(priv);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index a5d7ed627563..57449b6089c2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2898,7 +2898,9 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos)
 		const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 
 		for (j = 0; j < tbl->size; j++) {
-			rht_for_each_entry_rcu(nlk, tbl->buckets[j], node) {
+			struct rhash_head *node;
+
+			rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
 				s = (struct sock *)nlk;
 
 				if (sock_net(s) != seq_file_net(seq))
@@ -2926,6 +2928,8 @@ static void *netlink_seq_start(struct seq_file *seq, loff_t *pos)
 static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
 	struct rhashtable *ht;
+	const struct bucket_table *tbl;
+	struct rhash_head *node;
 	struct netlink_sock *nlk;
 	struct nl_seq_iter *iter;
 	struct net *net;
@@ -2942,17 +2946,17 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 
 	i = iter->link;
 	ht = &nl_table[i].hash;
-	rht_for_each_entry(nlk, nlk->node.next, ht, node)
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	rht_for_each_entry_rcu_continue(nlk, node, nlk->node.next, tbl, iter->hash_idx, node)
 		if (net_eq(sock_net((struct sock *)nlk), net))
 			return nlk;
 
 	j = iter->hash_idx + 1;
 
 	do {
-		const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 
 		for (; j < tbl->size; j++) {
-			rht_for_each_entry(nlk, tbl->buckets[j], ht, node) {
+			rht_for_each_entry_rcu(nlk, node, tbl, j, node) {
 				if (net_eq(sock_net((struct sock *)nlk), net)) {
 					iter->link = i;
 					iter->hash_idx = j;
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index de8c74a3c061..fcca36d81a62 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -113,7 +113,9 @@ static int __netlink_diag_dump(struct sk_buff *skb, struct netlink_callback *cb,
 	req = nlmsg_data(cb->nlh);
 
 	for (i = 0; i < htbl->size; i++) {
-		rht_for_each_entry(nlsk, htbl->buckets[i], ht, node) {
+		struct rhash_head *pos;
+
+		rht_for_each_entry(nlsk, pos, htbl, i, node) {
 			sk = (struct sock *)nlsk;
 
 			if (!net_eq(sock_net(sk), net))
-- 
cgit v1.2.3-70-g09d2


From 897362e446436d245972e72c6bc5b33bd7a5c659 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:18 +0100
Subject: nft_hash: Remove rhashtable_remove_pprev()

The removal function of nft_hash currently stores a reference to the
previous element during lookup which is used to optimize removal later
on. This was possible because a lock is held throughout calling
rhashtable_lookup() and rhashtable_remove().

With the introdution of deferred table resizing in parallel to lookups
and insertions, the nftables lock will no longer synchronize all
table mutations and the stored pprev may become invalid.

Removing this optimization makes removal slightly more expensive on
average but allows taking the resize cost out of the insert and
remove path.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Cc: netfilter-devel@vger.kernel.org
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 --
 lib/rhashtable.c           | 34 +++++++---------------------------
 net/netfilter/nft_hash.c   | 11 +++--------
 3 files changed, 10 insertions(+), 37 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index b54e24a08806..f624d4b5045f 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -105,8 +105,6 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
-void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev);
 
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 0bd29c178910..e6b85c4a5828 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -344,32 +344,6 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert);
 
-/**
- * rhashtable_remove_pprev - remove object from hash table given previous element
- * @ht:		hash table
- * @obj:	pointer to hash head inside object
- * @pprev:	pointer to previous element
- *
- * Identical to rhashtable_remove() but caller is alreayd aware of the element
- * in front of the element to be deleted. This is in particular useful for
- * deletion when combined with walking or lookup.
- */
-void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev)
-{
-	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
-
-	ASSERT_RHT_MUTEX(ht);
-
-	RCU_INIT_POINTER(*pprev, obj->next);
-	ht->nelems--;
-
-	if (ht->p.shrink_decision &&
-	    ht->p.shrink_decision(ht, tbl->size))
-		rhashtable_shrink(ht);
-}
-EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
-
 /**
  * rhashtable_remove - remove object from hash table
  * @ht:		hash table
@@ -403,7 +377,13 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 			continue;
 		}
 
-		rhashtable_remove_pprev(ht, he, pprev);
+		RCU_INIT_POINTER(*pprev, he->next);
+		ht->nelems--;
+
+		if (ht->p.shrink_decision &&
+		    ht->p.shrink_decision(ht, tbl->size))
+			rhashtable_shrink(ht);
+
 		return true;
 	}
 
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index d93f1f4c22a9..7f903cf9a1b9 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -83,15 +83,10 @@ static void nft_hash_remove(const struct nft_set *set,
 			    const struct nft_set_elem *elem)
 {
 	struct rhashtable *priv = nft_set_priv(set);
-	struct rhash_head *he, __rcu **pprev;
-
-	pprev = elem->cookie;
-	he = rht_dereference((*pprev), priv);
-
-	rhashtable_remove_pprev(priv, he, pprev);
 
+	rhashtable_remove(priv, elem->cookie);
 	synchronize_rcu();
-	kfree(he);
+	kfree(elem->cookie);
 }
 
 struct nft_compare_arg {
@@ -105,7 +100,7 @@ static bool nft_hash_compare(void *ptr, void *arg)
 	struct nft_compare_arg *x = arg;
 
 	if (!nft_data_cmp(&he->key, &x->elem->key, x->set->klen)) {
-		x->elem->cookie = &he->node;
+		x->elem->cookie = he;
 		x->elem->flags = 0;
 		if (x->set->flags & NFT_SET_MAP)
 			nft_data_copy(&x->elem->data, he->data);
-- 
cgit v1.2.3-70-g09d2


From 113948d841e8d78039e5dbbb5248f5b73e99eafa Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:19 +0100
Subject: spinlock: Add spin_lock_bh_nested()

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/spinlock.h         | 8 ++++++++
 include/linux/spinlock_api_smp.h | 2 ++
 include/linux/spinlock_api_up.h  | 1 +
 kernel/locking/spinlock.c        | 8 ++++++++
 4 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 262ba4ef9a8e..3e18379dfa6f 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -190,6 +190,8 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define raw_spin_lock_nested(lock, subclass) \
 	_raw_spin_lock_nested(lock, subclass)
+# define raw_spin_lock_bh_nested(lock, subclass) \
+	_raw_spin_lock_bh_nested(lock, subclass)
 
 # define raw_spin_lock_nest_lock(lock, nest_lock)			\
 	 do {								\
@@ -205,6 +207,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 # define raw_spin_lock_nested(lock, subclass)		\
 	_raw_spin_lock(((void)(subclass), (lock)))
 # define raw_spin_lock_nest_lock(lock, nest_lock)	_raw_spin_lock(lock)
+# define raw_spin_lock_bh_nested(lock, subclass)	_raw_spin_lock_bh(lock)
 #endif
 
 #if defined(CONFIG_SMP) || defined(CONFIG_DEBUG_SPINLOCK)
@@ -324,6 +327,11 @@ do {								\
 	raw_spin_lock_nested(spinlock_check(lock), subclass);	\
 } while (0)
 
+#define spin_lock_bh_nested(lock, subclass)			\
+do {								\
+	raw_spin_lock_bh_nested(spinlock_check(lock), subclass);\
+} while (0)
+
 #define spin_lock_nest_lock(lock, nest_lock)				\
 do {									\
 	raw_spin_lock_nest_lock(spinlock_check(lock), nest_lock);	\
diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h
index 42dfab89e740..5344268e6e62 100644
--- a/include/linux/spinlock_api_smp.h
+++ b/include/linux/spinlock_api_smp.h
@@ -22,6 +22,8 @@ int in_lock_functions(unsigned long addr);
 void __lockfunc _raw_spin_lock(raw_spinlock_t *lock)		__acquires(lock);
 void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
 								__acquires(lock);
+void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
+								__acquires(lock);
 void __lockfunc
 _raw_spin_lock_nest_lock(raw_spinlock_t *lock, struct lockdep_map *map)
 								__acquires(lock);
diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h
index d0d188861ad6..d3afef9d8dbe 100644
--- a/include/linux/spinlock_api_up.h
+++ b/include/linux/spinlock_api_up.h
@@ -57,6 +57,7 @@
 
 #define _raw_spin_lock(lock)			__LOCK(lock)
 #define _raw_spin_lock_nested(lock, subclass)	__LOCK(lock)
+#define _raw_spin_lock_bh_nested(lock, subclass) __LOCK(lock)
 #define _raw_read_lock(lock)			__LOCK(lock)
 #define _raw_write_lock(lock)			__LOCK(lock)
 #define _raw_spin_lock_bh(lock)			__LOCK_BH(lock)
diff --git a/kernel/locking/spinlock.c b/kernel/locking/spinlock.c
index 4b082b5cac9e..db3ccb1dd614 100644
--- a/kernel/locking/spinlock.c
+++ b/kernel/locking/spinlock.c
@@ -363,6 +363,14 @@ void __lockfunc _raw_spin_lock_nested(raw_spinlock_t *lock, int subclass)
 }
 EXPORT_SYMBOL(_raw_spin_lock_nested);
 
+void __lockfunc _raw_spin_lock_bh_nested(raw_spinlock_t *lock, int subclass)
+{
+	__local_bh_disable_ip(_RET_IP_, SOFTIRQ_LOCK_OFFSET);
+	spin_acquire(&lock->dep_map, subclass, 0, _RET_IP_);
+	LOCK_CONTENDED(lock, do_raw_spin_trylock, do_raw_spin_lock);
+}
+EXPORT_SYMBOL(_raw_spin_lock_bh_nested);
+
 unsigned long __lockfunc _raw_spin_lock_irqsave_nested(raw_spinlock_t *lock,
 						   int subclass)
 {
-- 
cgit v1.2.3-70-g09d2


From 97defe1ecf868b8127f8e62395499d6a06e4c4b1 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:20 +0100
Subject: rhashtable: Per bucket locks & deferred expansion/shrinking

Introduces an array of spinlocks to protect bucket mutations. The number
of spinlocks per CPU is configurable and selected based on the hash of
the bucket. This allows for parallel insertions and removals of entries
which do not share a lock.

The patch also defers expansion and shrinking to a worker queue which
allows insertion and removal from atomic context. Insertions and
deletions may occur in parallel to it and are only held up briefly
while the particular bucket is linked or unzipped.

Mutations of the bucket table pointer is protected by a new mutex, read
access is RCU protected.

In the event of an expansion or shrinking, the new bucket table allocated
is exposed as a so called future table as soon as the resize process
starts.  Lookups, deletions, and insertions will briefly use both tables.
The future table becomes the main table after an RCU grace period and
initial linking of the old to the new table was performed. Optimization
of the chains to make use of the new number of buckets follows only the
new table is in use.

The side effect of this is that during that RCU grace period, a bucket
traversal using any rht_for_each() variant on the main table will not see
any insertions performed during the RCU grace period which would at that
point land in the future table. The lookup will see them as it searches
both tables if needed.

Having multiple insertions and removals occur in parallel requires nelems
to become an atomic counter.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  37 ++--
 lib/rhashtable.c           | 458 ++++++++++++++++++++++++++++++++++-----------
 net/netfilter/nft_hash.c   |  27 ++-
 net/netlink/af_netlink.c   |  15 +-
 4 files changed, 384 insertions(+), 153 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index f624d4b5045f..a1688f0a6193 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -19,6 +19,7 @@
 #define _LINUX_RHASHTABLE_H
 
 #include <linux/rculist.h>
+#include <linux/workqueue.h>
 
 struct rhash_head {
 	struct rhash_head __rcu		*next;
@@ -26,8 +27,17 @@ struct rhash_head {
 
 #define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
 
+/**
+ * struct bucket_table - Table of hash buckets
+ * @size: Number of hash buckets
+ * @locks_mask: Mask to apply before accessing locks[]
+ * @locks: Array of spinlocks protecting individual buckets
+ * @buckets: size * hash buckets
+ */
 struct bucket_table {
 	size_t				size;
+	unsigned int			locks_mask;
+	spinlock_t			*locks;
 	struct rhash_head __rcu		*buckets[];
 };
 
@@ -45,11 +55,11 @@ struct rhashtable;
  * @hash_rnd: Seed to use while hashing
  * @max_shift: Maximum number of shifts while expanding
  * @min_shift: Minimum number of shifts while shrinking
+ * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
  * @grow_decision: If defined, may return true if table should expand
  * @shrink_decision: If defined, may return true if table should shrink
- * @mutex_is_held: Must return true if protecting mutex is held
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
@@ -59,37 +69,42 @@ struct rhashtable_params {
 	u32			hash_rnd;
 	size_t			max_shift;
 	size_t			min_shift;
+	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
 	bool			(*grow_decision)(const struct rhashtable *ht,
 						 size_t new_size);
 	bool			(*shrink_decision)(const struct rhashtable *ht,
 						   size_t new_size);
-#ifdef CONFIG_PROVE_LOCKING
-	int			(*mutex_is_held)(void *parent);
-	void			*parent;
-#endif
 };
 
 /**
  * struct rhashtable - Hash table handle
  * @tbl: Bucket table
+ * @future_tbl: Table under construction during expansion/shrinking
  * @nelems: Number of elements in table
  * @shift: Current size (1 << shift)
  * @p: Configuration parameters
+ * @run_work: Deferred worker to expand/shrink asynchronously
+ * @mutex: Mutex to protect current/future table swapping
+ * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
 	struct bucket_table __rcu	*tbl;
-	size_t				nelems;
+	struct bucket_table __rcu       *future_tbl;
+	atomic_t			nelems;
 	size_t				shift;
 	struct rhashtable_params	p;
+	struct delayed_work             run_work;
+	struct mutex                    mutex;
+	bool                            being_destroyed;
 };
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_rht_mutex_is_held(const struct rhashtable *ht);
+int lockdep_rht_mutex_is_held(struct rhashtable *ht);
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
 #else
-static inline int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
+static inline int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 {
 	return 1;
 }
@@ -112,11 +127,11 @@ bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
 int rhashtable_expand(struct rhashtable *ht);
 int rhashtable_shrink(struct rhashtable *ht);
 
-void *rhashtable_lookup(const struct rhashtable *ht, const void *key);
-void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
+void *rhashtable_lookup(struct rhashtable *ht, const void *key);
+void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg);
 
-void rhashtable_destroy(const struct rhashtable *ht);
+void rhashtable_destroy(struct rhashtable *ht);
 
 #define rht_dereference(p, ht) \
 	rcu_dereference_protected(p, lockdep_rht_mutex_is_held(ht))
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index e6b85c4a5828..312e3437c7bc 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -26,19 +26,42 @@
 
 #define HASH_DEFAULT_SIZE	64UL
 #define HASH_MIN_SIZE		4UL
+#define BUCKET_LOCKS_PER_CPU   128UL
+
+enum {
+	RHT_LOCK_NORMAL,
+	RHT_LOCK_NESTED,
+	RHT_LOCK_NESTED2,
+};
+
+/* The bucket lock is selected based on the hash and protects mutations
+ * on a group of hash buckets.
+ *
+ * IMPORTANT: When holding the bucket lock of both the old and new table
+ * during expansions and shrinking, the old bucket lock must always be
+ * acquired first.
+ */
+static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash)
+{
+	return &tbl->locks[hash & tbl->locks_mask];
+}
 
 #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT))
+#define ASSERT_BUCKET_LOCK(TBL, HASH) \
+	BUG_ON(!lockdep_rht_bucket_is_held(TBL, HASH))
 
 #ifdef CONFIG_PROVE_LOCKING
-int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
+int lockdep_rht_mutex_is_held(struct rhashtable *ht)
 {
-	return ht->p.mutex_is_held(ht->p.parent);
+	return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1;
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash)
 {
-	return 1;
+	spinlock_t *lock = bucket_lock(tbl, hash);
+
+	return (debug_locks) ? lockdep_is_held(lock) : 1;
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held);
 #endif
@@ -66,7 +89,7 @@ static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
 	return hash;
 }
 
-static u32 key_hashfn(const struct rhashtable *ht, const void *key, u32 len)
+static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
 {
 	struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
 	u32 hash;
@@ -95,7 +118,49 @@ static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
 	return pprev;
 }
 
-static struct bucket_table *bucket_table_alloc(size_t nbuckets)
+static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl)
+{
+	unsigned int i, size;
+#if defined(CONFIG_PROVE_LOCKING)
+	unsigned int nr_pcpus = 2;
+#else
+	unsigned int nr_pcpus = num_possible_cpus();
+#endif
+
+	nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL);
+	size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul);
+
+	/* Never allocate more than one lock per bucket */
+	size = min_t(unsigned int, size, tbl->size);
+
+	if (sizeof(spinlock_t) != 0) {
+#ifdef CONFIG_NUMA
+		if (size * sizeof(spinlock_t) > PAGE_SIZE)
+			tbl->locks = vmalloc(size * sizeof(spinlock_t));
+		else
+#endif
+		tbl->locks = kmalloc_array(size, sizeof(spinlock_t),
+					   GFP_KERNEL);
+		if (!tbl->locks)
+			return -ENOMEM;
+		for (i = 0; i < size; i++)
+			spin_lock_init(&tbl->locks[i]);
+	}
+	tbl->locks_mask = size - 1;
+
+	return 0;
+}
+
+static void bucket_table_free(const struct bucket_table *tbl)
+{
+	if (tbl)
+		kvfree(tbl->locks);
+
+	kvfree(tbl);
+}
+
+static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
+					       size_t nbuckets)
 {
 	struct bucket_table *tbl;
 	size_t size;
@@ -110,12 +175,12 @@ static struct bucket_table *bucket_table_alloc(size_t nbuckets)
 
 	tbl->size = nbuckets;
 
-	return tbl;
-}
+	if (alloc_bucket_locks(ht, tbl) < 0) {
+		bucket_table_free(tbl);
+		return NULL;
+	}
 
-static void bucket_table_free(const struct bucket_table *tbl)
-{
-	kvfree(tbl);
+	return tbl;
 }
 
 /**
@@ -126,7 +191,7 @@ static void bucket_table_free(const struct bucket_table *tbl)
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
 {
 	/* Expand table when exceeding 75% load */
-	return ht->nelems > (new_size / 4 * 3);
+	return atomic_read(&ht->nelems) > (new_size / 4 * 3);
 }
 EXPORT_SYMBOL_GPL(rht_grow_above_75);
 
@@ -138,41 +203,59 @@ EXPORT_SYMBOL_GPL(rht_grow_above_75);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 {
 	/* Shrink table beneath 30% load */
-	return ht->nelems < (new_size * 3 / 10);
+	return atomic_read(&ht->nelems) < (new_size * 3 / 10);
 }
 EXPORT_SYMBOL_GPL(rht_shrink_below_30);
 
 static void hashtable_chain_unzip(const struct rhashtable *ht,
 				  const struct bucket_table *new_tbl,
-				  struct bucket_table *old_tbl, size_t n)
+				  struct bucket_table *old_tbl,
+				  size_t old_hash)
 {
 	struct rhash_head *he, *p, *next;
-	unsigned int h;
+	spinlock_t *new_bucket_lock, *new_bucket_lock2 = NULL;
+	unsigned int new_hash, new_hash2;
+
+	ASSERT_BUCKET_LOCK(old_tbl, old_hash);
 
 	/* Old bucket empty, no work needed. */
-	p = rht_dereference(old_tbl->buckets[n], ht);
+	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
+				   old_hash);
 	if (!p)
 		return;
 
+	new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
+	new_bucket_lock = bucket_lock(new_tbl, new_hash);
+
 	/* Advance the old bucket pointer one or more times until it
 	 * reaches a node that doesn't hash to the same bucket as the
 	 * previous node p. Call the previous node p;
 	 */
-	h = head_hashfn(ht, new_tbl, p);
-	rht_for_each_continue(he, p->next, old_tbl, n) {
-		if (head_hashfn(ht, new_tbl, he) != h)
+	rht_for_each_continue(he, p->next, old_tbl, old_hash) {
+		new_hash2 = head_hashfn(ht, new_tbl, he);
+		if (new_hash != new_hash2)
 			break;
 		p = he;
 	}
-	RCU_INIT_POINTER(old_tbl->buckets[n], p->next);
+	rcu_assign_pointer(old_tbl->buckets[old_hash], p->next);
+
+	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
+
+	/* If we have encountered an entry that maps to a different bucket in
+	 * the new table, lock down that bucket as well as we might cut off
+	 * the end of the chain.
+	 */
+	new_bucket_lock2 = bucket_lock(new_tbl, new_hash);
+	if (new_bucket_lock != new_bucket_lock2)
+		spin_lock_bh_nested(new_bucket_lock2, RHT_LOCK_NESTED2);
 
 	/* Find the subsequent node which does hash to the same
 	 * bucket as node P, or NULL if no such node exists.
 	 */
 	next = NULL;
 	if (he) {
-		rht_for_each_continue(he, he->next, old_tbl, n) {
-			if (head_hashfn(ht, new_tbl, he) == h) {
+		rht_for_each_continue(he, he->next, old_tbl, old_hash) {
+			if (head_hashfn(ht, new_tbl, he) == new_hash) {
 				next = he;
 				break;
 			}
@@ -182,7 +265,23 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	/* Set p's next pointer to that subsequent node pointer,
 	 * bypassing the nodes which do not hash to p's bucket
 	 */
-	RCU_INIT_POINTER(p->next, next);
+	rcu_assign_pointer(p->next, next);
+
+	if (new_bucket_lock != new_bucket_lock2)
+		spin_unlock_bh(new_bucket_lock2);
+	spin_unlock_bh(new_bucket_lock);
+}
+
+static void link_old_to_new(struct bucket_table *new_tbl,
+			    unsigned int new_hash, struct rhash_head *entry)
+{
+	spinlock_t *new_bucket_lock;
+
+	new_bucket_lock = bucket_lock(new_tbl, new_hash);
+
+	spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
+	rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry);
+	spin_unlock_bh(new_bucket_lock);
 }
 
 /**
@@ -195,43 +294,59 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
  *
- * The caller must ensure that no concurrent table mutations take place.
- * It is however valid to have concurrent lookups if they are RCU protected.
+ * The caller must ensure that no concurrent resizing occurs by holding
+ * ht->mutex.
+ *
+ * It is valid to have concurrent insertions and deletions protected by per
+ * bucket locks or concurrent RCU protected lookups and traversals.
  */
 int rhashtable_expand(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head *he;
-	unsigned int i, h;
-	bool complete;
+	spinlock_t *old_bucket_lock;
+	unsigned int new_hash, old_hash;
+	bool complete = false;
 
 	ASSERT_RHT_MUTEX(ht);
 
 	if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
 		return 0;
 
-	new_tbl = bucket_table_alloc(old_tbl->size * 2);
+	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
 	ht->shift++;
 
-	/* For each new bucket, search the corresponding old bucket
-	 * for the first entry that hashes to the new bucket, and
-	 * link the new bucket to that entry. Since all the entries
-	 * which will end up in the new bucket appear in the same
-	 * old bucket, this constructs an entirely valid new hash
-	 * table, but with multiple buckets "zipped" together into a
-	 * single imprecise chain.
+	/* Make insertions go into the new, empty table right away. Deletions
+	 * and lookups will be attempted in both tables until we synchronize.
+	 * The synchronize_rcu() guarantees for the new table to be picked up
+	 * so no new additions go into the old table while we relink.
+	 */
+	rcu_assign_pointer(ht->future_tbl, new_tbl);
+	synchronize_rcu();
+
+	/* For each new bucket, search the corresponding old bucket for the
+	 * first entry that hashes to the new bucket, and link the end of
+	 * newly formed bucket chain (containing entries added to future
+	 * table) to that entry. Since all the entries which will end up in
+	 * the new bucket appear in the same old bucket, this constructs an
+	 * entirely valid new hash table, but with multiple buckets
+	 * "zipped" together into a single imprecise chain.
 	 */
-	for (i = 0; i < new_tbl->size; i++) {
-		h = rht_bucket_index(old_tbl, i);
-		rht_for_each(he, old_tbl, h) {
-			if (head_hashfn(ht, new_tbl, he) == i) {
-				RCU_INIT_POINTER(new_tbl->buckets[i], he);
+	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
+		old_hash = rht_bucket_index(old_tbl, new_hash);
+		old_bucket_lock = bucket_lock(old_tbl, old_hash);
+
+		spin_lock_bh(old_bucket_lock);
+		rht_for_each(he, old_tbl, old_hash) {
+			if (head_hashfn(ht, new_tbl, he) == new_hash) {
+				link_old_to_new(new_tbl, new_hash, he);
 				break;
 			}
 		}
+		spin_unlock_bh(old_bucket_lock);
 	}
 
 	/* Publish the new table pointer. Lookups may now traverse
@@ -241,7 +356,7 @@ int rhashtable_expand(struct rhashtable *ht)
 	rcu_assign_pointer(ht->tbl, new_tbl);
 
 	/* Unzip interleaved hash chains */
-	do {
+	while (!complete && !ht->being_destroyed) {
 		/* Wait for readers. All new readers will see the new
 		 * table, and thus no references to the old table will
 		 * remain.
@@ -253,12 +368,17 @@ int rhashtable_expand(struct rhashtable *ht)
 		 * table): ...
 		 */
 		complete = true;
-		for (i = 0; i < old_tbl->size; i++) {
-			hashtable_chain_unzip(ht, new_tbl, old_tbl, i);
-			if (old_tbl->buckets[i] != NULL)
+		for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+			old_bucket_lock = bucket_lock(old_tbl, old_hash);
+			spin_lock_bh(old_bucket_lock);
+
+			hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
+			if (old_tbl->buckets[old_hash] != NULL)
 				complete = false;
+
+			spin_unlock_bh(old_bucket_lock);
 		}
-	} while (!complete);
+	}
 
 	bucket_table_free(old_tbl);
 	return 0;
@@ -272,38 +392,65 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
  *
+ * The caller must ensure that no concurrent resizing occurs by holding
+ * ht->mutex.
+ *
  * The caller must ensure that no concurrent table mutations take place.
  * It is however valid to have concurrent lookups if they are RCU protected.
+ *
+ * It is valid to have concurrent insertions and deletions protected by per
+ * bucket locks or concurrent RCU protected lookups and traversals.
  */
 int rhashtable_shrink(struct rhashtable *ht)
 {
-	struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht);
-	unsigned int i;
+	struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht);
+	spinlock_t *new_bucket_lock, *old_bucket_lock1, *old_bucket_lock2;
+	unsigned int new_hash;
 
 	ASSERT_RHT_MUTEX(ht);
 
 	if (ht->shift <= ht->p.min_shift)
 		return 0;
 
-	ntbl = bucket_table_alloc(tbl->size / 2);
-	if (ntbl == NULL)
+	new_tbl = bucket_table_alloc(ht, tbl->size / 2);
+	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	ht->shift--;
+	rcu_assign_pointer(ht->future_tbl, new_tbl);
+	synchronize_rcu();
 
-	/* Link each bucket in the new table to the first bucket
-	 * in the old table that contains entries which will hash
-	 * to the new bucket.
+	/* Link the first entry in the old bucket to the end of the
+	 * bucket in the new table. As entries are concurrently being
+	 * added to the new table, lock down the new bucket. As we
+	 * always divide the size in half when shrinking, each bucket
+	 * in the new table maps to exactly two buckets in the old
+	 * table.
+	 *
+	 * As removals can occur concurrently on the old table, we need
+	 * to lock down both matching buckets in the old table.
 	 */
-	for (i = 0; i < ntbl->size; i++) {
-		ntbl->buckets[i] = tbl->buckets[i];
-		RCU_INIT_POINTER(*bucket_tail(ntbl, i),
-				 tbl->buckets[i + ntbl->size]);
-
+	for (new_hash = 0; new_hash < new_tbl->size; new_hash++) {
+		old_bucket_lock1 = bucket_lock(tbl, new_hash);
+		old_bucket_lock2 = bucket_lock(tbl, new_hash + new_tbl->size);
+		new_bucket_lock = bucket_lock(new_tbl, new_hash);
+
+		spin_lock_bh(old_bucket_lock1);
+		spin_lock_bh_nested(old_bucket_lock2, RHT_LOCK_NESTED);
+		spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED2);
+
+		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
+				   tbl->buckets[new_hash]);
+		rcu_assign_pointer(*bucket_tail(new_tbl, new_hash),
+				   tbl->buckets[new_hash + new_tbl->size]);
+
+		spin_unlock_bh(new_bucket_lock);
+		spin_unlock_bh(old_bucket_lock2);
+		spin_unlock_bh(old_bucket_lock1);
 	}
 
 	/* Publish the new, valid hash table */
-	rcu_assign_pointer(ht->tbl, ntbl);
+	rcu_assign_pointer(ht->tbl, new_tbl);
+	ht->shift--;
 
 	/* Wait for readers. No new readers will have references to the
 	 * old hash table.
@@ -316,31 +463,63 @@ int rhashtable_shrink(struct rhashtable *ht)
 }
 EXPORT_SYMBOL_GPL(rhashtable_shrink);
 
+static void rht_deferred_worker(struct work_struct *work)
+{
+	struct rhashtable *ht;
+	struct bucket_table *tbl;
+
+	ht = container_of(work, struct rhashtable, run_work.work);
+	mutex_lock(&ht->mutex);
+	tbl = rht_dereference(ht->tbl, ht);
+
+	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+		rhashtable_expand(ht);
+	else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
+		rhashtable_shrink(ht);
+
+	mutex_unlock(&ht->mutex);
+}
+
 /**
  * rhashtable_insert - insert object into hash hash table
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
  *
- * Will automatically grow the table via rhashtable_expand() if the the
- * grow_decision function specified at rhashtable_init() returns true.
+ * Will take a per bucket spinlock to protect against mutual mutations
+ * on the same bucket. Multiple insertions may occur in parallel unless
+ * they map to the same bucket lock.
  *
- * The caller must ensure that no concurrent table mutations occur. It is
- * however valid to have concurrent lookups if they are RCU protected.
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
  */
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
-	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
-	u32 hash;
+	struct bucket_table *tbl;
+	spinlock_t *lock;
+	unsigned hash;
 
-	ASSERT_RHT_MUTEX(ht);
+	rcu_read_lock();
 
+	tbl = rht_dereference_rcu(ht->future_tbl, ht);
 	hash = head_hashfn(ht, tbl, obj);
+	lock = bucket_lock(tbl, hash);
+
+	spin_lock_bh(lock);
 	RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
 	rcu_assign_pointer(tbl->buckets[hash], obj);
-	ht->nelems++;
+	spin_unlock_bh(lock);
 
-	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
-		rhashtable_expand(ht);
+	atomic_inc(&ht->nelems);
+
+	/* Only grow the table if no resizing is currently in progress. */
+	if (ht->tbl != ht->future_tbl &&
+	    ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
+		schedule_delayed_work(&ht->run_work, 0);
+
+	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert);
 
@@ -361,32 +540,56 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
  */
 bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 {
-	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
+	struct bucket_table *tbl;
 	struct rhash_head __rcu **pprev;
 	struct rhash_head *he;
-	u32 h;
+	spinlock_t *lock;
+	unsigned int hash;
 
-	ASSERT_RHT_MUTEX(ht);
+	rcu_read_lock();
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+	hash = head_hashfn(ht, tbl, obj);
 
-	h = head_hashfn(ht, tbl, obj);
+	lock = bucket_lock(tbl, hash);
+	spin_lock_bh(lock);
 
-	pprev = &tbl->buckets[h];
-	rht_for_each(he, tbl, h) {
+restart:
+	pprev = &tbl->buckets[hash];
+	rht_for_each(he, tbl, hash) {
 		if (he != obj) {
 			pprev = &he->next;
 			continue;
 		}
 
-		RCU_INIT_POINTER(*pprev, he->next);
-		ht->nelems--;
+		rcu_assign_pointer(*pprev, obj->next);
+		atomic_dec(&ht->nelems);
 
-		if (ht->p.shrink_decision &&
+		spin_unlock_bh(lock);
+
+		if (ht->tbl != ht->future_tbl &&
+		    ht->p.shrink_decision &&
 		    ht->p.shrink_decision(ht, tbl->size))
-			rhashtable_shrink(ht);
+			schedule_delayed_work(&ht->run_work, 0);
+
+		rcu_read_unlock();
 
 		return true;
 	}
 
+	if (tbl != rht_dereference_rcu(ht->tbl, ht)) {
+		spin_unlock_bh(lock);
+
+		tbl = rht_dereference_rcu(ht->tbl, ht);
+		hash = head_hashfn(ht, tbl, obj);
+
+		lock = bucket_lock(tbl, hash);
+		spin_lock_bh(lock);
+		goto restart;
+	}
+
+	spin_unlock_bh(lock);
+	rcu_read_unlock();
+
 	return false;
 }
 EXPORT_SYMBOL_GPL(rhashtable_remove);
@@ -402,25 +605,35 @@ EXPORT_SYMBOL_GPL(rhashtable_remove);
  * This lookup function may only be used for fixed key hash table (key_len
  * paramter set). It will BUG() if used inappropriately.
  *
- * Lookups may occur in parallel with hash mutations as long as the lookup is
- * guarded by rcu_read_lock(). The caller must take care of this.
+ * Lookups may occur in parallel with hashtable mutations and resizing.
  */
-void *rhashtable_lookup(const struct rhashtable *ht, const void *key)
+void *rhashtable_lookup(struct rhashtable *ht, const void *key)
 {
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	const struct bucket_table *tbl, *old_tbl;
 	struct rhash_head *he;
-	u32 h;
+	u32 hash;
 
 	BUG_ON(!ht->p.key_len);
 
-	h = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl, h) {
+	rcu_read_lock();
+	old_tbl = rht_dereference_rcu(ht->tbl, ht);
+	tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	hash = key_hashfn(ht, key, ht->p.key_len);
+restart:
+	rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
 		if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key,
 			   ht->p.key_len))
 			continue;
+		rcu_read_unlock();
 		return rht_obj(ht, he);
 	}
 
+	if (unlikely(tbl != old_tbl)) {
+		tbl = old_tbl;
+		goto restart;
+	}
+
+	rcu_read_unlock();
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup);
@@ -435,25 +648,36 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup);
  * Traverses the bucket chain behind the provided hash value and calls the
  * specified compare function for each entry.
  *
- * Lookups may occur in parallel with hash mutations as long as the lookup is
- * guarded by rcu_read_lock(). The caller must take care of this.
+ * Lookups may occur in parallel with hashtable mutations and resizing.
  *
  * Returns the first entry on which the compare function returned true.
  */
-void *rhashtable_lookup_compare(const struct rhashtable *ht, const void *key,
+void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg)
 {
-	const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht);
+	const struct bucket_table *tbl, *old_tbl;
 	struct rhash_head *he;
 	u32 hash;
 
+	rcu_read_lock();
+
+	old_tbl = rht_dereference_rcu(ht->tbl, ht);
+	tbl = rht_dereference_rcu(ht->future_tbl, ht);
 	hash = key_hashfn(ht, key, ht->p.key_len);
-	rht_for_each_rcu(he, tbl, hash) {
+restart:
+	rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) {
 		if (!compare(rht_obj(ht, he), arg))
 			continue;
+		rcu_read_unlock();
 		return rht_obj(ht, he);
 	}
 
+	if (unlikely(tbl != old_tbl)) {
+		tbl = old_tbl;
+		goto restart;
+	}
+	rcu_read_unlock();
+
 	return NULL;
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
@@ -485,9 +709,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.key_offset = offsetof(struct test_obj, key),
  *	.key_len = sizeof(int),
  *	.hashfn = jhash,
- * #ifdef CONFIG_PROVE_LOCKING
- *	.mutex_is_held = &my_mutex_is_held,
- * #endif
  * };
  *
  * Configuration Example 2: Variable length keys
@@ -507,9 +728,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.head_offset = offsetof(struct test_obj, node),
  *	.hashfn = jhash,
  *	.obj_hashfn = my_hash_fn,
- * #ifdef CONFIG_PROVE_LOCKING
- *	.mutex_is_held = &my_mutex_is_held,
- * #endif
  * };
  */
 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
@@ -529,18 +747,29 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (params->nelem_hint)
 		size = rounded_hashtable_size(params);
 
-	tbl = bucket_table_alloc(size);
+	memset(ht, 0, sizeof(*ht));
+	mutex_init(&ht->mutex);
+	memcpy(&ht->p, params, sizeof(*params));
+
+	if (params->locks_mul)
+		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
+	else
+		ht->p.locks_mul = BUCKET_LOCKS_PER_CPU;
+
+	tbl = bucket_table_alloc(ht, size);
 	if (tbl == NULL)
 		return -ENOMEM;
 
-	memset(ht, 0, sizeof(*ht));
 	ht->shift = ilog2(tbl->size);
-	memcpy(&ht->p, params, sizeof(*params));
 	RCU_INIT_POINTER(ht->tbl, tbl);
+	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
 	if (!ht->p.hash_rnd)
 		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
 
+	if (ht->p.grow_decision || ht->p.shrink_decision)
+		INIT_DEFERRABLE_WORK(&ht->run_work, rht_deferred_worker);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(rhashtable_init);
@@ -553,9 +782,16 @@ EXPORT_SYMBOL_GPL(rhashtable_init);
  * has to make sure that no resizing may happen by unpublishing the hashtable
  * and waiting for the quiescent cycle before releasing the bucket array.
  */
-void rhashtable_destroy(const struct rhashtable *ht)
+void rhashtable_destroy(struct rhashtable *ht)
 {
-	bucket_table_free(ht->tbl);
+	ht->being_destroyed = true;
+
+	mutex_lock(&ht->mutex);
+
+	cancel_delayed_work(&ht->run_work);
+	bucket_table_free(rht_dereference(ht->tbl, ht));
+
+	mutex_unlock(&ht->mutex);
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
 
@@ -570,13 +806,6 @@ EXPORT_SYMBOL_GPL(rhashtable_destroy);
 #define TEST_PTR	((void *) 0xdeadbeef)
 #define TEST_NEXPANDS	4
 
-#ifdef CONFIG_PROVE_LOCKING
-static int test_mutex_is_held(void *parent)
-{
-	return 1;
-}
-#endif
-
 struct test_obj {
 	void			*ptr;
 	int			value;
@@ -646,10 +875,10 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet)
 				i, tbl->buckets[i], cnt);
 	}
 
-	pr_info("  Traversal complete: counted=%u, nelems=%zu, entries=%d\n",
-		total, ht->nelems, TEST_ENTRIES);
+	pr_info("  Traversal complete: counted=%u, nelems=%u, entries=%d\n",
+		total, atomic_read(&ht->nelems), TEST_ENTRIES);
 
-	if (total != ht->nelems || total != TEST_ENTRIES)
+	if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES)
 		pr_warn("Test failed: Total count mismatch ^^^");
 }
 
@@ -688,7 +917,9 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table expansion iteration %u...\n", i);
+		mutex_lock(&ht->mutex);
 		rhashtable_expand(ht);
+		mutex_unlock(&ht->mutex);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -698,7 +929,9 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table shrinkage iteration %u...\n", i);
+		mutex_lock(&ht->mutex);
 		rhashtable_shrink(ht);
+		mutex_unlock(&ht->mutex);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -741,9 +974,6 @@ static int __init test_rht_init(void)
 		.key_offset = offsetof(struct test_obj, value),
 		.key_len = sizeof(int),
 		.hashfn = jhash,
-#ifdef CONFIG_PROVE_LOCKING
-		.mutex_is_held = &test_mutex_is_held,
-#endif
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
 	};
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 7f903cf9a1b9..75887d7d2c6a 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -33,7 +33,7 @@ static bool nft_hash_lookup(const struct nft_set *set,
 			    const struct nft_data *key,
 			    struct nft_data *data)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
+	struct rhashtable *priv = nft_set_priv(set);
 	const struct nft_hash_elem *he;
 
 	he = rhashtable_lookup(priv, key);
@@ -113,7 +113,7 @@ static bool nft_hash_compare(void *ptr, void *arg)
 
 static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
+	struct rhashtable *priv = nft_set_priv(set);
 	struct nft_compare_arg arg = {
 		.set = set,
 		.elem = elem,
@@ -129,7 +129,7 @@ static int nft_hash_get(const struct nft_set *set, struct nft_set_elem *elem)
 static void nft_hash_walk(const struct nft_ctx *ctx, const struct nft_set *set,
 			  struct nft_set_iter *iter)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
+	struct rhashtable *priv = nft_set_priv(set);
 	const struct bucket_table *tbl;
 	const struct nft_hash_elem *he;
 	struct nft_set_elem elem;
@@ -162,13 +162,6 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
 	return sizeof(struct rhashtable);
 }
 
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nfnl_lock_is_held(void *parent)
-{
-	return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
-}
-#endif
-
 static int nft_hash_init(const struct nft_set *set,
 			 const struct nft_set_desc *desc,
 			 const struct nlattr * const tb[])
@@ -182,9 +175,6 @@ static int nft_hash_init(const struct nft_set *set,
 		.hashfn = jhash,
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
-		.mutex_is_held = lockdep_nfnl_lock_is_held,
-#endif
 	};
 
 	return rhashtable_init(priv, &params);
@@ -192,16 +182,23 @@ static int nft_hash_init(const struct nft_set *set,
 
 static void nft_hash_destroy(const struct nft_set *set)
 {
-	const struct rhashtable *priv = nft_set_priv(set);
-	const struct bucket_table *tbl = priv->tbl;
+	struct rhashtable *priv = nft_set_priv(set);
+	const struct bucket_table *tbl;
 	struct nft_hash_elem *he;
 	struct rhash_head *pos, *next;
 	unsigned int i;
 
+	/* Stop an eventual async resizing */
+	priv->being_destroyed = true;
+	mutex_lock(&priv->mutex);
+
+	tbl = rht_dereference(priv->tbl, priv);
 	for (i = 0; i < tbl->size; i++) {
 		rht_for_each_entry_safe(he, pos, next, tbl, i, node)
 			nft_hash_elem_destroy(set, he);
 	}
+	mutex_unlock(&priv->mutex);
+
 	rhashtable_destroy(priv);
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 57449b6089c2..738c3bfaa564 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -114,15 +114,6 @@ static atomic_t nl_table_users = ATOMIC_INIT(0);
 DEFINE_MUTEX(nl_sk_hash_lock);
 EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
 
-#ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nl_sk_hash_is_held(void *parent)
-{
-	if (debug_locks)
-		return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
-	return 1;
-}
-#endif
-
 static ATOMIC_NOTIFIER_HEAD(netlink_chain);
 
 static DEFINE_SPINLOCK(netlink_tap_lock);
@@ -1063,7 +1054,8 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 		goto err;
 
 	err = -ENOMEM;
-	if (BITS_PER_LONG > 32 && unlikely(table->hash.nelems >= UINT_MAX))
+	if (BITS_PER_LONG > 32 &&
+	    unlikely(atomic_read(&table->hash.nelems) >= UINT_MAX))
 		goto err;
 
 	nlk_sk(sk)->portid = portid;
@@ -3122,9 +3114,6 @@ static int __init netlink_proto_init(void)
 		.max_shift = 16, /* 64K */
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
-#ifdef CONFIG_PROVE_LOCKING
-		.mutex_is_held = lockdep_nl_sk_hash_is_held,
-#endif
 	};
 
 	if (err != 0)
-- 
cgit v1.2.3-70-g09d2


From f89bd6f87a53ce5a7d60662429591ebac2745c10 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Fri, 2 Jan 2015 23:00:21 +0100
Subject: rhashtable: Supports for nulls marker

In order to allow for wider usage of rhashtable, use a special nulls
marker to terminate each chain. The reason for not using the existing
nulls_list is that the prev pointer usage would not be valid as entries
can be linked in two different buckets at the same time.

The 4 nulls base bits can be set through the rhashtable_params structure
like this:

struct rhashtable_params params = {
        [...]
        .nulls_base = (1U << RHT_BASE_SHIFT),
};

This reduces the hash length from 32 bits to 27 bits.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list_nulls.h |  3 ++-
 include/linux/rhashtable.h | 57 ++++++++++++++++++++++++++++++++++++++--------
 lib/rhashtable.c           | 37 ++++++++++++++++++++++++------
 3 files changed, 79 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index 5d10ae364b5e..e8c300e06438 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -21,8 +21,9 @@ struct hlist_nulls_head {
 struct hlist_nulls_node {
 	struct hlist_nulls_node *next, **pprev;
 };
+#define NULLS_MARKER(value) (1UL | (((long)value) << 1))
 #define INIT_HLIST_NULLS_HEAD(ptr, nulls) \
-	((ptr)->first = (struct hlist_nulls_node *) (1UL | (((long)nulls) << 1)))
+	((ptr)->first = (struct hlist_nulls_node *) NULLS_MARKER(nulls))
 
 #define hlist_nulls_entry(ptr, type, member) container_of(ptr,type,member)
 /**
diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index a1688f0a6193..de7cac753b09 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -18,15 +18,32 @@
 #ifndef _LINUX_RHASHTABLE_H
 #define _LINUX_RHASHTABLE_H
 
-#include <linux/rculist.h>
+#include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 
+/*
+ * The end of the chain is marked with a special nulls marks which has
+ * the following format:
+ *
+ * +-------+-----------------------------------------------------+-+
+ * | Base  |                      Hash                           |1|
+ * +-------+-----------------------------------------------------+-+
+ *
+ * Base (4 bits) : Reserved to distinguish between multiple tables.
+ *                 Specified via &struct rhashtable_params.nulls_base.
+ * Hash (27 bits): Full hash (unmasked) of first element added to bucket
+ * 1 (1 bit)     : Nulls marker (always set)
+ *
+ * The remaining bits of the next pointer remain unused for now.
+ */
+#define RHT_BASE_BITS		4
+#define RHT_HASH_BITS		27
+#define RHT_BASE_SHIFT		RHT_HASH_BITS
+
 struct rhash_head {
 	struct rhash_head __rcu		*next;
 };
 
-#define INIT_HASH_HEAD(ptr) ((ptr)->next = NULL)
-
 /**
  * struct bucket_table - Table of hash buckets
  * @size: Number of hash buckets
@@ -55,6 +72,7 @@ struct rhashtable;
  * @hash_rnd: Seed to use while hashing
  * @max_shift: Maximum number of shifts while expanding
  * @min_shift: Minimum number of shifts while shrinking
+ * @nulls_base: Base value to generate nulls marker
  * @locks_mul: Number of bucket locks to allocate per cpu (default: 128)
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
@@ -69,6 +87,7 @@ struct rhashtable_params {
 	u32			hash_rnd;
 	size_t			max_shift;
 	size_t			min_shift;
+	u32			nulls_base;
 	size_t			locks_mul;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
@@ -100,6 +119,24 @@ struct rhashtable {
 	bool                            being_destroyed;
 };
 
+static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
+{
+	return NULLS_MARKER(ht->p.nulls_base + hash);
+}
+
+#define INIT_RHT_NULLS_HEAD(ptr, ht, hash) \
+	((ptr) = (typeof(ptr)) rht_marker(ht, hash))
+
+static inline bool rht_is_a_nulls(const struct rhash_head *ptr)
+{
+	return ((unsigned long) ptr & 1);
+}
+
+static inline unsigned long rht_get_nulls_value(const struct rhash_head *ptr)
+{
+	return ((unsigned long) ptr) >> 1;
+}
+
 #ifdef CONFIG_PROVE_LOCKING
 int lockdep_rht_mutex_is_held(struct rhashtable *ht);
 int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash);
@@ -157,7 +194,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  */
 #define rht_for_each_continue(pos, head, tbl, hash) \
 	for (pos = rht_dereference_bucket(head, tbl, hash); \
-	     pos; \
+	     !rht_is_a_nulls(pos); \
 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
 
 /**
@@ -180,7 +217,7 @@ void rhashtable_destroy(struct rhashtable *ht);
  */
 #define rht_for_each_entry_continue(tpos, pos, head, tbl, hash, member)	\
 	for (pos = rht_dereference_bucket(head, tbl, hash);		\
-	     pos && rht_entry(tpos, pos, member);			\
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	\
 	     pos = rht_dereference_bucket((pos)->next, tbl, hash))
 
 /**
@@ -209,9 +246,9 @@ void rhashtable_destroy(struct rhashtable *ht);
  */
 #define rht_for_each_entry_safe(tpos, pos, next, tbl, hash, member)	    \
 	for (pos = rht_dereference_bucket((tbl)->buckets[hash], tbl, hash), \
-	     next = pos ? rht_dereference_bucket(pos->next, tbl, hash)      \
-			: NULL;						    \
-	     pos && rht_entry(tpos, pos, member);			    \
+	     next = !rht_is_a_nulls(pos) ?				    \
+		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
 	     pos = next)
 
 /**
@@ -228,7 +265,7 @@ void rhashtable_destroy(struct rhashtable *ht);
 #define rht_for_each_rcu_continue(pos, head, tbl, hash)			\
 	for (({barrier(); }),						\
 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		\
-	     pos;							\
+	     !rht_is_a_nulls(pos);					\
 	     pos = rcu_dereference_raw(pos->next))
 
 /**
@@ -260,7 +297,7 @@ void rhashtable_destroy(struct rhashtable *ht);
 #define rht_for_each_entry_rcu_continue(tpos, pos, head, tbl, hash, member) \
 	for (({barrier(); }),						    \
 	     pos = rht_dereference_bucket_rcu(head, tbl, hash);		    \
-	     pos && rht_entry(tpos, pos, member);			    \
+	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
 	     pos = rht_dereference_bucket_rcu(pos->next, tbl, hash))
 
 /**
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 312e3437c7bc..cbad192d3b3d 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -28,6 +28,9 @@
 #define HASH_MIN_SIZE		4UL
 #define BUCKET_LOCKS_PER_CPU   128UL
 
+/* Base bits plus 1 bit for nulls marker */
+#define HASH_RESERVED_SPACE	(RHT_BASE_BITS + 1)
+
 enum {
 	RHT_LOCK_NORMAL,
 	RHT_LOCK_NESTED,
@@ -86,7 +89,7 @@ static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr)
 		hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len,
 				    ht->p.hash_rnd);
 
-	return hash;
+	return hash >> HASH_RESERVED_SPACE;
 }
 
 static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
@@ -95,6 +98,7 @@ static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len)
 	u32 hash;
 
 	hash = ht->p.hashfn(key, len, ht->p.hash_rnd);
+	hash >>= HASH_RESERVED_SPACE;
 
 	return rht_bucket_index(tbl, hash);
 }
@@ -111,7 +115,7 @@ static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n)
 	struct rhash_head __rcu **pprev;
 
 	for (pprev = &tbl->buckets[n];
-	     rht_dereference_bucket(*pprev, tbl, n);
+	     !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n));
 	     pprev = &rht_dereference_bucket(*pprev, tbl, n)->next)
 		;
 
@@ -164,6 +168,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 {
 	struct bucket_table *tbl;
 	size_t size;
+	int i;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
 	tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
@@ -180,6 +185,9 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 		return NULL;
 	}
 
+	for (i = 0; i < nbuckets; i++)
+		INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i);
+
 	return tbl;
 }
 
@@ -221,7 +229,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	/* Old bucket empty, no work needed. */
 	p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl,
 				   old_hash);
-	if (!p)
+	if (rht_is_a_nulls(p))
 		return;
 
 	new_hash = new_hash2 = head_hashfn(ht, new_tbl, p);
@@ -252,8 +260,8 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 	/* Find the subsequent node which does hash to the same
 	 * bucket as node P, or NULL if no such node exists.
 	 */
-	next = NULL;
-	if (he) {
+	INIT_RHT_NULLS_HEAD(next, ht, old_hash);
+	if (!rht_is_a_nulls(he)) {
 		rht_for_each_continue(he, he->next, old_tbl, old_hash) {
 			if (head_hashfn(ht, new_tbl, he) == new_hash) {
 				next = he;
@@ -369,11 +377,15 @@ int rhashtable_expand(struct rhashtable *ht)
 		 */
 		complete = true;
 		for (old_hash = 0; old_hash < old_tbl->size; old_hash++) {
+			struct rhash_head *head;
+
 			old_bucket_lock = bucket_lock(old_tbl, old_hash);
 			spin_lock_bh(old_bucket_lock);
 
 			hashtable_chain_unzip(ht, new_tbl, old_tbl, old_hash);
-			if (old_tbl->buckets[old_hash] != NULL)
+			head = rht_dereference_bucket(old_tbl->buckets[old_hash],
+						      old_tbl, old_hash);
+			if (!rht_is_a_nulls(head))
 				complete = false;
 
 			spin_unlock_bh(old_bucket_lock);
@@ -498,6 +510,7 @@ static void rht_deferred_worker(struct work_struct *work)
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
 	struct bucket_table *tbl;
+	struct rhash_head *head;
 	spinlock_t *lock;
 	unsigned hash;
 
@@ -508,7 +521,12 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 	lock = bucket_lock(tbl, hash);
 
 	spin_lock_bh(lock);
-	RCU_INIT_POINTER(obj->next, tbl->buckets[hash]);
+	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
+	if (rht_is_a_nulls(head))
+		INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
+	else
+		RCU_INIT_POINTER(obj->next, head);
+
 	rcu_assign_pointer(tbl->buckets[hash], obj);
 	spin_unlock_bh(lock);
 
@@ -709,6 +727,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.key_offset = offsetof(struct test_obj, key),
  *	.key_len = sizeof(int),
  *	.hashfn = jhash,
+ *	.nulls_base = (1U << RHT_BASE_SHIFT),
  * };
  *
  * Configuration Example 2: Variable length keys
@@ -741,6 +760,9 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	    (!params->key_len && !params->obj_hashfn))
 		return -EINVAL;
 
+	if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT))
+		return -EINVAL;
+
 	params->min_shift = max_t(size_t, params->min_shift,
 				  ilog2(HASH_MIN_SIZE));
 
@@ -974,6 +996,7 @@ static int __init test_rht_init(void)
 		.key_offset = offsetof(struct test_obj, value),
 		.key_len = sizeof(int),
 		.hashfn = jhash,
+		.nulls_base = (3U << RHT_BASE_SHIFT),
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
 	};
-- 
cgit v1.2.3-70-g09d2


From 043ec9bf7b9d7cdce84d2e8d3df9b9eb520d929e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 2 Jan 2015 23:35:19 -0800
Subject: Bluetooth: Introduce HCI_QUIRK_FIXUP_INQUIRY_MODE option

The HCI_QUIRK_FIXUP_INQUIRY_MODE option allows to force Inquiry Result
with RSSI setting on controllers that do not indicate support for it,
but where it is known to be fully functional.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 12 ++++++++++++
 net/bluetooth/hci_core.c    |  3 ++-
 2 files changed, 14 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index aee16bf5d34f..d0bca316b43b 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -102,6 +102,18 @@ enum {
 	 */
 	HCI_QUIRK_FIXUP_BUFFER_SIZE,
 
+	/* When this quirk is set, then a controller that does not
+	 * indicate support for Inquiry Result with RSSI is assumed to
+	 * support it anyway. Some early Bluetooth 1.2 controllers had
+	 * wrongly configured local features that will require forcing
+	 * them to enable this mode. Getting RSSI information with the
+	 * inquiry responses is preferred since it allows for a better
+	 * user expierence.
+	 *
+	 * This quirk must be set before hci_register_dev is called.
+	 */
+	HCI_QUIRK_FIXUP_INQUIRY_MODE,
+
 	/* When this quirk is set, then the HCI Read Local Supported
 	 * Commands command is not supported. In general Bluetooth 1.2
 	 * and later controllers should support this command. However
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ebac859e1258..bc5486ea5411 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -621,7 +621,8 @@ static void hci_init2_req(struct hci_request *req, unsigned long opt)
 		}
 	}
 
-	if (lmp_inq_rssi_capable(hdev)) {
+	if (lmp_inq_rssi_capable(hdev) ||
+	    test_bit(HCI_QUIRK_FIXUP_INQUIRY_MODE, &hdev->quirks)) {
 		u8 mode;
 
 		/* If Extended Inquiry Result events are supported, then
-- 
cgit v1.2.3-70-g09d2


From 61f3cade763dca46127146a52d829e30b8f48921 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 2 Jan 2015 18:26:02 -0800
Subject: geneve: Remove workqueue.

The work queue is used only to free the UDP socket upon destruction.
This is not necessary with Geneve and generally makes the code more
difficult to reason about. It also introduces nondeterministic
behavior such as when a socket is rapidly deleted and recreated, which
could fail as the the deletion happens asynchronously.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h |  1 -
 net/ipv4/geneve.c    | 21 ++-------------------
 2 files changed, 2 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 112132cf8e2e..56c7e1ac216a 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -71,7 +71,6 @@ struct geneve_sock {
 	struct hlist_node	hlist;
 	geneve_rcv_t		*rcv;
 	void			*rcv_data;
-	struct work_struct	del_work;
 	struct socket		*sock;
 	struct rcu_head		rcu;
 	atomic_t		refcnt;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 19e256e1dd92..136a829e8746 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -61,8 +61,6 @@ struct geneve_net {
 
 static int geneve_net_id;
 
-static struct workqueue_struct *geneve_wq;
-
 static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 {
 	return (struct genevehdr *)(udp_hdr(skb) + 1);
@@ -307,15 +305,6 @@ error:
 	return 1;
 }
 
-static void geneve_del_work(struct work_struct *work)
-{
-	struct geneve_sock *gs = container_of(work, struct geneve_sock,
-					      del_work);
-
-	udp_tunnel_sock_release(gs->sock);
-	kfree_rcu(gs, rcu);
-}
-
 static struct socket *geneve_create_sock(struct net *net, bool ipv6,
 					 __be16 port)
 {
@@ -356,8 +345,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	if (!gs)
 		return ERR_PTR(-ENOMEM);
 
-	INIT_WORK(&gs->del_work, geneve_del_work);
-
 	sock = geneve_create_sock(net, ipv6, port);
 	if (IS_ERR(sock)) {
 		kfree(gs);
@@ -430,7 +417,8 @@ void geneve_sock_release(struct geneve_sock *gs)
 	geneve_notify_del_rx_port(gs);
 	spin_unlock(&gn->sock_lock);
 
-	queue_work(geneve_wq, &gs->del_work);
+	udp_tunnel_sock_release(gs->sock);
+	kfree_rcu(gs, rcu);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
 
@@ -458,10 +446,6 @@ static int __init geneve_init_module(void)
 {
 	int rc;
 
-	geneve_wq = alloc_workqueue("geneve", 0, 0);
-	if (!geneve_wq)
-		return -ENOMEM;
-
 	rc = register_pernet_subsys(&geneve_net_ops);
 	if (rc)
 		return rc;
@@ -474,7 +458,6 @@ late_initcall(geneve_init_module);
 
 static void __exit geneve_cleanup_module(void)
 {
-	destroy_workqueue(geneve_wq);
 	unregister_pernet_subsys(&geneve_net_ops);
 }
 module_exit(geneve_cleanup_module);
-- 
cgit v1.2.3-70-g09d2


From 829a3ada9cc7d4c30fa61f8033403fb6c8f8092a Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 2 Jan 2015 18:26:03 -0800
Subject: geneve: Simplify locking.

The existing Geneve locking scheme was pulled over directly from
VXLAN. However, VXLAN has a number of built in mechanisms which make
the locking more complex and are unlikely to be necessary with Geneve.
This simplifies the locking to use a basic scheme of a mutex
when doing updates plus RCU on receive.

In addition to making the code easier to read, this also avoids the
possibility of a race when creating or destroying sockets since
UDP sockets and the list of Geneve sockets are protected by different
locks. After this change, the entire operation is atomic.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h |  2 +-
 net/ipv4/geneve.c    | 59 +++++++++++++++++++++++-----------------------------
 2 files changed, 27 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 56c7e1ac216a..b40f4affc4cb 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -73,7 +73,7 @@ struct geneve_sock {
 	void			*rcv_data;
 	struct socket		*sock;
 	struct rcu_head		rcu;
-	atomic_t		refcnt;
+	int			refcnt;
 	struct udp_offload	udp_offloads;
 };
 
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 136a829e8746..ad8dbae11d01 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -17,7 +17,7 @@
 #include <linux/errno.h>
 #include <linux/slab.h>
 #include <linux/skbuff.h>
-#include <linux/rculist.h>
+#include <linux/list.h>
 #include <linux/netdevice.h>
 #include <linux/in.h>
 #include <linux/ip.h>
@@ -28,6 +28,7 @@
 #include <linux/if_vlan.h>
 #include <linux/hash.h>
 #include <linux/ethtool.h>
+#include <linux/mutex.h>
 #include <net/arp.h>
 #include <net/ndisc.h>
 #include <net/ip.h>
@@ -50,13 +51,15 @@
 #include <net/ip6_checksum.h>
 #endif
 
+/* Protects sock_list and refcounts. */
+static DEFINE_MUTEX(geneve_mutex);
+
 #define PORT_HASH_BITS 8
 #define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
 
 /* per-network namespace private data for this module */
 struct geneve_net {
 	struct hlist_head	sock_list[PORT_HASH_SIZE];
-	spinlock_t		sock_lock;   /* Protects sock_list */
 };
 
 static int geneve_net_id;
@@ -78,7 +81,7 @@ static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
 {
 	struct geneve_sock *gs;
 
-	hlist_for_each_entry_rcu(gs, gs_head(net, port), hlist) {
+	hlist_for_each_entry(gs, gs_head(net, port), hlist) {
 		if (inet_sk(gs->sock->sk)->inet_sport == port)
 			return gs;
 	}
@@ -336,7 +339,6 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 						geneve_rcv_t *rcv, void *data,
 						bool ipv6)
 {
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 	struct socket *sock;
 	struct udp_tunnel_sock_cfg tunnel_cfg;
@@ -352,7 +354,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	}
 
 	gs->sock = sock;
-	atomic_set(&gs->refcnt, 1);
+	gs->refcnt = 1;
 	gs->rcv = rcv;
 	gs->rcv_data = data;
 
@@ -360,11 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	gs->udp_offloads.port = port;
 	gs->udp_offloads.callbacks.gro_receive  = geneve_gro_receive;
 	gs->udp_offloads.callbacks.gro_complete = geneve_gro_complete;
-
-	spin_lock(&gn->sock_lock);
-	hlist_add_head_rcu(&gs->hlist, gs_head(net, port));
 	geneve_notify_add_rx_port(gs);
-	spin_unlock(&gn->sock_lock);
 
 	/* Mark socket as an encapsulation socket */
 	tunnel_cfg.sk_user_data = gs;
@@ -373,6 +371,8 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	tunnel_cfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 
+	hlist_add_head(&gs->hlist, gs_head(net, port));
+
 	return gs;
 }
 
@@ -380,25 +380,21 @@ struct geneve_sock *geneve_sock_add(struct net *net, __be16 port,
 				    geneve_rcv_t *rcv, void *data,
 				    bool no_share, bool ipv6)
 {
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
-	gs = geneve_socket_create(net, port, rcv, data, ipv6);
-	if (!IS_ERR(gs))
-		return gs;
-
-	if (no_share)	/* Return error if sharing is not allowed. */
-		return ERR_PTR(-EINVAL);
+	mutex_lock(&geneve_mutex);
 
-	spin_lock(&gn->sock_lock);
 	gs = geneve_find_sock(net, port);
-	if (gs && ((gs->rcv != rcv) ||
-		   !atomic_add_unless(&gs->refcnt, 1, 0)))
+	if (gs) {
+		if (!no_share && gs->rcv == rcv)
+			gs->refcnt++;
+		else
 			gs = ERR_PTR(-EBUSY);
-	spin_unlock(&gn->sock_lock);
+	} else {
+		gs = geneve_socket_create(net, port, rcv, data, ipv6);
+	}
 
-	if (!gs)
-		gs = ERR_PTR(-EINVAL);
+	mutex_unlock(&geneve_mutex);
 
 	return gs;
 }
@@ -406,19 +402,18 @@ EXPORT_SYMBOL_GPL(geneve_sock_add);
 
 void geneve_sock_release(struct geneve_sock *gs)
 {
-	struct net *net = sock_net(gs->sock->sk);
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
+	mutex_lock(&geneve_mutex);
 
-	if (!atomic_dec_and_test(&gs->refcnt))
-		return;
+	if (--gs->refcnt)
+		goto unlock;
 
-	spin_lock(&gn->sock_lock);
-	hlist_del_rcu(&gs->hlist);
+	hlist_del(&gs->hlist);
 	geneve_notify_del_rx_port(gs);
-	spin_unlock(&gn->sock_lock);
-
 	udp_tunnel_sock_release(gs->sock);
 	kfree_rcu(gs, rcu);
+
+unlock:
+	mutex_unlock(&geneve_mutex);
 }
 EXPORT_SYMBOL_GPL(geneve_sock_release);
 
@@ -427,8 +422,6 @@ static __net_init int geneve_init_net(struct net *net)
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	unsigned int h;
 
-	spin_lock_init(&gn->sock_lock);
-
 	for (h = 0; h < PORT_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&gn->sock_list[h]);
 
@@ -454,7 +447,7 @@ static int __init geneve_init_module(void)
 
 	return 0;
 }
-late_initcall(geneve_init_module);
+module_init(geneve_init_module);
 
 static void __exit geneve_cleanup_module(void)
 {
-- 
cgit v1.2.3-70-g09d2


From df5dba8e52be50e615e03ef73b34611d82587f42 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Fri, 2 Jan 2015 18:26:04 -0800
Subject: geneve: Remove socket hash table.

The hash table for open Geneve ports is used only on creation and
deletion time. It is not performance critical and is not likely to
grow to a large number of items. Therefore, this can be changed
to use a simple linked list.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h |  2 +-
 net/ipv4/geneve.c    | 26 +++++++-------------------
 2 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index b40f4affc4cb..03aa2adb5bab 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -68,7 +68,7 @@ struct geneve_sock;
 typedef void (geneve_rcv_t)(struct geneve_sock *gs, struct sk_buff *skb);
 
 struct geneve_sock {
-	struct hlist_node	hlist;
+	struct list_head	list;
 	geneve_rcv_t		*rcv;
 	void			*rcv_data;
 	struct socket		*sock;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index ad8dbae11d01..4fe5a592821c 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -26,7 +26,6 @@
 #include <linux/etherdevice.h>
 #include <linux/if_ether.h>
 #include <linux/if_vlan.h>
-#include <linux/hash.h>
 #include <linux/ethtool.h>
 #include <linux/mutex.h>
 #include <net/arp.h>
@@ -54,12 +53,9 @@
 /* Protects sock_list and refcounts. */
 static DEFINE_MUTEX(geneve_mutex);
 
-#define PORT_HASH_BITS 8
-#define PORT_HASH_SIZE (1<<PORT_HASH_BITS)
-
 /* per-network namespace private data for this module */
 struct geneve_net {
-	struct hlist_head	sock_list[PORT_HASH_SIZE];
+	struct list_head	sock_list;
 };
 
 static int geneve_net_id;
@@ -69,19 +65,13 @@ static inline struct genevehdr *geneve_hdr(const struct sk_buff *skb)
 	return (struct genevehdr *)(udp_hdr(skb) + 1);
 }
 
-static struct hlist_head *gs_head(struct net *net, __be16 port)
-{
-	struct geneve_net *gn = net_generic(net, geneve_net_id);
-
-	return &gn->sock_list[hash_32(ntohs(port), PORT_HASH_BITS)];
-}
-
 /* Find geneve socket based on network namespace and UDP port */
 static struct geneve_sock *geneve_find_sock(struct net *net, __be16 port)
 {
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 
-	hlist_for_each_entry(gs, gs_head(net, port), hlist) {
+	list_for_each_entry(gs, &gn->sock_list, list) {
 		if (inet_sk(gs->sock->sk)->inet_sport == port)
 			return gs;
 	}
@@ -339,6 +329,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 						geneve_rcv_t *rcv, void *data,
 						bool ipv6)
 {
+	struct geneve_net *gn = net_generic(net, geneve_net_id);
 	struct geneve_sock *gs;
 	struct socket *sock;
 	struct udp_tunnel_sock_cfg tunnel_cfg;
@@ -371,7 +362,7 @@ static struct geneve_sock *geneve_socket_create(struct net *net, __be16 port,
 	tunnel_cfg.encap_destroy = NULL;
 	setup_udp_tunnel_sock(net, sock, &tunnel_cfg);
 
-	hlist_add_head(&gs->hlist, gs_head(net, port));
+	list_add(&gs->list, &gn->sock_list);
 
 	return gs;
 }
@@ -407,7 +398,7 @@ void geneve_sock_release(struct geneve_sock *gs)
 	if (--gs->refcnt)
 		goto unlock;
 
-	hlist_del(&gs->hlist);
+	list_del(&gs->list);
 	geneve_notify_del_rx_port(gs);
 	udp_tunnel_sock_release(gs->sock);
 	kfree_rcu(gs, rcu);
@@ -420,17 +411,14 @@ EXPORT_SYMBOL_GPL(geneve_sock_release);
 static __net_init int geneve_init_net(struct net *net)
 {
 	struct geneve_net *gn = net_generic(net, geneve_net_id);
-	unsigned int h;
 
-	for (h = 0; h < PORT_HASH_SIZE; ++h)
-		INIT_HLIST_HEAD(&gn->sock_list[h]);
+	INIT_LIST_HEAD(&gn->sock_list);
 
 	return 0;
 }
 
 static struct pernet_operations geneve_net_ops = {
 	.init = geneve_init_net,
-	.exit = NULL,
 	.id   = &geneve_net_id,
 	.size = sizeof(struct geneve_net),
 };
-- 
cgit v1.2.3-70-g09d2


From 86b35b64ed7b6b38305dee67a0f2ddff2ca5455d Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Sun, 4 Jan 2015 15:25:09 +0800
Subject: rhashtable: fix missing header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixup below build error:

include/linux/rhashtable.h: At top level:
include/linux/rhashtable.h:118:34: error: field ‘mutex’ has incomplete type

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index de7cac753b09..de1459c74c4d 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -20,6 +20,7 @@
 
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
+#include <linux/mutex.h>
 
 /*
  * The end of the chain is marked with a special nulls marks which has
-- 
cgit v1.2.3-70-g09d2


From a3449ded128d037e6b2bd7a59b0741de56506066 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Sun, 4 Jan 2015 15:24:35 +0800
Subject: list_nulls: fix missing header
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixup below build error:

include/linux/list_nulls.h: In function ‘hlist_nulls_del’:
include/linux/list_nulls.h:84:13: error: ‘LIST_POISON2’ undeclared (first use in this function)

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/list_nulls.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h
index e8c300e06438..f266661d2666 100644
--- a/include/linux/list_nulls.h
+++ b/include/linux/list_nulls.h
@@ -1,6 +1,9 @@
 #ifndef _LINUX_LIST_NULLS_H
 #define _LINUX_LIST_NULLS_H
 
+#include <linux/poison.h>
+#include <linux/const.h>
+
 /*
  * Special version of lists, where end of list is not a NULL pointer,
  * but a 'nulls' marker, which can have many different values.
-- 
cgit v1.2.3-70-g09d2


From 1803f594cbf9bb2e662ac945038113d0d0cc5e89 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 5 Jan 2015 11:16:42 +0100
Subject: nl80211: document NL80211_BSS_STATUS_AUTHENTICATED isn't used

The flag is no longer used (and hasn't been for a long time)
since trying to track authentication (and make decisions based
on state) was just causing issues all over - see commit
95de817b9034d50860319f6033ec85d25024694c.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 18cb0aa06351..54f391141351 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3265,6 +3265,9 @@ enum nl80211_bss {
 /**
  * enum nl80211_bss_status - BSS "status"
  * @NL80211_BSS_STATUS_AUTHENTICATED: Authenticated with this BSS.
+ *	Note that this is no longer used since cfg80211 no longer
+ *	keeps track of whether or not authentication was done with
+ *	a given BSS.
  * @NL80211_BSS_STATUS_ASSOCIATED: Associated with this BSS.
  * @NL80211_BSS_STATUS_IBSS_JOINED: Joined to this IBSS.
  *
-- 
cgit v1.2.3-70-g09d2


From 149118d89355fb0e1a898f47977f8ae9be8e14e7 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Tue, 6 Jan 2015 01:04:21 +0100
Subject: netlink: Warn on unordered or illegal nla_nest_cancel() or
 nlmsg_cancel()

Calling nla_nest_cancel() in a different order as the nesting was
built up can lead to negative offsets being calculated which
results in skb_trim() being called with an underflowed unsigned
int. Warn if mark < skb->data as it's definitely a bug.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 64158353ecb2..d5869b90bfbb 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -520,8 +520,10 @@ static inline void *nlmsg_get_pos(struct sk_buff *skb)
  */
 static inline void nlmsg_trim(struct sk_buff *skb, const void *mark)
 {
-	if (mark)
+	if (mark) {
+		WARN_ON((unsigned char *) mark < skb->data);
 		skb_trim(skb, (unsigned char *) mark - skb->data);
+	}
 }
 
 /**
-- 
cgit v1.2.3-70-g09d2


From 224d019c4fbba242041e9b25a926ba873b7da1e2 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:14 -0800
Subject: ip: Move checksum convert defines to inet

Move convert_csum from udp_sock to inet_sock. This allows the
possibility that we can use convert checksum for different types
of sockets and also allows convert checksum to be enabled from
inet layer (what we'll want to do when enabling IP_CHECKSUM cmsg).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h     | 16 +---------------
 include/net/inet_sock.h | 17 +++++++++++++++++
 net/ipv4/fou.c          |  2 +-
 net/ipv4/udp.c          |  2 +-
 net/ipv4/udp_tunnel.c   |  2 +-
 net/ipv6/udp.c          |  2 +-
 6 files changed, 22 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index ee3277593222..247cfdcc4b08 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -49,11 +49,7 @@ struct udp_sock {
 	unsigned int	 corkflag;	/* Cork is required */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
-			 convert_csum:1;/* On receive, convert checksum
-					 * unnecessary to checksum complete
-					 * if possible.
-					 */
+			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -102,16 +98,6 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
 	return udp_sk(sk)->no_check6_rx;
 }
 
-static inline void udp_set_convert_csum(struct sock *sk, bool val)
-{
-	udp_sk(sk)->convert_csum = val;
-}
-
-static inline bool udp_get_convert_csum(struct sock *sk)
-{
-	return udp_sk(sk)->convert_csum;
-}
-
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index a829b77523cf..360b110b3e36 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -184,6 +184,7 @@ struct inet_sock {
 				mc_all:1,
 				nodefrag:1;
 	__u8			rcv_tos;
+	__u8			convert_csum;
 	int			uc_index;
 	int			mc_index;
 	__be32			mc_addr;
@@ -250,4 +251,20 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk)
 	return flags;
 }
 
+static inline void inet_inc_convert_csum(struct sock *sk)
+{
+	inet_sk(sk)->convert_csum++;
+}
+
+static inline void inet_dec_convert_csum(struct sock *sk)
+{
+	if (inet_sk(sk)->convert_csum > 0)
+		inet_sk(sk)->convert_csum--;
+}
+
+static inline bool inet_get_convert_csum(struct sock *sk)
+{
+	return !!inet_sk(sk)->convert_csum;
+}
+
 #endif	/* _INET_SOCK_H */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index b986298a7ba3..2197c36f722f 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -490,7 +490,7 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 	sk->sk_user_data = fou;
 	fou->sock = sock;
 
-	udp_set_convert_csum(sk, true);
+	inet_inc_convert_csum(sk);
 
 	sk->sk_allocation = GFP_ATOMIC;
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 13b4dcf86ef6..53358d88f110 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1806,7 +1806,7 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
-		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+		if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
 			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 						 inet_compute_pseudo);
 
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 1671263e5fa0..9996e63ed304 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -63,7 +63,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 	inet_sk(sk)->mc_loop = 0;
 
 	/* Enable CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE conversion */
-	udp_set_convert_csum(sk, true);
+	inet_inc_convert_csum(sk);
 
 	rcu_assign_sk_user_data(sk, cfg->sk_user_data);
 
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 189dc4ae3eca..e41f017cd479 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -909,7 +909,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 			goto csum_error;
 		}
 
-		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+		if (inet_get_convert_csum(sk) && uh->check && !IS_UDPLITE(sk))
 			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 						 ip6_compute_pseudo);
 
-- 
cgit v1.2.3-70-g09d2


From c44d13d6f341ca59f3d6646f2337d4d3c8a814a6 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:15 -0800
Subject: ip: IP cmsg cleanup

Move the IP_CMSG_* constants from ip_sockglue.c to inet_sock.h so that
they can be referenced in other source files.

Restructure ip_cmsg_recv to not go through flags using shift, check
for flags by 'and'. This eliminates both the shift and a conditional
per flag check.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h | 11 ++++++++-
 net/ipv4/ip_sockglue.c  | 64 ++++++++++++++++++++++++++++---------------------
 2 files changed, 47 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 360b110b3e36..605ca421d5ab 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -16,7 +16,7 @@
 #ifndef _INET_SOCK_H
 #define _INET_SOCK_H
 
-
+#include <linux/bitops.h>
 #include <linux/kmemcheck.h>
 #include <linux/string.h>
 #include <linux/types.h>
@@ -195,6 +195,15 @@ struct inet_sock {
 #define IPCORK_OPT	1	/* ip-options has been held in ipcork.opt */
 #define IPCORK_ALLFRAG	2	/* always fragment (for ipv6 for now) */
 
+/* cmsg flags for inet */
+#define IP_CMSG_PKTINFO		BIT(0)
+#define IP_CMSG_TTL		BIT(1)
+#define IP_CMSG_TOS		BIT(2)
+#define IP_CMSG_RECVOPTS	BIT(3)
+#define IP_CMSG_RETOPTS		BIT(4)
+#define IP_CMSG_PASSSEC		BIT(5)
+#define IP_CMSG_ORIGDSTADDR	BIT(6)
+
 static inline struct inet_sock *inet_sk(const struct sock *sk)
 {
 	return (struct inet_sock *)sk;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8a89c738b7a3..80f78565b41b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -45,14 +45,6 @@
 #include <linux/errqueue.h>
 #include <asm/uaccess.h>
 
-#define IP_CMSG_PKTINFO		1
-#define IP_CMSG_TTL		2
-#define IP_CMSG_TOS		4
-#define IP_CMSG_RECVOPTS	8
-#define IP_CMSG_RETOPTS		16
-#define IP_CMSG_PASSSEC		32
-#define IP_CMSG_ORIGDSTADDR     64
-
 /*
  *	SOL_IP control messages.
  */
@@ -150,37 +142,55 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 	unsigned int flags = inet->cmsg_flags;
 
 	/* Ordered by supposed usage frequency */
-	if (flags & 1)
+	if (flags & IP_CMSG_PKTINFO) {
 		ip_cmsg_recv_pktinfo(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_PKTINFO;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_TTL) {
 		ip_cmsg_recv_ttl(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_TTL;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_TOS) {
 		ip_cmsg_recv_tos(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_TOS;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_RECVOPTS) {
 		ip_cmsg_recv_opts(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_RECVOPTS;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_RETOPTS) {
 		ip_cmsg_recv_retopts(msg, skb);
-	if ((flags >>= 1) == 0)
-		return;
 
-	if (flags & 1)
+		flags &= ~IP_CMSG_RETOPTS;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_PASSSEC) {
 		ip_cmsg_recv_security(msg, skb);
 
-	if ((flags >>= 1) == 0)
-		return;
-	if (flags & 1)
+		flags &= ~IP_CMSG_PASSSEC;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_ORIGDSTADDR)
 		ip_cmsg_recv_dstaddr(msg, skb);
 
 }
-- 
cgit v1.2.3-70-g09d2


From 5961de9f199bef6ff437d7d85fe69b6a1964739b Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:16 -0800
Subject: ip: Add offset parameter to ip_cmsg_recv

Add ip_cmsg_recv_offset function which takes an offset argument
that indicates the starting offset in skb where data is being received
from. This will be useful in the case of UDP and provided checksum
to user space.

ip_cmsg_recv is an inline call to ip_cmsg_recv_offset with offset of
zero.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       | 7 ++++++-
 net/ipv4/ip_sockglue.c | 5 +++--
 2 files changed, 9 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 0bb620702929..0e5a0bae187f 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -537,7 +537,7 @@ int ip_options_rcv_srr(struct sk_buff *skb);
  */
 
 void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb);
-void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb);
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb, int offset);
 int ip_cmsg_send(struct net *net, struct msghdr *msg,
 		 struct ipcm_cookie *ipc, bool allow_ipv6);
 int ip_setsockopt(struct sock *sk, int level, int optname, char __user *optval,
@@ -557,6 +557,11 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port,
 void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport,
 		    u32 info);
 
+static inline void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
+{
+	ip_cmsg_recv_offset(msg, skb, 0);
+}
+
 bool icmp_global_allow(void);
 extern int sysctl_icmp_msgs_per_sec;
 extern int sysctl_icmp_msgs_burst;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 80f78565b41b..513d506ffebb 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -136,7 +136,8 @@ static void ip_cmsg_recv_dstaddr(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_IP, IP_ORIGDSTADDR, sizeof(sin), &sin);
 }
 
-void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
+void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
+			 int offset)
 {
 	struct inet_sock *inet = inet_sk(skb->sk);
 	unsigned int flags = inet->cmsg_flags;
@@ -194,7 +195,7 @@ void ip_cmsg_recv(struct msghdr *msg, struct sk_buff *skb)
 		ip_cmsg_recv_dstaddr(msg, skb);
 
 }
-EXPORT_SYMBOL(ip_cmsg_recv);
+EXPORT_SYMBOL(ip_cmsg_recv_offset);
 
 int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
 		 bool allow_ipv6)
-- 
cgit v1.2.3-70-g09d2


From ad6f939ab193750cc94a265f58e007fb598c97b7 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 5 Jan 2015 13:56:17 -0800
Subject: ip: Add offset parameter to ip_cmsg_recv

Add ip_cmsg_recv_offset function which takes an offset argument
that indicates the starting offset in skb where data is being received
from. This will be useful in the case of UDP and provided checksum
to user space.

ip_cmsg_recv is an inline call to ip_cmsg_recv_offset with offset of
zero.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_sock.h |  1 +
 include/uapi/linux/in.h |  1 +
 net/ipv4/ip_sockglue.c  | 41 ++++++++++++++++++++++++++++++++++++++++-
 net/ipv4/udp.c          |  2 +-
 4 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index 605ca421d5ab..eb16c7beed1e 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -203,6 +203,7 @@ struct inet_sock {
 #define IP_CMSG_RETOPTS		BIT(4)
 #define IP_CMSG_PASSSEC		BIT(5)
 #define IP_CMSG_ORIGDSTADDR	BIT(6)
+#define IP_CMSG_CHECKSUM	BIT(7)
 
 static inline struct inet_sock *inet_sk(const struct sock *sk)
 {
diff --git a/include/uapi/linux/in.h b/include/uapi/linux/in.h
index c33a65e3d62c..589ced069e8a 100644
--- a/include/uapi/linux/in.h
+++ b/include/uapi/linux/in.h
@@ -109,6 +109,7 @@ struct in_addr {
 
 #define IP_MINTTL       21
 #define IP_NODEFRAG     22
+#define IP_CHECKSUM	23
 
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 513d506ffebb..a317797b3cd0 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -37,6 +37,7 @@
 #include <net/route.h>
 #include <net/xfrm.h>
 #include <net/compat.h>
+#include <net/checksum.h>
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/transp_v6.h>
 #endif
@@ -96,6 +97,20 @@ static void ip_cmsg_recv_retopts(struct msghdr *msg, struct sk_buff *skb)
 	put_cmsg(msg, SOL_IP, IP_RETOPTS, opt->optlen, opt->__data);
 }
 
+static void ip_cmsg_recv_checksum(struct msghdr *msg, struct sk_buff *skb,
+				  int offset)
+{
+	__wsum csum = skb->csum;
+
+	if (skb->ip_summed != CHECKSUM_COMPLETE)
+		return;
+
+	if (offset != 0)
+		csum = csum_sub(csum, csum_partial(skb->data, offset, 0));
+
+	put_cmsg(msg, SOL_IP, IP_CHECKSUM, sizeof(__wsum), &csum);
+}
+
 static void ip_cmsg_recv_security(struct msghdr *msg, struct sk_buff *skb)
 {
 	char *secdata;
@@ -191,9 +206,16 @@ void ip_cmsg_recv_offset(struct msghdr *msg, struct sk_buff *skb,
 			return;
 	}
 
-	if (flags & IP_CMSG_ORIGDSTADDR)
+	if (flags & IP_CMSG_ORIGDSTADDR) {
 		ip_cmsg_recv_dstaddr(msg, skb);
 
+		flags &= ~IP_CMSG_ORIGDSTADDR;
+		if (!flags)
+			return;
+	}
+
+	if (flags & IP_CMSG_CHECKSUM)
+		ip_cmsg_recv_checksum(msg, skb, offset);
 }
 EXPORT_SYMBOL(ip_cmsg_recv_offset);
 
@@ -533,6 +555,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	case IP_MULTICAST_ALL:
 	case IP_MULTICAST_LOOP:
 	case IP_RECVORIGDSTADDR:
+	case IP_CHECKSUM:
 		if (optlen >= sizeof(int)) {
 			if (get_user(val, (int __user *) optval))
 				return -EFAULT;
@@ -630,6 +653,19 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 		else
 			inet->cmsg_flags &= ~IP_CMSG_ORIGDSTADDR;
 		break;
+	case IP_CHECKSUM:
+		if (val) {
+			if (!(inet->cmsg_flags & IP_CMSG_CHECKSUM)) {
+				inet_inc_convert_csum(sk);
+				inet->cmsg_flags |= IP_CMSG_CHECKSUM;
+			}
+		} else {
+			if (inet->cmsg_flags & IP_CMSG_CHECKSUM) {
+				inet_dec_convert_csum(sk);
+				inet->cmsg_flags &= ~IP_CMSG_CHECKSUM;
+			}
+		}
+		break;
 	case IP_TOS:	/* This sets both TOS and Precedence */
 		if (sk->sk_type == SOCK_STREAM) {
 			val &= ~INET_ECN_MASK;
@@ -1233,6 +1269,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_RECVORIGDSTADDR:
 		val = (inet->cmsg_flags & IP_CMSG_ORIGDSTADDR) != 0;
 		break;
+	case IP_CHECKSUM:
+		val = (inet->cmsg_flags & IP_CMSG_CHECKSUM) != 0;
+		break;
 	case IP_TOS:
 		val = inet->tos;
 		break;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 53358d88f110..97ef1f8b7be8 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1329,7 +1329,7 @@ try_again:
 		*addr_len = sizeof(*sin);
 	}
 	if (inet->cmsg_flags)
-		ip_cmsg_recv(msg, skb);
+		ip_cmsg_recv_offset(msg, skb, sizeof(struct udphdr));
 
 	err = copied;
 	if (flags & MSG_TRUNC)
-- 
cgit v1.2.3-70-g09d2


From e715b6d3a5ef55834778d49224e60e8ccb5bf45f Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 5 Jan 2015 23:57:44 +0100
Subject: net: fib6: convert cfg metric to u32 outside of table write lock

Do the nla validation earlier, outside the write lock.

This is needed by followup patch which needs to be able to call
request_module (which can sleep) if needed.

Joint work with Daniel Borkmann.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h | 10 +++++---
 net/ipv6/ip6_fib.c    | 69 +++++++++++++++++++++++++++------------------------
 net/ipv6/route.c      | 57 ++++++++++++++++++++++++++++++++++--------
 3 files changed, 90 insertions(+), 46 deletions(-)

(limited to 'include')

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 8eea35d32a75..20e80fa7bbdd 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -74,6 +74,11 @@ struct fib6_node {
 #define FIB6_SUBTREE(fn)	((fn)->subtree)
 #endif
 
+struct mx6_config {
+	const u32 *mx;
+	DECLARE_BITMAP(mx_valid, RTAX_MAX);
+};
+
 /*
  *	routing information
  *
@@ -291,9 +296,8 @@ struct fib6_node *fib6_locate(struct fib6_node *root,
 void fib6_clean_all(struct net *net, int (*func)(struct rt6_info *, void *arg),
 		    void *arg);
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
-	     struct nlattr *mx, int mx_len);
-
+int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+	     struct nl_info *info, struct mx6_config *mxc);
 int fib6_del(struct rt6_info *rt, struct nl_info *info);
 
 void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info);
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index db4984e13f2f..03c520a4ebeb 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -630,31 +630,35 @@ static bool rt6_qualify_for_ecmp(struct rt6_info *rt)
 	       RTF_GATEWAY;
 }
 
-static int fib6_commit_metrics(struct dst_entry *dst,
-			       struct nlattr *mx, int mx_len)
+static void fib6_copy_metrics(u32 *mp, const struct mx6_config *mxc)
 {
-	bool dst_host = dst->flags & DST_HOST;
-	struct nlattr *nla;
-	int remaining;
-	u32 *mp;
+	int i;
 
-	mp = dst_host ? dst_metrics_write_ptr(dst) :
-			kzalloc(sizeof(u32) * RTAX_MAX, GFP_ATOMIC);
-	if (unlikely(!mp))
-		return -ENOMEM;
-	if (!dst_host)
-		dst_init_metrics(dst, mp, 0);
+	for (i = 0; i < RTAX_MAX; i++) {
+		if (test_bit(i, mxc->mx_valid))
+			mp[i] = mxc->mx[i];
+	}
+}
+
+static int fib6_commit_metrics(struct dst_entry *dst, struct mx6_config *mxc)
+{
+	if (!mxc->mx)
+		return 0;
 
-	nla_for_each_attr(nla, mx, mx_len, remaining) {
-		int type = nla_type(nla);
+	if (dst->flags & DST_HOST) {
+		u32 *mp = dst_metrics_write_ptr(dst);
 
-		if (type) {
-			if (type > RTAX_MAX)
-				return -EINVAL;
+		if (unlikely(!mp))
+			return -ENOMEM;
 
-			mp[type - 1] = nla_get_u32(nla);
-		}
+		fib6_copy_metrics(mp, mxc);
+	} else {
+		dst_init_metrics(dst, mxc->mx, false);
+
+		/* We've stolen mx now. */
+		mxc->mx = NULL;
 	}
+
 	return 0;
 }
 
@@ -663,7 +667,7 @@ static int fib6_commit_metrics(struct dst_entry *dst,
  */
 
 static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
-			    struct nl_info *info, struct nlattr *mx, int mx_len)
+			    struct nl_info *info, struct mx6_config *mxc)
 {
 	struct rt6_info *iter = NULL;
 	struct rt6_info **ins;
@@ -772,11 +776,10 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
 			pr_warn("NLM_F_CREATE should be set when creating new route\n");
 
 add:
-		if (mx) {
-			err = fib6_commit_metrics(&rt->dst, mx, mx_len);
-			if (err)
-				return err;
-		}
+		err = fib6_commit_metrics(&rt->dst, mxc);
+		if (err)
+			return err;
+
 		rt->dst.rt6_next = iter;
 		*ins = rt;
 		rt->rt6i_node = fn;
@@ -796,11 +799,11 @@ add:
 			pr_warn("NLM_F_REPLACE set, but no existing node found!\n");
 			return -ENOENT;
 		}
-		if (mx) {
-			err = fib6_commit_metrics(&rt->dst, mx, mx_len);
-			if (err)
-				return err;
-		}
+
+		err = fib6_commit_metrics(&rt->dst, mxc);
+		if (err)
+			return err;
+
 		*ins = rt;
 		rt->rt6i_node = fn;
 		rt->dst.rt6_next = iter->dst.rt6_next;
@@ -837,8 +840,8 @@ void fib6_force_start_gc(struct net *net)
  *	with source addr info in sub-trees
  */
 
-int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
-	     struct nlattr *mx, int mx_len)
+int fib6_add(struct fib6_node *root, struct rt6_info *rt,
+	     struct nl_info *info, struct mx6_config *mxc)
 {
 	struct fib6_node *fn, *pn = NULL;
 	int err = -ENOMEM;
@@ -933,7 +936,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info,
 	}
 #endif
 
-	err = fib6_add_rt2node(fn, rt, info, mx, mx_len);
+	err = fib6_add_rt2node(fn, rt, info, mxc);
 	if (!err) {
 		fib6_start_gc(info->nl_net, rt);
 		if (!(rt->rt6i_flags & RTF_CACHE))
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index c91083156edb..454771d20b21 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -853,14 +853,14 @@ EXPORT_SYMBOL(rt6_lookup);
  */
 
 static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
-			struct nlattr *mx, int mx_len)
+			struct mx6_config *mxc)
 {
 	int err;
 	struct fib6_table *table;
 
 	table = rt->rt6i_table;
 	write_lock_bh(&table->tb6_lock);
-	err = fib6_add(&table->tb6_root, rt, info, mx, mx_len);
+	err = fib6_add(&table->tb6_root, rt, info, mxc);
 	write_unlock_bh(&table->tb6_lock);
 
 	return err;
@@ -868,10 +868,10 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info,
 
 int ip6_ins_rt(struct rt6_info *rt)
 {
-	struct nl_info info = {
-		.nl_net = dev_net(rt->dst.dev),
-	};
-	return __ip6_ins_rt(rt, &info, NULL, 0);
+	struct nl_info info = {	.nl_net = dev_net(rt->dst.dev), };
+	struct mx6_config mxc = { .mx = NULL, };
+
+	return __ip6_ins_rt(rt, &info, &mxc);
 }
 
 static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort,
@@ -1470,9 +1470,39 @@ out:
 	return entries > rt_max_size;
 }
 
-/*
- *
- */
+static int ip6_convert_metrics(struct mx6_config *mxc,
+			       const struct fib6_config *cfg)
+{
+	struct nlattr *nla;
+	int remaining;
+	u32 *mp;
+
+	if (cfg->fc_mx == NULL)
+		return 0;
+
+	mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL);
+	if (unlikely(!mp))
+		return -ENOMEM;
+
+	nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) {
+		int type = nla_type(nla);
+
+		if (type) {
+			if (unlikely(type > RTAX_MAX))
+				goto err;
+
+			mp[type - 1] = nla_get_u32(nla);
+			__set_bit(type - 1, mxc->mx_valid);
+		}
+	}
+
+	mxc->mx = mp;
+
+	return 0;
+ err:
+	kfree(mp);
+	return -EINVAL;
+}
 
 int ip6_route_add(struct fib6_config *cfg)
 {
@@ -1482,6 +1512,7 @@ int ip6_route_add(struct fib6_config *cfg)
 	struct net_device *dev = NULL;
 	struct inet6_dev *idev = NULL;
 	struct fib6_table *table;
+	struct mx6_config mxc = { .mx = NULL, };
 	int addr_type;
 
 	if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128)
@@ -1677,8 +1708,14 @@ install_route:
 
 	cfg->fc_nlinfo.nl_net = dev_net(dev);
 
-	return __ip6_ins_rt(rt, &cfg->fc_nlinfo, cfg->fc_mx, cfg->fc_mx_len);
+	err = ip6_convert_metrics(&mxc, cfg);
+	if (err)
+		goto out;
+
+	err = __ip6_ins_rt(rt, &cfg->fc_nlinfo, &mxc);
 
+	kfree(mxc.mx);
+	return err;
 out:
 	if (dev)
 		dev_put(dev);
-- 
cgit v1.2.3-70-g09d2


From c5c6a8ab45ec0f18733afb4aaade0d4a139d80b3 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:46 +0100
Subject: net: tcp: add key management to congestion control

This patch adds necessary infrastructure to the congestion control
framework for later per route congestion control support.

For a per route congestion control possibility, our aim is to store
a unique u32 key identifier into dst metrics, which can then be
mapped into a tcp_congestion_ops struct. We argue that having a
RTAX key entry is the most simple, generic and easy way to manage,
and also keeps the memory footprint of dst entries lower on 64 bit
than with storing a pointer directly, for example. Having a unique
key id also allows for decoupling actual TCP congestion control
module management from the FIB layer, i.e. we don't have to care
about expensive module refcounting inside the FIB at this point.

We first thought of using an IDR store for the realization, which
takes over dynamic assignment of unused key space and also performs
the key to pointer mapping in RCU. While doing so, we stumbled upon
the issue that due to the nature of dynamic key distribution, it
just so happens, arguably in very rare occasions, that excessive
module loads and unloads can lead to a possible reuse of previously
used key space. Thus, previously stale keys in the dst metric are
now being reassigned to a different congestion control algorithm,
which might lead to unexpected behaviour. One way to resolve this
would have been to walk FIBs on the actually rare occasion of a
module unload and reset the metric keys for each FIB in each netns,
but that's just very costly.

Therefore, we argue a better solution is to reuse the unique
congestion control algorithm name member and map that into u32 key
space through jhash. For that, we split the flags attribute (as it
currently uses 2 bits only anyway) into two u32 attributes, flags
and key, so that we can keep the cacheline boundary of 2 cachelines
on x86_64 and cache the precalculated key at registration time for
the fast path. On average we might expect 2 - 4 modules being loaded
worst case perhaps 15, so a key collision possibility is extremely
low, and guaranteed collision-free on LE/BE for all in-tree modules.
Overall this results in much simpler code, and all without the
overhead of an IDR. Due to the deterministic nature, modules can
now be unloaded, the congestion control algorithm for a specific
but unloaded key will fall back to the default one, and on module
reload time it will switch back to the expected algorithm
transparently.

Joint work with Florian Westphal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h |  3 +-
 include/net/tcp.h                  |  9 +++-
 net/ipv4/tcp_cong.c                | 97 +++++++++++++++++++++++++++++++-------
 3 files changed, 91 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index 848e85cb5c61..5976bdecf58b 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -98,7 +98,8 @@ struct inet_connection_sock {
 	const struct tcp_congestion_ops *icsk_ca_ops;
 	const struct inet_connection_sock_af_ops *icsk_af_ops;
 	unsigned int		  (*icsk_sync_mss)(struct sock *sk, u32 pmtu);
-	__u8			  icsk_ca_state;
+	__u8			  icsk_ca_state:7,
+				  icsk_ca_dst_locked:1;
 	__u8			  icsk_retransmits;
 	__u8			  icsk_pending;
 	__u8			  icsk_backoff;
diff --git a/include/net/tcp.h b/include/net/tcp.h
index f50f29faf76f..135b70c9a734 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -787,6 +787,8 @@ enum tcp_ca_ack_event_flags {
 #define TCP_CA_MAX	128
 #define TCP_CA_BUF_MAX	(TCP_CA_NAME_MAX*TCP_CA_MAX)
 
+#define TCP_CA_UNSPEC	0
+
 /* Algorithm can be set on socket without CAP_NET_ADMIN privileges */
 #define TCP_CONG_NON_RESTRICTED 0x1
 /* Requires ECN/ECT set on all packets */
@@ -794,7 +796,8 @@ enum tcp_ca_ack_event_flags {
 
 struct tcp_congestion_ops {
 	struct list_head	list;
-	unsigned long flags;
+	u32 key;
+	u32 flags;
 
 	/* initialize private data (optional) */
 	void (*init)(struct sock *sk);
@@ -841,6 +844,10 @@ u32 tcp_reno_ssthresh(struct sock *sk);
 void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked);
 extern struct tcp_congestion_ops tcp_reno;
 
+struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
+u32 tcp_ca_get_key_by_name(const char *name);
+char *tcp_ca_get_name_by_key(u32 key, char *buffer);
+
 static inline bool tcp_ca_needs_ecn(const struct sock *sk)
 {
 	const struct inet_connection_sock *icsk = inet_csk(sk);
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index 38f2f8aa4ceb..63c29dba68a8 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -13,6 +13,7 @@
 #include <linux/types.h>
 #include <linux/list.h>
 #include <linux/gfp.h>
+#include <linux/jhash.h>
 #include <net/tcp.h>
 
 static DEFINE_SPINLOCK(tcp_cong_list_lock);
@@ -31,6 +32,34 @@ static struct tcp_congestion_ops *tcp_ca_find(const char *name)
 	return NULL;
 }
 
+/* Must be called with rcu lock held */
+static const struct tcp_congestion_ops *__tcp_ca_find_autoload(const char *name)
+{
+	const struct tcp_congestion_ops *ca = tcp_ca_find(name);
+#ifdef CONFIG_MODULES
+	if (!ca && capable(CAP_NET_ADMIN)) {
+		rcu_read_unlock();
+		request_module("tcp_%s", name);
+		rcu_read_lock();
+		ca = tcp_ca_find(name);
+	}
+#endif
+	return ca;
+}
+
+/* Simple linear search, not much in here. */
+struct tcp_congestion_ops *tcp_ca_find_key(u32 key)
+{
+	struct tcp_congestion_ops *e;
+
+	list_for_each_entry_rcu(e, &tcp_cong_list, list) {
+		if (e->key == key)
+			return e;
+	}
+
+	return NULL;
+}
+
 /*
  * Attach new congestion control algorithm to the list
  * of available options.
@@ -45,9 +74,12 @@ int tcp_register_congestion_control(struct tcp_congestion_ops *ca)
 		return -EINVAL;
 	}
 
+	ca->key = jhash(ca->name, sizeof(ca->name), strlen(ca->name));
+
 	spin_lock(&tcp_cong_list_lock);
-	if (tcp_ca_find(ca->name)) {
-		pr_notice("%s already registered\n", ca->name);
+	if (ca->key == TCP_CA_UNSPEC || tcp_ca_find_key(ca->key)) {
+		pr_notice("%s already registered or non-unique key\n",
+			  ca->name);
 		ret = -EEXIST;
 	} else {
 		list_add_tail_rcu(&ca->list, &tcp_cong_list);
@@ -70,9 +102,50 @@ void tcp_unregister_congestion_control(struct tcp_congestion_ops *ca)
 	spin_lock(&tcp_cong_list_lock);
 	list_del_rcu(&ca->list);
 	spin_unlock(&tcp_cong_list_lock);
+
+	/* Wait for outstanding readers to complete before the
+	 * module gets removed entirely.
+	 *
+	 * A try_module_get() should fail by now as our module is
+	 * in "going" state since no refs are held anymore and
+	 * module_exit() handler being called.
+	 */
+	synchronize_rcu();
 }
 EXPORT_SYMBOL_GPL(tcp_unregister_congestion_control);
 
+u32 tcp_ca_get_key_by_name(const char *name)
+{
+	const struct tcp_congestion_ops *ca;
+	u32 key;
+
+	might_sleep();
+
+	rcu_read_lock();
+	ca = __tcp_ca_find_autoload(name);
+	key = ca ? ca->key : TCP_CA_UNSPEC;
+	rcu_read_unlock();
+
+	return key;
+}
+EXPORT_SYMBOL_GPL(tcp_ca_get_key_by_name);
+
+char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+{
+	const struct tcp_congestion_ops *ca;
+	char *ret = NULL;
+
+	rcu_read_lock();
+	ca = tcp_ca_find_key(key);
+	if (ca)
+		ret = strncpy(buffer, ca->name,
+			      TCP_CA_NAME_MAX);
+	rcu_read_unlock();
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(tcp_ca_get_name_by_key);
+
 /* Assign choice of congestion control. */
 void tcp_assign_congestion_control(struct sock *sk)
 {
@@ -253,25 +326,17 @@ out:
 int tcp_set_congestion_control(struct sock *sk, const char *name)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
-	struct tcp_congestion_ops *ca;
+	const struct tcp_congestion_ops *ca;
 	int err = 0;
 
-	rcu_read_lock();
-	ca = tcp_ca_find(name);
+	if (icsk->icsk_ca_dst_locked)
+		return -EPERM;
 
-	/* no change asking for existing value */
+	rcu_read_lock();
+	ca = __tcp_ca_find_autoload(name);
+	/* No change asking for existing value */
 	if (ca == icsk->icsk_ca_ops)
 		goto out;
-
-#ifdef CONFIG_MODULES
-	/* not found attempt to autoload module */
-	if (!ca && capable(CAP_NET_ADMIN)) {
-		rcu_read_unlock();
-		request_module("tcp_%s", name);
-		rcu_read_lock();
-		ca = tcp_ca_find(name);
-	}
-#endif
 	if (!ca)
 		err = -ENOENT;
 	else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
-- 
cgit v1.2.3-70-g09d2


From ea697639992d96da98016b8934e68a73876a2264 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:47 +0100
Subject: net: tcp: add RTAX_CC_ALGO fib handling

This patch adds the minimum necessary for the RTAX_CC_ALGO congestion
control metric to be set up and dumped back to user space.

While the internal representation of RTAX_CC_ALGO is handled as a u32
key, we avoided to expose this implementation detail to user space, thus
instead, we chose the netlink attribute that is being exchanged between
user space to be the actual congestion control algorithm name, similarly
as in the setsockopt(2) API in order to allow for maximum flexibility,
even for 3rd party modules.

It is a bit unfortunate that RTAX_QUICKACK used up a whole RTAX slot as
it should have been stored in RTAX_FEATURES instead, we first thought
about reusing it for the congestion control key, but it brings more
complications and/or confusion than worth it.

Joint work with Florian Westphal.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h              |  7 +++++++
 include/uapi/linux/rtnetlink.h |  2 ++
 net/core/rtnetlink.c           | 15 +++++++++++++--
 net/decnet/dn_fib.c            |  3 ++-
 net/decnet/dn_table.c          |  4 +++-
 net/ipv4/fib_semantics.c       | 14 ++++++++++++--
 net/ipv6/route.c               | 17 +++++++++++++++--
 7 files changed, 54 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 135b70c9a734..95bb237152e0 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -846,7 +846,14 @@ extern struct tcp_congestion_ops tcp_reno;
 
 struct tcp_congestion_ops *tcp_ca_find_key(u32 key);
 u32 tcp_ca_get_key_by_name(const char *name);
+#ifdef CONFIG_INET
 char *tcp_ca_get_name_by_key(u32 key, char *buffer);
+#else
+static inline char *tcp_ca_get_name_by_key(u32 key, char *buffer)
+{
+	return NULL;
+}
+#endif
 
 static inline bool tcp_ca_needs_ecn(const struct sock *sk)
 {
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 9c9b8b4480cd..d81f22d5b390 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -389,6 +389,8 @@ enum {
 #define RTAX_INITRWND RTAX_INITRWND
 	RTAX_QUICKACK,
 #define RTAX_QUICKACK RTAX_QUICKACK
+	RTAX_CC_ALGO,
+#define RTAX_CC_ALGO RTAX_CC_ALGO
 	__RTAX_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index da983d4bac02..6a6cdade1676 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -50,6 +50,7 @@
 #include <net/arp.h>
 #include <net/route.h>
 #include <net/udp.h>
+#include <net/tcp.h>
 #include <net/sock.h>
 #include <net/pkt_sched.h>
 #include <net/fib_rules.h>
@@ -669,9 +670,19 @@ int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics)
 
 	for (i = 0; i < RTAX_MAX; i++) {
 		if (metrics[i]) {
+			if (i == RTAX_CC_ALGO - 1) {
+				char tmp[TCP_CA_NAME_MAX], *name;
+
+				name = tcp_ca_get_name_by_key(metrics[i], tmp);
+				if (!name)
+					continue;
+				if (nla_put_string(skb, i + 1, name))
+					goto nla_put_failure;
+			} else {
+				if (nla_put_u32(skb, i + 1, metrics[i]))
+					goto nla_put_failure;
+			}
 			valid++;
-			if (nla_put_u32(skb, i+1, metrics[i]))
-				goto nla_put_failure;
 		}
 	}
 
diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c
index d332aefb0846..df4803437888 100644
--- a/net/decnet/dn_fib.c
+++ b/net/decnet/dn_fib.c
@@ -298,7 +298,8 @@ struct dn_fib_info *dn_fib_create_info(const struct rtmsg *r, struct nlattr *att
 			int type = nla_type(attr);
 
 			if (type) {
-				if (type > RTAX_MAX || nla_len(attr) < 4)
+				if (type > RTAX_MAX || type == RTAX_CC_ALGO ||
+				    nla_len(attr) < 4)
 					goto err_inval;
 
 				fi->fib_metrics[type-1] = nla_get_u32(attr);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 86e3807052e9..3f19fcbf126d 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -29,6 +29,7 @@
 #include <linux/route.h> /* RTF_xxx */
 #include <net/neighbour.h>
 #include <net/netlink.h>
+#include <net/tcp.h>
 #include <net/dst.h>
 #include <net/flow.h>
 #include <net/fib_rules.h>
@@ -273,7 +274,8 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi)
 	size_t payload = NLMSG_ALIGN(sizeof(struct rtmsg))
 			 + nla_total_size(4) /* RTA_TABLE */
 			 + nla_total_size(2) /* RTA_DST */
-			 + nla_total_size(4); /* RTA_PRIORITY */
+			 + nla_total_size(4) /* RTA_PRIORITY */
+			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index f99f41bd15b8..d2b7b5521b1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -360,7 +360,8 @@ static inline size_t fib_nlmsg_size(struct fib_info *fi)
 			 + nla_total_size(4) /* RTA_TABLE */
 			 + nla_total_size(4) /* RTA_DST */
 			 + nla_total_size(4) /* RTA_PRIORITY */
-			 + nla_total_size(4); /* RTA_PREFSRC */
+			 + nla_total_size(4) /* RTA_PREFSRC */
+			 + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 
 	/* space for nested metrics */
 	payload += nla_total_size((RTAX_MAX * nla_total_size(4)));
@@ -859,7 +860,16 @@ struct fib_info *fib_create_info(struct fib_config *cfg)
 
 				if (type > RTAX_MAX)
 					goto err_inval;
-				val = nla_get_u32(nla);
+				if (type == RTAX_CC_ALGO) {
+					char tmp[TCP_CA_NAME_MAX];
+
+					nla_strlcpy(tmp, nla, sizeof(tmp));
+					val = tcp_ca_get_key_by_name(tmp);
+					if (val == TCP_CA_UNSPEC)
+						goto err_inval;
+				} else {
+					val = nla_get_u32(nla);
+				}
 				if (type == RTAX_ADVMSS && val > 65535 - 40)
 					val = 65535 - 40;
 				if (type == RTAX_MTU && val > 65535 - 15)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 454771d20b21..34dcbb59df75 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1488,10 +1488,22 @@ static int ip6_convert_metrics(struct mx6_config *mxc,
 		int type = nla_type(nla);
 
 		if (type) {
+			u32 val;
+
 			if (unlikely(type > RTAX_MAX))
 				goto err;
+			if (type == RTAX_CC_ALGO) {
+				char tmp[TCP_CA_NAME_MAX];
+
+				nla_strlcpy(tmp, nla, sizeof(tmp));
+				val = tcp_ca_get_key_by_name(tmp);
+				if (val == TCP_CA_UNSPEC)
+					goto err;
+			} else {
+				val = nla_get_u32(nla);
+			}
 
-			mp[type - 1] = nla_get_u32(nla);
+			mp[type - 1] = val;
 			__set_bit(type - 1, mxc->mx_valid);
 		}
 	}
@@ -2571,7 +2583,8 @@ static inline size_t rt6_nlmsg_size(void)
 	       + nla_total_size(4) /* RTA_OIF */
 	       + nla_total_size(4) /* RTA_PRIORITY */
 	       + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */
-	       + nla_total_size(sizeof(struct rta_cacheinfo));
+	       + nla_total_size(sizeof(struct rta_cacheinfo))
+	       + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */
 }
 
 static int rt6_fill_node(struct net *net,
-- 
cgit v1.2.3-70-g09d2


From 81164413ad096bafe8ad1068f3f095a7dd081d8b Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 5 Jan 2015 23:57:48 +0100
Subject: net: tcp: add per route congestion control

This work adds the possibility to define a per route/destination
congestion control algorithm. Generally, this opens up the possibility
for a machine with different links to enforce specific congestion
control algorithms with optimal strategies for each of them based
on their network characteristics, even transparently for a single
application listening on all links.

For our specific use case, this additionally facilitates deployment
of DCTCP, for example, applications can easily serve internal
traffic/dsts in DCTCP and external one with CUBIC. Other scenarios
would also allow for utilizing e.g. long living, low priority
background flows for certain destinations/routes while still being
able for normal traffic to utilize the default congestion control
algorithm. We also thought about a per netns setting (where different
defaults are possible), but given its actually a link specific
property, we argue that a per route/destination setting is the most
natural and flexible.

The administrator can utilize this through ip-route(8) by appending
"congctl [lock] <name>", where <name> denotes the name of a
congestion control algorithm and the optional lock parameter allows
to enforce the given algorithm so that applications in user space
would not be allowed to overwrite that algorithm for that destination.

The dst metric lookups are being done when a dst entry is already
available in order to avoid a costly lookup and still before the
algorithms are being initialized, thus overhead is very low when the
feature is not being used. While the client side would need to drop
the current reference on the module, on server side this can actually
even be avoided as we just got a flat-copied socket clone.

Joint work with Florian Westphal.

Suggested-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h        |  6 ++++++
 net/ipv4/tcp_ipv4.c      |  2 ++
 net/ipv4/tcp_minisocks.c | 30 ++++++++++++++++++++++++++----
 net/ipv4/tcp_output.c    | 21 +++++++++++++++++++++
 net/ipv6/tcp_ipv6.c      |  2 ++
 5 files changed, 57 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 95bb237152e0..b8fdc6bab3f3 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -448,6 +448,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(struct sock *sk,
 				      struct request_sock *req,
 				      struct sk_buff *skb);
+void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst);
 struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 				  struct request_sock *req,
 				  struct dst_entry *dst);
@@ -636,6 +637,11 @@ static inline u32 tcp_rto_min_us(struct sock *sk)
 	return jiffies_to_usecs(tcp_rto_min(sk));
 }
 
+static inline bool tcp_ca_dst_locked(const struct dst_entry *dst)
+{
+	return dst_metric_locked(dst, RTAX_CC_ALGO);
+}
+
 /* Compute the actual receive window we are currently advertising.
  * Rcv_nxt can be after the window if our peer push more data
  * than the offered window.
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index a3f72d7fc06c..ad3e65bdd368 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1340,6 +1340,8 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 	}
 	sk_setup_caps(newsk, dst);
 
+	tcp_ca_openreq_child(newsk, dst);
+
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric_advmss(dst);
 	if (tcp_sk(sk)->rx_opt.user_mss &&
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 63d2680b65db..bc9216dc9de1 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -399,6 +399,32 @@ static void tcp_ecn_openreq_child(struct tcp_sock *tp,
 	tp->ecn_flags = inet_rsk(req)->ecn_ok ? TCP_ECN_OK : 0;
 }
 
+void tcp_ca_openreq_child(struct sock *sk, const struct dst_entry *dst)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+	bool ca_got_dst = false;
+
+	if (ca_key != TCP_CA_UNSPEC) {
+		const struct tcp_congestion_ops *ca;
+
+		rcu_read_lock();
+		ca = tcp_ca_find_key(ca_key);
+		if (likely(ca && try_module_get(ca->owner))) {
+			icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
+			icsk->icsk_ca_ops = ca;
+			ca_got_dst = true;
+		}
+		rcu_read_unlock();
+	}
+
+	if (!ca_got_dst && !try_module_get(icsk->icsk_ca_ops->owner))
+		tcp_assign_congestion_control(sk);
+
+	tcp_set_ca_state(sk, TCP_CA_Open);
+}
+EXPORT_SYMBOL_GPL(tcp_ca_openreq_child);
+
 /* This is not only more efficient than what we used to do, it eliminates
  * a lot of code duplication between IPv4/IPv6 SYN recv processing. -DaveM
  *
@@ -451,10 +477,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		newtp->snd_cwnd = TCP_INIT_CWND;
 		newtp->snd_cwnd_cnt = 0;
 
-		if (!try_module_get(newicsk->icsk_ca_ops->owner))
-			tcp_assign_congestion_control(newsk);
-
-		tcp_set_ca_state(newsk, TCP_CA_Open);
 		tcp_init_xmit_timers(newsk);
 		__skb_queue_head_init(&newtp->out_of_order_queue);
 		newtp->write_seq = newtp->pushed_seq = treq->snt_isn + 1;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 7f18262e2326..dc30cb563e4f 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2939,6 +2939,25 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst,
 }
 EXPORT_SYMBOL(tcp_make_synack);
 
+static void tcp_ca_dst_init(struct sock *sk, const struct dst_entry *dst)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	const struct tcp_congestion_ops *ca;
+	u32 ca_key = dst_metric(dst, RTAX_CC_ALGO);
+
+	if (ca_key == TCP_CA_UNSPEC)
+		return;
+
+	rcu_read_lock();
+	ca = tcp_ca_find_key(ca_key);
+	if (likely(ca && try_module_get(ca->owner))) {
+		module_put(icsk->icsk_ca_ops->owner);
+		icsk->icsk_ca_dst_locked = tcp_ca_dst_locked(dst);
+		icsk->icsk_ca_ops = ca;
+	}
+	rcu_read_unlock();
+}
+
 /* Do all connect socket setups that can be done AF independent. */
 static void tcp_connect_init(struct sock *sk)
 {
@@ -2964,6 +2983,8 @@ static void tcp_connect_init(struct sock *sk)
 	tcp_mtup_init(sk);
 	tcp_sync_mss(sk, dst_mtu(dst));
 
+	tcp_ca_dst_init(sk, dst);
+
 	if (!tp->window_clamp)
 		tp->window_clamp = dst_metric(dst, RTAX_WINDOW);
 	tp->advmss = dst_metric_advmss(dst);
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 9c0b54e87b47..5d46832c6f72 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1199,6 +1199,8 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
 		inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
 						     newnp->opt->opt_flen);
 
+	tcp_ca_openreq_child(newsk, dst);
+
 	tcp_sync_mss(newsk, dst_mtu(dst));
 	newtp->advmss = dst_metric_advmss(dst);
 	if (tcp_sk(sk)->rx_opt.user_mss &&
-- 
cgit v1.2.3-70-g09d2


From d75bb06b61cb69ee6223d791d3bb230e68623b20 Mon Sep 17 00:00:00 2001
From: Gautam Kumar Shukla <gautams@broadcom.com>
Date: Tue, 23 Dec 2014 16:55:19 +0100
Subject: cfg80211: add extensible feature flag attribute

With the wiphy::features flag being used up this patch adds a
new field wiphy::ext_features. Considering extensibility this
new field is declared as a byte array. This extensible flag is
exposed to user-space by NL80211_ATTR_EXT_FEATURES.

Cc: Avinash Patil <patila@marvell.com>
Signed-off-by: Gautam (Gautam Kumar) Shukla <gautams@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 39 +++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/nl80211.h | 22 ++++++++++++++++++++++
 net/wireless/nl80211.c       |  5 +++++
 3 files changed, 66 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bd672ea08c9a..f38645fb83b9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3016,6 +3016,8 @@ struct wiphy_vendor_command {
  * @regulatory_flags: wiphy regulatory flags, see
  *	&enum ieee80211_regulatory_flags
  * @features: features advertised to nl80211, see &enum nl80211_feature_flags.
+ * @ext_features: extended features advertised to nl80211, see
+ *	&enum nl80211_ext_feature_index.
  * @bss_priv_size: each BSS struct has private data allocated with it,
  *	this variable determines its size
  * @max_scan_ssids: maximum number of SSIDs the device can scan for in
@@ -3125,6 +3127,7 @@ struct wiphy {
 	u16 max_acl_mac_addrs;
 
 	u32 flags, regulatory_flags, features;
+	u8 ext_features[DIV_ROUND_UP(NUM_NL80211_EXT_FEATURES, 8)];
 
 	u32 ap_sme_capa;
 
@@ -5052,6 +5055,42 @@ void cfg80211_stop_iface(struct wiphy *wiphy, struct wireless_dev *wdev,
  */
 void cfg80211_shutdown_all_interfaces(struct wiphy *wiphy);
 
+/**
+ * wiphy_ext_feature_set - set the extended feature flag
+ *
+ * @wiphy: the wiphy to modify.
+ * @ftidx: extended feature bit index.
+ *
+ * The extended features are flagged in multiple bytes (see
+ * &struct wiphy.@ext_features)
+ */
+static inline void wiphy_ext_feature_set(struct wiphy *wiphy,
+					 enum nl80211_ext_feature_index ftidx)
+{
+	u8 *ft_byte;
+
+	ft_byte = &wiphy->ext_features[ftidx / 8];
+	*ft_byte |= BIT(ftidx % 8);
+}
+
+/**
+ * wiphy_ext_feature_isset - check the extended feature flag
+ *
+ * @wiphy: the wiphy to modify.
+ * @ftidx: extended feature bit index.
+ *
+ * The extended features are flagged in multiple bytes (see
+ * &struct wiphy.@ext_features)
+ */
+static inline bool
+wiphy_ext_feature_isset(struct wiphy *wiphy,
+			enum nl80211_ext_feature_index ftidx)
+{
+	u8 ft_byte;
+
+	ft_byte = wiphy->ext_features[ftidx / 8];
+	return (ft_byte & BIT(ftidx % 8)) != 0;
+}
 
 /* ethtool helper */
 void cfg80211_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info);
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 54f391141351..f95d35483086 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1713,6 +1713,13 @@ enum nl80211_commands {
  *	obtained from it is coming from the device's wiphy and not the global
  *	cfg80211 regdomain.
  *
+ * @NL80211_ATTR_EXT_FEATURES: extended feature flags contained in a byte
+ *	array. The feature flags are identified by their bit index (see &enum
+ *	nl80211_ext_feature_index). The bit index is ordered starting at the
+ *	least-significant bit of the first byte in the array, ie. bit index 0
+ *	is located at bit 0 of byte 0. bit index 25 would be located at bit 1
+ *	of byte 3 (u8 array).
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2072,6 +2079,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_SELF_MANAGED_REG,
 
+	NL80211_ATTR_EXT_FEATURES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -4223,6 +4232,19 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_ND_RANDOM_MAC_ADDR		= 1 << 31,
 };
 
+/**
+ * enum nl80211_ext_feature_index - bit index of extended features.
+ *
+ * @NUM_NL80211_EXT_FEATURES: number of extended features.
+ * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
+ */
+enum nl80211_ext_feature_index {
+
+	/* add new features before the definition below */
+	NUM_NL80211_EXT_FEATURES,
+	MAX_NL80211_EXT_FEATURES = NUM_NL80211_EXT_FEATURES - 1
+};
+
 /**
  * enum nl80211_probe_resp_offload_support_attr - optional supported
  *	protocols for probe-response offloading by the driver/FW.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 702920134b34..689e1a8fd60a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1706,6 +1706,11 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
 			goto nla_put_failure;
 
+		if (nla_put(msg, NL80211_ATTR_EXT_FEATURES,
+			    sizeof(rdev->wiphy.ext_features),
+			    rdev->wiphy.ext_features))
+			goto nla_put_failure;
+
 		/* done */
 		state->split_start = 0;
 		break;
-- 
cgit v1.2.3-70-g09d2


From 71b836eca7f380fbd4c025f8c4371f9a071bc909 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 23 Dec 2014 17:17:38 +0100
Subject: nl80211: define multicast group names in header

Put the group names into the userspace API header file so that
userspace clients can use symbolic names from there instead of
hardcoding the actual names. This doesn't really change much,
but seems somewhat cleaner.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  7 +++++++
 net/wireless/nl80211.c       | 12 ++++++------
 2 files changed, 13 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f95d35483086..7ba9404b290d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -29,6 +29,13 @@
 
 #define NL80211_GENL_NAME "nl80211"
 
+#define NL80211_MULTICAST_GROUP_CONFIG		"config"
+#define NL80211_MULTICAST_GROUP_SCAN		"scan"
+#define NL80211_MULTICAST_GROUP_REG		"regulatory"
+#define NL80211_MULTICAST_GROUP_MLME		"mlme"
+#define NL80211_MULTICAST_GROUP_VENDOR		"vendor"
+#define NL80211_MULTICAST_GROUP_TESTMODE	"testmode"
+
 /**
  * DOC: Station handling
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 689e1a8fd60a..049f505e5660 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -59,13 +59,13 @@ enum nl80211_multicast_groups {
 };
 
 static const struct genl_multicast_group nl80211_mcgrps[] = {
-	[NL80211_MCGRP_CONFIG] = { .name = "config", },
-	[NL80211_MCGRP_SCAN] = { .name = "scan", },
-	[NL80211_MCGRP_REGULATORY] = { .name = "regulatory", },
-	[NL80211_MCGRP_MLME] = { .name = "mlme", },
-	[NL80211_MCGRP_VENDOR] = { .name = "vendor", },
+	[NL80211_MCGRP_CONFIG] = { .name = NL80211_MULTICAST_GROUP_CONFIG },
+	[NL80211_MCGRP_SCAN] = { .name = NL80211_MULTICAST_GROUP_SCAN },
+	[NL80211_MCGRP_REGULATORY] = { .name = NL80211_MULTICAST_GROUP_REG },
+	[NL80211_MCGRP_MLME] = { .name = NL80211_MULTICAST_GROUP_MLME },
+	[NL80211_MCGRP_VENDOR] = { .name = NL80211_MULTICAST_GROUP_VENDOR },
 #ifdef CONFIG_NL80211_TESTMODE
-	[NL80211_MCGRP_TESTMODE] = { .name = "testmode", }
+	[NL80211_MCGRP_TESTMODE] = { .name = NL80211_MULTICAST_GROUP_TESTMODE }
 #endif
 };
 
-- 
cgit v1.2.3-70-g09d2


From 8b3a38daff6f50027039d6979b9eb026907508eb Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Tue, 23 Dec 2014 19:04:23 +0100
Subject: brcmfmac: Add support for bcm43340/1 wireless chipsets

This patch adds support for the bcm43340 and bcm43341 wireless
chipsets. These two chipsets are identical from wireless parts
perspective. As such they use the same firmware image.

Cc: Samuel Ortiz <sameo@linux.intel.com>
Cc: Rob Herring <rob.herring@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
[arend@broadcom.com: squash to single commit, remove 43341 chipid]
Reviewed-by: Pieter-Paul Giesberts <pieterpg@broadcom.com>
Reviewed-by: Hante Meuleman <meuleman@broadcom.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c     | 2 ++
 drivers/net/wireless/brcm80211/brcmfmac/chip.c       | 1 +
 drivers/net/wireless/brcm80211/brcmfmac/sdio.c       | 5 +++++
 drivers/net/wireless/brcm80211/include/brcm_hw_ids.h | 3 +++
 include/linux/mmc/sdio_ids.h                         | 6 ++++--
 5 files changed, 15 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
index 9880dae2a569..dffd9e44f5b6 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/bcmsdh.c
@@ -1005,6 +1005,8 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(BRCM_SDIO_4329_DEVICE_ID),
 	BRCMF_SDIO_DEVICE(BRCM_SDIO_4330_DEVICE_ID),
 	BRCMF_SDIO_DEVICE(BRCM_SDIO_4334_DEVICE_ID),
+	BRCMF_SDIO_DEVICE(BRCM_SDIO_43340_DEVICE_ID),
+	BRCMF_SDIO_DEVICE(BRCM_SDIO_43341_DEVICE_ID),
 	BRCMF_SDIO_DEVICE(BRCM_SDIO_43362_DEVICE_ID),
 	BRCMF_SDIO_DEVICE(BRCM_SDIO_4335_4339_DEVICE_ID),
 	BRCMF_SDIO_DEVICE(BRCM_SDIO_4354_DEVICE_ID),
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/chip.c b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
index 519b79ebaabd..04d2ca0d87d6 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/chip.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/chip.c
@@ -481,6 +481,7 @@ static void brcmf_chip_get_raminfo(struct brcmf_chip_priv *ci)
 		ci->pub.ramsize = 0x48000;
 		break;
 	case BRCM_CC_4334_CHIP_ID:
+	case BRCM_CC_43340_CHIP_ID:
 		ci->pub.ramsize = 0x80000;
 		break;
 	case BRCM_CC_4335_CHIP_ID:
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
index 0b0d51a61060..551da356a5bd 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/sdio.c
@@ -608,6 +608,8 @@ static const struct sdiod_drive_str sdiod_drvstr_tab2_3v3[] = {
 #define BCM4330_NVRAM_NAME		"brcm/brcmfmac4330-sdio.txt"
 #define BCM4334_FIRMWARE_NAME		"brcm/brcmfmac4334-sdio.bin"
 #define BCM4334_NVRAM_NAME		"brcm/brcmfmac4334-sdio.txt"
+#define BCM43340_FIRMWARE_NAME		"brcm/brcmfmac43340-sdio.bin"
+#define BCM43340_NVRAM_NAME		"brcm/brcmfmac43340-sdio.txt"
 #define BCM4335_FIRMWARE_NAME		"brcm/brcmfmac4335-sdio.bin"
 #define BCM4335_NVRAM_NAME		"brcm/brcmfmac4335-sdio.txt"
 #define BCM43362_FIRMWARE_NAME		"brcm/brcmfmac43362-sdio.bin"
@@ -629,6 +631,8 @@ MODULE_FIRMWARE(BCM4330_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM4330_NVRAM_NAME);
 MODULE_FIRMWARE(BCM4334_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM4334_NVRAM_NAME);
+MODULE_FIRMWARE(BCM43340_FIRMWARE_NAME);
+MODULE_FIRMWARE(BCM43340_NVRAM_NAME);
 MODULE_FIRMWARE(BCM4335_FIRMWARE_NAME);
 MODULE_FIRMWARE(BCM4335_NVRAM_NAME);
 MODULE_FIRMWARE(BCM43362_FIRMWARE_NAME);
@@ -660,6 +664,7 @@ static const struct brcmf_firmware_names brcmf_fwname_data[] = {
 	{ BRCM_CC_4329_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4329) },
 	{ BRCM_CC_4330_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4330) },
 	{ BRCM_CC_4334_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4334) },
+	{ BRCM_CC_43340_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM43340) },
 	{ BRCM_CC_4335_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4335) },
 	{ BRCM_CC_43362_CHIP_ID, 0xFFFFFFFE, BRCMF_FIRMWARE_NVRAM(BCM43362) },
 	{ BRCM_CC_4339_CHIP_ID, 0xFFFFFFFF, BRCMF_FIRMWARE_NVRAM(BCM4339) },
diff --git a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
index 6996fcc144cf..00215efbc13b 100644
--- a/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
+++ b/drivers/net/wireless/brcm80211/include/brcm_hw_ids.h
@@ -34,6 +34,7 @@
 #define BRCM_CC_4329_CHIP_ID		0x4329
 #define BRCM_CC_4330_CHIP_ID		0x4330
 #define BRCM_CC_4334_CHIP_ID		0x4334
+#define BRCM_CC_43340_CHIP_ID		43340
 #define BRCM_CC_43362_CHIP_ID		43362
 #define BRCM_CC_4335_CHIP_ID		0x4335
 #define BRCM_CC_4339_CHIP_ID		0x4339
@@ -51,6 +52,8 @@
 #define BRCM_SDIO_4329_DEVICE_ID	BRCM_CC_4329_CHIP_ID
 #define BRCM_SDIO_4330_DEVICE_ID	BRCM_CC_4330_CHIP_ID
 #define BRCM_SDIO_4334_DEVICE_ID	BRCM_CC_4334_CHIP_ID
+#define BRCM_SDIO_43340_DEVICE_ID	BRCM_CC_43340_CHIP_ID
+#define BRCM_SDIO_43341_DEVICE_ID	43341
 #define BRCM_SDIO_43362_DEVICE_ID	BRCM_CC_43362_CHIP_ID
 #define BRCM_SDIO_4335_4339_DEVICE_ID	BRCM_CC_4335_CHIP_ID
 #define BRCM_SDIO_4354_DEVICE_ID	BRCM_CC_4354_CHIP_ID
diff --git a/include/linux/mmc/sdio_ids.h b/include/linux/mmc/sdio_ids.h
index 0f01fe065424..996807963716 100644
--- a/include/linux/mmc/sdio_ids.h
+++ b/include/linux/mmc/sdio_ids.h
@@ -24,13 +24,15 @@
  * Vendors and devices.  Sort key: vendor first, device next.
  */
 #define SDIO_VENDOR_ID_BROADCOM			0x02d0
-#define SDIO_DEVICE_ID_BROADCOM_43143		43143
+#define SDIO_DEVICE_ID_BROADCOM_43143		0xa887
 #define SDIO_DEVICE_ID_BROADCOM_43241		0x4324
 #define SDIO_DEVICE_ID_BROADCOM_4329		0x4329
 #define SDIO_DEVICE_ID_BROADCOM_4330		0x4330
 #define SDIO_DEVICE_ID_BROADCOM_4334		0x4334
+#define SDIO_DEVICE_ID_BROADCOM_43340		0xa94c
+#define SDIO_DEVICE_ID_BROADCOM_43341		0xa94d
 #define SDIO_DEVICE_ID_BROADCOM_4335_4339	0x4335
-#define SDIO_DEVICE_ID_BROADCOM_43362		43362
+#define SDIO_DEVICE_ID_BROADCOM_43362		0xa962
 #define SDIO_DEVICE_ID_BROADCOM_4354		0x4354
 
 #define SDIO_VENDOR_ID_INTEL			0x0089
-- 
cgit v1.2.3-70-g09d2


From 2f4383667d57d1c719070db86b14277277752841 Mon Sep 17 00:00:00 2001
From: Ed Swierk <eswierk@skyportsystems.com>
Date: Fri, 2 Jan 2015 17:27:56 -0800
Subject: ethtool: Extend ethtool plugin module eeprom API to phylib

This patch extends the ethtool plugin module eeprom API to support cards
whose phy support is delegated to a separate driver.

The handlers for ETHTOOL_GMODULEINFO and ETHTOOL_GMODULEEEPROM call the
module_info and module_eeprom functions if the phy driver provides them;
otherwise the handlers call the equivalent ethtool_ops functions provided
by network drivers with built-in phy support.

Signed-off-by: Ed Swierk <eswierk@skyportsystems.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h |  9 +++++++++
 net/core/ethtool.c  | 45 ++++++++++++++++++++++++++++++++++-----------
 2 files changed, 43 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 22af8f8f5802..9c189a1fa3a2 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -565,6 +565,15 @@ struct phy_driver {
 	void (*write_mmd_indirect)(struct phy_device *dev, int ptrad,
 				   int devnum, int regnum, u32 val);
 
+	/* Get the size and type of the eeprom contained within a plug-in
+	 * module */
+	int (*module_info)(struct phy_device *dev,
+			   struct ethtool_modinfo *modinfo);
+
+	/* Get the eeprom information from the plug-in module */
+	int (*module_eeprom)(struct phy_device *dev,
+			     struct ethtool_eeprom *ee, u8 *data);
+
 	struct device_driver driver;
 };
 #define to_phy_driver(d) container_of(d, struct phy_driver, driver)
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 550892cd6b3f..91f74f3eb204 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1597,20 +1597,31 @@ static int ethtool_get_ts_info(struct net_device *dev, void __user *useraddr)
 	return err;
 }
 
+static int __ethtool_get_module_info(struct net_device *dev,
+				     struct ethtool_modinfo *modinfo)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct phy_device *phydev = dev->phydev;
+
+	if (phydev && phydev->drv && phydev->drv->module_info)
+		return phydev->drv->module_info(phydev, modinfo);
+
+	if (ops->get_module_info)
+		return ops->get_module_info(dev, modinfo);
+
+	return -EOPNOTSUPP;
+}
+
 static int ethtool_get_module_info(struct net_device *dev,
 				   void __user *useraddr)
 {
 	int ret;
 	struct ethtool_modinfo modinfo;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->get_module_info)
-		return -EOPNOTSUPP;
 
 	if (copy_from_user(&modinfo, useraddr, sizeof(modinfo)))
 		return -EFAULT;
 
-	ret = ops->get_module_info(dev, &modinfo);
+	ret = __ethtool_get_module_info(dev, &modinfo);
 	if (ret)
 		return ret;
 
@@ -1620,21 +1631,33 @@ static int ethtool_get_module_info(struct net_device *dev,
 	return 0;
 }
 
+static int __ethtool_get_module_eeprom(struct net_device *dev,
+				       struct ethtool_eeprom *ee, u8 *data)
+{
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	struct phy_device *phydev = dev->phydev;
+
+	if (phydev && phydev->drv && phydev->drv->module_eeprom)
+		return phydev->drv->module_eeprom(phydev, ee, data);
+
+	if (ops->get_module_eeprom)
+		return ops->get_module_eeprom(dev, ee, data);
+
+	return -EOPNOTSUPP;
+}
+
 static int ethtool_get_module_eeprom(struct net_device *dev,
 				     void __user *useraddr)
 {
 	int ret;
 	struct ethtool_modinfo modinfo;
-	const struct ethtool_ops *ops = dev->ethtool_ops;
-
-	if (!ops->get_module_info || !ops->get_module_eeprom)
-		return -EOPNOTSUPP;
 
-	ret = ops->get_module_info(dev, &modinfo);
+	ret = __ethtool_get_module_info(dev, &modinfo);
 	if (ret)
 		return ret;
 
-	return ethtool_get_any_eeprom(dev, useraddr, ops->get_module_eeprom,
+	return ethtool_get_any_eeprom(dev, useraddr,
+				      __ethtool_get_module_eeprom,
 				      modinfo.eeprom_len);
 }
 
-- 
cgit v1.2.3-70-g09d2


From db12847ca84b7a315a3ba77c939c9d08df17d54f Mon Sep 17 00:00:00 2001
From: Ido Yariv <ido@wizery.com>
Date: Tue, 6 Jan 2015 08:39:02 -0500
Subject: mac80211: Re-fix accounting of the tailroom-needed counter

When hw acceleration is enabled, the GENERATE_IV or PUT_IV_SPACE flags
only require headroom space. Therefore, the tailroom-needed counter can
safely be decremented for most drivers.

The older incarnation of this patch (ca34e3b5) assumed that the above
holds true for all drivers. As reported by Christopher Chavez and
researched by Christian Lamparter and Larry Finger, this isn't a valid
assumption for p54 and cw1200.

Drivers that still require tailroom for ICV/MIC even when HW encryption
is enabled can use IEEE80211_KEY_FLAG_RESERVE_TAILROOM to indicate it.

Signed-off-by: Ido Yariv <idox.yariv@intel.com>
Cc: Christopher Chavez <chrischavez@gmx.us>
Cc: Christian Lamparter <chunkeey@googlemail.com>
Cc: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Solomon Peachy <pizza@shaftnet.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/cw1200/sta.c |  3 ++-
 drivers/net/wireless/p54/main.c   |  2 ++
 include/net/mac80211.h            | 11 +++++++++--
 net/mac80211/key.c                |  9 +++------
 4 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/cw1200/sta.c b/drivers/net/wireless/cw1200/sta.c
index a1e3237c0be8..4a47c7f8a246 100644
--- a/drivers/net/wireless/cw1200/sta.c
+++ b/drivers/net/wireless/cw1200/sta.c
@@ -709,7 +709,8 @@ int cw1200_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 		if (sta)
 			peer_addr = sta->addr;
 
-		key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE;
+		key->flags |= IEEE80211_KEY_FLAG_PUT_IV_SPACE |
+			      IEEE80211_KEY_FLAG_RESERVE_TAILROOM;
 
 		switch (key->cipher) {
 		case WLAN_CIPHER_SUITE_WEP40:
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 97aeff0edb84..13a30c4a27f2 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -575,6 +575,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd,
 			key->hw_key_idx = 0xff;
 			goto out_unlock;
 		}
+
+		key->flags |= IEEE80211_KEY_FLAG_RESERVE_TAILROOM;
 	} else {
 		slot = key->hw_key_idx;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ece1a546587e..555a845ad51e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1281,7 +1281,8 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  *
  * @IEEE80211_KEY_FLAG_GENERATE_IV: This flag should be set by the
  *	driver to indicate that it requires IV generation for this
- *	particular key.
+ *	particular key. Setting this flag does not necessarily mean that SKBs
+ *	will have sufficient tailroom for ICV or MIC.
  * @IEEE80211_KEY_FLAG_GENERATE_MMIC: This flag should be set by
  *	the driver for a TKIP key if it requires Michael MIC
  *	generation in software.
@@ -1293,7 +1294,9 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  * @IEEE80211_KEY_FLAG_PUT_IV_SPACE: This flag should be set by the driver
  *	if space should be prepared for the IV, but the IV
  *	itself should not be generated. Do not set together with
- *	@IEEE80211_KEY_FLAG_GENERATE_IV on the same key.
+ *	@IEEE80211_KEY_FLAG_GENERATE_IV on the same key. Setting this flag does
+ *	not necessarily mean that SKBs will have sufficient tailroom for ICV or
+ *	MIC.
  * @IEEE80211_KEY_FLAG_RX_MGMT: This key will be used to decrypt received
  *	management frames. The flag can help drivers that have a hardware
  *	crypto implementation that doesn't deal with management frames
@@ -1304,6 +1307,9 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the
  *	driver for a CCMP key to indicate that is requires IV generation
  *	only for managment frames (MFP).
+ * @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the
+ *	driver for a key to indicate that sufficient tailroom must always
+ *	be reserved for ICV or MIC, even when HW encryption is enabled.
  */
 enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_GENERATE_IV_MGMT	= BIT(0),
@@ -1313,6 +1319,7 @@ enum ieee80211_key_flags {
 	IEEE80211_KEY_FLAG_SW_MGMT_TX		= BIT(4),
 	IEEE80211_KEY_FLAG_PUT_IV_SPACE		= BIT(5),
 	IEEE80211_KEY_FLAG_RX_MGMT		= BIT(6),
+	IEEE80211_KEY_FLAG_RESERVE_TAILROOM	= BIT(7),
 };
 
 /**
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index bd4e46ec32bd..f8d9f0ee59bf 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -141,8 +141,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE;
 
 		if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+		      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 			sdata->crypto_tx_tailroom_needed_cnt--;
 
 		WARN_ON((key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
@@ -191,8 +190,7 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key)
 	sdata = key->sdata;
 
 	if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
-	      (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
-	      (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+	      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 		increment_tailroom_need_count(sdata);
 
 	ret = drv_set_key(key->local, DISABLE_KEY, sdata,
@@ -889,8 +887,7 @@ void ieee80211_remove_key(struct ieee80211_key_conf *keyconf)
 		key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE;
 
 		if (!((key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_MMIC) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_GENERATE_IV) ||
-		      (key->conf.flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE)))
+		      (key->conf.flags & IEEE80211_KEY_FLAG_RESERVE_TAILROOM)))
 			increment_tailroom_need_count(key->sdata);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From ae406bd0572be97a46d72e8a5e97c33c3168388c Mon Sep 17 00:00:00 2001
From: Kristian Evensen <kristian.evensen@gmail.com>
Date: Wed, 24 Dec 2014 09:57:11 +0100
Subject: netfilter: conntrack: Remove nf_ct_conntrack_flush_report

The only user of nf_ct_conntrack_flush_report() was ctnetlink_del_conntrack().
After adding support for flushing connections with a given mark, this function
is no longer called.

Signed-off-by: Kristian Evensen <kristian.evensen@gmail.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack.h | 2 --
 net/netfilter/nf_conntrack_core.c    | 6 ------
 2 files changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/net/netfilter/nf_conntrack.h b/include/net/netfilter/nf_conntrack.h
index f0daed2b54d1..74f271a172dd 100644
--- a/include/net/netfilter/nf_conntrack.h
+++ b/include/net/netfilter/nf_conntrack.h
@@ -191,8 +191,6 @@ __nf_conntrack_find(struct net *net, u16 zone,
 int nf_conntrack_hash_check_insert(struct nf_conn *ct);
 bool nf_ct_delete(struct nf_conn *ct, u32 pid, int report);
 
-void nf_conntrack_flush_report(struct net *net, u32 portid, int report);
-
 bool nf_ct_get_tuplepr(const struct sk_buff *skb, unsigned int nhoff,
 		       u_int16_t l3num, struct nf_conntrack_tuple *tuple);
 bool nf_ct_invert_tuplepr(struct nf_conntrack_tuple *inverse,
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index da58cd4f2cb7..5a6990bc5a5b 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1426,12 +1426,6 @@ void nf_ct_free_hashtable(void *hash, unsigned int size)
 }
 EXPORT_SYMBOL_GPL(nf_ct_free_hashtable);
 
-void nf_conntrack_flush_report(struct net *net, u32 portid, int report)
-{
-	nf_ct_iterate_cleanup(net, kill_all, NULL, portid, report);
-}
-EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
-
 static int untrack_refs(void)
 {
 	int cnt = 0, cpu;
-- 
cgit v1.2.3-70-g09d2


From 4ed20bebf51578229a1986efcf46344075ec8447 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:35:34 +0100
Subject: cfg80211: remove "channel" from survey names

All of the survey data is (currently) per channel anyway,
so having the word "channel" in the name does nothing. In
the next patch I'll introduce global data to the survey,
where the word "channel" is actually confusing.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath10k/wmi.c         |  8 ++---
 drivers/net/wireless/ath/ath5k/mac80211-ops.c | 16 +++++-----
 drivers/net/wireless/ath/ath9k/link.c         | 16 +++++-----
 drivers/net/wireless/ath/carl9170/cmd.c       | 12 ++++----
 drivers/net/wireless/ath/carl9170/main.c      |  6 ++--
 drivers/net/wireless/mwifiex/cfg80211.c       |  7 +++--
 drivers/net/wireless/mwl8k.c                  | 12 ++++----
 drivers/net/wireless/p54/eeprom.c             |  6 ++--
 drivers/net/wireless/p54/main.c               |  8 ++---
 drivers/net/wireless/p54/txrx.c               | 12 ++++----
 drivers/net/wireless/rt2x00/rt2800lib.c       | 12 ++++----
 include/net/cfg80211.h                        | 44 +++++++++++++--------------
 include/uapi/linux/nl80211.h                  | 27 ++++++++++------
 net/mac80211/ethtool.c                        | 20 ++++++------
 net/wireless/nl80211.c                        | 30 +++++++++---------
 net/wireless/trace.h                          | 26 ++++++++--------
 16 files changed, 135 insertions(+), 127 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index c0f3e4d09263..721631c3dd3e 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1344,11 +1344,11 @@ static void ath10k_wmi_event_chan_info(struct ath10k *ar, struct sk_buff *skb)
 		rx_clear_count -= ar->survey_last_rx_clear_count;
 
 		survey = &ar->survey[idx];
-		survey->channel_time = WMI_CHAN_INFO_MSEC(cycle_count);
-		survey->channel_time_rx = WMI_CHAN_INFO_MSEC(rx_clear_count);
+		survey->time = WMI_CHAN_INFO_MSEC(cycle_count);
+		survey->time_rx = WMI_CHAN_INFO_MSEC(rx_clear_count);
 		survey->noise = noise_floor;
-		survey->filled = SURVEY_INFO_CHANNEL_TIME |
-				 SURVEY_INFO_CHANNEL_TIME_RX |
+		survey->filled = SURVEY_INFO_TIME |
+				 SURVEY_INFO_TIME_RX |
 				 SURVEY_INFO_NOISE_DBM;
 	}
 
diff --git a/drivers/net/wireless/ath/ath5k/mac80211-ops.c b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
index 19eab2a69ad5..3b4a6463d87a 100644
--- a/drivers/net/wireless/ath/ath5k/mac80211-ops.c
+++ b/drivers/net/wireless/ath/ath5k/mac80211-ops.c
@@ -672,10 +672,10 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey)
 	spin_lock_bh(&common->cc_lock);
 	ath_hw_cycle_counters_update(common);
 	if (cc->cycles > 0) {
-		ah->survey.channel_time += cc->cycles / div;
-		ah->survey.channel_time_busy += cc->rx_busy / div;
-		ah->survey.channel_time_rx += cc->rx_frame / div;
-		ah->survey.channel_time_tx += cc->tx_frame / div;
+		ah->survey.time += cc->cycles / div;
+		ah->survey.time_busy += cc->rx_busy / div;
+		ah->survey.time_rx += cc->rx_frame / div;
+		ah->survey.time_tx += cc->tx_frame / div;
 	}
 	memset(cc, 0, sizeof(*cc));
 	spin_unlock_bh(&common->cc_lock);
@@ -686,10 +686,10 @@ ath5k_get_survey(struct ieee80211_hw *hw, int idx, struct survey_info *survey)
 	survey->noise = ah->ah_noise_floor;
 	survey->filled = SURVEY_INFO_NOISE_DBM |
 			SURVEY_INFO_IN_USE |
-			SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_RX |
-			SURVEY_INFO_CHANNEL_TIME_TX;
+			SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_RX |
+			SURVEY_INFO_TIME_TX;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/ath/ath9k/link.c b/drivers/net/wireless/ath/ath9k/link.c
index b829263e3d0a..90631d768a60 100644
--- a/drivers/net/wireless/ath/ath9k/link.c
+++ b/drivers/net/wireless/ath/ath9k/link.c
@@ -516,14 +516,14 @@ int ath_update_survey_stats(struct ath_softc *sc)
 		ath_hw_cycle_counters_update(common);
 
 	if (cc->cycles > 0) {
-		survey->filled |= SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_RX |
-			SURVEY_INFO_CHANNEL_TIME_TX;
-		survey->channel_time += cc->cycles / div;
-		survey->channel_time_busy += cc->rx_busy / div;
-		survey->channel_time_rx += cc->rx_frame / div;
-		survey->channel_time_tx += cc->tx_frame / div;
+		survey->filled |= SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_RX |
+			SURVEY_INFO_TIME_TX;
+		survey->time += cc->cycles / div;
+		survey->time_busy += cc->rx_busy / div;
+		survey->time_rx += cc->rx_frame / div;
+		survey->time_tx += cc->tx_frame / div;
 	}
 
 	if (cc->cycles < div)
diff --git a/drivers/net/wireless/ath/carl9170/cmd.c b/drivers/net/wireless/ath/carl9170/cmd.c
index 39a63874b275..f2b4f537e4c1 100644
--- a/drivers/net/wireless/ath/carl9170/cmd.c
+++ b/drivers/net/wireless/ath/carl9170/cmd.c
@@ -188,12 +188,12 @@ int carl9170_collect_tally(struct ar9170 *ar)
 
 		if (ar->channel) {
 			info = &ar->survey[ar->channel->hw_value];
-			info->channel_time = ar->tally.active;
-			info->channel_time_busy = ar->tally.cca;
-			info->channel_time_tx = ar->tally.tx_time;
-			do_div(info->channel_time, 1000);
-			do_div(info->channel_time_busy, 1000);
-			do_div(info->channel_time_tx, 1000);
+			info->time = ar->tally.active;
+			info->time_busy = ar->tally.cca;
+			info->time_tx = ar->tally.tx_time;
+			do_div(info->time, 1000);
+			do_div(info->time_busy, 1000);
+			do_div(info->time_tx, 1000);
 		}
 	}
 	return 0;
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index ef5b6dc7b7f1..f1455a04cb62 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -1690,9 +1690,9 @@ found:
 		survey->filled |= SURVEY_INFO_IN_USE;
 
 	if (ar->fw.hw_counters) {
-		survey->filled |= SURVEY_INFO_CHANNEL_TIME |
-				  SURVEY_INFO_CHANNEL_TIME_BUSY |
-				  SURVEY_INFO_CHANNEL_TIME_TX;
+		survey->filled |= SURVEY_INFO_TIME |
+				  SURVEY_INFO_TIME_BUSY |
+				  SURVEY_INFO_TIME_TX;
 	}
 
 	return 0;
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 4a66a6555366..8bd446b69658 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -1037,10 +1037,11 @@ mwifiex_cfg80211_dump_survey(struct wiphy *wiphy, struct net_device *dev,
 	survey->channel = ieee80211_get_channel(wiphy,
 	    ieee80211_channel_to_frequency(pchan_stats[idx].chan_num, band));
 	survey->filled = SURVEY_INFO_NOISE_DBM |
-		SURVEY_INFO_CHANNEL_TIME | SURVEY_INFO_CHANNEL_TIME_BUSY;
+			 SURVEY_INFO_TIME |
+			 SURVEY_INFO_TIME_BUSY;
 	survey->noise = pchan_stats[idx].noise;
-	survey->channel_time = pchan_stats[idx].cca_scan_dur;
-	survey->channel_time_busy = pchan_stats[idx].cca_busy_dur;
+	survey->time = pchan_stats[idx].cca_scan_dur;
+	survey->time_busy = pchan_stats[idx].cca_busy_dur;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index b8d1e04aa9b9..f9b1218c761a 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3098,14 +3098,14 @@ static void mwl8k_update_survey(struct mwl8k_priv *priv,
 
 	cca_cnt = ioread32(priv->regs + NOK_CCA_CNT_REG);
 	cca_cnt /= 1000; /* uSecs to mSecs */
-	survey->channel_time_busy = (u64) cca_cnt;
+	survey->time_busy = (u64) cca_cnt;
 
 	rx_rdy = ioread32(priv->regs + BBU_RXRDY_CNT_REG);
 	rx_rdy /= 1000; /* uSecs to mSecs */
-	survey->channel_time_rx = (u64) rx_rdy;
+	survey->time_rx = (u64) rx_rdy;
 
 	priv->channel_time = jiffies - priv->channel_time;
-	survey->channel_time = jiffies_to_msecs(priv->channel_time);
+	survey->time = jiffies_to_msecs(priv->channel_time);
 
 	survey->channel = channel;
 
@@ -3115,9 +3115,9 @@ static void mwl8k_update_survey(struct mwl8k_priv *priv,
 	survey->noise = nf * -1;
 
 	survey->filled = SURVEY_INFO_NOISE_DBM |
-			 SURVEY_INFO_CHANNEL_TIME |
-			 SURVEY_INFO_CHANNEL_TIME_BUSY |
-			 SURVEY_INFO_CHANNEL_TIME_RX;
+			 SURVEY_INFO_TIME |
+			 SURVEY_INFO_TIME_BUSY |
+			 SURVEY_INFO_TIME_RX;
 }
 
 /*
diff --git a/drivers/net/wireless/p54/eeprom.c b/drivers/net/wireless/p54/eeprom.c
index 0fe67d2da208..2fe713eda7ad 100644
--- a/drivers/net/wireless/p54/eeprom.c
+++ b/drivers/net/wireless/p54/eeprom.c
@@ -196,9 +196,9 @@ static int p54_generate_band(struct ieee80211_hw *dev,
 		dest->max_power = chan->max_power;
 		priv->survey[*chan_num].channel = &tmp->channels[j];
 		priv->survey[*chan_num].filled = SURVEY_INFO_NOISE_DBM |
-			SURVEY_INFO_CHANNEL_TIME |
-			SURVEY_INFO_CHANNEL_TIME_BUSY |
-			SURVEY_INFO_CHANNEL_TIME_TX;
+			SURVEY_INFO_TIME |
+			SURVEY_INFO_TIME_BUSY |
+			SURVEY_INFO_TIME_TX;
 		dest->hw_value = (*chan_num);
 		j++;
 		(*chan_num)++;
diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c
index 13a30c4a27f2..b9250d75d253 100644
--- a/drivers/net/wireless/p54/main.c
+++ b/drivers/net/wireless/p54/main.c
@@ -305,9 +305,9 @@ static void p54_reset_stats(struct p54_common *priv)
 		struct survey_info *info = &priv->survey[chan->hw_value];
 
 		/* only reset channel statistics, don't touch .filled, etc. */
-		info->channel_time = 0;
-		info->channel_time_busy = 0;
-		info->channel_time_tx = 0;
+		info->time = 0;
+		info->time_busy = 0;
+		info->time_tx = 0;
 	}
 
 	priv->update_stats = true;
@@ -636,7 +636,7 @@ static int p54_get_survey(struct ieee80211_hw *dev, int idx,
 
 		if (in_use) {
 			/* test if the reported statistics are valid. */
-			if  (survey->channel_time != 0) {
+			if  (survey->time != 0) {
 				survey->filled |= SURVEY_INFO_IN_USE;
 			} else {
 				/*
diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c
index 153c61539ec8..24e5ff9a9272 100644
--- a/drivers/net/wireless/p54/txrx.c
+++ b/drivers/net/wireless/p54/txrx.c
@@ -587,13 +587,13 @@ static void p54_rx_stats(struct p54_common *priv, struct sk_buff *skb)
 	if (chan) {
 		struct survey_info *survey = &priv->survey[chan->hw_value];
 		survey->noise = clamp(priv->noise, -128, 127);
-		survey->channel_time = priv->survey_raw.active;
-		survey->channel_time_tx = priv->survey_raw.tx;
-		survey->channel_time_busy = priv->survey_raw.tx +
+		survey->time = priv->survey_raw.active;
+		survey->time_tx = priv->survey_raw.tx;
+		survey->time_busy = priv->survey_raw.tx +
 			priv->survey_raw.cca;
-		do_div(survey->channel_time, 1024);
-		do_div(survey->channel_time_tx, 1024);
-		do_div(survey->channel_time_busy, 1024);
+		do_div(survey->time, 1024);
+		do_div(survey->time_tx, 1024);
+		do_div(survey->time_busy, 1024);
 	}
 
 	tmp = p54_find_and_unlink_skb(priv, hdr->req_id);
diff --git a/drivers/net/wireless/rt2x00/rt2800lib.c b/drivers/net/wireless/rt2x00/rt2800lib.c
index 81ee481487cf..be2d54f257b1 100644
--- a/drivers/net/wireless/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/rt2x00/rt2800lib.c
@@ -8020,13 +8020,13 @@ int rt2800_get_survey(struct ieee80211_hw *hw, int idx,
 	rt2800_register_read(rt2x00dev, CH_BUSY_STA_SEC, &busy_ext);
 
 	if (idle || busy) {
-		survey->filled = SURVEY_INFO_CHANNEL_TIME |
-				 SURVEY_INFO_CHANNEL_TIME_BUSY |
-				 SURVEY_INFO_CHANNEL_TIME_EXT_BUSY;
+		survey->filled = SURVEY_INFO_TIME |
+				 SURVEY_INFO_TIME_BUSY |
+				 SURVEY_INFO_TIME_EXT_BUSY;
 
-		survey->channel_time = (idle + busy) / 1000;
-		survey->channel_time_busy = busy / 1000;
-		survey->channel_time_ext_busy = busy_ext / 1000;
+		survey->time = (idle + busy) / 1000;
+		survey->time_busy = busy / 1000;
+		survey->time_ext_busy = busy_ext / 1000;
 	}
 
 	if (!(hw->conf.flags & IEEE80211_CONF_OFFCHANNEL))
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f38645fb83b9..3b489f8fc4cd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -520,23 +520,23 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
  *
  * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in
  * @SURVEY_INFO_IN_USE: channel is currently being used
- * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in
- * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in
- * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in
+ * @SURVEY_INFO_TIME: active time (in ms) was filled in
+ * @SURVEY_INFO_TIME_BUSY: busy time was filled in
+ * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in
+ * @SURVEY_INFO_TIME_RX: receive time was filled in
+ * @SURVEY_INFO_TIME_TX: transmit time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
  */
 enum survey_info_flags {
-	SURVEY_INFO_NOISE_DBM = 1<<0,
-	SURVEY_INFO_IN_USE = 1<<1,
-	SURVEY_INFO_CHANNEL_TIME = 1<<2,
-	SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3,
-	SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4,
-	SURVEY_INFO_CHANNEL_TIME_RX = 1<<5,
-	SURVEY_INFO_CHANNEL_TIME_TX = 1<<6,
+	SURVEY_INFO_NOISE_DBM		= BIT(0),
+	SURVEY_INFO_IN_USE		= BIT(1),
+	SURVEY_INFO_TIME		= BIT(2),
+	SURVEY_INFO_TIME_BUSY		= BIT(3),
+	SURVEY_INFO_TIME_EXT_BUSY	= BIT(4),
+	SURVEY_INFO_TIME_RX		= BIT(5),
+	SURVEY_INFO_TIME_TX		= BIT(6),
 };
 
 /**
@@ -546,11 +546,11 @@ enum survey_info_flags {
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *	optional
- * @channel_time: amount of time in ms the radio spent on the channel
- * @channel_time_busy: amount of time the primary channel was sensed busy
- * @channel_time_ext_busy: amount of time the extension channel was sensed busy
- * @channel_time_rx: amount of time the radio spent receiving data
- * @channel_time_tx: amount of time the radio spent transmitting data
+ * @time: amount of time in ms the radio was turn on (on the channel)
+ * @time_busy: amount of time the primary channel was sensed busy
+ * @time_ext_busy: amount of time the extension channel was sensed busy
+ * @time_rx: amount of time the radio spent receiving data
+ * @time_tx: amount of time the radio spent transmitting data
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -559,11 +559,11 @@ enum survey_info_flags {
  */
 struct survey_info {
 	struct ieee80211_channel *channel;
-	u64 channel_time;
-	u64 channel_time_busy;
-	u64 channel_time_ext_busy;
-	u64 channel_time_rx;
-	u64 channel_time_tx;
+	u64 time;
+	u64 time_busy;
+	u64 time_ext_busy;
+	u64 time_rx;
+	u64 time_tx;
 	u32 filled;
 	s8 noise;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 7ba9404b290d..1a5acc80ab88 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2815,15 +2815,15 @@ enum nl80211_user_reg_hint_type {
  * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
  * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
- * @NL80211_SURVEY_INFO_CHANNEL_TIME: amount of time (in ms) that the radio
+ * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
  *	spent on this channel
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY: amount of the time the primary
+ * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
  *	channel was sensed busy (either due to activity or energy detect)
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: amount of time the extension
+ * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
  *	channel was sensed busy
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_RX: amount of time the radio spent
+ * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
  *	receiving data
- * @NL80211_SURVEY_INFO_CHANNEL_TIME_TX: amount of time the radio spent
+ * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
  *	transmitting data
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
@@ -2834,17 +2834,24 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_FREQUENCY,
 	NL80211_SURVEY_INFO_NOISE,
 	NL80211_SURVEY_INFO_IN_USE,
-	NL80211_SURVEY_INFO_CHANNEL_TIME,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
-	NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
+	NL80211_SURVEY_INFO_TIME,
+	NL80211_SURVEY_INFO_TIME_BUSY,
+	NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+	NL80211_SURVEY_INFO_TIME_RX,
+	NL80211_SURVEY_INFO_TIME_TX,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
 	NL80211_SURVEY_INFO_MAX = __NL80211_SURVEY_INFO_AFTER_LAST - 1
 };
 
+/* keep old names for compatibility */
+#define NL80211_SURVEY_INFO_CHANNEL_TIME		NL80211_SURVEY_INFO_TIME
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY		NL80211_SURVEY_INFO_TIME_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY	NL80211_SURVEY_INFO_TIME_EXT_BUSY
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_RX		NL80211_SURVEY_INFO_TIME_RX
+#define NL80211_SURVEY_INFO_CHANNEL_TIME_TX		NL80211_SURVEY_INFO_TIME_TX
+
 /**
  * enum nl80211_mntr_flags - monitor configuration flags
  *
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index ebfc8091557b..eea742710c0a 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -175,24 +175,24 @@ do_survey:
 		data[i++] = (u8)survey.noise;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME)
-		data[i++] = survey.channel_time;
+	if (survey.filled & SURVEY_INFO_TIME)
+		data[i++] = survey.time;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_BUSY)
-		data[i++] = survey.channel_time_busy;
+	if (survey.filled & SURVEY_INFO_TIME_BUSY)
+		data[i++] = survey.time_busy;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY)
-		data[i++] = survey.channel_time_ext_busy;
+	if (survey.filled & SURVEY_INFO_TIME_EXT_BUSY)
+		data[i++] = survey.time_ext_busy;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_RX)
-		data[i++] = survey.channel_time_rx;
+	if (survey.filled & SURVEY_INFO_TIME_RX)
+		data[i++] = survey.time_rx;
 	else
 		data[i++] = -1LL;
-	if (survey.filled & SURVEY_INFO_CHANNEL_TIME_TX)
-		data[i++] = survey.channel_time_tx;
+	if (survey.filled & SURVEY_INFO_TIME_TX)
+		data[i++] = survey.time_tx;
 	else
 		data[i++] = -1LL;
 
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index ad3e294acabe..94ab2014fefe 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6641,25 +6641,25 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	if ((survey->filled & SURVEY_INFO_IN_USE) &&
 	    nla_put_flag(msg, NL80211_SURVEY_INFO_IN_USE))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME,
-			survey->channel_time))
+	if ((survey->filled & SURVEY_INFO_TIME) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME,
+			survey->time))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY,
-			survey->channel_time_busy))
+	if ((survey->filled & SURVEY_INFO_TIME_BUSY) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_BUSY,
+			survey->time_busy))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY,
-			survey->channel_time_ext_busy))
+	if ((survey->filled & SURVEY_INFO_TIME_EXT_BUSY) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_EXT_BUSY,
+			survey->time_ext_busy))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_RX) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX,
-			survey->channel_time_rx))
+	if ((survey->filled & SURVEY_INFO_TIME_RX) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_RX,
+			survey->time_rx))
 		goto nla_put_failure;
-	if ((survey->filled & SURVEY_INFO_CHANNEL_TIME_TX) &&
-	    nla_put_u64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX,
-			survey->channel_time_tx))
+	if ((survey->filled & SURVEY_INFO_TIME_TX) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX,
+			survey->time_tx))
 		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ad38910f7036..bbb7afc264af 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1604,11 +1604,11 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		WIPHY_ENTRY
 		CHAN_ENTRY
 		__field(int, ret)
-		__field(u64, channel_time)
-		__field(u64, channel_time_busy)
-		__field(u64, channel_time_ext_busy)
-		__field(u64, channel_time_rx)
-		__field(u64, channel_time_tx)
+		__field(u64, time)
+		__field(u64, time_busy)
+		__field(u64, time_ext_busy)
+		__field(u64, time_rx)
+		__field(u64, time_tx)
 		__field(u32, filled)
 		__field(s8, noise)
 	),
@@ -1616,11 +1616,11 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		WIPHY_ASSIGN;
 		CHAN_ASSIGN(info->channel);
 		__entry->ret = ret;
-		__entry->channel_time = info->channel_time;
-		__entry->channel_time_busy = info->channel_time_busy;
-		__entry->channel_time_ext_busy = info->channel_time_ext_busy;
-		__entry->channel_time_rx = info->channel_time_rx;
-		__entry->channel_time_tx = info->channel_time_tx;
+		__entry->time = info->time;
+		__entry->time_busy = info->time_busy;
+		__entry->time_ext_busy = info->time_ext_busy;
+		__entry->time_rx = info->time_rx;
+		__entry->time_tx = info->time_tx;
 		__entry->filled = info->filled;
 		__entry->noise = info->noise;
 	),
@@ -1629,9 +1629,9 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		  "channel time extension busy: %llu, channel time rx: %llu, "
 		  "channel time tx: %llu, filled: %u, noise: %d",
 		  WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG,
-		  __entry->channel_time, __entry->channel_time_busy,
-		  __entry->channel_time_ext_busy, __entry->channel_time_rx,
-		  __entry->channel_time_tx, __entry->filled, __entry->noise)
+		  __entry->time, __entry->time_busy,
+		  __entry->time_ext_busy, __entry->time_rx,
+		  __entry->time_tx, __entry->filled, __entry->noise)
 );
 
 TRACE_EVENT(rdev_tdls_oper,
-- 
cgit v1.2.3-70-g09d2


From 11f78ac32b06648c1dde9371b70323168b51a83e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:43:50 +0100
Subject: cfg80211: allow survey data to return global data

Not all devices are able to report survey data (particularly
time spent for various operations) per channel. As all these
statistics already exist in survey data, allow such devices
to report them (if userspace requested it)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  3 ++-
 include/uapi/linux/nl80211.h | 16 +++++++++++++---
 net/wireless/nl80211.c       | 31 ++++++++++++++++++-------------
 3 files changed, 33 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 3b489f8fc4cd..5a861440c122 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -542,7 +542,8 @@ enum survey_info_flags {
 /**
  * struct survey_info - channel survey response
  *
- * @channel: the channel this survey record reports, mandatory
+ * @channel: the channel this survey record reports, may be %NULL for a single
+ *	record to report global statistics
  * @filled: bitflag of flags from &enum survey_info_flags
  * @noise: channel noise in dBm. This and all following fields are
  *	optional
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1a5acc80ab88..5e8b65f239a5 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1727,6 +1727,14 @@ enum nl80211_commands {
  *	is located at bit 0 of byte 0. bit index 25 would be located at bit 1
  *	of byte 3 (u8 array).
  *
+ * @NL80211_ATTR_SURVEY_RADIO_STATS: Request overall radio statistics to be
+ *	returned along with other survey data. If set, @NL80211_CMD_GET_SURVEY
+ *	may return a survey entry without a channel indicating global radio
+ *	statistics (only some values are valid and make sense.)
+ *	For devices that don't return such an entry even then, the information
+ *	should be contained in the result as the sum of the respective counters
+ *	over all channels.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2088,6 +2096,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_EXT_FEATURES,
 
+	NL80211_ATTR_SURVEY_RADIO_STATS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -2816,15 +2826,15 @@ enum nl80211_user_reg_hint_type {
  * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm)
  * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used
  * @NL80211_SURVEY_INFO_TIME: amount of time (in ms) that the radio
- *	spent on this channel
+ *	was turned on (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_BUSY: amount of the time the primary
  *	channel was sensed busy (either due to activity or energy detect)
  * @NL80211_SURVEY_INFO_TIME_EXT_BUSY: amount of time the extension
  *	channel was sensed busy
  * @NL80211_SURVEY_INFO_TIME_RX: amount of time the radio spent
- *	receiving data
+ *	receiving data (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
- *	transmitting data
+ *	transmitting data (on channel or globally)
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 94ab2014fefe..9555ef9fd99e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6613,12 +6613,17 @@ static int nl80211_dump_scan(struct sk_buff *skb, struct netlink_callback *cb)
 }
 
 static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
-				int flags, struct net_device *dev,
-				struct survey_info *survey)
+			       int flags, struct net_device *dev,
+			       bool allow_radio_stats,
+			       struct survey_info *survey)
 {
 	void *hdr;
 	struct nlattr *infoattr;
 
+	/* skip radio stats if userspace didn't request them */
+	if (!survey->channel && !allow_radio_stats)
+		return 0;
+
 	hdr = nl80211hdr_put(msg, portid, seq, flags,
 			     NL80211_CMD_NEW_SURVEY_RESULTS);
 	if (!hdr)
@@ -6631,7 +6636,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	if (!infoattr)
 		goto nla_put_failure;
 
-	if (nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY,
+	if (survey->channel &&
+	    nla_put_u32(msg, NL80211_SURVEY_INFO_FREQUENCY,
 			survey->channel->center_freq))
 		goto nla_put_failure;
 
@@ -6671,19 +6677,22 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	return -EMSGSIZE;
 }
 
-static int nl80211_dump_survey(struct sk_buff *skb,
-			struct netlink_callback *cb)
+static int nl80211_dump_survey(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct survey_info survey;
 	struct cfg80211_registered_device *rdev;
 	struct wireless_dev *wdev;
 	int survey_idx = cb->args[2];
 	int res;
+	bool radio_stats;
 
 	res = nl80211_prepare_wdev_dump(skb, cb, &rdev, &wdev);
 	if (res)
 		return res;
 
+	/* prepare_wdev_dump parsed the attributes */
+	radio_stats = nl80211_fam.attrbuf[NL80211_ATTR_SURVEY_RADIO_STATS];
+
 	if (!wdev->netdev) {
 		res = -EINVAL;
 		goto out_err;
@@ -6701,13 +6710,9 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		if (res)
 			goto out_err;
 
-		/* Survey without a channel doesn't make sense */
-		if (!survey.channel) {
-			res = -EINVAL;
-			goto out;
-		}
-
-		if (survey.channel->flags & IEEE80211_CHAN_DISABLED) {
+		/* don't send disabled channels, but do send non-channel data */
+		if (survey.channel &&
+		    survey.channel->flags & IEEE80211_CHAN_DISABLED) {
 			survey_idx++;
 			continue;
 		}
@@ -6715,7 +6720,7 @@ static int nl80211_dump_survey(struct sk_buff *skb,
 		if (nl80211_send_survey(skb,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
-				wdev->netdev, &survey) < 0)
+				wdev->netdev, radio_stats, &survey) < 0)
 			goto out;
 		survey_idx++;
 	}
-- 
cgit v1.2.3-70-g09d2


From 052536abfa9144566599a7fbe8cc89e1086fa9a7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 16:44:11 +0100
Subject: cfg80211: add scan time to survey data

Add the time spent scanning to the survey data so it can be
reported by drivers that collect such information.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 4 ++++
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/nl80211.c       | 4 ++++
 net/wireless/trace.h         | 7 +++++--
 4 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5a861440c122..f94f0d486d13 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -525,6 +525,7 @@ ieee80211_chandef_max_power(struct cfg80211_chan_def *chandef)
  * @SURVEY_INFO_TIME_EXT_BUSY: extension channel busy time was filled in
  * @SURVEY_INFO_TIME_RX: receive time was filled in
  * @SURVEY_INFO_TIME_TX: transmit time was filled in
+ * @SURVEY_INFO_TIME_SCAN: scan time was filled in
  *
  * Used by the driver to indicate which info in &struct survey_info
  * it has filled in during the get_survey().
@@ -537,6 +538,7 @@ enum survey_info_flags {
 	SURVEY_INFO_TIME_EXT_BUSY	= BIT(4),
 	SURVEY_INFO_TIME_RX		= BIT(5),
 	SURVEY_INFO_TIME_TX		= BIT(6),
+	SURVEY_INFO_TIME_SCAN		= BIT(7),
 };
 
 /**
@@ -552,6 +554,7 @@ enum survey_info_flags {
  * @time_ext_busy: amount of time the extension channel was sensed busy
  * @time_rx: amount of time the radio spent receiving data
  * @time_tx: amount of time the radio spent transmitting data
+ * @time_scan: amount of time the radio spent for scanning
  *
  * Used by dump_survey() to report back per-channel survey information.
  *
@@ -565,6 +568,7 @@ struct survey_info {
 	u64 time_ext_busy;
 	u64 time_rx;
 	u64 time_tx;
+	u64 time_scan;
 	u32 filled;
 	s8 noise;
 };
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5e8b65f239a5..2f549a253138 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2835,6 +2835,8 @@ enum nl80211_user_reg_hint_type {
  *	receiving data (on channel or globally)
  * @NL80211_SURVEY_INFO_TIME_TX: amount of time the radio spent
  *	transmitting data (on channel or globally)
+ * @NL80211_SURVEY_INFO_TIME_SCAN: time the radio spent for scan
+ *	(on this channel or globally)
  * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number
  *	currently defined
  * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use
@@ -2849,6 +2851,7 @@ enum nl80211_survey_info {
 	NL80211_SURVEY_INFO_TIME_EXT_BUSY,
 	NL80211_SURVEY_INFO_TIME_RX,
 	NL80211_SURVEY_INFO_TIME_TX,
+	NL80211_SURVEY_INFO_TIME_SCAN,
 
 	/* keep last */
 	__NL80211_SURVEY_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 9555ef9fd99e..f56309bd21bd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -6667,6 +6667,10 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_TX,
 			survey->time_tx))
 		goto nla_put_failure;
+	if ((survey->filled & SURVEY_INFO_TIME_SCAN) &&
+	    nla_put_u64(msg, NL80211_SURVEY_INFO_TIME_SCAN,
+			survey->time_scan))
+		goto nla_put_failure;
 
 	nla_nest_end(msg, infoattr);
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index bbb7afc264af..b17b3692f8c2 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1609,6 +1609,7 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		__field(u64, time_ext_busy)
 		__field(u64, time_rx)
 		__field(u64, time_tx)
+		__field(u64, time_scan)
 		__field(u32, filled)
 		__field(s8, noise)
 	),
@@ -1621,17 +1622,19 @@ TRACE_EVENT(rdev_return_int_survey_info,
 		__entry->time_ext_busy = info->time_ext_busy;
 		__entry->time_rx = info->time_rx;
 		__entry->time_tx = info->time_tx;
+		__entry->time_scan = info->time_scan;
 		__entry->filled = info->filled;
 		__entry->noise = info->noise;
 	),
 	TP_printk(WIPHY_PR_FMT ", returned: %d, " CHAN_PR_FMT
 		  ", channel time: %llu, channel time busy: %llu, "
 		  "channel time extension busy: %llu, channel time rx: %llu, "
-		  "channel time tx: %llu, filled: %u, noise: %d",
+		  "channel time tx: %llu, scan time: %llu, filled: %u, noise: %d",
 		  WIPHY_PR_ARG, __entry->ret, CHAN_PR_ARG,
 		  __entry->time, __entry->time_busy,
 		  __entry->time_ext_busy, __entry->time_rx,
-		  __entry->time_tx, __entry->filled, __entry->noise)
+		  __entry->time_tx, __entry->time_scan,
+		  __entry->filled, __entry->noise)
 );
 
 TRACE_EVENT(rdev_tdls_oper,
-- 
cgit v1.2.3-70-g09d2


From cf5ead822d5db2d276616ccca91f00eb3b855db2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 14 Nov 2014 17:14:00 +0100
Subject: cfg80211: allow including station info in delete event

When a station is removed, its statistics may be interesting to
userspace, for example for further aggregation of statistics of
all stations that ever connected to an AP.

Introduce a new cfg80211_del_sta_sinfo() function (and make the
cfg80211_del_sta() a static inline calling it) to allow passing
a struct station_info along with this, and send the data in the
nl80211 event message.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 16 +++++++++++++++-
 net/wireless/nl80211.c | 38 ++++++++++++++++----------------------
 2 files changed, 31 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f94f0d486d13..42e3d74f1906 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4591,6 +4591,16 @@ void cfg80211_remain_on_channel_expired(struct wireless_dev *wdev, u64 cookie,
 void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 		      struct station_info *sinfo, gfp_t gfp);
 
+/**
+ * cfg80211_del_sta_sinfo - notify userspace about deletion of a station
+ * @dev: the netdev
+ * @mac_addr: the station's address
+ * @sinfo: the station information/statistics
+ * @gfp: allocation flags
+ */
+void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
+			    struct station_info *sinfo, gfp_t gfp);
+
 /**
  * cfg80211_del_sta - notify userspace about deletion of a station
  *
@@ -4598,7 +4608,11 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
  * @mac_addr: the station's address
  * @gfp: allocation flags
  */
-void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp);
+static inline void cfg80211_del_sta(struct net_device *dev,
+				    const u8 *mac_addr, gfp_t gfp)
+{
+	cfg80211_del_sta_sinfo(dev, mac_addr, NULL, gfp);
+}
 
 /**
  * cfg80211_conn_failed - connection request failed notification
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f56309bd21bd..a75dc91976d3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3650,8 +3650,8 @@ static bool nl80211_put_signal(struct sk_buff *msg, u8 mask, s8 *signal,
 	return true;
 }
 
-static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
-				int flags,
+static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
+				u32 seq, int flags,
 				struct cfg80211_registered_device *rdev,
 				struct net_device *dev,
 				const u8 *mac_addr, struct station_info *sinfo)
@@ -3659,7 +3659,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq,
 	void *hdr;
 	struct nlattr *sinfoattr, *bss_param;
 
-	hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION);
+	hdr = nl80211hdr_put(msg, portid, seq, flags, cmd);
 	if (!hdr)
 		return -1;
 
@@ -3854,7 +3854,7 @@ static int nl80211_dump_station(struct sk_buff *skb,
 		if (err)
 			goto out_err;
 
-		if (nl80211_send_station(skb,
+		if (nl80211_send_station(skb, NL80211_CMD_NEW_STATION,
 				NETLINK_CB(cb->skb).portid,
 				cb->nlh->nlmsg_seq, NLM_F_MULTI,
 				rdev, wdev->netdev, mac_addr,
@@ -3901,7 +3901,8 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info)
 	if (!msg)
 		return -ENOMEM;
 
-	if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0,
+	if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION,
+				 info->snd_portid, info->snd_seq, 0,
 				 rdev, dev, mac_addr, &sinfo) < 0) {
 		nlmsg_free(msg);
 		return -ENOBUFS;
@@ -11687,7 +11688,7 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 	if (!msg)
 		return;
 
-	if (nl80211_send_station(msg, 0, 0, 0,
+	if (nl80211_send_station(msg, NL80211_CMD_NEW_STATION, 0, 0, 0,
 				 rdev, dev, mac_addr, sinfo) < 0) {
 		nlmsg_free(msg);
 		return;
@@ -11698,12 +11699,16 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr,
 }
 EXPORT_SYMBOL(cfg80211_new_sta);
 
-void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
+void cfg80211_del_sta_sinfo(struct net_device *dev, const u8 *mac_addr,
+			    struct station_info *sinfo, gfp_t gfp)
 {
 	struct wiphy *wiphy = dev->ieee80211_ptr->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
-	void *hdr;
+	struct station_info empty_sinfo = {};
+
+	if (!sinfo)
+		sinfo = &empty_sinfo;
 
 	trace_cfg80211_del_sta(dev, mac_addr);
 
@@ -11711,27 +11716,16 @@ void cfg80211_del_sta(struct net_device *dev, const u8 *mac_addr, gfp_t gfp)
 	if (!msg)
 		return;
 
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_DEL_STATION);
-	if (!hdr) {
+	if (nl80211_send_station(msg, NL80211_CMD_DEL_STATION, 0, 0, 0,
+				 rdev, dev, mac_addr, sinfo)) {
 		nlmsg_free(msg);
 		return;
 	}
 
-	if (nla_put_u32(msg, NL80211_ATTR_IFINDEX, dev->ifindex) ||
-	    nla_put(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr))
-		goto nla_put_failure;
-
-	genlmsg_end(msg, hdr);
-
 	genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
 				NL80211_MCGRP_MLME, gfp);
-	return;
-
- nla_put_failure:
-	genlmsg_cancel(msg, hdr);
-	nlmsg_free(msg);
 }
-EXPORT_SYMBOL(cfg80211_del_sta);
+EXPORT_SYMBOL(cfg80211_del_sta_sinfo);
 
 void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 			  enum nl80211_connect_failed_reason reason,
-- 
cgit v1.2.3-70-g09d2


From 2b9a7e1bac24df8ddb0713ad1e5807a7243bcab0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 11:35:23 +0100
Subject: mac80211: allow drivers to provide most station statistics

In many cases, drivers can filter things like beacons that will
skew statistics reported by mac80211. To get correct statistics
in these cases, call drivers to obtain statistics and let them
override all values, filling values from mac80211 if the driver
didn't provide them. Not all of them make sense for the driver
to fill, so some are still always done by mac80211.

Note that this doesn't currently allow a driver to say "I know
this value is wrong, don't report it at all", or to sum it up
with a mac80211 value (as could be useful for "dropped misc"),
that can be added if it turns out to be needed.

This also gets rid of the get_rssi() method as is can now be
implemented using sta_statistics().

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ti/wlcore/main.c | 20 ++++----
 include/net/mac80211.h                | 17 ++++--
 net/mac80211/driver-ops.h             | 30 +++++------
 net/mac80211/sta_info.c               | 97 +++++++++++++++++++++++------------
 net/mac80211/trace.h                  | 33 +++---------
 5 files changed, 110 insertions(+), 87 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 2a99456b6b8f..8d11b0ca412c 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5376,14 +5376,15 @@ static void wlcore_op_sta_rc_update(struct ieee80211_hw *hw,
 	wlcore_hw_sta_rc_update(wl, wlvif, sta, changed);
 }
 
-static int wlcore_op_get_rssi(struct ieee80211_hw *hw,
-			       struct ieee80211_vif *vif,
-			       struct ieee80211_sta *sta,
-			       s8 *rssi_dbm)
+static void wlcore_op_sta_statistics(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif,
+				     struct ieee80211_sta *sta,
+				     struct station_info *sinfo)
 {
 	struct wl1271 *wl = hw->priv;
 	struct wl12xx_vif *wlvif = wl12xx_vif_to_data(vif);
-	int ret = 0;
+	s8 rssi_dbm;
+	int ret;
 
 	wl1271_debug(DEBUG_MAC80211, "mac80211 get_rssi");
 
@@ -5396,17 +5397,18 @@ static int wlcore_op_get_rssi(struct ieee80211_hw *hw,
 	if (ret < 0)
 		goto out_sleep;
 
-	ret = wlcore_acx_average_rssi(wl, wlvif, rssi_dbm);
+	ret = wlcore_acx_average_rssi(wl, wlvif, &rssi_dbm);
 	if (ret < 0)
 		goto out_sleep;
 
+	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->signal = rssi_dbm;
+
 out_sleep:
 	wl1271_ps_elp_sleep(wl);
 
 out:
 	mutex_unlock(&wl->mutex);
-
-	return ret;
 }
 
 static bool wl1271_tx_frames_pending(struct ieee80211_hw *hw)
@@ -5606,7 +5608,7 @@ static const struct ieee80211_ops wl1271_ops = {
 	.assign_vif_chanctx = wlcore_op_assign_vif_chanctx,
 	.unassign_vif_chanctx = wlcore_op_unassign_vif_chanctx,
 	.sta_rc_update = wlcore_op_sta_rc_update,
-	.get_rssi = wlcore_op_get_rssi,
+	.sta_statistics = wlcore_op_sta_statistics,
 	CFG80211_TESTMODE_CMD(wl1271_tm_cmd)
 };
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 555a845ad51e..123f2308958a 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2708,6 +2708,14 @@ enum ieee80211_reconfig_type {
  *	is only used if the configured rate control algorithm actually uses
  *	the new rate table API, and is therefore optional. Must be atomic.
  *
+ * @sta_statistics: Get statistics for this station. For example with beacon
+ *	filtering, the statistics kept by mac80211 might not be accurate, so
+ *	let the driver pre-fill the statistics. The driver can fill most of
+ *	the values (indicating which by setting the filled bitmap), but not
+ *	all of them make sense - see the source for which ones are possible.
+ *	Statistics that the driver doesn't fill will be filled by mac80211.
+ *	The callback can sleep.
+ *
  * @conf_tx: Configure TX queue parameters (EDCF (aifs, cw_min, cw_max),
  *	bursting) for a hardware TX queue.
  *	Returns a negative error code on failure.
@@ -2868,9 +2876,6 @@ enum ieee80211_reconfig_type {
  * @get_et_strings:  Ethtool API to get a set of strings to describe stats
  *	and perhaps other supported types of ethtool data-sets.
  *
- * @get_rssi: Get current signal strength in dBm, the function is optional
- *	and can sleep.
- *
  * @mgd_prepare_tx: Prepare for transmitting a management frame for association
  *	before associated. In multi-channel scenarios, a virtual interface is
  *	bound to a channel before it is associated, but as it isn't associated
@@ -3071,6 +3076,10 @@ struct ieee80211_ops {
 	void (*sta_rate_tbl_update)(struct ieee80211_hw *hw,
 				    struct ieee80211_vif *vif,
 				    struct ieee80211_sta *sta);
+	void (*sta_statistics)(struct ieee80211_hw *hw,
+			       struct ieee80211_vif *vif,
+			       struct ieee80211_sta *sta,
+			       struct station_info *sinfo);
 	int (*conf_tx)(struct ieee80211_hw *hw,
 		       struct ieee80211_vif *vif, u16 ac,
 		       const struct ieee80211_tx_queue_params *params);
@@ -3138,8 +3147,6 @@ struct ieee80211_ops {
 	void	(*get_et_strings)(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif,
 				  u32 sset, u8 *data);
-	int	(*get_rssi)(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
-			    struct ieee80211_sta *sta, s8 *rssi_dbm);
 
 	void	(*mgd_prepare_tx)(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 2ebc9ead9695..fdeda17b8dd2 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -639,6 +639,21 @@ static inline void drv_sta_rate_tbl_update(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
+static inline void drv_sta_statistics(struct ieee80211_local *local,
+				      struct ieee80211_sub_if_data *sdata,
+				      struct ieee80211_sta *sta,
+				      struct station_info *sinfo)
+{
+	sdata = get_bss_sdata(sdata);
+	if (!check_sdata_in_driver(sdata))
+		return;
+
+	trace_drv_sta_statistics(local, sdata, sta);
+	if (local->ops->sta_statistics)
+		local->ops->sta_statistics(&local->hw, &sdata->vif, sta, sinfo);
+	trace_drv_return_void(local);
+}
+
 static inline int drv_conf_tx(struct ieee80211_local *local,
 			      struct ieee80211_sub_if_data *sdata, u16 ac,
 			      const struct ieee80211_tx_queue_params *params)
@@ -966,21 +981,6 @@ drv_allow_buffered_frames(struct ieee80211_local *local,
 	trace_drv_return_void(local);
 }
 
-static inline int drv_get_rssi(struct ieee80211_local *local,
-				struct ieee80211_sub_if_data *sdata,
-				struct ieee80211_sta *sta,
-				s8 *rssi_dbm)
-{
-	int ret;
-
-	might_sleep();
-
-	ret = local->ops->get_rssi(&local->hw, &sdata->vif, sta, rssi_dbm);
-	trace_drv_get_rssi(local, sta, *rssi_dbm, ret);
-
-	return ret;
-}
-
 static inline void drv_mgd_prepare_tx(struct ieee80211_local *local,
 				      struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 388ff0b2ad2b..967b42eae5c2 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1746,7 +1746,6 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 	struct ieee80211_local *local = sdata->local;
 	struct rate_control_ref *ref = NULL;
 	struct timespec uptime;
-	u64 packets = 0;
 	u32 thr = 0;
 	int i, ac;
 
@@ -1755,47 +1754,74 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	sinfo->generation = sdata->local->sta_generation;
 
-	sinfo->filled = STATION_INFO_INACTIVE_TIME |
-			STATION_INFO_RX_BYTES64 |
-			STATION_INFO_TX_BYTES64 |
-			STATION_INFO_RX_PACKETS |
-			STATION_INFO_TX_PACKETS |
-			STATION_INFO_TX_RETRIES |
-			STATION_INFO_TX_FAILED |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_RX_BITRATE |
-			STATION_INFO_RX_DROP_MISC |
-			STATION_INFO_BSS_PARAM |
-			STATION_INFO_CONNECTED_TIME |
-			STATION_INFO_STA_FLAGS |
-			STATION_INFO_BEACON_LOSS_COUNT;
+	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
+
+	sinfo->filled |= STATION_INFO_INACTIVE_TIME |
+			 STATION_INFO_STA_FLAGS |
+			 STATION_INFO_BSS_PARAM |
+			 STATION_INFO_CONNECTED_TIME |
+			 STATION_INFO_RX_DROP_MISC |
+			 STATION_INFO_BEACON_LOSS_COUNT;
 
 	ktime_get_ts(&uptime);
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
-
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
-	sinfo->tx_bytes = 0;
-	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
-		sinfo->tx_bytes += sta->tx_bytes[ac];
-		packets += sta->tx_packets[ac];
+
+	if (!(sinfo->filled & (STATION_INFO_TX_BYTES64 |
+			       STATION_INFO_TX_BYTES))) {
+		sinfo->tx_bytes = 0;
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->tx_bytes += sta->tx_bytes[ac];
+		sinfo->filled |= STATION_INFO_TX_BYTES64;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_TX_PACKETS)) {
+		sinfo->tx_packets = 0;
+		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
+			sinfo->tx_packets += sta->tx_packets[ac];
+		sinfo->filled |= STATION_INFO_TX_PACKETS;
+	}
+
+	if (!(sinfo->filled & (STATION_INFO_RX_BYTES64 |
+			       STATION_INFO_RX_BYTES))) {
+		sinfo->rx_bytes = sta->rx_bytes;
+		sinfo->filled |= STATION_INFO_RX_BYTES64;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_RX_PACKETS)) {
+		sinfo->rx_packets = sta->rx_packets;
+		sinfo->filled |= STATION_INFO_RX_PACKETS;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_TX_RETRIES)) {
+		sinfo->tx_retries = sta->tx_retry_count;
+		sinfo->filled |= STATION_INFO_TX_RETRIES;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_TX_FAILED)) {
+		sinfo->tx_failed = sta->tx_retry_failed;
+		sinfo->filled |= STATION_INFO_TX_FAILED;
 	}
-	sinfo->tx_packets = packets;
-	sinfo->rx_bytes = sta->rx_bytes;
-	sinfo->rx_packets = sta->rx_packets;
-	sinfo->tx_retries = sta->tx_retry_count;
-	sinfo->tx_failed = sta->tx_retry_failed;
+
 	sinfo->rx_dropped_misc = sta->rx_dropped;
 	sinfo->beacon_loss_count = sta->beacon_loss_count;
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		sinfo->filled |= STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
-		if (!local->ops->get_rssi ||
-		    drv_get_rssi(local, sdata, &sta->sta, &sinfo->signal))
+		if (!(sinfo->filled & STATION_INFO_SIGNAL)) {
 			sinfo->signal = (s8)sta->last_signal;
-		sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
+			sinfo->filled |= STATION_INFO_SIGNAL;
+		}
+
+		if (!(sinfo->filled & STATION_INFO_SIGNAL_AVG)) {
+			sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
+			sinfo->filled |= STATION_INFO_SIGNAL_AVG;
+		}
 	}
-	if (sta->chains) {
+
+	if (sta->chains &&
+	    !(sinfo->filled & (STATION_INFO_CHAIN_SIGNAL |
+			       STATION_INFO_CHAIN_SIGNAL_AVG))) {
 		sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
 				 STATION_INFO_CHAIN_SIGNAL_AVG;
 
@@ -1807,8 +1833,15 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		}
 	}
 
-	sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
-	sta_set_rate_info_rx(sta, &sinfo->rxrate);
+	if (!(sinfo->filled & STATION_INFO_TX_BITRATE)) {
+		sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
+		sinfo->filled |= STATION_INFO_TX_BITRATE;
+	}
+
+	if (!(sinfo->filled & STATION_INFO_RX_BITRATE)) {
+		sta_set_rate_info_rx(sta, &sinfo->rxrate);
+		sinfo->filled |= STATION_INFO_RX_BITRATE;
+	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 8e461a02c6a8..263a9561eb26 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -825,6 +825,13 @@ DECLARE_EVENT_CLASS(sta_event,
 	)
 );
 
+DEFINE_EVENT(sta_event, drv_sta_statistics,
+	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
+		 struct ieee80211_sta *sta),
+	TP_ARGS(local, sdata, sta)
+);
+
 DEFINE_EVENT(sta_event, drv_sta_add,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
@@ -1329,32 +1336,6 @@ DEFINE_EVENT(release_evt, drv_allow_buffered_frames,
 	TP_ARGS(local, sta, tids, num_frames, reason, more_data)
 );
 
-TRACE_EVENT(drv_get_rssi,
-	TP_PROTO(struct ieee80211_local *local, struct ieee80211_sta *sta,
-		 s8 rssi, int ret),
-
-	TP_ARGS(local, sta, rssi, ret),
-
-	TP_STRUCT__entry(
-		LOCAL_ENTRY
-		STA_ENTRY
-		__field(s8, rssi)
-		__field(int, ret)
-	),
-
-	TP_fast_assign(
-		LOCAL_ASSIGN;
-		STA_ASSIGN;
-		__entry->rssi = rssi;
-		__entry->ret = ret;
-	),
-
-	TP_printk(
-		LOCAL_PR_FMT STA_PR_FMT " rssi:%d ret:%d",
-		LOCAL_PR_ARG, STA_PR_ARG, __entry->rssi, __entry->ret
-	)
-);
-
 DEFINE_EVENT(local_sdata_evt, drv_mgd_prepare_tx,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata),
-- 
cgit v1.2.3-70-g09d2


From 319090bf6c75e3ad42a8c74973be5e78ae4f948f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 14:08:11 +0100
Subject: cfg80211: remove enum station_info_flags

This is really just duplicating the list of information that's
already available in the nl80211 attribute, so remove the list.
Two small changes are needed:
 * remove STATION_INFO_ASSOC_REQ_IES complete, but the length
   (assoc_req_ies_len) can be used instead
 * add NL80211_STA_INFO_RX_DROP_MISC which exists internally
   but not in nl80211 yet

This gets rid of the duplicate maintenance of the two lists.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c         |  14 +--
 drivers/net/wireless/ath/ath6kl/main.c             |   1 -
 drivers/net/wireless/ath/wil6210/cfg80211.c        |  18 +--
 drivers/net/wireless/ath/wil6210/wmi.c             |   1 -
 drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c |  11 +-
 drivers/net/wireless/libertas/cfg.c                |  12 +-
 drivers/net/wireless/mwifiex/cfg80211.c            |  10 +-
 drivers/net/wireless/mwifiex/uap_event.c           |   1 -
 drivers/net/wireless/rndis_wlan.c                  |   4 +-
 drivers/net/wireless/ti/wlcore/main.c              |   2 +-
 drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c  |   9 +-
 drivers/staging/wlan-ng/cfg80211.c                 |   4 +-
 include/net/cfg80211.h                             |  77 +-----------
 include/uapi/linux/nl80211.h                       |   3 +
 net/mac80211/ethtool.c                             |   6 +-
 net/mac80211/sta_info.c                            |  80 ++++++-------
 net/wireless/nl80211.c                             | 133 ++++++++-------------
 net/wireless/wext-compat.c                         |  10 +-
 18 files changed, 142 insertions(+), 254 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 7a5337877a0c..44dd6ef923cd 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1799,20 +1799,20 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	if (vif->target_stats.rx_byte) {
 		sinfo->rx_bytes = vif->target_stats.rx_byte;
-		sinfo->filled |= STATION_INFO_RX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
 		sinfo->rx_packets = vif->target_stats.rx_pkt;
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 	}
 
 	if (vif->target_stats.tx_byte) {
 		sinfo->tx_bytes = vif->target_stats.tx_byte;
-		sinfo->filled |= STATION_INFO_TX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
 		sinfo->tx_packets = vif->target_stats.tx_pkt;
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 	}
 
 	sinfo->signal = vif->target_stats.cs_rssi;
-	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 
 	rate = vif->target_stats.tx_ucast_rate;
 
@@ -1844,12 +1844,12 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 		return 0;
 	}
 
-	sinfo->filled |= STATION_INFO_TX_BITRATE;
+	sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 
 	if (test_bit(CONNECTED, &vif->flags) &&
 	    test_bit(DTIM_PERIOD_AVAIL, &vif->flags) &&
 	    vif->nw_type == INFRA_NETWORK) {
-		sinfo->filled |= STATION_INFO_BSS_PARAM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		sinfo->bss_param.flags = 0;
 		sinfo->bss_param.dtim_period = vif->assoc_bss_dtim_period;
 		sinfo->bss_param.beacon_interval = vif->assoc_bss_beacon_int;
diff --git a/drivers/net/wireless/ath/ath6kl/main.c b/drivers/net/wireless/ath/ath6kl/main.c
index 933aef025698..b42ba46b5030 100644
--- a/drivers/net/wireless/ath/ath6kl/main.c
+++ b/drivers/net/wireless/ath/ath6kl/main.c
@@ -488,7 +488,6 @@ void ath6kl_connect_ap_mode_sta(struct ath6kl_vif *vif, u16 aid, u8 *mac_addr,
 
 	sinfo.assoc_req_ies = ies;
 	sinfo.assoc_req_ies_len = ies_len;
-	sinfo.filled |= STATION_INFO_ASSOC_REQ_IES;
 
 	cfg80211_new_sta(vif->ndev, mac_addr, &sinfo, GFP_KERNEL);
 
diff --git a/drivers/net/wireless/ath/wil6210/cfg80211.c b/drivers/net/wireless/ath/wil6210/cfg80211.c
index 38332a6dfb3a..e72a95d1ced6 100644
--- a/drivers/net/wireless/ath/wil6210/cfg80211.c
+++ b/drivers/net/wireless/ath/wil6210/cfg80211.c
@@ -142,14 +142,14 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 
 	sinfo->generation = wil->sinfo_gen;
 
-	sinfo->filled = STATION_INFO_RX_BYTES |
-			STATION_INFO_TX_BYTES |
-			STATION_INFO_RX_PACKETS |
-			STATION_INFO_TX_PACKETS |
-			STATION_INFO_RX_BITRATE |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_RX_DROP_MISC |
-			STATION_INFO_TX_FAILED;
+	sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) |
+			BIT(NL80211_STA_INFO_TX_BYTES) |
+			BIT(NL80211_STA_INFO_RX_PACKETS) |
+			BIT(NL80211_STA_INFO_TX_PACKETS) |
+			BIT(NL80211_STA_INFO_RX_BITRATE) |
+			BIT(NL80211_STA_INFO_TX_BITRATE) |
+			BIT(NL80211_STA_INFO_RX_DROP_MISC) |
+			BIT(NL80211_STA_INFO_TX_FAILED);
 
 	sinfo->txrate.flags = RATE_INFO_FLAGS_MCS | RATE_INFO_FLAGS_60G;
 	sinfo->txrate.mcs = le16_to_cpu(reply.evt.bf_mcs);
@@ -163,7 +163,7 @@ int wil_cid_fill_sinfo(struct wil6210_priv *wil, int cid,
 	sinfo->tx_failed = stats->tx_errors;
 
 	if (test_bit(wil_status_fwconnected, &wil->status)) {
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		sinfo->signal = reply.evt.sqi;
 	}
 
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 63476c86cd0e..899754920955 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -457,7 +457,6 @@ static void wmi_evt_connect(struct wil6210_priv *wil, int id, void *d, int len)
 		if (assoc_req_ie) {
 			sinfo.assoc_req_ies = assoc_req_ie;
 			sinfo.assoc_req_ies_len = assoc_req_ielen;
-			sinfo.filled |= STATION_INFO_ASSOC_REQ_IES;
 		}
 
 		cfg80211_new_sta(ndev, evt->bssid, &sinfo, GFP_KERNEL);
diff --git a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
index 3aecc5f48719..4a88b2381a68 100644
--- a/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/brcm80211/brcmfmac/cfg80211.c
@@ -2333,10 +2333,10 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("GET STA INFO failed, %d\n", err);
 			goto done;
 		}
-		sinfo->filled = STATION_INFO_INACTIVE_TIME;
+		sinfo->filled = BIT(NL80211_STA_INFO_INACTIVE_TIME);
 		sinfo->inactive_time = le32_to_cpu(sta_info_le.idle) * 1000;
 		if (le32_to_cpu(sta_info_le.flags) & BRCMF_STA_ASSOC) {
-			sinfo->filled |= STATION_INFO_CONNECTED_TIME;
+			sinfo->filled |= BIT(NL80211_STA_INFO_CONNECTED_TIME);
 			sinfo->connected_time = le32_to_cpu(sta_info_le.in);
 		}
 		brcmf_dbg(TRACE, "STA idle time : %d ms, connected time :%d sec\n",
@@ -2354,7 +2354,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 			brcmf_err("Could not get rate (%d)\n", err);
 			goto done;
 		} else {
-			sinfo->filled |= STATION_INFO_TX_BITRATE;
+			sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 			sinfo->txrate.legacy = rate * 5;
 			brcmf_dbg(CONN, "Rate %d Mbps\n", rate / 2);
 		}
@@ -2369,7 +2369,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 				goto done;
 			} else {
 				rssi = le32_to_cpu(scb_val.val);
-				sinfo->filled |= STATION_INFO_SIGNAL;
+				sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 				sinfo->signal = rssi;
 				brcmf_dbg(CONN, "RSSI %d dBm\n", rssi);
 			}
@@ -2396,7 +2396,7 @@ brcmf_cfg80211_get_station(struct wiphy *wiphy, struct net_device *ndev,
 				brcmf_dbg(CONN, "DTIM peroid %d\n",
 					  dtim_period);
 			}
-			sinfo->filled |= STATION_INFO_BSS_PARAM;
+			sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		}
 	} else
 		err = -EPERM;
@@ -4778,7 +4778,6 @@ brcmf_notify_connect_status_ap(struct brcmf_cfg80211_info *cfg,
 	if (((event == BRCMF_E_ASSOC_IND) || (event == BRCMF_E_REASSOC_IND)) &&
 	    (reason == BRCMF_E_STATUS_SUCCESS)) {
 		memset(&sinfo, 0, sizeof(sinfo));
-		sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 		if (!data) {
 			brcmf_err("No IEs present in ASSOC/REASSOC_IND");
 			return -EINVAL;
diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c
index 34f09ef90bb3..a92985a6ea21 100644
--- a/drivers/net/wireless/libertas/cfg.c
+++ b/drivers/net/wireless/libertas/cfg.c
@@ -1616,10 +1616,10 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	lbs_deb_enter(LBS_DEB_CFG80211);
 
-	sinfo->filled |= STATION_INFO_TX_BYTES |
-			 STATION_INFO_TX_PACKETS |
-			 STATION_INFO_RX_BYTES |
-			 STATION_INFO_RX_PACKETS;
+	sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES) |
+			 BIT(NL80211_STA_INFO_TX_PACKETS) |
+			 BIT(NL80211_STA_INFO_RX_BYTES) |
+			 BIT(NL80211_STA_INFO_RX_PACKETS);
 	sinfo->tx_bytes = priv->dev->stats.tx_bytes;
 	sinfo->tx_packets = priv->dev->stats.tx_packets;
 	sinfo->rx_bytes = priv->dev->stats.rx_bytes;
@@ -1629,14 +1629,14 @@ static int lbs_cfg_get_station(struct wiphy *wiphy, struct net_device *dev,
 	ret = lbs_get_rssi(priv, &signal, &noise);
 	if (ret == 0) {
 		sinfo->signal = signal;
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 
 	/* Convert priv->cur_rate from hw_value to NL80211 value */
 	for (i = 0; i < ARRAY_SIZE(lbs_rates); i++) {
 		if (priv->cur_rate == lbs_rates[i].hw_value) {
 			sinfo->txrate.legacy = lbs_rates[i].bitrate;
-			sinfo->filled |= STATION_INFO_TX_BITRATE;
+			sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 			break;
 		}
 	}
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 8bd446b69658..71312ff52703 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -910,10 +910,10 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 {
 	u32 rate;
 
-	sinfo->filled = STATION_INFO_RX_BYTES | STATION_INFO_TX_BYTES |
-			STATION_INFO_RX_PACKETS | STATION_INFO_TX_PACKETS |
-			STATION_INFO_TX_BITRATE |
-			STATION_INFO_SIGNAL | STATION_INFO_SIGNAL_AVG;
+	sinfo->filled = BIT(NL80211_STA_INFO_RX_BYTES) | BIT(NL80211_STA_INFO_TX_BYTES) |
+			BIT(NL80211_STA_INFO_RX_PACKETS) | BIT(NL80211_STA_INFO_TX_PACKETS) |
+			BIT(NL80211_STA_INFO_TX_BITRATE) |
+			BIT(NL80211_STA_INFO_SIGNAL) | BIT(NL80211_STA_INFO_SIGNAL_AVG);
 
 	/* Get signal information from the firmware */
 	if (mwifiex_send_cmd(priv, HostCmd_CMD_RSSI_INFO,
@@ -944,7 +944,7 @@ mwifiex_dump_station_info(struct mwifiex_private *priv,
 	sinfo->txrate.legacy = rate * 5;
 
 	if (priv->bss_mode == NL80211_IFTYPE_STATION) {
-		sinfo->filled |= STATION_INFO_BSS_PARAM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_BSS_PARAM);
 		sinfo->bss_param.flags = 0;
 		if (priv->curr_bss_params.bss_descriptor.cap_info_bitmap &
 						WLAN_CAPABILITY_SHORT_PREAMBLE)
diff --git a/drivers/net/wireless/mwifiex/uap_event.c b/drivers/net/wireless/mwifiex/uap_event.c
index c54a537e31fb..3b3a970e2086 100644
--- a/drivers/net/wireless/mwifiex/uap_event.c
+++ b/drivers/net/wireless/mwifiex/uap_event.c
@@ -68,7 +68,6 @@ int mwifiex_process_uap_event(struct mwifiex_private *priv)
 				len = ETH_ALEN;
 
 			if (len != -1) {
-				sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 				sinfo.assoc_req_ies = &event->data[len];
 				len = (u8 *)sinfo.assoc_req_ies -
 				      (u8 *)&event->frame_control;
diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 1a4facd1fbf3..60d44ce9c017 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -2478,7 +2478,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
 	ret = rndis_query_oid(usbdev, RNDIS_OID_GEN_LINK_SPEED, &linkspeed, &len);
 	if (ret == 0) {
 		sinfo->txrate.legacy = le32_to_cpu(linkspeed) / 1000;
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 	}
 
 	len = sizeof(rssi);
@@ -2486,7 +2486,7 @@ static void rndis_fill_station_info(struct usbnet *usbdev,
 			      &rssi, &len);
 	if (ret == 0) {
 		sinfo->signal = level_to_qual(le32_to_cpu(rssi));
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 }
 
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 8d11b0ca412c..a2133b1fd631 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -5401,7 +5401,7 @@ static void wlcore_op_sta_statistics(struct ieee80211_hw *hw,
 	if (ret < 0)
 		goto out_sleep;
 
-	sinfo->filled |= STATION_INFO_SIGNAL;
+	sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	sinfo->signal = rssi_dbm;
 
 out_sleep:
diff --git a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
index 3d26955da724..c76874d72a22 100644
--- a/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723au/os_dep/ioctl_cfg80211.c
@@ -1092,17 +1092,17 @@ static int cfg80211_rtw_get_station(struct wiphy *wiphy,
 			goto exit;
 		}
 
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		sinfo->signal = translate_percentage_to_dbm(padapter->recvpriv.
 							    signal_strength);
 
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 		sinfo->txrate.legacy = rtw_get_cur_max_rate(padapter);
 
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 		sinfo->rx_packets = sta_rx_data_pkts(psta);
 
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 		sinfo->tx_packets = psta->sta_stats.tx_pkts;
 	}
 
@@ -2365,7 +2365,6 @@ void rtw_cfg80211_indicate_sta_assoc(struct rtw_adapter *padapter,
 					     u.reassoc_req.variable);
 
 		sinfo.filled = 0;
-		sinfo.filled = STATION_INFO_ASSOC_REQ_IES;
 		sinfo.assoc_req_ies = pmgmt_frame + ie_offset;
 		sinfo.assoc_req_ies_len = frame_len - ie_offset;
 		cfg80211_new_sta(ndev, hdr->addr2, &sinfo, GFP_ATOMIC);
diff --git a/drivers/staging/wlan-ng/cfg80211.c b/drivers/staging/wlan-ng/cfg80211.c
index 8942dcb44180..7c87aecf4744 100644
--- a/drivers/staging/wlan-ng/cfg80211.c
+++ b/drivers/staging/wlan-ng/cfg80211.c
@@ -325,9 +325,9 @@ static int prism2_get_station(struct wiphy *wiphy, struct net_device *dev,
 
 	if (result == 0) {
 		sinfo->txrate.legacy = quality.txrate.data;
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 		sinfo->signal = quality.level.data;
-		sinfo->filled |= STATION_INFO_SIGNAL;
+		sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 	}
 
 	return result;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 42e3d74f1906..91c133626c32 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -865,75 +865,6 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
 				  struct station_parameters *params,
 				  enum cfg80211_station_type statype);
 
-/**
- * enum station_info_flags - station information flags
- *
- * Used by the driver to indicate which info in &struct station_info
- * it has filled in during get_station() or dump_station().
- *
- * @STATION_INFO_INACTIVE_TIME: @inactive_time filled
- * @STATION_INFO_RX_BYTES: @rx_bytes filled
- * @STATION_INFO_TX_BYTES: @tx_bytes filled
- * @STATION_INFO_RX_BYTES64: @rx_bytes filled with 64-bit value
- * @STATION_INFO_TX_BYTES64: @tx_bytes filled with 64-bit value
- * @STATION_INFO_LLID: @llid filled
- * @STATION_INFO_PLID: @plid filled
- * @STATION_INFO_PLINK_STATE: @plink_state filled
- * @STATION_INFO_SIGNAL: @signal filled
- * @STATION_INFO_TX_BITRATE: @txrate fields are filled
- *	(tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs)
- * @STATION_INFO_RX_PACKETS: @rx_packets filled with 32-bit value
- * @STATION_INFO_TX_PACKETS: @tx_packets filled with 32-bit value
- * @STATION_INFO_TX_RETRIES: @tx_retries filled
- * @STATION_INFO_TX_FAILED: @tx_failed filled
- * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled
- * @STATION_INFO_SIGNAL_AVG: @signal_avg filled
- * @STATION_INFO_RX_BITRATE: @rxrate fields are filled
- * @STATION_INFO_BSS_PARAM: @bss_param filled
- * @STATION_INFO_CONNECTED_TIME: @connected_time filled
- * @STATION_INFO_ASSOC_REQ_IES: @assoc_req_ies filled
- * @STATION_INFO_STA_FLAGS: @sta_flags filled
- * @STATION_INFO_BEACON_LOSS_COUNT: @beacon_loss_count filled
- * @STATION_INFO_T_OFFSET: @t_offset filled
- * @STATION_INFO_LOCAL_PM: @local_pm filled
- * @STATION_INFO_PEER_PM: @peer_pm filled
- * @STATION_INFO_NONPEER_PM: @nonpeer_pm filled
- * @STATION_INFO_CHAIN_SIGNAL: @chain_signal filled
- * @STATION_INFO_CHAIN_SIGNAL_AVG: @chain_signal_avg filled
- * @STATION_INFO_EXPECTED_THROUGHPUT: @expected_throughput filled
- */
-enum station_info_flags {
-	STATION_INFO_INACTIVE_TIME		= BIT(0),
-	STATION_INFO_RX_BYTES			= BIT(1),
-	STATION_INFO_TX_BYTES			= BIT(2),
-	STATION_INFO_LLID			= BIT(3),
-	STATION_INFO_PLID			= BIT(4),
-	STATION_INFO_PLINK_STATE		= BIT(5),
-	STATION_INFO_SIGNAL			= BIT(6),
-	STATION_INFO_TX_BITRATE			= BIT(7),
-	STATION_INFO_RX_PACKETS			= BIT(8),
-	STATION_INFO_TX_PACKETS			= BIT(9),
-	STATION_INFO_TX_RETRIES			= BIT(10),
-	STATION_INFO_TX_FAILED			= BIT(11),
-	STATION_INFO_RX_DROP_MISC		= BIT(12),
-	STATION_INFO_SIGNAL_AVG			= BIT(13),
-	STATION_INFO_RX_BITRATE			= BIT(14),
-	STATION_INFO_BSS_PARAM			= BIT(15),
-	STATION_INFO_CONNECTED_TIME		= BIT(16),
-	STATION_INFO_ASSOC_REQ_IES		= BIT(17),
-	STATION_INFO_STA_FLAGS			= BIT(18),
-	STATION_INFO_BEACON_LOSS_COUNT		= BIT(19),
-	STATION_INFO_T_OFFSET			= BIT(20),
-	STATION_INFO_LOCAL_PM			= BIT(21),
-	STATION_INFO_PEER_PM			= BIT(22),
-	STATION_INFO_NONPEER_PM			= BIT(23),
-	STATION_INFO_RX_BYTES64			= BIT(24),
-	STATION_INFO_TX_BYTES64			= BIT(25),
-	STATION_INFO_CHAIN_SIGNAL		= BIT(26),
-	STATION_INFO_CHAIN_SIGNAL_AVG		= BIT(27),
-	STATION_INFO_EXPECTED_THROUGHPUT	= BIT(28),
-};
-
 /**
  * enum station_info_rate_flags - bitrate info flags
  *
@@ -1015,7 +946,8 @@ struct sta_bss_parameters {
  *
  * Station information filled by driver for get_station() and dump_station.
  *
- * @filled: bitflag of flags from &enum station_info_flags
+ * @filled: bitflag of flags using the bits of &enum nl80211_sta_info to
+ *	indicate the relevant values in this struct for them
  * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
  * @rx_bytes: bytes received from this station
@@ -1094,11 +1026,6 @@ struct station_info {
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
 	u32 expected_throughput;
-
-	/*
-	 * Note: Add a new enum station_info_flags value for each new field and
-	 * use it to check which fields are initialized.
-	 */
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2f549a253138..e48ca0bbd07b 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2373,6 +2373,8 @@ enum nl80211_sta_bss_param {
  *	Same format as NL80211_STA_INFO_CHAIN_SIGNAL.
  * @NL80211_STA_EXPECTED_THROUGHPUT: expected throughput considering also the
  *	802.11 header (u32, kbps)
+ * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
+ *	(u64)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2405,6 +2407,7 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_CHAIN_SIGNAL,
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
 	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
+	NL80211_STA_INFO_RX_DROP_MISC,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/mac80211/ethtool.c b/net/mac80211/ethtool.c
index eea742710c0a..52bcea6ad9e8 100644
--- a/net/mac80211/ethtool.c
+++ b/net/mac80211/ethtool.c
@@ -117,16 +117,16 @@ static void ieee80211_get_stats(struct net_device *dev,
 		data[i++] = sta->sta_state;
 
 
-		if (sinfo.filled & STATION_INFO_TX_BITRATE)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE))
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.txrate);
 		i++;
-		if (sinfo.filled & STATION_INFO_RX_BITRATE)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_RX_BITRATE))
 			data[i] = 100000 *
 				cfg80211_calculate_bitrate(&sinfo.rxrate);
 		i++;
 
-		if (sinfo.filled & STATION_INFO_SIGNAL_AVG)
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))
 			data[i] = (u8)sinfo.signal_avg;
 		i++;
 	} else {
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 967b42eae5c2..64b53b943d98 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -1756,51 +1756,51 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	drv_sta_statistics(local, sdata, &sta->sta, sinfo);
 
-	sinfo->filled |= STATION_INFO_INACTIVE_TIME |
-			 STATION_INFO_STA_FLAGS |
-			 STATION_INFO_BSS_PARAM |
-			 STATION_INFO_CONNECTED_TIME |
-			 STATION_INFO_RX_DROP_MISC |
-			 STATION_INFO_BEACON_LOSS_COUNT;
+	sinfo->filled |= BIT(NL80211_STA_INFO_INACTIVE_TIME) |
+			 BIT(NL80211_STA_INFO_STA_FLAGS) |
+			 BIT(NL80211_STA_INFO_BSS_PARAM) |
+			 BIT(NL80211_STA_INFO_CONNECTED_TIME) |
+			 BIT(NL80211_STA_INFO_RX_DROP_MISC) |
+			 BIT(NL80211_STA_INFO_BEACON_LOSS);
 
 	ktime_get_ts(&uptime);
 	sinfo->connected_time = uptime.tv_sec - sta->last_connected;
 	sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx);
 
-	if (!(sinfo->filled & (STATION_INFO_TX_BYTES64 |
-			       STATION_INFO_TX_BYTES))) {
+	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES64) |
+			       BIT(NL80211_STA_INFO_TX_BYTES)))) {
 		sinfo->tx_bytes = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_bytes += sta->tx_bytes[ac];
-		sinfo->filled |= STATION_INFO_TX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BYTES64);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_PACKETS)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_PACKETS))) {
 		sinfo->tx_packets = 0;
 		for (ac = 0; ac < IEEE80211_NUM_ACS; ac++)
 			sinfo->tx_packets += sta->tx_packets[ac];
-		sinfo->filled |= STATION_INFO_TX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_PACKETS);
 	}
 
-	if (!(sinfo->filled & (STATION_INFO_RX_BYTES64 |
-			       STATION_INFO_RX_BYTES))) {
+	if (!(sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES64) |
+			       BIT(NL80211_STA_INFO_RX_BYTES)))) {
 		sinfo->rx_bytes = sta->rx_bytes;
-		sinfo->filled |= STATION_INFO_RX_BYTES64;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BYTES64);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_RX_PACKETS)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_PACKETS))) {
 		sinfo->rx_packets = sta->rx_packets;
-		sinfo->filled |= STATION_INFO_RX_PACKETS;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_PACKETS);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_RETRIES)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_RETRIES))) {
 		sinfo->tx_retries = sta->tx_retry_count;
-		sinfo->filled |= STATION_INFO_TX_RETRIES;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_RETRIES);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_FAILED)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_FAILED))) {
 		sinfo->tx_failed = sta->tx_retry_failed;
-		sinfo->filled |= STATION_INFO_TX_FAILED;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_FAILED);
 	}
 
 	sinfo->rx_dropped_misc = sta->rx_dropped;
@@ -1808,22 +1808,22 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 
 	if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) ||
 	    (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) {
-		if (!(sinfo->filled & STATION_INFO_SIGNAL)) {
+		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL))) {
 			sinfo->signal = (s8)sta->last_signal;
-			sinfo->filled |= STATION_INFO_SIGNAL;
+			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL);
 		}
 
-		if (!(sinfo->filled & STATION_INFO_SIGNAL_AVG)) {
+		if (!(sinfo->filled & BIT(NL80211_STA_INFO_SIGNAL_AVG))) {
 			sinfo->signal_avg = (s8) -ewma_read(&sta->avg_signal);
-			sinfo->filled |= STATION_INFO_SIGNAL_AVG;
+			sinfo->filled |= BIT(NL80211_STA_INFO_SIGNAL_AVG);
 		}
 	}
 
 	if (sta->chains &&
-	    !(sinfo->filled & (STATION_INFO_CHAIN_SIGNAL |
-			       STATION_INFO_CHAIN_SIGNAL_AVG))) {
-		sinfo->filled |= STATION_INFO_CHAIN_SIGNAL |
-				 STATION_INFO_CHAIN_SIGNAL_AVG;
+	    !(sinfo->filled & (BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
+			       BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)))) {
+		sinfo->filled |= BIT(NL80211_STA_INFO_CHAIN_SIGNAL) |
+				 BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG);
 
 		sinfo->chains = sta->chains;
 		for (i = 0; i < ARRAY_SIZE(sinfo->chain_signal); i++) {
@@ -1833,30 +1833,30 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		}
 	}
 
-	if (!(sinfo->filled & STATION_INFO_TX_BITRATE)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE))) {
 		sta_set_rate_info_tx(sta, &sta->last_tx_rate, &sinfo->txrate);
-		sinfo->filled |= STATION_INFO_TX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_TX_BITRATE);
 	}
 
-	if (!(sinfo->filled & STATION_INFO_RX_BITRATE)) {
+	if (!(sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE))) {
 		sta_set_rate_info_rx(sta, &sinfo->rxrate);
-		sinfo->filled |= STATION_INFO_RX_BITRATE;
+		sinfo->filled |= BIT(NL80211_STA_INFO_RX_BITRATE);
 	}
 
 	if (ieee80211_vif_is_mesh(&sdata->vif)) {
 #ifdef CONFIG_MAC80211_MESH
-		sinfo->filled |= STATION_INFO_LLID |
-				 STATION_INFO_PLID |
-				 STATION_INFO_PLINK_STATE |
-				 STATION_INFO_LOCAL_PM |
-				 STATION_INFO_PEER_PM |
-				 STATION_INFO_NONPEER_PM;
+		sinfo->filled |= BIT(NL80211_STA_INFO_LLID) |
+				 BIT(NL80211_STA_INFO_PLID) |
+				 BIT(NL80211_STA_INFO_PLINK_STATE) |
+				 BIT(NL80211_STA_INFO_LOCAL_PM) |
+				 BIT(NL80211_STA_INFO_PEER_PM) |
+				 BIT(NL80211_STA_INFO_NONPEER_PM);
 
 		sinfo->llid = sta->llid;
 		sinfo->plid = sta->plid;
 		sinfo->plink_state = sta->plink_state;
 		if (test_sta_flag(sta, WLAN_STA_TOFFSET_KNOWN)) {
-			sinfo->filled |= STATION_INFO_T_OFFSET;
+			sinfo->filled |= BIT(NL80211_STA_INFO_T_OFFSET);
 			sinfo->t_offset = sta->t_offset;
 		}
 		sinfo->local_pm = sta->local_pm;
@@ -1905,7 +1905,7 @@ void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo)
 		thr = drv_get_expected_throughput(local, &sta->sta);
 
 	if (thr != 0) {
-		sinfo->filled |= STATION_INFO_EXPECTED_THROUGHPUT;
+		sinfo->filled |= BIT(NL80211_STA_INFO_EXPECTED_THROUGHPUT);
 		sinfo->expected_throughput = thr;
 	}
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index a75dc91976d3..68faf8a2aa43 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3671,115 +3671,77 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	sinfoattr = nla_nest_start(msg, NL80211_ATTR_STA_INFO);
 	if (!sinfoattr)
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_CONNECTED_TIME) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_CONNECTED_TIME,
-			sinfo->connected_time))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_INACTIVE_TIME) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_INACTIVE_TIME,
-			sinfo->inactive_time))
-		goto nla_put_failure;
-	if ((sinfo->filled & (STATION_INFO_RX_BYTES |
-			      STATION_INFO_RX_BYTES64)) &&
+
+#define PUT_SINFO(attr, memb, type) do {				\
+	if (sinfo->filled & BIT(NL80211_STA_INFO_ ## attr) &&		\
+	    nla_put_ ## type(msg, NL80211_STA_INFO_ ## attr,		\
+			     sinfo->memb))				\
+		goto nla_put_failure;					\
+	} while (0)
+
+	PUT_SINFO(CONNECTED_TIME, connected_time, u32);
+	PUT_SINFO(INACTIVE_TIME, inactive_time, u32);
+
+	if (sinfo->filled & (BIT(NL80211_STA_INFO_RX_BYTES) |
+			     BIT(NL80211_STA_INFO_RX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_RX_BYTES,
 			(u32)sinfo->rx_bytes))
 		goto nla_put_failure;
-	if ((sinfo->filled & (STATION_INFO_TX_BYTES |
-			      STATION_INFO_TX_BYTES64)) &&
+
+	if (sinfo->filled & (BIT(NL80211_STA_INFO_TX_BYTES) |
+			     BIT(NL80211_STA_INFO_TX_BYTES64)) &&
 	    nla_put_u32(msg, NL80211_STA_INFO_TX_BYTES,
 			(u32)sinfo->tx_bytes))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_RX_BYTES64) &&
-	    nla_put_u64(msg, NL80211_STA_INFO_RX_BYTES64,
-			sinfo->rx_bytes))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_BYTES64) &&
-	    nla_put_u64(msg, NL80211_STA_INFO_TX_BYTES64,
-			sinfo->tx_bytes))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_LLID) &&
-	    nla_put_u16(msg, NL80211_STA_INFO_LLID, sinfo->llid))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PLID) &&
-	    nla_put_u16(msg, NL80211_STA_INFO_PLID, sinfo->plid))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PLINK_STATE) &&
-	    nla_put_u8(msg, NL80211_STA_INFO_PLINK_STATE,
-		       sinfo->plink_state))
-		goto nla_put_failure;
+
+	PUT_SINFO(RX_BYTES64, rx_bytes, u64);
+	PUT_SINFO(TX_BYTES64, tx_bytes, u64);
+	PUT_SINFO(LLID, llid, u16);
+	PUT_SINFO(PLID, plid, u16);
+	PUT_SINFO(PLINK_STATE, plink_state, u8);
+
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-		if ((sinfo->filled & STATION_INFO_SIGNAL) &&
-		    nla_put_u8(msg, NL80211_STA_INFO_SIGNAL,
-			       sinfo->signal))
-			goto nla_put_failure;
-		if ((sinfo->filled & STATION_INFO_SIGNAL_AVG) &&
-		    nla_put_u8(msg, NL80211_STA_INFO_SIGNAL_AVG,
-			       sinfo->signal_avg))
-			goto nla_put_failure;
+		PUT_SINFO(SIGNAL, signal, u8);
+		PUT_SINFO(SIGNAL_AVG, signal_avg, u8);
 		break;
 	default:
 		break;
 	}
-	if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal,
 					NL80211_STA_INFO_CHAIN_SIGNAL))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_CHAIN_SIGNAL_AVG) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_CHAIN_SIGNAL_AVG)) {
 		if (!nl80211_put_signal(msg, sinfo->chains,
 					sinfo->chain_signal_avg,
 					NL80211_STA_INFO_CHAIN_SIGNAL_AVG))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_TX_BITRATE) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_TX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->txrate,
 					  NL80211_STA_INFO_TX_BITRATE))
 			goto nla_put_failure;
 	}
-	if (sinfo->filled & STATION_INFO_RX_BITRATE) {
+	if (sinfo->filled & BIT(NL80211_STA_INFO_RX_BITRATE)) {
 		if (!nl80211_put_sta_rate(msg, &sinfo->rxrate,
 					  NL80211_STA_INFO_RX_BITRATE))
 			goto nla_put_failure;
 	}
-	if ((sinfo->filled & STATION_INFO_RX_PACKETS) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_RX_PACKETS,
-			sinfo->rx_packets))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_PACKETS) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_PACKETS,
-			sinfo->tx_packets))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_RETRIES) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_RETRIES,
-			sinfo->tx_retries))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_TX_FAILED) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_TX_FAILED,
-			sinfo->tx_failed))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_EXPECTED_THROUGHPUT) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_EXPECTED_THROUGHPUT,
-			sinfo->expected_throughput))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_BEACON_LOSS_COUNT) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_BEACON_LOSS,
-			sinfo->beacon_loss_count))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_LOCAL_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_LOCAL_PM,
-			sinfo->local_pm))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_PEER_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_PEER_PM,
-			sinfo->peer_pm))
-		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_NONPEER_PM) &&
-	    nla_put_u32(msg, NL80211_STA_INFO_NONPEER_PM,
-			sinfo->nonpeer_pm))
-		goto nla_put_failure;
-	if (sinfo->filled & STATION_INFO_BSS_PARAM) {
+
+	PUT_SINFO(RX_PACKETS, rx_packets, u32);
+	PUT_SINFO(TX_PACKETS, tx_packets, u32);
+	PUT_SINFO(TX_RETRIES, tx_retries, u32);
+	PUT_SINFO(TX_FAILED, tx_failed, u32);
+	PUT_SINFO(EXPECTED_THROUGHPUT, expected_throughput, u32);
+	PUT_SINFO(BEACON_LOSS, beacon_loss_count, u32);
+	PUT_SINFO(LOCAL_PM, local_pm, u32);
+	PUT_SINFO(PEER_PM, peer_pm, u32);
+	PUT_SINFO(NONPEER_PM, nonpeer_pm, u32);
+
+	if (sinfo->filled & BIT(NL80211_STA_INFO_BSS_PARAM)) {
 		bss_param = nla_nest_start(msg, NL80211_STA_INFO_BSS_PARAM);
 		if (!bss_param)
 			goto nla_put_failure;
@@ -3798,18 +3760,19 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 		nla_nest_end(msg, bss_param);
 	}
-	if ((sinfo->filled & STATION_INFO_STA_FLAGS) &&
+	if ((sinfo->filled & BIT(NL80211_STA_INFO_STA_FLAGS)) &&
 	    nla_put(msg, NL80211_STA_INFO_STA_FLAGS,
 		    sizeof(struct nl80211_sta_flag_update),
 		    &sinfo->sta_flags))
 		goto nla_put_failure;
-	if ((sinfo->filled & STATION_INFO_T_OFFSET) &&
-		nla_put_u64(msg, NL80211_STA_INFO_T_OFFSET,
-			    sinfo->t_offset))
-		goto nla_put_failure;
+
+	PUT_SINFO(T_OFFSET, t_offset, u64);
+	PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64);
+
+#undef PUT_SINFO
 	nla_nest_end(msg, sinfoattr);
 
-	if ((sinfo->filled & STATION_INFO_ASSOC_REQ_IES) &&
+	if (sinfo->assoc_req_ies_len &&
 	    nla_put(msg, NL80211_ATTR_IE, sinfo->assoc_req_ies_len,
 		    sinfo->assoc_req_ies))
 		goto nla_put_failure;
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 0f47948c572f..5b24d39d7903 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1300,7 +1300,7 @@ static int cfg80211_wext_giwrate(struct net_device *dev,
 	if (err)
 		return err;
 
-	if (!(sinfo.filled & STATION_INFO_TX_BITRATE))
+	if (!(sinfo.filled & BIT(NL80211_STA_INFO_TX_BITRATE)))
 		return -EOPNOTSUPP;
 
 	rate->value = 100000 * cfg80211_calculate_bitrate(&sinfo.txrate);
@@ -1340,7 +1340,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 
 	switch (rdev->wiphy.signal_type) {
 	case CFG80211_SIGNAL_TYPE_MBM:
-		if (sinfo.filled & STATION_INFO_SIGNAL) {
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
 			int sig = sinfo.signal;
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
@@ -1354,7 +1354,7 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 			break;
 		}
 	case CFG80211_SIGNAL_TYPE_UNSPEC:
-		if (sinfo.filled & STATION_INFO_SIGNAL) {
+		if (sinfo.filled & BIT(NL80211_STA_INFO_SIGNAL)) {
 			wstats.qual.updated |= IW_QUAL_LEVEL_UPDATED;
 			wstats.qual.updated |= IW_QUAL_QUAL_UPDATED;
 			wstats.qual.level = sinfo.signal;
@@ -1367,9 +1367,9 @@ static struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev)
 	}
 
 	wstats.qual.updated |= IW_QUAL_NOISE_INVALID;
-	if (sinfo.filled & STATION_INFO_RX_DROP_MISC)
+	if (sinfo.filled & BIT(NL80211_STA_INFO_RX_DROP_MISC))
 		wstats.discard.misc = sinfo.rx_dropped_misc;
-	if (sinfo.filled & STATION_INFO_TX_FAILED)
+	if (sinfo.filled & BIT(NL80211_STA_INFO_TX_FAILED))
 		wstats.discard.retries = sinfo.tx_failed;
 
 	return &wstats;
-- 
cgit v1.2.3-70-g09d2


From a76b1942a10293a94edf3c93c23a6231b63532f5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 17 Nov 2014 14:12:22 +0100
Subject: cfg80211: add nl80211 beacon-only statistics

Add these two values:
 * BEACON_RX: number of beacons received from this peer
 * BEACON_SIGNAL_AVG: signal strength average for beacons only

These can then be used for Android Lollipop's statistics request.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 6 ++++++
 include/uapi/linux/nl80211.h | 5 +++++
 net/wireless/nl80211.c       | 2 ++
 3 files changed, 13 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 91c133626c32..ef26ce16b058 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -987,6 +987,9 @@ struct sta_bss_parameters {
  * @nonpeer_pm: non-peer mesh STA power save mode
  * @expected_throughput: expected throughput in kbps (including 802.11 headers)
  *	towards this station.
+ * @rx_beacon: number of beacons received from this peer
+ * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
+ *	from this peer
  */
 struct station_info {
 	u32 filled;
@@ -1026,6 +1029,9 @@ struct station_info {
 	enum nl80211_mesh_power_mode nonpeer_pm;
 
 	u32 expected_throughput;
+
+	u64 rx_beacon;
+	u8 rx_beacon_signal_avg;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e48ca0bbd07b..0c3d341c0aeb 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2375,6 +2375,9 @@ enum nl80211_sta_bss_param {
  *	802.11 header (u32, kbps)
  * @NL80211_STA_INFO_RX_DROP_MISC: RX packets dropped for unspecified reasons
  *	(u64)
+ * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
+ * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
+ *	for beacons only (u8, dBm)
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2408,6 +2411,8 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_CHAIN_SIGNAL_AVG,
 	NL80211_STA_INFO_EXPECTED_THROUGHPUT,
 	NL80211_STA_INFO_RX_DROP_MISC,
+	NL80211_STA_INFO_BEACON_RX,
+	NL80211_STA_INFO_BEACON_SIGNAL_AVG,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 68faf8a2aa43..42b968a1f994 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3768,6 +3768,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 
 	PUT_SINFO(T_OFFSET, t_offset, u64);
 	PUT_SINFO(RX_DROP_MISC, rx_dropped_misc, u64);
+	PUT_SINFO(BEACON_RX, rx_beacon, u64);
+	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
 
 #undef PUT_SINFO
 	nla_nest_end(msg, sinfoattr);
-- 
cgit v1.2.3-70-g09d2


From 8d791361a4698ca6f01c361a47b39b30d26bf66c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 21 Nov 2014 12:40:05 +0100
Subject: nl80211: clarify packet statistics descriptions

The current statistics we keep aren't very clear, some are on
MPDUs and some on MSDUs/MMPDUs. Clarify the descriptions based
on the counters mac80211 keeps.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 12 ++++++------
 include/uapi/linux/nl80211.h | 24 +++++++++++++++---------
 2 files changed, 21 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ef26ce16b058..95420fb61600 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -950,8 +950,8 @@ struct sta_bss_parameters {
  *	indicate the relevant values in this struct for them
  * @connected_time: time(in secs) since a station is last connected
  * @inactive_time: time since last station activity (tx/rx) in milliseconds
- * @rx_bytes: bytes received from this station
- * @tx_bytes: bytes transmitted to this station
+ * @rx_bytes: bytes (size of MPDUs) received from this station
+ * @tx_bytes: bytes (size of MPDUs) transmitted to this station
  * @llid: mesh local link id
  * @plid: mesh peer link id
  * @plink_state: mesh peer link state
@@ -964,10 +964,10 @@ struct sta_bss_parameters {
  * @chain_signal_avg: per-chain signal strength average in dBm
  * @txrate: current unicast bitrate from this station
  * @rxrate: current unicast bitrate to this station
- * @rx_packets: packets received from this station
- * @tx_packets: packets transmitted to this station
- * @tx_retries: cumulative retry counts
- * @tx_failed: number of failed transmissions (retries exceeded, no ACK)
+ * @rx_packets: packets (MSDUs & MMPDUs) received from this station
+ * @tx_packets: packets (MSDUs & MMPDUs) transmitted to this station
+ * @tx_retries: cumulative retry counts (MPDUs)
+ * @tx_failed: number of failed transmissions (MPDUs) (retries exceeded, no ACK)
  * @rx_dropped_misc:  Dropped for un-specified reason.
  * @bss_param: current BSS parameters
  * @generation: generation number for nl80211 dumps.
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0c3d341c0aeb..b0fb5d598250 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2338,18 +2338,24 @@ enum nl80211_sta_bss_param {
  *
  * @__NL80211_STA_INFO_INVALID: attribute number 0 is reserved
  * @NL80211_STA_INFO_INACTIVE_TIME: time since last activity (u32, msecs)
- * @NL80211_STA_INFO_RX_BYTES: total received bytes (u32, from this station)
- * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (u32, to this station)
- * @NL80211_STA_INFO_RX_BYTES64: total received bytes (u64, from this station)
- * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (u64, to this station)
+ * @NL80211_STA_INFO_RX_BYTES: total received bytes (MPDU length)
+ *	(u32, from this station)
+ * @NL80211_STA_INFO_TX_BYTES: total transmitted bytes (MPDU length)
+ *	(u32, to this station)
+ * @NL80211_STA_INFO_RX_BYTES64: total received bytes (MPDU length)
+ *	(u64, from this station)
+ * @NL80211_STA_INFO_TX_BYTES64: total transmitted bytes (MPDU length)
+ *	(u64, to this station)
  * @NL80211_STA_INFO_SIGNAL: signal strength of last received PPDU (u8, dBm)
  * @NL80211_STA_INFO_TX_BITRATE: current unicast tx rate, nested attribute
  * 	containing info as possible, see &enum nl80211_rate_info
- * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station)
- * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this
- *	station)
- * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station)
- * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station)
+ * @NL80211_STA_INFO_RX_PACKETS: total received packet (MSDUs and MMPDUs)
+ *	(u32, from this station)
+ * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (MSDUs and MMPDUs)
+ *	(u32, to this station)
+ * @NL80211_STA_INFO_TX_RETRIES: total retries (MPDUs) (u32, to this station)
+ * @NL80211_STA_INFO_TX_FAILED: total failed packets (MPDUs)
+ *	(u32, to this station)
  * @NL80211_STA_INFO_SIGNAL_AVG: signal strength average (u8, dBm)
  * @NL80211_STA_INFO_LLID: the station's mesh LLID
  * @NL80211_STA_INFO_PLID: the station's mesh PLID
-- 
cgit v1.2.3-70-g09d2


From 6de39808cf1dd7b02bf42e7d8695d80f5eaf645d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 19 Dec 2014 12:34:00 +0100
Subject: nl80211: support per-TID station statistics

The base for the current statistics is pretty mixed up, support
exporting RX/TX statistics for MSDUs per TID. This (currently)
covers received MSDUs, transmitted MSDUs and retries/failures
thereof.

Doing it per TID for MSDUs makes more sense than say only per AC
because it's symmetric - we could export per-AC statistics for all
frames (which AC we used for transmission can be determined also
for management frames) but per TID is better and usually data
frames are really the ones we care about. Also, on RX we can't
determine the AC - but we do know the TID for any QoS MPDU we
received.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 21 +++++++++++++++++++++
 include/uapi/linux/nl80211.h | 31 +++++++++++++++++++++++++++++++
 net/wireless/nl80211.c       | 41 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 93 insertions(+)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 95420fb61600..197735788f18 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -939,6 +939,24 @@ struct sta_bss_parameters {
 	u16 beacon_interval;
 };
 
+/**
+ * struct cfg80211_tid_stats - per-TID statistics
+ * @filled: bitmap of flags using the bits of &enum nl80211_tid_stats to
+ *	indicate the relevant values in this struct are filled
+ * @rx_msdu: number of received MSDUs
+ * @tx_msdu: number of (attempted) transmitted MSDUs
+ * @tx_msdu_retries: number of retries (not counting the first) for
+ *	transmitted MSDUs
+ * @tx_msdu_failed: number of failed transmitted MSDUs
+ */
+struct cfg80211_tid_stats {
+	u32 filled;
+	u64 rx_msdu;
+	u64 tx_msdu;
+	u64 tx_msdu_retries;
+	u64 tx_msdu_failed;
+};
+
 #define IEEE80211_MAX_CHAINS	4
 
 /**
@@ -990,6 +1008,8 @@ struct sta_bss_parameters {
  * @rx_beacon: number of beacons received from this peer
  * @rx_beacon_signal_avg: signal strength average (in dBm) for beacons received
  *	from this peer
+ * @pertid: per-TID statistics, see &struct cfg80211_tid_stats, using the last
+ *	(IEEE80211_NUM_TIDS) index for MSDUs not encapsulated in QoS-MPDUs.
  */
 struct station_info {
 	u32 filled;
@@ -1032,6 +1052,7 @@ struct station_info {
 
 	u64 rx_beacon;
 	u8 rx_beacon_signal_avg;
+	struct cfg80211_tid_stats pertid[IEEE80211_NUM_TIDS + 1];
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b0fb5d598250..a963d4824c51 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2384,6 +2384,11 @@ enum nl80211_sta_bss_param {
  * @NL80211_STA_INFO_BEACON_RX: number of beacons received from this peer (u64)
  * @NL80211_STA_INFO_BEACON_SIGNAL_AVG: signal strength average
  *	for beacons only (u8, dBm)
+ * @NL80211_STA_INFO_TID_STATS: per-TID statistics (see &enum nl80211_tid_stats)
+ *	This is a nested attribute where each the inner attribute number is the
+ *	TID+1 and the special TID 16 (i.e. value 17) is used for non-QoS frames;
+ *	each one of those is again nested with &enum nl80211_tid_stats
+ *	attributes carrying the actual values.
  * @__NL80211_STA_INFO_AFTER_LAST: internal
  * @NL80211_STA_INFO_MAX: highest possible station info attribute
  */
@@ -2419,12 +2424,38 @@ enum nl80211_sta_info {
 	NL80211_STA_INFO_RX_DROP_MISC,
 	NL80211_STA_INFO_BEACON_RX,
 	NL80211_STA_INFO_BEACON_SIGNAL_AVG,
+	NL80211_STA_INFO_TID_STATS,
 
 	/* keep last */
 	__NL80211_STA_INFO_AFTER_LAST,
 	NL80211_STA_INFO_MAX = __NL80211_STA_INFO_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_tid_stats - per TID statistics attributes
+ * @__NL80211_TID_STATS_INVALID: attribute number 0 is reserved
+ * @NL80211_TID_STATS_RX_MSDU: number of MSDUs received (u64)
+ * @NL80211_TID_STATS_TX_MSDU: number of MSDUs transmitted (or
+ *	attempted to transmit; u64)
+ * @NL80211_TID_STATS_TX_MSDU_RETRIES: number of retries for
+ *	transmitted MSDUs (not counting the first attempt; u64)
+ * @NL80211_TID_STATS_TX_MSDU_FAILED: number of failed transmitted
+ *	MSDUs (u64)
+ * @NUM_NL80211_TID_STATS: number of attributes here
+ * @NL80211_TID_STATS_MAX: highest numbered attribute here
+ */
+enum nl80211_tid_stats {
+	__NL80211_TID_STATS_INVALID,
+	NL80211_TID_STATS_RX_MSDU,
+	NL80211_TID_STATS_TX_MSDU,
+	NL80211_TID_STATS_TX_MSDU_RETRIES,
+	NL80211_TID_STATS_TX_MSDU_FAILED,
+
+	/* keep last */
+	NUM_NL80211_TID_STATS,
+	NL80211_TID_STATS_MAX = NUM_NL80211_TID_STATS - 1
+};
+
 /**
  * enum nl80211_mpath_flags - nl80211 mesh path flags
  *
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 42b968a1f994..7c2ce26e22de 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3772,6 +3772,47 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 	PUT_SINFO(BEACON_SIGNAL_AVG, rx_beacon_signal_avg, u8);
 
 #undef PUT_SINFO
+
+	if (sinfo->filled & BIT(NL80211_STA_INFO_TID_STATS)) {
+		struct nlattr *tidsattr;
+		int tid;
+
+		tidsattr = nla_nest_start(msg, NL80211_STA_INFO_TID_STATS);
+		if (!tidsattr)
+			goto nla_put_failure;
+
+		for (tid = 0; tid < IEEE80211_NUM_TIDS + 1; tid++) {
+			struct cfg80211_tid_stats *tidstats;
+			struct nlattr *tidattr;
+
+			tidstats = &sinfo->pertid[tid];
+
+			if (!tidstats->filled)
+				continue;
+
+			tidattr = nla_nest_start(msg, tid + 1);
+			if (!tidattr)
+				goto nla_put_failure;
+
+#define PUT_TIDVAL(attr, memb, type) do {				\
+	if (tidstats->filled & BIT(NL80211_TID_STATS_ ## attr) &&	\
+	    nla_put_ ## type(msg, NL80211_TID_STATS_ ## attr,		\
+			     tidstats->memb))				\
+		goto nla_put_failure;					\
+	} while (0)
+
+			PUT_TIDVAL(RX_MSDU, rx_msdu, u64);
+			PUT_TIDVAL(TX_MSDU, tx_msdu, u64);
+			PUT_TIDVAL(TX_MSDU_RETRIES, tx_msdu_retries, u64);
+			PUT_TIDVAL(TX_MSDU_FAILED, tx_msdu_failed, u64);
+
+#undef PUT_TIDVAL
+			nla_nest_end(msg, tidattr);
+		}
+
+		nla_nest_end(msg, tidsattr);
+	}
+
 	nla_nest_end(msg, sinfoattr);
 
 	if (sinfo->assoc_req_ies_len &&
-- 
cgit v1.2.3-70-g09d2


From 3b50d9029809b60a5081d90c282aa04d438d3ea1 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 6 Jan 2015 15:45:31 -0800
Subject: ipv6: fix redefinition of in6_pktinfo and ip6_mtuinfo
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both netinet/in.h and linux/ipv6.h define these two structs,
if we include both of them, we got:

	/usr/include/linux/ipv6.h:19:8: error: redefinition of ‘struct in6_pktinfo’
	 struct in6_pktinfo {
		^
	In file included from /usr/include/arpa/inet.h:22:0,
			 from txtimestamp.c:33:
	/usr/include/netinet/in.h:524:8: note: originally defined here
	 struct in6_pktinfo
		^
	In file included from txtimestamp.c:40:0:
	/usr/include/linux/ipv6.h:24:8: error: redefinition of ‘struct ip6_mtuinfo’
	 struct ip6_mtuinfo {
		^
	In file included from /usr/include/arpa/inet.h:22:0,
			 from txtimestamp.c:33:
	/usr/include/netinet/in.h:531:8: note: originally defined here
	 struct ip6_mtuinfo
		^
So similarly to what we did for in6_addr, we need to sync with
libc header on their definitions.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6.h        | 5 ++++-
 include/uapi/linux/libc-compat.h | 6 ++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index e863d088b9a5..b9b1b7d1c839 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -15,16 +15,19 @@
  *	*under construction*
  */
 
-
+#if __UAPI_DEF_IN6_PKTINFO
 struct in6_pktinfo {
 	struct in6_addr	ipi6_addr;
 	int		ipi6_ifindex;
 };
+#endif
 
+#if __UAPI_DEF_IP6_MTUINFO
 struct ip6_mtuinfo {
 	struct sockaddr_in6	ip6m_addr;
 	__u32			ip6m_mtu;
 };
+#endif
 
 struct in6_ifreq {
 	struct in6_addr	ifr6_addr;
diff --git a/include/uapi/linux/libc-compat.h b/include/uapi/linux/libc-compat.h
index e28807ad17fa..fa673e9cc040 100644
--- a/include/uapi/linux/libc-compat.h
+++ b/include/uapi/linux/libc-compat.h
@@ -70,6 +70,8 @@
 #define __UAPI_DEF_IPV6_MREQ		0
 #define __UAPI_DEF_IPPROTO_V6		0
 #define __UAPI_DEF_IPV6_OPTIONS		0
+#define __UAPI_DEF_IN6_PKTINFO		0
+#define __UAPI_DEF_IP6_MTUINFO		0
 
 #else
 
@@ -84,6 +86,8 @@
 #define __UAPI_DEF_IPV6_MREQ		1
 #define __UAPI_DEF_IPPROTO_V6		1
 #define __UAPI_DEF_IPV6_OPTIONS		1
+#define __UAPI_DEF_IN6_PKTINFO		1
+#define __UAPI_DEF_IP6_MTUINFO		1
 
 #endif /* _NETINET_IN_H */
 
@@ -106,6 +110,8 @@
 #define __UAPI_DEF_IPV6_MREQ		1
 #define __UAPI_DEF_IPPROTO_V6		1
 #define __UAPI_DEF_IPV6_OPTIONS		1
+#define __UAPI_DEF_IN6_PKTINFO		1
+#define __UAPI_DEF_IP6_MTUINFO		1
 
 /* Definitions for xattr.h */
 #define __UAPI_DEF_XATTR		1
-- 
cgit v1.2.3-70-g09d2


From db30485408326a6f466a843b291b23535f63eda0 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 7 Jan 2015 13:41:54 +0800
Subject: rhashtable: involve rhashtable_lookup_insert routine

Involve a new function called rhashtable_lookup_insert() which makes
lookup and insertion atomic under bucket lock protection, helping us
avoid to introduce an extra lock when we search and insert an object
into hash table.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Signed-off-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  1 +
 lib/rhashtable.c           | 97 +++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 83 insertions(+), 15 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index de1459c74c4d..73c913f31574 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -168,6 +168,7 @@ int rhashtable_shrink(struct rhashtable *ht);
 void *rhashtable_lookup(struct rhashtable *ht, const void *key);
 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg);
+bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj);
 
 void rhashtable_destroy(struct rhashtable *ht);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 20006854fce0..4430233c4e11 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -505,8 +505,26 @@ static void rhashtable_wakeup_worker(struct rhashtable *ht)
 		schedule_delayed_work(&ht->run_work, 0);
 }
 
+static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
+				struct bucket_table *tbl, u32 hash)
+{
+	struct rhash_head *head = rht_dereference_bucket(tbl->buckets[hash],
+							 tbl, hash);
+
+	if (rht_is_a_nulls(head))
+		INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
+	else
+		RCU_INIT_POINTER(obj->next, head);
+
+	rcu_assign_pointer(tbl->buckets[hash], obj);
+
+	atomic_inc(&ht->nelems);
+
+	rhashtable_wakeup_worker(ht);
+}
+
 /**
- * rhashtable_insert - insert object into hash hash table
+ * rhashtable_insert - insert object into hash table
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
  *
@@ -523,7 +541,6 @@ static void rhashtable_wakeup_worker(struct rhashtable *ht)
 void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
 	struct bucket_table *tbl;
-	struct rhash_head *head;
 	spinlock_t *lock;
 	unsigned hash;
 
@@ -534,19 +551,9 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 	lock = bucket_lock(tbl, hash);
 
 	spin_lock_bh(lock);
-	head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash);
-	if (rht_is_a_nulls(head))
-		INIT_RHT_NULLS_HEAD(obj->next, ht, hash);
-	else
-		RCU_INIT_POINTER(obj->next, head);
-
-	rcu_assign_pointer(tbl->buckets[hash], obj);
+	__rhashtable_insert(ht, obj, tbl, hash);
 	spin_unlock_bh(lock);
 
-	atomic_inc(&ht->nelems);
-
-	rhashtable_wakeup_worker(ht);
-
 	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert);
@@ -560,7 +567,7 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
  * walk the bucket chain upon removal. The removal operation is thus
  * considerable slow if the hash table is not correctly sized.
  *
- * Will automatically shrink the table via rhashtable_expand() if the the
+ * Will automatically shrink the table via rhashtable_expand() if the
  * shrink_decision function specified at rhashtable_init() returns true.
  *
  * The caller must ensure that no concurrent table mutations occur. It is
@@ -641,7 +648,7 @@ static bool rhashtable_compare(void *ptr, void *arg)
  * for a entry with an identical key. The first matching entry is returned.
  *
  * This lookup function may only be used for fixed key hash table (key_len
- * paramter set). It will BUG() if used inappropriately.
+ * parameter set). It will BUG() if used inappropriately.
  *
  * Lookups may occur in parallel with hashtable mutations and resizing.
  */
@@ -702,6 +709,66 @@ restart:
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
 
+/**
+ * rhashtable_lookup_insert - lookup and insert object into hash table
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ *
+ * Locks down the bucket chain in both the old and new table if a resize
+ * is in progress to ensure that writers can't remove from the old table
+ * and can't insert to the new table during the atomic operation of search
+ * and insertion. Searches for duplicates in both the old and new table if
+ * a resize is in progress.
+ *
+ * This lookup function may only be used for fixed key hash table (key_len
+ * parameter set). It will BUG() if used inappropriately.
+ *
+ * It is safe to call this function from atomic context.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ */
+bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
+{
+	struct bucket_table *new_tbl, *old_tbl;
+	spinlock_t *new_bucket_lock, *old_bucket_lock;
+	u32 new_hash, old_hash;
+	bool success = true;
+
+	BUG_ON(!ht->p.key_len);
+
+	rcu_read_lock();
+
+	old_tbl = rht_dereference_rcu(ht->tbl, ht);
+	old_hash = head_hashfn(ht, old_tbl, obj);
+	old_bucket_lock = bucket_lock(old_tbl, old_hash);
+	spin_lock_bh(old_bucket_lock);
+
+	new_tbl = rht_dereference_rcu(ht->future_tbl, ht);
+	new_hash = head_hashfn(ht, new_tbl, obj);
+	new_bucket_lock = bucket_lock(new_tbl, new_hash);
+	if (unlikely(old_tbl != new_tbl))
+		spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
+
+	if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) {
+		success = false;
+		goto exit;
+	}
+
+	__rhashtable_insert(ht, obj, new_tbl, new_hash);
+
+exit:
+	if (unlikely(old_tbl != new_tbl))
+		spin_unlock_bh(new_bucket_lock);
+	spin_unlock_bh(old_bucket_lock);
+
+	rcu_read_unlock();
+
+	return success;
+}
+EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
+
 static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
-- 
cgit v1.2.3-70-g09d2


From c0c09bfdc4150b3918526660768585cd477adf35 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 7 Jan 2015 13:41:56 +0800
Subject: rhashtable: avoid unnecessary wakeup for worker queue

Move condition statements of verifying whether hash table size exceeds
its maximum threshold or reaches its minimum threshold from resizing
functions to resizing decision functions, avoiding unnecessary wakeup
for worker queue thread.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 +-
 lib/rhashtable.c           | 18 +++++++-----------
 2 files changed, 8 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 73c913f31574..326acd8c2e9f 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -113,7 +113,7 @@ struct rhashtable {
 	struct bucket_table __rcu	*tbl;
 	struct bucket_table __rcu       *future_tbl;
 	atomic_t			nelems;
-	size_t				shift;
+	atomic_t			shift;
 	struct rhashtable_params	p;
 	struct delayed_work             run_work;
 	struct mutex                    mutex;
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 1aef942976fe..7fb474b18f1b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -199,7 +199,8 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht,
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size)
 {
 	/* Expand table when exceeding 75% load */
-	return atomic_read(&ht->nelems) > (new_size / 4 * 3);
+	return atomic_read(&ht->nelems) > (new_size / 4 * 3) &&
+	       (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift);
 }
 EXPORT_SYMBOL_GPL(rht_grow_above_75);
 
@@ -211,7 +212,8 @@ EXPORT_SYMBOL_GPL(rht_grow_above_75);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size)
 {
 	/* Shrink table beneath 30% load */
-	return atomic_read(&ht->nelems) < (new_size * 3 / 10);
+	return atomic_read(&ht->nelems) < (new_size * 3 / 10) &&
+	       (atomic_read(&ht->shift) > ht->p.min_shift);
 }
 EXPORT_SYMBOL_GPL(rht_shrink_below_30);
 
@@ -318,14 +320,11 @@ int rhashtable_expand(struct rhashtable *ht)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
-		return 0;
-
 	new_tbl = bucket_table_alloc(ht, old_tbl->size * 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
-	ht->shift++;
+	atomic_inc(&ht->shift);
 
 	/* Make insertions go into the new, empty table right away. Deletions
 	 * and lookups will be attempted in both tables until we synchronize.
@@ -421,9 +420,6 @@ int rhashtable_shrink(struct rhashtable *ht)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	if (ht->shift <= ht->p.min_shift)
-		return 0;
-
 	new_tbl = bucket_table_alloc(ht, tbl->size / 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
@@ -462,7 +458,7 @@ int rhashtable_shrink(struct rhashtable *ht)
 
 	/* Publish the new, valid hash table */
 	rcu_assign_pointer(ht->tbl, new_tbl);
-	ht->shift--;
+	atomic_dec(&ht->shift);
 
 	/* Wait for readers. No new readers will have references to the
 	 * old hash table.
@@ -851,7 +847,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (tbl == NULL)
 		return -ENOMEM;
 
-	ht->shift = ilog2(tbl->size);
+	atomic_set(&ht->shift, ilog2(tbl->size));
 	RCU_INIT_POINTER(ht->tbl, tbl);
 	RCU_INIT_POINTER(ht->future_tbl, tbl);
 
-- 
cgit v1.2.3-70-g09d2


From 1904a853fae40ee61bed7c231fc5bd2158984441 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Sun, 11 Jan 2015 13:50:44 -0800
Subject: Bluetooth: Add opcode parameter to hci_req_complete_t callback

When hci_req_run() calls its provided complete function and one of the
HCI commands in the sequence fails, then provide the opcode of failing
command. In case of success HCI_OP_NOP is provided since all commands
completed.

This patch fixes the prototype of hci_req_complete_t and all its users.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/bluetooth.h |  2 +-
 net/bluetooth/hci_conn.c          |  2 +-
 net/bluetooth/hci_core.c          |  9 ++++----
 net/bluetooth/hci_request.c       |  3 ++-
 net/bluetooth/mgmt.c              | 44 ++++++++++++++++++++++-----------------
 5 files changed, 34 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 58695ffeb138..e00455aab18c 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -273,7 +273,7 @@ struct l2cap_ctrl {
 
 struct hci_dev;
 
-typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status);
+typedef void (*hci_req_complete_t)(struct hci_dev *hdev, u8 status, u16 opcode);
 
 struct hci_req_ctrl {
 	bool			start;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 75240aaca101..2e724e0b75b9 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -633,7 +633,7 @@ void hci_le_conn_failed(struct hci_conn *conn, u8 status)
 	mgmt_reenable_advertising(hdev);
 }
 
-static void create_le_conn_complete(struct hci_dev *hdev, u8 status)
+static void create_le_conn_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct hci_conn *conn;
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index bc5486ea5411..ba0d1fdccbd9 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -141,7 +141,7 @@ static const struct file_operations dut_mode_fops = {
 
 /* ---- HCI requests ---- */
 
-static void hci_req_sync_complete(struct hci_dev *hdev, u8 result)
+static void hci_req_sync_complete(struct hci_dev *hdev, u8 result, u16 opcode)
 {
 	BT_DBG("%s result 0x%2.2x", hdev->name, result);
 
@@ -2754,7 +2754,7 @@ void hci_conn_params_clear_all(struct hci_dev *hdev)
 	BT_DBG("All LE connection parameters were removed");
 }
 
-static void inquiry_complete(struct hci_dev *hdev, u8 status)
+static void inquiry_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	if (status) {
 		BT_ERR("Failed to start inquiry: status %d", status);
@@ -2766,7 +2766,8 @@ static void inquiry_complete(struct hci_dev *hdev, u8 status)
 	}
 }
 
-static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status)
+static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status,
+					  u16 opcode)
 {
 	/* General inquiry access code (GIAC) */
 	u8 lap[3] = { 0x33, 0x8b, 0x9e };
@@ -4159,7 +4160,7 @@ void hci_req_cmd_complete(struct hci_dev *hdev, u16 opcode, u8 status)
 
 call_complete:
 	if (req_complete)
-		req_complete(hdev, status);
+		req_complete(hdev, status, status ? opcode : HCI_OP_NOP);
 }
 
 static void hci_rx_work(struct work_struct *work)
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 324c6418b17c..b59f92c6df0c 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -533,7 +533,8 @@ void __hci_update_background_scan(struct hci_request *req)
 	}
 }
 
-static void update_background_scan_complete(struct hci_dev *hdev, u8 status)
+static void update_background_scan_complete(struct hci_dev *hdev, u8 status,
+					    u16 opcode)
 {
 	if (status)
 		BT_DBG("HCI request failed to update background scanning: "
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6b3f5537e441..e531da805923 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -1251,7 +1251,7 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
 			    sizeof(settings));
 }
 
-static void clean_up_hci_complete(struct hci_dev *hdev, u8 status)
+static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("%s status 0x%02x", hdev->name, status);
 
@@ -1518,7 +1518,8 @@ static u8 mgmt_le_support(struct hci_dev *hdev)
 		return MGMT_STATUS_SUCCESS;
 }
 
-static void set_discoverable_complete(struct hci_dev *hdev, u8 status)
+static void set_discoverable_complete(struct hci_dev *hdev, u8 status,
+				      u16 opcode)
 {
 	struct pending_cmd *cmd;
 	struct mgmt_mode *cp;
@@ -1777,7 +1778,8 @@ static void write_fast_connectable(struct hci_request *req, bool enable)
 		hci_req_add(req, HCI_OP_WRITE_PAGE_SCAN_TYPE, 1, &type);
 }
 
-static void set_connectable_complete(struct hci_dev *hdev, u8 status)
+static void set_connectable_complete(struct hci_dev *hdev, u8 status,
+				     u16 opcode)
 {
 	struct pending_cmd *cmd;
 	struct mgmt_mode *cp;
@@ -2195,7 +2197,7 @@ unlock:
 	return err;
 }
 
-static void le_enable_complete(struct hci_dev *hdev, u8 status)
+static void le_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
@@ -2385,7 +2387,7 @@ unlock:
 	hci_dev_unlock(hdev);
 }
 
-static void add_uuid_complete(struct hci_dev *hdev, u8 status)
+static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("status 0x%02x", status);
 
@@ -2464,7 +2466,7 @@ static bool enable_service_cache(struct hci_dev *hdev)
 	return false;
 }
 
-static void remove_uuid_complete(struct hci_dev *hdev, u8 status)
+static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("status 0x%02x", status);
 
@@ -2549,7 +2551,7 @@ unlock:
 	return err;
 }
 
-static void set_class_complete(struct hci_dev *hdev, u8 status)
+static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("status 0x%02x", status);
 
@@ -3483,7 +3485,7 @@ static void update_name(struct hci_request *req)
 	hci_req_add(req, HCI_OP_WRITE_LOCAL_NAME, sizeof(cp), &cp);
 }
 
-static void set_name_complete(struct hci_dev *hdev, u8 status)
+static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct mgmt_cp_set_local_name *cp;
 	struct pending_cmd *cmd;
@@ -3834,7 +3836,8 @@ static bool trigger_discovery(struct hci_request *req, u8 *status)
 	return true;
 }
 
-static void start_discovery_complete(struct hci_dev *hdev, u8 status)
+static void start_discovery_complete(struct hci_dev *hdev, u8 status,
+				     u16 opcode)
 {
 	struct pending_cmd *cmd;
 	unsigned long timeout;
@@ -4063,7 +4066,7 @@ failed:
 	return err;
 }
 
-static void stop_discovery_complete(struct hci_dev *hdev, u8 status)
+static void stop_discovery_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -4289,7 +4292,8 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
 	return err;
 }
 
-static void set_advertising_complete(struct hci_dev *hdev, u8 status)
+static void set_advertising_complete(struct hci_dev *hdev, u8 status,
+				     u16 opcode)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
@@ -4496,7 +4500,8 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
 	return err;
 }
 
-static void fast_connectable_complete(struct hci_dev *hdev, u8 status)
+static void fast_connectable_complete(struct hci_dev *hdev, u8 status,
+				      u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -4594,7 +4599,7 @@ unlock:
 	return err;
 }
 
-static void set_bredr_complete(struct hci_dev *hdev, u8 status)
+static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -5119,7 +5124,8 @@ static int conn_info_cmd_complete(struct pending_cmd *cmd, u8 status)
 	return err;
 }
 
-static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status)
+static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
+				       u16 opcode)
 {
 	struct hci_cp_read_rssi *cp;
 	struct pending_cmd *cmd;
@@ -5326,7 +5332,7 @@ complete:
 	return err;
 }
 
-static void get_clock_info_complete(struct hci_dev *hdev, u8 status)
+static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct hci_cp_read_clock *hci_cp;
 	struct pending_cmd *cmd;
@@ -5504,7 +5510,7 @@ static void device_added(struct sock *sk, struct hci_dev *hdev,
 	mgmt_event(MGMT_EV_DEVICE_ADDED, hdev, &ev, sizeof(ev), sk);
 }
 
-static void add_device_complete(struct hci_dev *hdev, u8 status)
+static void add_device_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -5627,7 +5633,7 @@ static void device_removed(struct sock *sk, struct hci_dev *hdev,
 	mgmt_event(MGMT_EV_DEVICE_REMOVED, hdev, &ev, sizeof(ev), sk);
 }
 
-static void remove_device_complete(struct hci_dev *hdev, u8 status)
+static void remove_device_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct pending_cmd *cmd;
 
@@ -6205,7 +6211,7 @@ static void restart_le_actions(struct hci_request *req)
 	__hci_update_background_scan(req);
 }
 
-static void powered_complete(struct hci_dev *hdev, u8 status)
+static void powered_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
@@ -7316,7 +7322,7 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering)
 	mgmt_event(MGMT_EV_DISCOVERING, hdev, &ev, sizeof(ev), NULL);
 }
 
-static void adv_enable_complete(struct hci_dev *hdev, u8 status)
+static void adv_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	BT_DBG("%s status %u", hdev->name, status);
 }
-- 
cgit v1.2.3-70-g09d2


From cb9627806ce898c436dc74252718e4a757b33bc3 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 09:21:24 -0800
Subject: Bluetooth: Add defintions for HCI Read Stored Link Key command

This patch adds the missing commmand structure and command complete
structure for the HCI Read Store Link Key command.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index d0bca316b43b..91cd9d302cd0 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -855,6 +855,17 @@ struct hci_cp_set_event_flt {
 #define HCI_CONN_SETUP_AUTO_OFF	0x01
 #define HCI_CONN_SETUP_AUTO_ON	0x02
 
+#define HCI_OP_READ_STORED_LINK_KEY	0x0c0d
+struct hci_cp_read_stored_link_key {
+	bdaddr_t bdaddr;
+	__u8     read_all;
+} __packed;
+struct hci_rp_read_stored_link_key {
+	__u8     status;
+	__u8     max_keys;
+	__u8     num_keys;
+} __packed;
+
 #define HCI_OP_DELETE_STORED_LINK_KEY	0x0c12
 struct hci_cp_delete_stored_link_key {
 	bdaddr_t bdaddr;
-- 
cgit v1.2.3-70-g09d2


From c2f0f979276fc4911cef5da2fc113f0daeda3ebc Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 09:21:25 -0800
Subject: Bluetooth: Handle command complete event for HCI Read Stored Link
 Keys

When the HCI Read Stored Link Keys command completes it gives useful
information of the current stored keys and maximum keys a controller
can actually store. So process this event and store these information
in hci_dev structure.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_event.c        | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 89f4e3c8a097..1f21fe48b38e 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -205,6 +205,8 @@ struct hci_dev {
 	__u16		lmp_subver;
 	__u16		voice_setting;
 	__u8		num_iac;
+	__u8		stored_max_keys;
+	__u8		stored_num_keys;
 	__u8		io_capability;
 	__s8		inq_tx_power;
 	__u16		page_scan_interval;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0881efd0ad2d..b1580daede13 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -214,6 +214,24 @@ static void hci_cc_reset(struct hci_dev *hdev, struct sk_buff *skb)
 	hci_bdaddr_list_clear(&hdev->le_white_list);
 }
 
+static void hci_cc_read_stored_link_key(struct hci_dev *hdev,
+					struct sk_buff *skb)
+{
+	struct hci_rp_read_stored_link_key *rp = (void *)skb->data;
+	struct hci_cp_read_stored_link_key *sent;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	sent = hci_sent_cmd_data(hdev, HCI_OP_READ_STORED_LINK_KEY);
+	if (!sent)
+		return;
+
+	if (!rp->status && sent->read_all == 0x01) {
+		hdev->stored_max_keys = rp->max_keys;
+		hdev->stored_num_keys = rp->num_keys;
+	}
+}
+
 static void hci_cc_write_local_name(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	__u8 status = *((__u8 *) skb->data);
@@ -2714,6 +2732,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_cc_reset(hdev, skb);
 		break;
 
+	case HCI_OP_READ_STORED_LINK_KEY:
+		hci_cc_read_stored_link_key(hdev, skb);
+		break;
+
 	case HCI_OP_WRITE_LOCAL_NAME:
 		hci_cc_write_local_name(hdev, skb);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 039d4e410c06ed446566105a0a1c6ce9cad6b56b Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Mon, 12 Jan 2015 09:21:27 -0800
Subject: Bluetooth: Add missing response structure for HCI Delete Stored Link
 Key

This patch adds this missing structure for processing the result of the
HCI Delete Stored Link Key command.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 91cd9d302cd0..8e54f825153c 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -871,6 +871,10 @@ struct hci_cp_delete_stored_link_key {
 	bdaddr_t bdaddr;
 	__u8     delete_all;
 } __packed;
+struct hci_rp_delete_stored_link_key {
+	__u8     status;
+	__u8     num_keys;
+} __packed;
 
 #define HCI_MAX_NAME_LENGTH		248
 
-- 
cgit v1.2.3-70-g09d2


From 3bf3947526c1053ddf2523f261395d682718f56c Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Thu, 8 Jan 2015 12:31:18 -0800
Subject: vxlan: Improve support for header flags

This patch cleans up the header flags of VXLAN in anticipation of
defining some new ones:

- Move header related definitions from vxlan.c to vxlan.h
- Change VXLAN_FLAGS to be VXLAN_HF_VNI (only currently defined flag)
- Move check for unknown flags to after we find vxlan_sock, this
  assumes that some flags may be processed based on tunnel
  configuration
- Add a comment about why the stack treating unknown set flags as an
  error instead of ignoring them

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 42 ++++++++++++++++++++++++++++--------------
 include/net/vxlan.h |  7 +++++++
 2 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2ab0922af0b4..3a18d8ed89ca 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -61,12 +61,6 @@
 #define FDB_AGE_DEFAULT 300 /* 5 min */
 #define FDB_AGE_INTERVAL (10 * HZ)	/* rescan interval */
 
-#define VXLAN_N_VID	(1u << 24)
-#define VXLAN_VID_MASK	(VXLAN_N_VID - 1)
-#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
-
-#define VXLAN_FLAGS 0x08000000	/* struct vxlanhdr.vx_flags required value. */
-
 /* UDP port for VXLAN traffic.
  * The IANA assigned port is 4789, but the Linux default is 8472
  * for compatibility with early adopters.
@@ -1095,18 +1089,21 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
 	struct vxlan_sock *vs;
 	struct vxlanhdr *vxh;
+	u32 flags, vni;
 
 	/* Need Vxlan and inner Ethernet header to be present */
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
 		goto error;
 
-	/* Return packets with reserved bits set */
 	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
-	if (vxh->vx_flags != htonl(VXLAN_FLAGS) ||
-	    (vxh->vx_vni & htonl(0xff))) {
-		netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
-			   ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
-		goto error;
+	flags = ntohl(vxh->vx_flags);
+	vni = ntohl(vxh->vx_vni);
+
+	if (flags & VXLAN_HF_VNI) {
+		flags &= ~VXLAN_HF_VNI;
+	} else {
+		/* VNI flag always required to be set */
+		goto bad_flags;
 	}
 
 	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
@@ -1116,6 +1113,19 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	if (!vs)
 		goto drop;
 
+	if (flags || (vni & 0xff)) {
+		/* If there are any unprocessed flags remaining treat
+		 * this as a malformed packet. This behavior diverges from
+		 * VXLAN RFC (RFC7348) which stipulates that bits in reserved
+		 * in reserved fields are to be ignored. The approach here
+		 * maintains compatbility with previous stack code, and also
+		 * is more robust and provides a little more security in
+		 * adding extensions to VXLAN.
+		 */
+
+		goto bad_flags;
+	}
+
 	vs->rcv(vs, skb, vxh->vx_vni);
 	return 0;
 
@@ -1124,6 +1134,10 @@ drop:
 	kfree_skb(skb);
 	return 0;
 
+bad_flags:
+	netdev_dbg(skb->dev, "invalid vxlan flags=%#x vni=%#x\n",
+		   ntohl(vxh->vx_flags), ntohl(vxh->vx_vni));
+
 error:
 	/* Return non vxlan pkt */
 	return 1;
@@ -1563,7 +1577,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	}
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
-	vxh->vx_flags = htonl(VXLAN_FLAGS);
+	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = vni;
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
@@ -1607,7 +1621,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		return -ENOMEM;
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
-	vxh->vx_flags = htonl(VXLAN_FLAGS);
+	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = vni;
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 903461aa5644..a0d80736224f 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -17,6 +17,13 @@ struct vxlanhdr {
 	__be32 vx_vni;
 };
 
+/* VXLAN header flags. */
+#define VXLAN_HF_VNI 0x08000000
+
+#define VXLAN_N_VID     (1u << 24)
+#define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
+#define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
+
 struct vxlan_sock;
 typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
 
-- 
cgit v1.2.3-70-g09d2


From bdced7ef7838c1c4aebe9f295e44b7f0dcae2109 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 10 Jan 2015 07:31:12 -0800
Subject: bridge: support for multiple vlans and vlan ranges in setlink and
 dellink requests

This patch changes bridge IFLA_AF_SPEC netlink attribute parser to
look for more than one IFLA_BRIDGE_VLAN_INFO attribute. This allows
userspace to pack more than one vlan in the setlink msg.

The dumps were already sending more than one vlan info in the getlink msg.

This patch also adds bridge_vlan_info flags BRIDGE_VLAN_INFO_RANGE_BEGIN and
BRIDGE_VLAN_INFO_RANGE_END to indicate start and end of vlan range

This patch also deletes unused ifla_br_policy.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |   2 +
 net/bridge/br_netlink.c        | 104 +++++++++++++++++++++++++++--------------
 2 files changed, 70 insertions(+), 36 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index b03ee8f62d3c..eaaea6208b42 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -125,6 +125,8 @@ enum {
 #define BRIDGE_VLAN_INFO_MASTER	(1<<0)	/* Operate on Bridge device as well */
 #define BRIDGE_VLAN_INFO_PVID	(1<<1)	/* VLAN is PVID, ingress untagged */
 #define BRIDGE_VLAN_INFO_UNTAGGED	(1<<2)	/* VLAN egresses untagged */
+#define BRIDGE_VLAN_INFO_RANGE_BEGIN	(1<<3) /* VLAN is start of vlan range */
+#define BRIDGE_VLAN_INFO_RANGE_END	(1<<4) /* VLAN is end of vlan range */
 
 struct bridge_vlan_info {
 	__u16 flags;
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 9f5eb55a4d3a..6f616a2df0b4 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -218,57 +218,89 @@ out:
 	return err;
 }
 
-static const struct nla_policy ifla_br_policy[IFLA_MAX+1] = {
-	[IFLA_BRIDGE_FLAGS]	= { .type = NLA_U16 },
-	[IFLA_BRIDGE_MODE]	= { .type = NLA_U16 },
-	[IFLA_BRIDGE_VLAN_INFO]	= { .type = NLA_BINARY,
-				    .len = sizeof(struct bridge_vlan_info), },
-};
+static int br_vlan_info(struct net_bridge *br, struct net_bridge_port *p,
+			int cmd, struct bridge_vlan_info *vinfo)
+{
+	int err = 0;
+
+	switch (cmd) {
+	case RTM_SETLINK:
+		if (p) {
+			err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+			if (err)
+				break;
+
+			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+				err = br_vlan_add(p->br, vinfo->vid,
+						  vinfo->flags);
+		} else {
+			err = br_vlan_add(br, vinfo->vid, vinfo->flags);
+		}
+		break;
+
+	case RTM_DELLINK:
+		if (p) {
+			nbp_vlan_delete(p, vinfo->vid);
+			if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
+				br_vlan_delete(p->br, vinfo->vid);
+		} else {
+			br_vlan_delete(br, vinfo->vid);
+		}
+		break;
+	}
+
+	return err;
+}
 
 static int br_afspec(struct net_bridge *br,
 		     struct net_bridge_port *p,
 		     struct nlattr *af_spec,
 		     int cmd)
 {
-	struct nlattr *tb[IFLA_BRIDGE_MAX+1];
+	struct bridge_vlan_info *vinfo_start = NULL;
+	struct bridge_vlan_info *vinfo = NULL;
+	struct nlattr *attr;
 	int err = 0;
+	int rem;
 
-	err = nla_parse_nested(tb, IFLA_BRIDGE_MAX, af_spec, ifla_br_policy);
-	if (err)
-		return err;
+	nla_for_each_nested(attr, af_spec, rem) {
+		if (nla_type(attr) != IFLA_BRIDGE_VLAN_INFO)
+			continue;
+		if (nla_len(attr) != sizeof(struct bridge_vlan_info))
+			return -EINVAL;
+		vinfo = nla_data(attr);
+		if (vinfo->flags & BRIDGE_VLAN_INFO_RANGE_BEGIN) {
+			if (vinfo_start)
+				return -EINVAL;
+			vinfo_start = vinfo;
+			continue;
+		}
+
+		if (vinfo_start) {
+			struct bridge_vlan_info tmp_vinfo;
+			int v;
 
-	if (tb[IFLA_BRIDGE_VLAN_INFO]) {
-		struct bridge_vlan_info *vinfo;
+			if (!(vinfo->flags & BRIDGE_VLAN_INFO_RANGE_END))
+				return -EINVAL;
 
-		vinfo = nla_data(tb[IFLA_BRIDGE_VLAN_INFO]);
+			if (vinfo->vid <= vinfo_start->vid)
+				return -EINVAL;
 
-		if (!vinfo->vid || vinfo->vid >= VLAN_VID_MASK)
-			return -EINVAL;
+			memcpy(&tmp_vinfo, vinfo_start,
+			       sizeof(struct bridge_vlan_info));
 
-		switch (cmd) {
-		case RTM_SETLINK:
-			if (p) {
-				err = nbp_vlan_add(p, vinfo->vid, vinfo->flags);
+			for (v = vinfo_start->vid; v <= vinfo->vid; v++) {
+				tmp_vinfo.vid = v;
+				err = br_vlan_info(br, p, cmd, &tmp_vinfo);
 				if (err)
 					break;
-
-				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-					err = br_vlan_add(p->br, vinfo->vid,
-							  vinfo->flags);
-			} else
-				err = br_vlan_add(br, vinfo->vid, vinfo->flags);
-
-			break;
-
-		case RTM_DELLINK:
-			if (p) {
-				nbp_vlan_delete(p, vinfo->vid);
-				if (vinfo->flags & BRIDGE_VLAN_INFO_MASTER)
-					br_vlan_delete(p->br, vinfo->vid);
-			} else
-				br_vlan_delete(br, vinfo->vid);
-			break;
+			}
+			vinfo_start = NULL;
+		} else {
+			err = br_vlan_info(br, p, cmd, vinfo);
 		}
+		if (err)
+			break;
 	}
 
 	return err;
-- 
cgit v1.2.3-70-g09d2


From 35a27cee321e7c4e7cba3550b2f48c2ca44d8a72 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 10 Jan 2015 07:31:13 -0800
Subject: rtnetlink: new filter RTEXT_FILTER_BRVLAN_COMPRESSED

This filter is same as RTEXT_FILTER_BRVLAN except that it tries
to compress the consecutive vlans into ranges.

This helps on systems with large number of configured vlans.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/rtnetlink.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index d81f22d5b390..a1d18593f41e 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -636,6 +636,7 @@ struct tcamsg {
 /* New extended info filters for IFLA_EXT_MASK */
 #define RTEXT_FILTER_VF		(1 << 0)
 #define RTEXT_FILTER_BRVLAN	(1 << 1)
+#define RTEXT_FILTER_BRVLAN_COMPRESSED	(1 << 2)
 
 /* End of information exported to user level */
 
-- 
cgit v1.2.3-70-g09d2


From 7a868d1e9ab3c534c5ad44e3e5dc46753a1e5636 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Mon, 12 Jan 2015 14:52:22 +0800
Subject: rhashtable: involve rhashtable_lookup_compare_insert routine

Introduce a new function called rhashtable_lookup_compare_insert()
which is very similar to rhashtable_lookup_insert(). But the former
makes use of users' given compare function to look for an object,
and then inserts it into hash table if found. As the entire process
of search and insertion is under protection of per bucket lock, this
can help users to avoid the involvement of extra lock.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  5 +++++
 lib/rhashtable.c           | 42 ++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 326acd8c2e9f..7b9bd77ed684 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -168,7 +168,12 @@ int rhashtable_shrink(struct rhashtable *ht);
 void *rhashtable_lookup(struct rhashtable *ht, const void *key);
 void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key,
 				bool (*compare)(void *, void *), void *arg);
+
 bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj);
+bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
+				      struct rhash_head *obj,
+				      bool (*compare)(void *, void *),
+				      void *arg);
 
 void rhashtable_destroy(struct rhashtable *ht);
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 8023b554905c..ed6ae1ad304c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -726,6 +726,43 @@ EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
  * to rhashtable_init().
  */
 bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
+{
+	struct rhashtable_compare_arg arg = {
+		.ht = ht,
+		.key = rht_obj(ht, obj) + ht->p.key_offset,
+	};
+
+	BUG_ON(!ht->p.key_len);
+
+	return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare,
+						&arg);
+}
+EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
+
+/**
+ * rhashtable_lookup_compare_insert - search and insert object to hash table
+ *                                    with compare function
+ * @ht:		hash table
+ * @obj:	pointer to hash head inside object
+ * @compare:	compare function, must return true on match
+ * @arg:	argument passed on to compare function
+ *
+ * Locks down the bucket chain in both the old and new table if a resize
+ * is in progress to ensure that writers can't remove from the old table
+ * and can't insert to the new table during the atomic operation of search
+ * and insertion. Searches for duplicates in both the old and new table if
+ * a resize is in progress.
+ *
+ * Lookups may occur in parallel with hashtable mutations and resizing.
+ *
+ * Will trigger an automatic deferred table resizing if the size grows
+ * beyond the watermark indicated by grow_decision() which can be passed
+ * to rhashtable_init().
+ */
+bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
+				      struct rhash_head *obj,
+				      bool (*compare)(void *, void *),
+				      void *arg)
 {
 	struct bucket_table *new_tbl, *old_tbl;
 	spinlock_t *new_bucket_lock, *old_bucket_lock;
@@ -747,7 +784,8 @@ bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj)
 	if (unlikely(old_tbl != new_tbl))
 		spin_lock_bh_nested(new_bucket_lock, RHT_LOCK_NESTED);
 
-	if (rhashtable_lookup(ht, rht_obj(ht, obj) + ht->p.key_offset)) {
+	if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset,
+				      compare, arg)) {
 		success = false;
 		goto exit;
 	}
@@ -763,7 +801,7 @@ exit:
 
 	return success;
 }
-EXPORT_SYMBOL_GPL(rhashtable_lookup_insert);
+EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
 
 static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
-- 
cgit v1.2.3-70-g09d2


From 6f73d3b13dc5e16ae06025cd1b12a36b2857caa2 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Mon, 12 Jan 2015 14:52:24 +0800
Subject: rhashtable: add a note for grow and shrink decision functions

As commit c0c09bfdc415 ("rhashtable: avoid unnecessary wakeup for
worker queue") moves condition statements of verifying whether hash
table size exceeds its maximum threshold or reaches its minimum
threshold from resizing functions to resizing decision functions,
we should add a note in rhashtable.h to indicate the implementation
of what the grow and shrink decision function must enforce min/max
shift, otherwise, it's failed to take min/max shift's set watermarks
into effect.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 7b9bd77ed684..9570832ab07c 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -79,6 +79,10 @@ struct rhashtable;
  * @obj_hashfn: Function to hash object
  * @grow_decision: If defined, may return true if table should expand
  * @shrink_decision: If defined, may return true if table should shrink
+ *
+ * Note: when implementing the grow and shrink decision function, min/max
+ * shift must be enforced, otherwise, resizing watermarks they set may be
+ * useless.
  */
 struct rhashtable_params {
 	size_t			nelem_hint;
-- 
cgit v1.2.3-70-g09d2


From c66ad9ca3f4f55886829a61bd24fc5612d0c05c1 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Mon, 12 Jan 2015 14:29:34 -0500
Subject: ipv6: directly include libc-compat.h in ipv6.h

Patch 3b50d9029809 ("ipv6: fix redefinition of in6_pktinfo ...")
fixed a libc compatibility issue in ipv6 structure definitions
as described in include/uapi/linux/libc-compat.h.

It relies on including linux/in6.h to include libc-compat.h itself.
Include that file directly to clearly communicate the dependency
(libc-compat.h: "This include must be as early as possible").

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

As discussed in http://patchwork.ozlabs.org/patch/427384/
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ipv6.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index b9b1b7d1c839..73cb02dc3065 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -1,6 +1,7 @@
 #ifndef _UAPI_IPV6_H
 #define _UAPI_IPV6_H
 
+#include <linux/libc-compat.h>
 #include <linux/types.h>
 #include <linux/in6.h>
 #include <asm/byteorder.h>
-- 
cgit v1.2.3-70-g09d2


From d8b9605d2697c48fb822c821c5751afbb4567003 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 13 Jan 2015 17:13:43 +0100
Subject: net: sched: fix skb->protocol use in case of accelerated vlan path

tc code implicitly considers skb->protocol even in case of accelerated
vlan paths and expects vlan protocol type here. However, on rx path,
if the vlan header was already stripped, skb->protocol contains value
of next header. Similar situation is on tx path.

So for skbs that use skb->vlan_tci for tagging, use skb->vlan_proto instead.

Reported-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h | 12 ++++++++++++
 net/sched/act_csum.c    |  2 +-
 net/sched/cls_flow.c    |  8 ++++----
 net/sched/em_ipset.c    |  2 +-
 net/sched/em_meta.c     |  2 +-
 net/sched/sch_api.c     |  2 +-
 net/sched/sch_dsmark.c  |  6 +++---
 net/sched/sch_teql.c    |  4 ++--
 8 files changed, 25 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 27a33833ff4a..fe6e7aac3c56 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -3,6 +3,7 @@
 
 #include <linux/jiffies.h>
 #include <linux/ktime.h>
+#include <linux/if_vlan.h>
 #include <net/sch_generic.h>
 
 struct qdisc_walker {
@@ -114,6 +115,17 @@ int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
 int tc_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		struct tcf_result *res);
 
+static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
+{
+	/* We need to take extra care in case the skb came via
+	 * vlan accelerated path. In that case, use skb->vlan_proto
+	 * as the original vlan header was already stripped.
+	 */
+	if (vlan_tx_tag_present(skb))
+		return skb->vlan_proto;
+	return skb->protocol;
+}
+
 /* Calculate maximal size of packet seen by hard_start_xmit
    routine of this device.
  */
diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c
index edbf40dac709..4cd5cf1aedf8 100644
--- a/net/sched/act_csum.c
+++ b/net/sched/act_csum.c
@@ -509,7 +509,7 @@ static int tcf_csum(struct sk_buff *skb,
 	if (unlikely(action == TC_ACT_SHOT))
 		goto drop;
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case cpu_to_be16(ETH_P_IP):
 		if (!tcf_csum_ipv4(skb, update_flags))
 			goto drop;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index 15d68f24a521..461410394d08 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -77,7 +77,7 @@ static u32 flow_get_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
 	if (flow->dst)
 		return ntohl(flow->dst);
-	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
 static u32 flow_get_proto(const struct sk_buff *skb, const struct flow_keys *flow)
@@ -98,7 +98,7 @@ static u32 flow_get_proto_dst(const struct sk_buff *skb, const struct flow_keys
 	if (flow->ports)
 		return ntohs(flow->port16[1]);
 
-	return addr_fold(skb_dst(skb)) ^ (__force u16)skb->protocol;
+	return addr_fold(skb_dst(skb)) ^ (__force u16) tc_skb_protocol(skb);
 }
 
 static u32 flow_get_iif(const struct sk_buff *skb)
@@ -144,7 +144,7 @@ static u32 flow_get_nfct(const struct sk_buff *skb)
 
 static u32 flow_get_nfct_src(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, src.u3.ip));
 	case htons(ETH_P_IPV6):
@@ -156,7 +156,7 @@ fallback:
 
 static u32 flow_get_nfct_dst(const struct sk_buff *skb, const struct flow_keys *flow)
 {
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		return ntohl(CTTUPLE(skb, dst.u3.ip));
 	case htons(ETH_P_IPV6):
diff --git a/net/sched/em_ipset.c b/net/sched/em_ipset.c
index 5b4a4efe468c..a3d79c8bf3b8 100644
--- a/net/sched/em_ipset.c
+++ b/net/sched/em_ipset.c
@@ -59,7 +59,7 @@ static int em_ipset_match(struct sk_buff *skb, struct tcf_ematch *em,
 	struct net_device *dev, *indev = NULL;
 	int ret, network_offset;
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		acpar.family = NFPROTO_IPV4;
 		if (!pskb_network_may_pull(skb, sizeof(struct iphdr)))
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index c8f8c399b99a..2159981b604e 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -197,7 +197,7 @@ META_COLLECTOR(int_priority)
 META_COLLECTOR(int_protocol)
 {
 	/* Let userspace take care of the byte ordering */
-	dst->value = skb->protocol;
+	dst->value = tc_skb_protocol(skb);
 }
 
 META_COLLECTOR(int_pkttype)
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 76f402e05bd6..243b7d169d61 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1807,7 +1807,7 @@ done:
 int tc_classify_compat(struct sk_buff *skb, const struct tcf_proto *tp,
 		       struct tcf_result *res)
 {
-	__be16 protocol = skb->protocol;
+	__be16 protocol = tc_skb_protocol(skb);
 	int err;
 
 	for (; tp; tp = rcu_dereference_bh(tp->next)) {
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
index 227114f27f94..66700a6116aa 100644
--- a/net/sched/sch_dsmark.c
+++ b/net/sched/sch_dsmark.c
@@ -203,7 +203,7 @@ static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch)
 	pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
 
 	if (p->set_tc_index) {
-		switch (skb->protocol) {
+		switch (tc_skb_protocol(skb)) {
 		case htons(ETH_P_IP):
 			if (skb_cow_head(skb, sizeof(struct iphdr)))
 				goto drop;
@@ -289,7 +289,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 	index = skb->tc_index & (p->indices - 1);
 	pr_debug("index %d->%d\n", skb->tc_index, index);
 
-	switch (skb->protocol) {
+	switch (tc_skb_protocol(skb)) {
 	case htons(ETH_P_IP):
 		ipv4_change_dsfield(ip_hdr(skb), p->mask[index],
 				    p->value[index]);
@@ -306,7 +306,7 @@ static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
 		 */
 		if (p->mask[index] != 0xff || p->value[index])
 			pr_warn("%s: unsupported protocol %d\n",
-				__func__, ntohs(skb->protocol));
+				__func__, ntohs(tc_skb_protocol(skb)));
 		break;
 	}
 
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 4899d4a18aa5..e02687185a59 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -242,8 +242,8 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
 		char haddr[MAX_ADDR_LEN];
 
 		neigh_ha_snapshot(haddr, n, dev);
-		err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
-				      NULL, skb->len);
+		err = dev_hard_header(skb, dev, ntohs(tc_skb_protocol(skb)),
+				      haddr, NULL, skb->len);
 
 		if (err < 0)
 			err = -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From df8a39defad46b83694ea6dd868d332976d62cc0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 13 Jan 2015 17:13:44 +0100
Subject: net: rename vlan_tx_* helpers since "tx" is misleading there

The same macros are used for rx as well. So rename it.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt                  |  4 ++--
 drivers/infiniband/hw/nes/nes_nic.c                  | 13 +++++++------
 drivers/net/ethernet/3com/typhoon.c                  |  4 ++--
 drivers/net/ethernet/alteon/acenic.c                 |  8 ++++----
 drivers/net/ethernet/amd/amd8111e.c                  |  4 ++--
 drivers/net/ethernet/amd/xgbe/xgbe-drv.c             |  8 ++++----
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c      |  4 ++--
 drivers/net/ethernet/atheros/atl1e/atl1e_main.c      |  4 ++--
 drivers/net/ethernet/atheros/atlx/atl1.c             |  4 ++--
 drivers/net/ethernet/atheros/atlx/atl2.c             |  4 ++--
 drivers/net/ethernet/broadcom/bnx2.c                 |  4 ++--
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c      |  4 ++--
 drivers/net/ethernet/broadcom/tg3.c                  |  4 ++--
 drivers/net/ethernet/brocade/bna/bnad.c              |  4 ++--
 drivers/net/ethernet/chelsio/cxgb/sge.c              |  4 ++--
 drivers/net/ethernet/chelsio/cxgb3/sge.c             |  6 +++---
 drivers/net/ethernet/chelsio/cxgb4/sge.c             |  4 ++--
 drivers/net/ethernet/chelsio/cxgb4vf/sge.c           |  4 ++--
 drivers/net/ethernet/cisco/enic/enic_main.c          |  4 ++--
 drivers/net/ethernet/emulex/benet/be_main.c          | 12 ++++++------
 drivers/net/ethernet/freescale/gianfar.c             |  4 ++--
 drivers/net/ethernet/ibm/ehea/ehea_main.c            |  4 ++--
 drivers/net/ethernet/intel/e1000/e1000_main.c        |  5 +++--
 drivers/net/ethernet/intel/e1000e/netdev.c           |  9 +++++----
 drivers/net/ethernet/intel/fm10k/fm10k_main.c        |  4 ++--
 drivers/net/ethernet/intel/fm10k/fm10k_netdev.c      |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c          |  4 ++--
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c        |  4 ++--
 drivers/net/ethernet/intel/igb/igb_main.c            |  4 ++--
 drivers/net/ethernet/intel/igbvf/netdev.c            |  5 +++--
 drivers/net/ethernet/intel/ixgb/ixgb_main.c          |  4 ++--
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c        |  4 ++--
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c    |  4 ++--
 drivers/net/ethernet/jme.c                           |  4 ++--
 drivers/net/ethernet/marvell/sky2.c                  |  6 +++---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c           | 12 ++++++------
 drivers/net/ethernet/natsemi/ns83820.c               |  4 ++--
 drivers/net/ethernet/neterion/s2io.c                 |  4 ++--
 drivers/net/ethernet/neterion/vxge/vxge-main.c       |  4 ++--
 drivers/net/ethernet/nvidia/forcedeth.c              |  4 ++--
 drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c |  4 ++--
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c       |  8 ++++----
 drivers/net/ethernet/qlogic/qlge/qlge_main.c         |  6 +++---
 drivers/net/ethernet/realtek/8139cp.c                |  4 ++--
 drivers/net/ethernet/realtek/r8169.c                 |  4 ++--
 drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c      |  2 +-
 drivers/net/ethernet/tehuti/tehuti.c                 |  4 ++--
 drivers/net/ethernet/via/via-rhine.c                 |  6 +++---
 drivers/net/ethernet/via/via-velocity.c              |  4 ++--
 drivers/net/macvtap.c                                |  6 +++---
 drivers/net/tun.c                                    |  4 ++--
 drivers/net/usb/r8152.c                              |  4 ++--
 drivers/net/vmxnet3/vmxnet3_drv.c                    |  4 ++--
 drivers/net/vxlan.c                                  |  4 ++--
 drivers/s390/net/qeth_l3_main.c                      |  8 ++++----
 drivers/vhost/net.c                                  |  2 +-
 include/linux/if_vlan.h                              | 16 ++++++++--------
 include/net/pkt_sched.h                              |  2 +-
 include/trace/events/net.h                           |  8 ++++----
 net/8021q/vlan_core.c                                |  2 +-
 net/bridge/br_netfilter.c                            | 12 ++++++------
 net/bridge/br_private.h                              |  4 ++--
 net/bridge/br_vlan.c                                 |  4 ++--
 net/bridge/netfilter/ebt_vlan.c                      |  4 ++--
 net/bridge/netfilter/ebtables.c                      |  2 +-
 net/core/dev.c                                       | 10 +++++-----
 net/core/netpoll.c                                   |  2 +-
 net/core/skbuff.c                                    |  8 ++++----
 net/ipv4/geneve.c                                    |  2 +-
 net/openvswitch/actions.c                            |  4 ++--
 net/openvswitch/datapath.c                           |  2 +-
 net/openvswitch/flow.c                               |  4 ++--
 net/openvswitch/vport-gre.c                          |  2 +-
 net/openvswitch/vport.c                              |  3 ++-
 net/packet/af_packet.c                               | 12 ++++++------
 net/sched/em_meta.c                                  |  2 +-
 net/wireless/util.c                                  |  4 ++--
 77 files changed, 195 insertions(+), 190 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 58d08f8d8d80..9930ecfbb465 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -279,8 +279,8 @@ Possible BPF extensions are shown in the following table:
   hatype                                skb->dev->type
   rxhash                                skb->hash
   cpu                                   raw_smp_processor_id()
-  vlan_tci                              vlan_tx_tag_get(skb)
-  vlan_pr                               vlan_tx_tag_present(skb)
+  vlan_tci                              skb_vlan_tag_get(skb)
+  vlan_pr                               skb_vlan_tag_present(skb)
   rand                                  prandom_u32()
 
 These extensions can also be prefixed with '#'.
diff --git a/drivers/infiniband/hw/nes/nes_nic.c b/drivers/infiniband/hw/nes/nes_nic.c
index 49eb5111d2cd..70acda91eb2a 100644
--- a/drivers/infiniband/hw/nes/nes_nic.c
+++ b/drivers/infiniband/hw/nes/nes_nic.c
@@ -373,11 +373,11 @@ static int nes_nic_send(struct sk_buff *skb, struct net_device *netdev)
 	wqe_fragment_length = (__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
 
 	/* setup the VLAN tag if present */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
-				netdev->name, vlan_tx_tag_get(skb));
+				netdev->name, skb_vlan_tag_get(skb));
 		wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
-		wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+		wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
 	} else
 		wqe_misc = 0;
 
@@ -576,11 +576,12 @@ tso_sq_no_longer_full:
 				wqe_fragment_length =
 						(__le16 *)&nic_sqe->wqe_words[NES_NIC_SQ_WQE_LENGTH_0_TAG_IDX];
 				/* setup the VLAN tag if present */
-				if (vlan_tx_tag_present(skb)) {
+				if (skb_vlan_tag_present(skb)) {
 					nes_debug(NES_DBG_NIC_TX, "%s: VLAN packet to send... VLAN = %08X\n",
-							netdev->name, vlan_tx_tag_get(skb) );
+							netdev->name,
+						  skb_vlan_tag_get(skb));
 					wqe_misc = NES_NIC_SQ_WQE_TAGVALUE_ENABLE;
-					wqe_fragment_length[0] = (__force __le16) vlan_tx_tag_get(skb);
+					wqe_fragment_length[0] = (__force __le16) skb_vlan_tag_get(skb);
 				} else
 					wqe_misc = 0;
 
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index dede43f4ce09..8f8418d2ac4a 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -769,11 +769,11 @@ typhoon_start_tx(struct sk_buff *skb, struct net_device *dev)
 		first_txd->processFlags |= TYPHOON_TX_PF_IP_CHKSUM;
 	}
 
-	if(vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		first_txd->processFlags |=
 		    TYPHOON_TX_PF_INSERT_VLAN | TYPHOON_TX_PF_VLAN_PRIORITY;
 		first_txd->processFlags |=
-		    cpu_to_le32(htons(vlan_tx_tag_get(skb)) <<
+		    cpu_to_le32(htons(skb_vlan_tag_get(skb)) <<
 				TYPHOON_TX_PF_VLAN_TAG_SHIFT);
 	}
 
diff --git a/drivers/net/ethernet/alteon/acenic.c b/drivers/net/ethernet/alteon/acenic.c
index b68074803de3..b90a26b13fdf 100644
--- a/drivers/net/ethernet/alteon/acenic.c
+++ b/drivers/net/ethernet/alteon/acenic.c
@@ -2429,9 +2429,9 @@ restart:
 		flagsize = (skb->len << 16) | (BD_FLG_END);
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
-		if (vlan_tx_tag_present(skb)) {
+		if (skb_vlan_tag_present(skb)) {
 			flagsize |= BD_FLG_VLAN_TAG;
-			vlan_tag = vlan_tx_tag_get(skb);
+			vlan_tag = skb_vlan_tag_get(skb);
 		}
 		desc = ap->tx_ring + idx;
 		idx = (idx + 1) % ACE_TX_RING_ENTRIES(ap);
@@ -2450,9 +2450,9 @@ restart:
 		flagsize = (skb_headlen(skb) << 16);
 		if (skb->ip_summed == CHECKSUM_PARTIAL)
 			flagsize |= BD_FLG_TCP_UDP_SUM;
-		if (vlan_tx_tag_present(skb)) {
+		if (skb_vlan_tag_present(skb)) {
 			flagsize |= BD_FLG_VLAN_TAG;
-			vlan_tag = vlan_tx_tag_get(skb);
+			vlan_tag = skb_vlan_tag_get(skb);
 		}
 
 		ace_load_tx_bd(ap, ap->tx_ring + idx, mapping, flagsize, vlan_tag);
diff --git a/drivers/net/ethernet/amd/amd8111e.c b/drivers/net/ethernet/amd/amd8111e.c
index 841e6558db68..4c2ae2221780 100644
--- a/drivers/net/ethernet/amd/amd8111e.c
+++ b/drivers/net/ethernet/amd/amd8111e.c
@@ -1299,11 +1299,11 @@ static netdev_tx_t amd8111e_start_xmit(struct sk_buff *skb,
 	lp->tx_ring[tx_index].tx_flags = 0;
 
 #if AMD8111E_VLAN_TAG_USED
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		lp->tx_ring[tx_index].tag_ctrl_cmd |=
 				cpu_to_le16(TCC_VLAN_INSERT);
 		lp->tx_ring[tx_index].tag_ctrl_info =
-				cpu_to_le16(vlan_tx_tag_get(skb));
+				cpu_to_le16(skb_vlan_tag_get(skb));
 
 	}
 #endif
diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
index 7bb5f07dbeef..2ba1dd22ad64 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-drv.c
@@ -1165,8 +1165,8 @@ static void xgbe_prep_tx_tstamp(struct xgbe_prv_data *pdata,
 
 static void xgbe_prep_vlan(struct sk_buff *skb, struct xgbe_packet_data *packet)
 {
-	if (vlan_tx_tag_present(skb))
-		packet->vlan_ctag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb))
+		packet->vlan_ctag = skb_vlan_tag_get(skb);
 }
 
 static int xgbe_prep_tso(struct sk_buff *skb, struct xgbe_packet_data *packet)
@@ -1247,9 +1247,9 @@ static void xgbe_packet_info(struct xgbe_prv_data *pdata,
 		XGMAC_SET_BITS(packet->attributes, TX_PACKET_ATTRIBUTES,
 			       CSUM_ENABLE, 1);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/* VLAN requires an extra descriptor if tag is different */
-		if (vlan_tx_tag_get(skb) != ring->tx.cur_vlan_ctag)
+		if (skb_vlan_tag_get(skb) != ring->tx.cur_vlan_ctag)
 			/* We can share with the TSO context descriptor */
 			if (!context_desc) {
 				context_desc = 1;
diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index c9946c6c119e..587f63e87588 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2235,8 +2235,8 @@ static netdev_tx_t atl1c_xmit_frame(struct sk_buff *skb,
 		return NETDEV_TX_OK;
 	}
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 vlan = vlan_tx_tag_get(skb);
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		u16 vlan = skb_vlan_tag_get(skb);
 		__le16 tag;
 
 		vlan = cpu_to_le16(vlan);
diff --git a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
index c88abf5b6415..59a03a193e83 100644
--- a/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
+++ b/drivers/net/ethernet/atheros/atl1e/atl1e_main.c
@@ -1892,8 +1892,8 @@ static netdev_tx_t atl1e_xmit_frame(struct sk_buff *skb,
 
 	tpd = atl1e_get_tpd(adapter);
 
-	if (vlan_tx_tag_present(skb)) {
-		u16 vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		u16 vlan_tag = skb_vlan_tag_get(skb);
 		u16 atl1e_vlan_tag;
 
 		tpd->word3 |= 1 << TPD_INS_VL_TAG_SHIFT;
diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 2c8f398aeda9..eca1d113fee1 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -2415,8 +2415,8 @@ static netdev_tx_t atl1_xmit_frame(struct sk_buff *skb,
 		(u16) atomic_read(&tpd_ring->next_to_use));
 	memset(ptpd, 0, sizeof(struct tx_packet_desc));
 
-	if (vlan_tx_tag_present(skb)) {
-		vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		vlan_tag = skb_vlan_tag_get(skb);
 		vlan_tag = (vlan_tag << 4) | (vlan_tag >> 13) |
 			((vlan_tag >> 9) & 0x8);
 		ptpd->word3 |= 1 << TPD_INS_VL_TAG_SHIFT;
diff --git a/drivers/net/ethernet/atheros/atlx/atl2.c b/drivers/net/ethernet/atheros/atlx/atl2.c
index 482a7cabb0a1..46a535318c7a 100644
--- a/drivers/net/ethernet/atheros/atlx/atl2.c
+++ b/drivers/net/ethernet/atheros/atlx/atl2.c
@@ -887,8 +887,8 @@ static netdev_tx_t atl2_xmit_frame(struct sk_buff *skb,
 		offset = ((u32)(skb->len-copy_len + 3) & ~3);
 	}
 #ifdef NETIF_F_HW_VLAN_CTAG_TX
-	if (vlan_tx_tag_present(skb)) {
-		u16 vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		u16 vlan_tag = skb_vlan_tag_get(skb);
 		vlan_tag = (vlan_tag << 4) |
 			(vlan_tag >> 13) |
 			((vlan_tag >> 9) & 0x8);
diff --git a/drivers/net/ethernet/broadcom/bnx2.c b/drivers/net/ethernet/broadcom/bnx2.c
index 823d01c5684c..02bf0b86995b 100644
--- a/drivers/net/ethernet/broadcom/bnx2.c
+++ b/drivers/net/ethernet/broadcom/bnx2.c
@@ -6597,9 +6597,9 @@ bnx2_start_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag_flags |= TX_BD_FLAGS_TCP_UDP_CKSUM;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		vlan_tag_flags |=
-			(TX_BD_FLAGS_VLAN_TAG | (vlan_tx_tag_get(skb) << 16));
+			(TX_BD_FLAGS_VLAN_TAG | (skb_vlan_tag_get(skb) << 16));
 	}
 
 	if ((mss = skb_shinfo(skb)->gso_size)) {
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
index 1d1147c93d59..b51a18a09d4d 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_cmn.c
@@ -3865,9 +3865,9 @@ netdev_tx_t bnx2x_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	   "sending pkt %u @%p  next_idx %u  bd %u @%p\n",
 	   pkt_prod, tx_buf, txdata->tx_pkt_prod, bd_prod, tx_start_bd);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_start_bd->vlan_or_ethertype =
-		    cpu_to_le16(vlan_tx_tag_get(skb));
+		    cpu_to_le16(skb_vlan_tag_get(skb));
 		tx_start_bd->bd_flags.as_bitfield |=
 		    (X_ETH_OUTBAND_VLAN << ETH_TX_BD_FLAGS_VLAN_MODE_SHIFT);
 	} else {
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 356bd5b022a5..4cf43bfbc955 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -8002,9 +8002,9 @@ static netdev_tx_t tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	    !mss && skb->len > VLAN_ETH_FRAME_LEN)
 		base_flags |= TXD_FLAG_JMB_PKT;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		base_flags |= TXD_FLAG_VLAN;
-		vlan = vlan_tx_tag_get(skb);
+		vlan = skb_vlan_tag_get(skb);
 	}
 
 	if ((unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) &&
diff --git a/drivers/net/ethernet/brocade/bna/bnad.c b/drivers/net/ethernet/brocade/bna/bnad.c
index 323721838cf9..7714d7790089 100644
--- a/drivers/net/ethernet/brocade/bna/bnad.c
+++ b/drivers/net/ethernet/brocade/bna/bnad.c
@@ -2824,8 +2824,8 @@ bnad_txq_wi_prepare(struct bnad *bnad, struct bna_tcb *tcb,
 	u32 gso_size;
 	u16 vlan_tag = 0;
 
-	if (vlan_tx_tag_present(skb)) {
-		vlan_tag = (u16)vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		vlan_tag = (u16)skb_vlan_tag_get(skb);
 		flags |= (BNA_TXQ_WI_CF_INS_PRIO | BNA_TXQ_WI_CF_INS_VLAN);
 	}
 	if (test_bit(BNAD_RF_CEE_RUNNING, &bnad->run_flags)) {
diff --git a/drivers/net/ethernet/chelsio/cxgb/sge.c b/drivers/net/ethernet/chelsio/cxgb/sge.c
index babe2a915b00..526ea74e82d9 100644
--- a/drivers/net/ethernet/chelsio/cxgb/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb/sge.c
@@ -1860,9 +1860,9 @@ netdev_tx_t t1_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 	cpl->iff = dev->if_port;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		cpl->vlan_valid = 1;
-		cpl->vlan = htons(vlan_tx_tag_get(skb));
+		cpl->vlan = htons(skb_vlan_tag_get(skb));
 		st->vlan_insert++;
 	} else
 		cpl->vlan_valid = 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb3/sge.c b/drivers/net/ethernet/chelsio/cxgb3/sge.c
index 3dfcf600fcc6..d6aa602f168d 100644
--- a/drivers/net/ethernet/chelsio/cxgb3/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb3/sge.c
@@ -1148,8 +1148,8 @@ static void write_tx_pkt_wr(struct adapter *adap, struct sk_buff *skb,
 	cpl->len = htonl(skb->len);
 	cntrl = V_TXPKT_INTF(pi->port_id);
 
-	if (vlan_tx_tag_present(skb))
-		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(vlan_tx_tag_get(skb));
+	if (skb_vlan_tag_present(skb))
+		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(skb_vlan_tag_get(skb));
 
 	tso_info = V_LSO_MSS(skb_shinfo(skb)->gso_size);
 	if (tso_info) {
@@ -1282,7 +1282,7 @@ netdev_tx_t t3_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 		qs->port_stats[SGE_PSTAT_TX_CSUM]++;
 	if (skb_shinfo(skb)->gso_size)
 		qs->port_stats[SGE_PSTAT_TSO]++;
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		qs->port_stats[SGE_PSTAT_VLANINS]++;
 
 	/*
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index ca42e2e9dec9..619156112b21 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -1154,9 +1154,9 @@ out_free:	dev_kfree_skb_any(skb);
 			cntrl = TXPKT_L4CSUM_DIS | TXPKT_IPCSUM_DIS;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		q->vlan_ins++;
-		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb));
+		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb));
 	}
 
 	cpl->ctrl0 = htonl(TXPKT_OPCODE(CPL_TX_PKT_XT) |
diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
index 4424277a7e4d..0545f0de1c52 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/sge.c
@@ -1326,9 +1326,9 @@ int t4vf_eth_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * If there's a VLAN tag present, add that to the list of things to
 	 * do in this Work Request.
 	 */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		txq->vlan_ins++;
-		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(vlan_tx_tag_get(skb));
+		cntrl |= TXPKT_VLAN_VLD | TXPKT_VLAN(skb_vlan_tag_get(skb));
 	}
 
 	/*
diff --git a/drivers/net/ethernet/cisco/enic/enic_main.c b/drivers/net/ethernet/cisco/enic/enic_main.c
index 9a952df6606e..0535f6fbdc71 100644
--- a/drivers/net/ethernet/cisco/enic/enic_main.c
+++ b/drivers/net/ethernet/cisco/enic/enic_main.c
@@ -520,10 +520,10 @@ static inline void enic_queue_wq_skb(struct enic *enic,
 	int loopback = 0;
 	int err;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/* VLAN tag from trunking driver */
 		vlan_tag_insert = 1;
-		vlan_tag = vlan_tx_tag_get(skb);
+		vlan_tag = skb_vlan_tag_get(skb);
 	} else if (enic->loop_enable) {
 		vlan_tag = enic->loop_tag;
 		loopback = 1;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 37a26b0b7e33..ed46610e5453 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -694,7 +694,7 @@ static inline u16 be_get_tx_vlan_tag(struct be_adapter *adapter,
 	u8 vlan_prio;
 	u16 vlan_tag;
 
-	vlan_tag = vlan_tx_tag_get(skb);
+	vlan_tag = skb_vlan_tag_get(skb);
 	vlan_prio = (vlan_tag & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 	/* If vlan priority provided by OS is NOT in available bmap */
 	if (!(adapter->vlan_prio_bmap & (1 << vlan_prio)))
@@ -745,7 +745,7 @@ static void wrb_fill_hdr(struct be_adapter *adapter, struct be_eth_hdr_wrb *hdr,
 			SET_TX_WRB_HDR_BITS(udpcs, hdr, 1);
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		SET_TX_WRB_HDR_BITS(vlan, hdr, 1);
 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 		SET_TX_WRB_HDR_BITS(vlan_tag, hdr, vlan_tag);
@@ -864,7 +864,7 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
 	if (unlikely(!skb))
 		return skb;
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		vlan_tag = be_get_tx_vlan_tag(adapter, skb);
 
 	if (qnq_async_evt_rcvd(adapter) && adapter->pvid) {
@@ -923,7 +923,7 @@ static bool be_ipv6_exthdr_check(struct sk_buff *skb)
 
 static int be_vlan_tag_tx_chk(struct be_adapter *adapter, struct sk_buff *skb)
 {
-	return vlan_tx_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
+	return skb_vlan_tag_present(skb) || adapter->pvid || adapter->qnq_vid;
 }
 
 static int be_ipv6_tx_stall_chk(struct be_adapter *adapter, struct sk_buff *skb)
@@ -946,7 +946,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
 	eth_hdr_len = ntohs(skb->protocol) == ETH_P_8021Q ?
 						VLAN_ETH_HLEN : ETH_HLEN;
 	if (skb->len <= 60 &&
-	    (lancer_chip(adapter) || vlan_tx_tag_present(skb)) &&
+	    (lancer_chip(adapter) || skb_vlan_tag_present(skb)) &&
 	    is_ipv4_pkt(skb)) {
 		ip = (struct iphdr *)ip_hdr(skb);
 		pskb_trim(skb, eth_hdr_len + ntohs(ip->tot_len));
@@ -964,7 +964,7 @@ static struct sk_buff *be_lancer_xmit_workarounds(struct be_adapter *adapter,
 	 * Manually insert VLAN in pkt.
 	 */
 	if (skb->ip_summed != CHECKSUM_PARTIAL &&
-	    vlan_tx_tag_present(skb)) {
+	    skb_vlan_tag_present(skb)) {
 		skb = be_insert_vlan_in_pkt(adapter, skb, skip_hw_vlan);
 		if (unlikely(!skb))
 			goto err;
diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c
index e54b1e39f9b4..93ff846e96f1 100644
--- a/drivers/net/ethernet/freescale/gianfar.c
+++ b/drivers/net/ethernet/freescale/gianfar.c
@@ -2170,7 +2170,7 @@ static inline void gfar_tx_checksum(struct sk_buff *skb, struct txfcb *fcb,
 void inline gfar_tx_vlan(struct sk_buff *skb, struct txfcb *fcb)
 {
 	fcb->flags |= TXFCB_VLN;
-	fcb->vlctl = vlan_tx_tag_get(skb);
+	fcb->vlctl = skb_vlan_tag_get(skb);
 }
 
 static inline struct txbd8 *skip_txbd(struct txbd8 *bdp, int stride,
@@ -2230,7 +2230,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	regs = tx_queue->grp->regs;
 
 	do_csum = (CHECKSUM_PARTIAL == skb->ip_summed);
-	do_vlan = vlan_tx_tag_present(skb);
+	do_vlan = skb_vlan_tag_present(skb);
 	do_tstamp = (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 		    priv->hwts_tx_en;
 
diff --git a/drivers/net/ethernet/ibm/ehea/ehea_main.c b/drivers/net/ethernet/ibm/ehea/ehea_main.c
index 566b17db135a..e8a1adb7a962 100644
--- a/drivers/net/ethernet/ibm/ehea/ehea_main.c
+++ b/drivers/net/ethernet/ibm/ehea/ehea_main.c
@@ -2064,9 +2064,9 @@ static int ehea_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	memset(swqe, 0, SWQE_HEADER_SIZE);
 	atomic_dec(&pr->swqe_avail);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		swqe->tx_control |= EHEA_SWQE_VLAN_INSERT;
-		swqe->vlan_tag = vlan_tx_tag_get(skb);
+		swqe->vlan_tag = skb_vlan_tag_get(skb);
 	}
 
 	pr->tx_packets++;
diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 83140cbb5f01..9242982db3e0 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -3226,9 +3226,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= E1000_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     E1000_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	first = tx_ring->next_to_use;
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 332a298e95b5..38cb586b1bf4 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5463,8 +5463,8 @@ static int e1000_transfer_dhcp_info(struct e1000_adapter *adapter,
 	struct e1000_hw *hw = &adapter->hw;
 	u16 length, offset;
 
-	if (vlan_tx_tag_present(skb) &&
-	    !((vlan_tx_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) &&
+	if (skb_vlan_tag_present(skb) &&
+	    !((skb_vlan_tag_get(skb) == adapter->hw.mng_cookie.vlan_id) &&
 	      (adapter->hw.mng_cookie.status &
 	       E1000_MNG_DHCP_COOKIE_STATUS_VLAN)))
 		return 0;
@@ -5603,9 +5603,10 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 	if (e1000_maybe_stop_tx(tx_ring, count + 2))
 		return NETDEV_TX_BUSY;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= E1000_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << E1000_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     E1000_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	first = tx_ring->next_to_use;
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_main.c b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
index eb088b129bc7..caa43f7c2931 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_main.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_main.c
@@ -965,8 +965,8 @@ static void fm10k_tx_map(struct fm10k_ring *tx_ring,
 	tx_desc = FM10K_TX_DESC(tx_ring, i);
 
 	/* add HW VLAN tag */
-	if (vlan_tx_tag_present(skb))
-		tx_desc->vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+	if (skb_vlan_tag_present(skb))
+		tx_desc->vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 	else
 		tx_desc->vlan = 0;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
index 8811364b91cb..945b35d31c71 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c
@@ -609,7 +609,7 @@ static netdev_tx_t fm10k_xmit_frame(struct sk_buff *skb, struct net_device *dev)
 	int err;
 
 	if ((skb->protocol ==  htons(ETH_P_8021Q)) &&
-	    !vlan_tx_tag_present(skb)) {
+	    !skb_vlan_tag_present(skb)) {
 		/* FM10K only supports hardware tagging, any tags in frame
 		 * are considered 2nd level or "outer" tags
 		 */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 04b441460bbd..9f536dd8e1ec 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1772,8 +1772,8 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
 	u32  tx_flags = 0;
 
 	/* if we have a HW VLAN tag being added, default to the HW one */
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
 	/* else if it is a SW VLAN, check the next protocol and store the tag */
 	} else if (protocol == htons(ETH_P_8021Q)) {
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index 04c7c1557a0c..82c3798fdd36 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -1122,8 +1122,8 @@ static int i40e_tx_prepare_vlan_flags(struct sk_buff *skb,
 	u32  tx_flags = 0;
 
 	/* if we have a HW VLAN tag being added, default to the HW one */
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << I40E_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= I40E_TX_FLAGS_HW_VLAN;
 	/* else if it is a SW VLAN, check the next protocol and store the tag */
 	} else if (protocol == htons(ETH_P_8021Q)) {
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index ff59897a9463..6c25ec314183 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -5035,9 +5035,9 @@ netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb,
 
 	skb_tx_timestamp(skb);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= IGB_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	/* record initial flags and protocol */
diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c
index 63c807c9b21c..ad2b4897b392 100644
--- a/drivers/net/ethernet/intel/igbvf/netdev.c
+++ b/drivers/net/ethernet/intel/igbvf/netdev.c
@@ -2234,9 +2234,10 @@ static netdev_tx_t igbvf_xmit_frame_ring_adv(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= IGBVF_TX_FLAGS_VLAN;
-		tx_flags |= (vlan_tx_tag_get(skb) << IGBVF_TX_FLAGS_VLAN_SHIFT);
+		tx_flags |= (skb_vlan_tag_get(skb) <<
+			     IGBVF_TX_FLAGS_VLAN_SHIFT);
 	}
 
 	if (skb->protocol == htons(ETH_P_IP))
diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
index aa87605b144a..11a1bdbe3fd9 100644
--- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c
+++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c
@@ -1532,9 +1532,9 @@ ixgb_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
                      DESC_NEEDED)))
 		return NETDEV_TX_BUSY;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		tx_flags |= IXGB_TX_FLAGS_VLAN;
-		vlan_id = vlan_tx_tag_get(skb);
+		vlan_id = skb_vlan_tag_get(skb);
 	}
 
 	first = adapter->tx_ring.next_to_use;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 2ed2c7de2304..7bb421bfd84e 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7217,8 +7217,8 @@ netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *skb,
 	first->gso_segs = 1;
 
 	/* if we have a HW VLAN tag being added default to the HW one */
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb) << IXGBE_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= IXGBE_TX_FLAGS_HW_VLAN;
 	/* else if it is a SW VLAN check the next protocol and store the tag */
 	} else if (protocol == htons(ETH_P_8021Q)) {
diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 62a0d8e0f17d..c9b49bfb51bb 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -3452,8 +3452,8 @@ static int ixgbevf_xmit_frame(struct sk_buff *skb, struct net_device *netdev)
 	first->bytecount = skb->len;
 	first->gso_segs = 1;
 
-	if (vlan_tx_tag_present(skb)) {
-		tx_flags |= vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		tx_flags |= skb_vlan_tag_get(skb);
 		tx_flags <<= IXGBE_TX_FLAGS_VLAN_SHIFT;
 		tx_flags |= IXGBE_TX_FLAGS_VLAN;
 	}
diff --git a/drivers/net/ethernet/jme.c b/drivers/net/ethernet/jme.c
index 44ce7d88f554..6e9a792097d3 100644
--- a/drivers/net/ethernet/jme.c
+++ b/drivers/net/ethernet/jme.c
@@ -2154,9 +2154,9 @@ jme_tx_csum(struct jme_adapter *jme, struct sk_buff *skb, u8 *flags)
 static inline void
 jme_tx_vlan(struct sk_buff *skb, __le16 *vlan, u8 *flags)
 {
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		*flags |= TXFLAG_TAGON;
-		*vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+		*vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 	}
 }
 
diff --git a/drivers/net/ethernet/marvell/sky2.c b/drivers/net/ethernet/marvell/sky2.c
index 867a6a3ef81f..d9f4498832a1 100644
--- a/drivers/net/ethernet/marvell/sky2.c
+++ b/drivers/net/ethernet/marvell/sky2.c
@@ -1895,14 +1895,14 @@ static netdev_tx_t sky2_xmit_frame(struct sk_buff *skb,
 	ctrl = 0;
 
 	/* Add VLAN tag, can piggyback on LRGLEN or ADDR64 */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		if (!le) {
 			le = get_tx_le(sky2, &slot);
 			le->addr = 0;
 			le->opcode = OP_VLAN|HW_OWNER;
 		} else
 			le->opcode |= OP_VLAN;
-		le->length = cpu_to_be16(vlan_tx_tag_get(skb));
+		le->length = cpu_to_be16(skb_vlan_tag_get(skb));
 		ctrl |= INS_VLAN;
 	}
 
@@ -2594,7 +2594,7 @@ static struct sk_buff *sky2_receive(struct net_device *dev,
 	sky2->rx_next = (sky2->rx_next + 1) % sky2->rx_pending;
 	prefetch(sky2->rx_ring + sky2->rx_next);
 
-	if (vlan_tx_tag_present(re->skb))
+	if (skb_vlan_tag_present(re->skb))
 		count -= VLAN_HLEN;	/* Account for vlan tag */
 
 	/* This chip has hardware problems that generates bogus status.
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index e3357bf523df..359bb1286eb5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -682,8 +682,8 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 	if (dev->num_tc)
 		return skb_tx_hash(dev, skb);
 
-	if (vlan_tx_tag_present(skb))
-		up = vlan_tx_tag_get(skb) >> VLAN_PRIO_SHIFT;
+	if (skb_vlan_tag_present(skb))
+		up = skb_vlan_tag_get(skb) >> VLAN_PRIO_SHIFT;
 
 	return fallback(dev, skb) % rings_p_up + up * rings_p_up;
 }
@@ -742,8 +742,8 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		goto tx_drop;
 	}
 
-	if (vlan_tx_tag_present(skb))
-		vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb))
+		vlan_tag = skb_vlan_tag_get(skb);
 
 
 	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
@@ -930,7 +930,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	real_size = (real_size / 16) & 0x3f;
 
 	if (ring->bf_enabled && desc_size <= MAX_BF && !bounce &&
-	    !vlan_tx_tag_present(skb) && send_doorbell) {
+	    !skb_vlan_tag_present(skb) && send_doorbell) {
 		tx_desc->ctrl.bf_qpn = ring->doorbell_qpn |
 				       cpu_to_be32(real_size);
 
@@ -952,7 +952,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	} else {
 		tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
 		tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
-			!!vlan_tx_tag_present(skb);
+			!!skb_vlan_tag_present(skb);
 		tx_desc->ctrl.fence_size = real_size;
 
 		/* Ensure new descriptor hits memory
diff --git a/drivers/net/ethernet/natsemi/ns83820.c b/drivers/net/ethernet/natsemi/ns83820.c
index 2552e550a78c..eb807b0dc72a 100644
--- a/drivers/net/ethernet/natsemi/ns83820.c
+++ b/drivers/net/ethernet/natsemi/ns83820.c
@@ -1122,12 +1122,12 @@ again:
 	}
 
 #ifdef NS83820_VLAN_ACCEL_SUPPORT
-	if(vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/* fetch the vlan tag info out of the
 		 * ancillary data if the vlan code
 		 * is using hw vlan acceleration
 		 */
-		short tag = vlan_tx_tag_get(skb);
+		short tag = skb_vlan_tag_get(skb);
 		extsts |= (EXTSTS_VPKT | htons(tag));
 	}
 #endif
diff --git a/drivers/net/ethernet/neterion/s2io.c b/drivers/net/ethernet/neterion/s2io.c
index f5e4b820128b..0529cad75b10 100644
--- a/drivers/net/ethernet/neterion/s2io.c
+++ b/drivers/net/ethernet/neterion/s2io.c
@@ -4045,8 +4045,8 @@ static netdev_tx_t s2io_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	queue = 0;
-	if (vlan_tx_tag_present(skb))
-		vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb))
+		vlan_tag = skb_vlan_tag_get(skb);
 	if (sp->config.tx_steering_type == TX_DEFAULT_STEERING) {
 		if (skb->protocol == htons(ETH_P_IP)) {
 			struct iphdr *ip;
diff --git a/drivers/net/ethernet/neterion/vxge/vxge-main.c b/drivers/net/ethernet/neterion/vxge/vxge-main.c
index cc0485e3c621..50d5604833ed 100644
--- a/drivers/net/ethernet/neterion/vxge/vxge-main.c
+++ b/drivers/net/ethernet/neterion/vxge/vxge-main.c
@@ -890,8 +890,8 @@ vxge_xmit(struct sk_buff *skb, struct net_device *dev)
 		dev->name, __func__, __LINE__,
 		fifo_hw, dtr, dtr_priv);
 
-	if (vlan_tx_tag_present(skb)) {
-		u16 vlan_tag = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		u16 vlan_tag = skb_vlan_tag_get(skb);
 		vxge_hw_fifo_txdl_vlan_set(dtr, vlan_tag);
 	}
 
diff --git a/drivers/net/ethernet/nvidia/forcedeth.c b/drivers/net/ethernet/nvidia/forcedeth.c
index f39cae620f61..a41bb5e6b954 100644
--- a/drivers/net/ethernet/nvidia/forcedeth.c
+++ b/drivers/net/ethernet/nvidia/forcedeth.c
@@ -2462,9 +2462,9 @@ static netdev_tx_t nv_start_xmit_optimized(struct sk_buff *skb,
 			 NV_TX2_CHECKSUM_L3 | NV_TX2_CHECKSUM_L4 : 0;
 
 	/* vlan tag */
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		start_tx->txvlan = cpu_to_le32(NV_TX3_VLAN_TAG_PRESENT |
-					vlan_tx_tag_get(skb));
+					skb_vlan_tag_get(skb));
 	else
 		start_tx->txvlan = 0;
 
diff --git a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
index 613037584d08..a47fe67fdf58 100644
--- a/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
+++ b/drivers/net/ethernet/qlogic/netxen/netxen_nic_main.c
@@ -1893,9 +1893,9 @@ netxen_tso_check(struct net_device *netdev,
 		protocol = vh->h_vlan_encapsulated_proto;
 		flags = FLAGS_VLAN_TAGGED;
 
-	} else if (vlan_tx_tag_present(skb)) {
+	} else if (skb_vlan_tag_present(skb)) {
 		flags = FLAGS_VLAN_OOB;
-		vid = vlan_tx_tag_get(skb);
+		vid = skb_vlan_tag_get(skb);
 		netxen_set_tx_vlan_tci(first_desc, vid);
 		vlan_oob = 1;
 	}
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
index d166e534925d..4d2496f28b85 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_io.c
@@ -321,8 +321,8 @@ static void qlcnic_send_filter(struct qlcnic_adapter *adapter,
 		if (protocol == ETH_P_8021Q) {
 			vh = (struct vlan_ethhdr *)skb->data;
 			vlan_id = ntohs(vh->h_vlan_TCI);
-		} else if (vlan_tx_tag_present(skb)) {
-			vlan_id = vlan_tx_tag_get(skb);
+		} else if (skb_vlan_tag_present(skb)) {
+			vlan_id = skb_vlan_tag_get(skb);
 		}
 	}
 
@@ -473,9 +473,9 @@ static int qlcnic_tx_pkt(struct qlcnic_adapter *adapter,
 		flags = QLCNIC_FLAGS_VLAN_TAGGED;
 		vlan_tci = ntohs(vh->h_vlan_TCI);
 		protocol = ntohs(vh->h_vlan_encapsulated_proto);
-	} else if (vlan_tx_tag_present(skb)) {
+	} else if (skb_vlan_tag_present(skb)) {
 		flags = QLCNIC_FLAGS_VLAN_OOB;
-		vlan_tci = vlan_tx_tag_get(skb);
+		vlan_tci = skb_vlan_tag_get(skb);
 	}
 	if (unlikely(adapter->tx_pvid)) {
 		if (vlan_tci && !(adapter->flags & QLCNIC_TAGGING_ENABLED))
diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
index 6c904a6cad2a..dc0058f90370 100644
--- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c
+++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c
@@ -2660,11 +2660,11 @@ static netdev_tx_t qlge_send(struct sk_buff *skb, struct net_device *ndev)
 
 	mac_iocb_ptr->frame_len = cpu_to_le16((u16) skb->len);
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		netif_printk(qdev, tx_queued, KERN_DEBUG, qdev->ndev,
-			     "Adding a vlan tag %d.\n", vlan_tx_tag_get(skb));
+			     "Adding a vlan tag %d.\n", skb_vlan_tag_get(skb));
 		mac_iocb_ptr->flags3 |= OB_MAC_IOCB_V;
-		mac_iocb_ptr->vlan_tci = cpu_to_le16(vlan_tx_tag_get(skb));
+		mac_iocb_ptr->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
 	}
 	tso = ql_tso(skb, (struct ob_mac_tso_iocb_req *)mac_iocb_ptr);
 	if (tso < 0) {
diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 9c31e46d1eee..d79e33b3c191 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -708,8 +708,8 @@ static void cp_tx (struct cp_private *cp)
 
 static inline u32 cp_tx_vlan_tag(struct sk_buff *skb)
 {
-	return vlan_tx_tag_present(skb) ?
-		TxVlanTag | swab16(vlan_tx_tag_get(skb)) : 0x00;
+	return skb_vlan_tag_present(skb) ?
+		TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
 }
 
 static void unwind_tx_frag_mapping(struct cp_private *cp, struct sk_buff *skb,
diff --git a/drivers/net/ethernet/realtek/r8169.c b/drivers/net/ethernet/realtek/r8169.c
index 3a280598a15a..cd286b0356ab 100644
--- a/drivers/net/ethernet/realtek/r8169.c
+++ b/drivers/net/ethernet/realtek/r8169.c
@@ -2073,8 +2073,8 @@ static int rtl8169_set_features(struct net_device *dev,
 
 static inline u32 rtl8169_tx_vlan_tag(struct sk_buff *skb)
 {
-	return (vlan_tx_tag_present(skb)) ?
-		TxVlanTag | swab16(vlan_tx_tag_get(skb)) : 0x00;
+	return (skb_vlan_tag_present(skb)) ?
+		TxVlanTag | swab16(skb_vlan_tag_get(skb)) : 0x00;
 }
 
 static void rtl8169_rx_vlan_tag(struct RxDesc *desc, struct sk_buff *skb)
diff --git a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
index b6612d6090ac..23545e1e605a 100644
--- a/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
+++ b/drivers/net/ethernet/samsung/sxgbe/sxgbe_main.c
@@ -1272,7 +1272,7 @@ static netdev_tx_t sxgbe_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (unlikely(skb_is_gso(skb) && tqueue->prev_mss != cur_mss))
 		ctxt_desc_req = 1;
 
-	if (unlikely(vlan_tx_tag_present(skb) ||
+	if (unlikely(skb_vlan_tag_present(skb) ||
 		     ((skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) &&
 		      tqueue->hwts_tx_en)))
 		ctxt_desc_req = 1;
diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 6ab36d9ff2ab..a9cac8413e49 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -1650,9 +1650,9 @@ static netdev_tx_t bdx_tx_transmit(struct sk_buff *skb,
 		    txd_mss);
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		/*Cut VLAN ID to 12 bits */
-		txd_vlan_id = vlan_tx_tag_get(skb) & BITS_MASK(12);
+		txd_vlan_id = skb_vlan_tag_get(skb) & BITS_MASK(12);
 		txd_vtag = 1;
 	}
 
diff --git a/drivers/net/ethernet/via/via-rhine.c b/drivers/net/ethernet/via/via-rhine.c
index a191afc23b56..0ac76102b33d 100644
--- a/drivers/net/ethernet/via/via-rhine.c
+++ b/drivers/net/ethernet/via/via-rhine.c
@@ -1781,8 +1781,8 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 	rp->tx_ring[entry].desc_length =
 		cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : ETH_ZLEN));
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 vid_pcp = vlan_tx_tag_get(skb);
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		u16 vid_pcp = skb_vlan_tag_get(skb);
 
 		/* drop CFI/DEI bit, register needs VID and PCP */
 		vid_pcp = (vid_pcp & VLAN_VID_MASK) |
@@ -1803,7 +1803,7 @@ static netdev_tx_t rhine_start_tx(struct sk_buff *skb,
 
 	/* Non-x86 Todo: explicitly flush cache lines here. */
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		/* Tx queues are bits 7-0 (first Tx queue: bit 7) */
 		BYTE_REG_BITS_ON(1 << 7, ioaddr + TQWake);
 
diff --git a/drivers/net/ethernet/via/via-velocity.c b/drivers/net/ethernet/via/via-velocity.c
index 282f83a63b67..c20206f83cc1 100644
--- a/drivers/net/ethernet/via/via-velocity.c
+++ b/drivers/net/ethernet/via/via-velocity.c
@@ -2611,8 +2611,8 @@ static netdev_tx_t velocity_xmit(struct sk_buff *skb,
 
 	td_ptr->tdesc1.cmd = TCPLS_NORMAL + (tdinfo->nskb_dma + 1) * 16;
 
-	if (vlan_tx_tag_present(skb)) {
-		td_ptr->tdesc1.vlan = cpu_to_le16(vlan_tx_tag_get(skb));
+	if (skb_vlan_tag_present(skb)) {
+		td_ptr->tdesc1.vlan = cpu_to_le16(skb_vlan_tag_get(skb));
 		td_ptr->tdesc1.TCR |= TCR0_VETAG;
 	}
 
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 7df221788cd4..d0ed5694dd7d 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -645,7 +645,7 @@ static void macvtap_skb_to_vnet_hdr(struct macvtap_queue *q,
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
 		vnet_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
-		if (vlan_tx_tag_present(skb))
+		if (skb_vlan_tag_present(skb))
 			vnet_hdr->csum_start = cpu_to_macvtap16(q,
 				skb_checksum_start_offset(skb) + VLAN_HLEN);
 		else
@@ -821,13 +821,13 @@ static ssize_t macvtap_put_user(struct macvtap_queue *q,
 	total = vnet_hdr_len;
 	total += skb->len;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		struct {
 			__be16 h_vlan_proto;
 			__be16 h_vlan_TCI;
 		} veth;
 		veth.h_vlan_proto = skb->vlan_proto;
-		veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb));
+		veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
 
 		vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
 		total += VLAN_HLEN;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 74fdf1158448..be196e89ab6c 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1260,7 +1260,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 	int vlan_hlen = 0;
 	int vnet_hdr_sz = 0;
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		vlan_hlen = VLAN_HLEN;
 
 	if (tun->flags & IFF_VNET_HDR)
@@ -1337,7 +1337,7 @@ static ssize_t tun_put_user(struct tun_struct *tun,
 		} veth;
 
 		veth.h_vlan_proto = skb->vlan_proto;
-		veth.h_vlan_TCI = htons(vlan_tx_tag_get(skb));
+		veth.h_vlan_TCI = htons(skb_vlan_tag_get(skb));
 
 		vlan_offset = offsetof(struct vlan_ethhdr, h_vlan_proto);
 
diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index b23426e4952c..e519e6a269b9 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1421,10 +1421,10 @@ static int msdn_giant_send_check(struct sk_buff *skb)
 
 static inline void rtl_tx_vlan_tag(struct tx_desc *desc, struct sk_buff *skb)
 {
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		u32 opts2;
 
-		opts2 = TX_VLAN_TAG | swab16(vlan_tx_tag_get(skb));
+		opts2 = TX_VLAN_TAG | swab16(skb_vlan_tag_get(skb));
 		desc->opts2 |= cpu_to_le32(opts2);
 	}
 }
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 31439818c27e..294214c15292 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1038,9 +1038,9 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 		le32_add_cpu(&tq->shared->txNumDeferred, 1);
 	}
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		gdesc->txd.ti = 1;
-		gdesc->txd.tci = vlan_tx_tag_get(skb);
+		gdesc->txd.tci = skb_vlan_tag_get(skb);
 	}
 
 	/* finally flips the GEN bit of the SOP desc. */
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 3a18d8ed89ca..985359dd6033 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1561,7 +1561,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	min_headroom = LL_RESERVED_SPACE(dst->dev) + dst->header_len
 			+ VXLAN_HLEN + sizeof(struct ipv6hdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
@@ -1607,7 +1607,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ VXLAN_HLEN + sizeof(struct iphdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	/* Need space for new headers (invalidates iph ptr) */
 	err = skb_cow_head(skb, min_headroom);
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 625227ad16ee..dd4ab8d73d34 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -2800,12 +2800,12 @@ static void qeth_l3_fill_header(struct qeth_card *card, struct qeth_hdr *hdr,
 	 * before we're going to overwrite this location with next hop ip.
 	 * v6 uses passthrough, v4 sets the tag in the QDIO header.
 	 */
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		if ((ipv == 4) || (card->info.type == QETH_CARD_TYPE_IQD))
 			hdr->hdr.l3.ext_flags = QETH_HDR_EXT_VLAN_FRAME;
 		else
 			hdr->hdr.l3.ext_flags = QETH_HDR_EXT_INCLUDE_VLAN_TAG;
-		hdr->hdr.l3.vlan_id = vlan_tx_tag_get(skb);
+		hdr->hdr.l3.vlan_id = skb_vlan_tag_get(skb);
 	}
 
 	hdr->hdr.l3.length = skb->len - sizeof(struct qeth_hdr);
@@ -2986,7 +2986,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			skb_pull(new_skb, ETH_HLEN);
 		}
 
-		if (ipv != 4 && vlan_tx_tag_present(new_skb)) {
+		if (ipv != 4 && skb_vlan_tag_present(new_skb)) {
 			skb_push(new_skb, VLAN_HLEN);
 			skb_copy_to_linear_data(new_skb, new_skb->data + 4, 4);
 			skb_copy_to_linear_data_offset(new_skb, 4,
@@ -2995,7 +2995,7 @@ static int qeth_l3_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
 				new_skb->data + 12, 4);
 			tag = (u16 *)(new_skb->data + 12);
 			*tag = __constant_htons(ETH_P_8021Q);
-			*(tag + 1) = htons(vlan_tx_tag_get(new_skb));
+			*(tag + 1) = htons(skb_vlan_tag_get(new_skb));
 		}
 	}
 
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 14419a8ccbb6..bcaf4cabb858 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -469,7 +469,7 @@ static int peek_head_len(struct sock *sk)
 	head = skb_peek(&sk->sk_receive_queue);
 	if (likely(head)) {
 		len = head->len;
-		if (vlan_tx_tag_present(head))
+		if (skb_vlan_tag_present(head))
 			len += VLAN_HLEN;
 	}
 
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 515a35e2a48a..bea465f24ebb 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -78,9 +78,9 @@ static inline bool is_vlan_dev(struct net_device *dev)
         return dev->priv_flags & IFF_802_1Q_VLAN;
 }
 
-#define vlan_tx_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
-#define vlan_tx_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
-#define vlan_tx_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
+#define skb_vlan_tag_present(__skb)	((__skb)->vlan_tci & VLAN_TAG_PRESENT)
+#define skb_vlan_tag_get(__skb)		((__skb)->vlan_tci & ~VLAN_TAG_PRESENT)
+#define skb_vlan_tag_get_id(__skb)	((__skb)->vlan_tci & VLAN_VID_MASK)
 
 /**
  *	struct vlan_pcpu_stats - VLAN percpu rx/tx stats
@@ -376,7 +376,7 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb,
 static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
 {
 	skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-					vlan_tx_tag_get(skb));
+					skb_vlan_tag_get(skb));
 	if (likely(skb))
 		skb->vlan_tci = 0;
 	return skb;
@@ -393,7 +393,7 @@ static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
  */
 static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb)
 {
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		skb = __vlan_hwaccel_push_inside(skb);
 	return skb;
 }
@@ -442,8 +442,8 @@ static inline int __vlan_get_tag(const struct sk_buff *skb, u16 *vlan_tci)
 static inline int __vlan_hwaccel_get_tag(const struct sk_buff *skb,
 					 u16 *vlan_tci)
 {
-	if (vlan_tx_tag_present(skb)) {
-		*vlan_tci = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		*vlan_tci = skb_vlan_tag_get(skb);
 		return 0;
 	} else {
 		*vlan_tci = 0;
@@ -480,7 +480,7 @@ static inline __be16 vlan_get_protocol(const struct sk_buff *skb)
 {
 	__be16 protocol = 0;
 
-	if (vlan_tx_tag_present(skb) ||
+	if (skb_vlan_tag_present(skb) ||
 	     skb->protocol != cpu_to_be16(ETH_P_8021Q))
 		protocol = skb->protocol;
 	else {
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index fe6e7aac3c56..2342bf12cb78 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -121,7 +121,7 @@ static inline __be16 tc_skb_protocol(const struct sk_buff *skb)
 	 * vlan accelerated path. In that case, use skb->vlan_proto
 	 * as the original vlan header was already stripped.
 	 */
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		return skb->vlan_proto;
 	return skb->protocol;
 }
diff --git a/include/trace/events/net.h b/include/trace/events/net.h
index 1de256b35807..49cc7c3de252 100644
--- a/include/trace/events/net.h
+++ b/include/trace/events/net.h
@@ -40,9 +40,9 @@ TRACE_EVENT(net_dev_start_xmit,
 		__assign_str(name, dev->name);
 		__entry->queue_mapping = skb->queue_mapping;
 		__entry->skbaddr = skb;
-		__entry->vlan_tagged = vlan_tx_tag_present(skb);
+		__entry->vlan_tagged = skb_vlan_tag_present(skb);
 		__entry->vlan_proto = ntohs(skb->vlan_proto);
-		__entry->vlan_tci = vlan_tx_tag_get(skb);
+		__entry->vlan_tci = skb_vlan_tag_get(skb);
 		__entry->protocol = ntohs(skb->protocol);
 		__entry->ip_summed = skb->ip_summed;
 		__entry->len = skb->len;
@@ -174,9 +174,9 @@ DECLARE_EVENT_CLASS(net_dev_rx_verbose_template,
 #endif
 		__entry->queue_mapping = skb->queue_mapping;
 		__entry->skbaddr = skb;
-		__entry->vlan_tagged = vlan_tx_tag_present(skb);
+		__entry->vlan_tagged = skb_vlan_tag_present(skb);
 		__entry->vlan_proto = ntohs(skb->vlan_proto);
-		__entry->vlan_tci = vlan_tx_tag_get(skb);
+		__entry->vlan_tci = skb_vlan_tag_get(skb);
 		__entry->protocol = ntohs(skb->protocol);
 		__entry->ip_summed = skb->ip_summed;
 		__entry->hash = skb->hash;
diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c
index 90cc2bdd4064..61bf2a06e85d 100644
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -9,7 +9,7 @@ bool vlan_do_receive(struct sk_buff **skbp)
 {
 	struct sk_buff *skb = *skbp;
 	__be16 vlan_proto = skb->vlan_proto;
-	u16 vlan_id = vlan_tx_tag_get_id(skb);
+	u16 vlan_id = skb_vlan_tag_get_id(skb);
 	struct net_device *vlan_dev;
 	struct vlan_pcpu_stats *rx_stats;
 
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index c190d22b6b3d..65728e0dc4ff 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -66,17 +66,17 @@ static int brnf_pass_vlan_indev __read_mostly = 0;
 #endif
 
 #define IS_IP(skb) \
-	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
+	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IP))
 
 #define IS_IPV6(skb) \
-	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
+	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_IPV6))
 
 #define IS_ARP(skb) \
-	(!vlan_tx_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
+	(!skb_vlan_tag_present(skb) && skb->protocol == htons(ETH_P_ARP))
 
 static inline __be16 vlan_proto(const struct sk_buff *skb)
 {
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		return skb->protocol;
 	else if (skb->protocol == htons(ETH_P_8021Q))
 		return vlan_eth_hdr(skb)->h_vlan_encapsulated_proto;
@@ -436,11 +436,11 @@ static struct net_device *brnf_get_logical_dev(struct sk_buff *skb, const struct
 	struct net_device *vlan, *br;
 
 	br = bridge_parent(dev);
-	if (brnf_pass_vlan_indev == 0 || !vlan_tx_tag_present(skb))
+	if (brnf_pass_vlan_indev == 0 || !skb_vlan_tag_present(skb))
 		return br;
 
 	vlan = __vlan_find_dev_deep_rcu(br, skb->vlan_proto,
-				    vlan_tx_tag_get(skb) & VLAN_VID_MASK);
+				    skb_vlan_tag_get(skb) & VLAN_VID_MASK);
 
 	return vlan ? vlan : br;
 }
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index aea3d1339b3f..d808d766334d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -628,8 +628,8 @@ static inline int br_vlan_get_tag(const struct sk_buff *skb, u16 *vid)
 {
 	int err = 0;
 
-	if (vlan_tx_tag_present(skb))
-		*vid = vlan_tx_tag_get(skb) & VLAN_VID_MASK;
+	if (skb_vlan_tag_present(skb))
+		*vid = skb_vlan_tag_get(skb) & VLAN_VID_MASK;
 	else {
 		*vid = 0;
 		err = -EINVAL;
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 97b8ddf57363..13013fe8db24 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -187,7 +187,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 	 * sent from vlan device on the bridge device, it does not have
 	 * HW accelerated vlan tag.
 	 */
-	if (unlikely(!vlan_tx_tag_present(skb) &&
+	if (unlikely(!skb_vlan_tag_present(skb) &&
 		     skb->protocol == proto)) {
 		skb = skb_vlan_untag(skb);
 		if (unlikely(!skb))
@@ -200,7 +200,7 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 			/* Protocol-mismatch, empty out vlan_tci for new tag */
 			skb_push(skb, ETH_HLEN);
 			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-							vlan_tx_tag_get(skb));
+							skb_vlan_tag_get(skb));
 			if (unlikely(!skb))
 				return false;
 
diff --git a/net/bridge/netfilter/ebt_vlan.c b/net/bridge/netfilter/ebt_vlan.c
index 8d3f8c7651f0..618568888128 100644
--- a/net/bridge/netfilter/ebt_vlan.c
+++ b/net/bridge/netfilter/ebt_vlan.c
@@ -45,8 +45,8 @@ ebt_vlan_mt(const struct sk_buff *skb, struct xt_action_param *par)
 	/* VLAN encapsulated Type/Length field, given from orig frame */
 	__be16 encap;
 
-	if (vlan_tx_tag_present(skb)) {
-		TCI = vlan_tx_tag_get(skb);
+	if (skb_vlan_tag_present(skb)) {
+		TCI = skb_vlan_tag_get(skb);
 		encap = skb->protocol;
 	} else {
 		const struct vlan_hdr *fp;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index d9a8c05d995d..91180a7fc943 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -133,7 +133,7 @@ ebt_basic_match(const struct ebt_entry *e, const struct sk_buff *skb,
 	__be16 ethproto;
 	int verdict, i;
 
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		ethproto = htons(ETH_P_8021Q);
 	else
 		ethproto = h->h_proto;
diff --git a/net/core/dev.c b/net/core/dev.c
index 805456147c30..1e325adc4367 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2578,7 +2578,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (skb->encapsulation)
 		features &= dev->hw_enc_features;
 
-	if (!vlan_tx_tag_present(skb)) {
+	if (!skb_vlan_tag_present(skb)) {
 		if (unlikely(protocol == htons(ETH_P_8021Q) ||
 			     protocol == htons(ETH_P_8021AD))) {
 			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
@@ -2659,7 +2659,7 @@ out:
 static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
 					  netdev_features_t features)
 {
-	if (vlan_tx_tag_present(skb) &&
+	if (skb_vlan_tag_present(skb) &&
 	    !vlan_hw_offload_capable(features, skb->vlan_proto))
 		skb = __vlan_hwaccel_push_inside(skb);
 	return skb;
@@ -3676,7 +3676,7 @@ ncls:
 	if (pfmemalloc && !skb_pfmemalloc_protocol(skb))
 		goto drop;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		if (pt_prev) {
 			ret = deliver_skb(skb, pt_prev, orig_dev);
 			pt_prev = NULL;
@@ -3708,8 +3708,8 @@ ncls:
 		}
 	}
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		if (vlan_tx_tag_get_id(skb))
+	if (unlikely(skb_vlan_tag_present(skb))) {
+		if (skb_vlan_tag_get_id(skb))
 			skb->pkt_type = PACKET_OTHERHOST;
 		/* Note: we might in the future use prio bits
 		 * and set skb->priority like in vlan_do_receive()
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e0ad5d16c9c5..c126a878c47c 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -77,7 +77,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	features = netif_skb_features(skb);
 
-	if (vlan_tx_tag_present(skb) &&
+	if (skb_vlan_tag_present(skb) &&
 	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
 		skb = __vlan_hwaccel_push_inside(skb);
 		if (unlikely(!skb)) {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 5a2a2e887a12..56db472e9b86 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4197,7 +4197,7 @@ struct sk_buff *skb_vlan_untag(struct sk_buff *skb)
 	struct vlan_hdr *vhdr;
 	u16 vlan_tci;
 
-	if (unlikely(vlan_tx_tag_present(skb))) {
+	if (unlikely(skb_vlan_tag_present(skb))) {
 		/* vlan_tci is already set-up so leave this for another time */
 		return skb;
 	}
@@ -4283,7 +4283,7 @@ int skb_vlan_pop(struct sk_buff *skb)
 	__be16 vlan_proto;
 	int err;
 
-	if (likely(vlan_tx_tag_present(skb))) {
+	if (likely(skb_vlan_tag_present(skb))) {
 		skb->vlan_tci = 0;
 	} else {
 		if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
@@ -4313,7 +4313,7 @@ EXPORT_SYMBOL(skb_vlan_pop);
 
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 {
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		unsigned int offset = skb->data - skb_mac_header(skb);
 		int err;
 
@@ -4323,7 +4323,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
 		 */
 		__skb_push(skb, offset);
 		err = __vlan_insert_tag(skb, skb->vlan_proto,
-					vlan_tx_tag_get(skb));
+					skb_vlan_tag_get(skb));
 		if (err)
 			return err;
 		skb->protocol = skb->vlan_proto;
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 5b52046ec7a2..23744c7a9718 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -119,7 +119,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ GENEVE_BASE_HLEN + opt_len + sizeof(struct iphdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	err = skb_cow_head(skb, min_headroom);
 	if (unlikely(err)) {
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 770064c83711..b4cffe686126 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -212,7 +212,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 	int err;
 
 	err = skb_vlan_pop(skb);
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		invalidate_flow_key(key);
 	else
 		key->eth.tci = 0;
@@ -222,7 +222,7 @@ static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_vlan *vlan)
 {
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		invalidate_flow_key(key);
 	else
 		key->eth.tci = vlan->vlan_tci;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 4e9a5f035cbc..54854e3ecd83 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -419,7 +419,7 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 	if (!dp_ifindex)
 		return -ENODEV;
 
-	if (vlan_tx_tag_present(skb)) {
+	if (skb_vlan_tag_present(skb)) {
 		nskb = skb_clone(skb, GFP_ATOMIC);
 		if (!nskb)
 			return -ENOMEM;
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index da2fae0873a5..df334fe43d7f 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -70,7 +70,7 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
 {
 	struct flow_stats *stats;
 	int node = numa_node_id();
-	int len = skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+	int len = skb->len + (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 
 	stats = rcu_dereference(flow->stats[node]);
 
@@ -472,7 +472,7 @@ static int key_extract(struct sk_buff *skb, struct sw_flow_key *key)
 	 */
 
 	key->eth.tci = 0;
-	if (vlan_tx_tag_present(skb))
+	if (skb_vlan_tag_present(skb))
 		key->eth.tci = htons(skb->vlan_tci);
 	else if (eth->h_proto == htons(ETH_P_8021Q))
 		if (unlikely(parse_vlan(skb, key)))
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index d4168c442db5..e9aedb7c7106 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -166,7 +166,7 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	min_headroom = LL_RESERVED_SPACE(rt->dst.dev) + rt->dst.header_len
 			+ tunnel_hlen + sizeof(struct iphdr)
-			+ (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+			+ (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 	if (skb_headroom(skb) < min_headroom || skb_header_cloned(skb)) {
 		int head_delta = SKB_DATA_ALIGN(min_headroom -
 						skb_headroom(skb) +
diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c
index 2034c6d9cb5a..464739aac0f3 100644
--- a/net/openvswitch/vport.c
+++ b/net/openvswitch/vport.c
@@ -480,7 +480,8 @@ void ovs_vport_receive(struct vport *vport, struct sk_buff *skb,
 	stats = this_cpu_ptr(vport->percpu_stats);
 	u64_stats_update_begin(&stats->syncp);
 	stats->rx_packets++;
-	stats->rx_bytes += skb->len + (vlan_tx_tag_present(skb) ? VLAN_HLEN : 0);
+	stats->rx_bytes += skb->len +
+			   (skb_vlan_tag_present(skb) ? VLAN_HLEN : 0);
 	u64_stats_update_end(&stats->syncp);
 
 	OVS_CB(skb)->input_vport = vport;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0f02668dc219..d37075b0d6d5 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -986,8 +986,8 @@ static void prb_clear_rxhash(struct tpacket_kbdq_core *pkc,
 static void prb_fill_vlan_info(struct tpacket_kbdq_core *pkc,
 			struct tpacket3_hdr *ppd)
 {
-	if (vlan_tx_tag_present(pkc->skb)) {
-		ppd->hv1.tp_vlan_tci = vlan_tx_tag_get(pkc->skb);
+	if (skb_vlan_tag_present(pkc->skb)) {
+		ppd->hv1.tp_vlan_tci = skb_vlan_tag_get(pkc->skb);
 		ppd->hv1.tp_vlan_tpid = ntohs(pkc->skb->vlan_proto);
 		ppd->tp_status = TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
 	} else {
@@ -2000,8 +2000,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
 		h.h2->tp_net = netoff;
 		h.h2->tp_sec = ts.tv_sec;
 		h.h2->tp_nsec = ts.tv_nsec;
-		if (vlan_tx_tag_present(skb)) {
-			h.h2->tp_vlan_tci = vlan_tx_tag_get(skb);
+		if (skb_vlan_tag_present(skb)) {
+			h.h2->tp_vlan_tci = skb_vlan_tag_get(skb);
 			h.h2->tp_vlan_tpid = ntohs(skb->vlan_proto);
 			status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
 		} else {
@@ -3010,8 +3010,8 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		aux.tp_snaplen = skb->len;
 		aux.tp_mac = 0;
 		aux.tp_net = skb_network_offset(skb);
-		if (vlan_tx_tag_present(skb)) {
-			aux.tp_vlan_tci = vlan_tx_tag_get(skb);
+		if (skb_vlan_tag_present(skb)) {
+			aux.tp_vlan_tci = skb_vlan_tag_get(skb);
 			aux.tp_vlan_tpid = ntohs(skb->vlan_proto);
 			aux.tp_status |= TP_STATUS_VLAN_VALID | TP_STATUS_VLAN_TPID_VALID;
 		} else {
diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c
index 2159981b604e..b5294ce20cd4 100644
--- a/net/sched/em_meta.c
+++ b/net/sched/em_meta.c
@@ -176,7 +176,7 @@ META_COLLECTOR(int_vlan_tag)
 {
 	unsigned short tag;
 
-	tag = vlan_tx_tag_get(skb);
+	tag = skb_vlan_tag_get(skb);
 	if (!tag && __vlan_get_tag(skb, &tag))
 		*err = -1;
 	else
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d0ac795445b7..1d2fcfad06cc 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -708,8 +708,8 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb,
 	if (skb->priority >= 256 && skb->priority <= 263)
 		return skb->priority - 256;
 
-	if (vlan_tx_tag_present(skb)) {
-		vlan_priority = (vlan_tx_tag_get(skb) & VLAN_PRIO_MASK)
+	if (skb_vlan_tag_present(skb)) {
+		vlan_priority = (skb_vlan_tag_get(skb) & VLAN_PRIO_MASK)
 			>> VLAN_PRIO_SHIFT;
 		if (vlan_priority > 0)
 			return vlan_priority;
-- 
cgit v1.2.3-70-g09d2


From 28e98c2c20e9d791cf58a624a5a2b1fca63ea47b Mon Sep 17 00:00:00 2001
From: David Vrabel <david.vrabel@citrix.com>
Date: Tue, 13 Jan 2015 17:16:42 +0000
Subject: xen: add page_to_mfn()

pfn_to_mfn(page_to_pfn(p)) is a common use case so add a generic
helper for it.

Signed-off-by: David Vrabel <david.vrabel@citrix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/xen/page.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/xen/page.h b/include/xen/page.h
index 12765b6f9517..c5ed20bb3fe9 100644
--- a/include/xen/page.h
+++ b/include/xen/page.h
@@ -3,6 +3,11 @@
 
 #include <asm/xen/page.h>
 
+static inline unsigned long page_to_mfn(struct page *page)
+{
+	return pfn_to_mfn(page_to_pfn(page));
+}
+
 struct xen_memory_region {
 	phys_addr_t start;
 	phys_addr_t size;
-- 
cgit v1.2.3-70-g09d2


From 2c3e861c94a29a30c75f60f2561b4ee70b3fb3a4 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Wed, 7 Jan 2015 16:47:19 +0200
Subject: cfg80211: introduce sync regdom set API for self-managed

A self-managed device will sometimes need to set its regdomain synchronously.
Notably it should be set before usermode has a chance to query it. Expose
a new API to accomplish this which requires the RTNL.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Reviewed-by: Ilan Peer <ilan.peer@intel.com>
Reviewed-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 14 ++++++++++++++
 net/wireless/reg.c     | 31 +++++++++++++++++++++++++++++--
 2 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 197735788f18..38abc07503fd 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3788,6 +3788,20 @@ int regulatory_hint(struct wiphy *wiphy, const char *alpha2);
 int regulatory_set_wiphy_regd(struct wiphy *wiphy,
 			      struct ieee80211_regdomain *rd);
 
+/**
+ * regulatory_set_wiphy_regd_sync_rtnl - set regdom for self-managed drivers
+ * @wiphy: the wireless device we want to process the regulatory domain on
+ * @rd: the regulatory domain information to use for this wiphy
+ *
+ * This functions requires the RTNL to be held and applies the new regdomain
+ * synchronously to this wiphy. For more details see
+ * regulatory_set_wiphy_regd().
+ *
+ * Return: 0 on success. -EINVAL, -EPERM
+ */
+int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy,
+					struct ieee80211_regdomain *rd);
+
 /**
  * wiphy_apply_custom_regulatory - apply a custom driver regulatory domain
  * @wiphy: the wireless device we want to process the regulatory domain on
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 8d232b904210..f8ed79729eb0 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -2897,8 +2897,8 @@ int set_regdom(const struct ieee80211_regdomain *rd)
 	return 0;
 }
 
-int regulatory_set_wiphy_regd(struct wiphy *wiphy,
-			      struct ieee80211_regdomain *rd)
+static int __regulatory_set_wiphy_regd(struct wiphy *wiphy,
+				       struct ieee80211_regdomain *rd)
 {
 	const struct ieee80211_regdomain *regd;
 	const struct ieee80211_regdomain *prev_regd;
@@ -2928,12 +2928,39 @@ int regulatory_set_wiphy_regd(struct wiphy *wiphy,
 	spin_unlock(&reg_requests_lock);
 
 	kfree(prev_regd);
+	return 0;
+}
+
+int regulatory_set_wiphy_regd(struct wiphy *wiphy,
+			      struct ieee80211_regdomain *rd)
+{
+	int ret = __regulatory_set_wiphy_regd(wiphy, rd);
+
+	if (ret)
+		return ret;
 
 	schedule_work(&reg_work);
 	return 0;
 }
 EXPORT_SYMBOL(regulatory_set_wiphy_regd);
 
+int regulatory_set_wiphy_regd_sync_rtnl(struct wiphy *wiphy,
+					struct ieee80211_regdomain *rd)
+{
+	int ret;
+
+	ASSERT_RTNL();
+
+	ret = __regulatory_set_wiphy_regd(wiphy, rd);
+	if (ret)
+		return ret;
+
+	/* process the request immediately */
+	reg_process_self_managed_hints();
+	return 0;
+}
+EXPORT_SYMBOL(regulatory_set_wiphy_regd_sync_rtnl);
+
 void wiphy_regulatory_register(struct wiphy *wiphy)
 {
 	struct regulatory_request *lr;
-- 
cgit v1.2.3-70-g09d2


From 75453ccb61120885d6715a49496c57930dbe6253 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Fri, 9 Jan 2015 14:06:37 +0200
Subject: nl80211: send netdetect configuration info in NL80211_CMD_GET_WOWLAN

Send the netdetect configuration information in the response to
NL8021_CMD_GET_WOWLAN commands.  This includes the scan interval,
SSIDs to match and frequencies to scan.

Additionally, add the NL80211_WOWLAN_TRIG_NET_DETECT with
NL80211_ATTR_WOWLAN_TRIGGERS_SUPPORTED.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  4 +++-
 net/wireless/nl80211.c       | 52 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index a963d4824c51..b6c1a00bd8d2 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3734,7 +3734,9 @@ struct nl80211_pattern_support {
  *	same attributes used with @NL80211_CMD_START_SCHED_SCAN.  It
  *	specifies how the scan is performed (e.g. the interval and the
  *	channels to scan) as well as the scan results that will
- *	trigger a wake (i.e. the matchsets).
+ *	trigger a wake (i.e. the matchsets).  This attribute is also
+ *	sent in a response to @NL80211_CMD_GET_WIPHY, indicating the
+ *	number of match sets supported by the driver (u32).
  * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
  *	containing an array with information about what triggered the
  *	wake up.  If no elements are present in the array, it means
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7c2ce26e22de..380784378df8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1088,6 +1088,11 @@ static int nl80211_send_wowlan(struct sk_buff *msg,
 			return -ENOBUFS;
 	}
 
+	if ((rdev->wiphy.wowlan->flags & WIPHY_WOWLAN_NET_DETECT) &&
+	    nla_put_u32(msg, NL80211_WOWLAN_TRIG_NET_DETECT,
+			rdev->wiphy.wowlan->max_nd_match_sets))
+		return -ENOBUFS;
+
 	if (large && nl80211_send_wowlan_tcp_caps(rdev, msg))
 		return -ENOBUFS;
 
@@ -8747,6 +8752,48 @@ static int nl80211_send_wowlan_tcp(struct sk_buff *msg,
 	return 0;
 }
 
+static int nl80211_send_wowlan_nd(struct sk_buff *msg,
+				  struct cfg80211_sched_scan_request *req)
+{
+	struct nlattr *nd, *freqs, *matches, *match;
+	int i;
+
+	if (!req)
+		return 0;
+
+	nd = nla_nest_start(msg, NL80211_WOWLAN_TRIG_NET_DETECT);
+	if (!nd)
+		return -ENOBUFS;
+
+	if (nla_put_u32(msg, NL80211_ATTR_SCHED_SCAN_INTERVAL, req->interval))
+		return -ENOBUFS;
+
+	freqs = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQUENCIES);
+	if (!freqs)
+		return -ENOBUFS;
+
+	for (i = 0; i < req->n_channels; i++)
+		nla_put_u32(msg, i, req->channels[i]->center_freq);
+
+	nla_nest_end(msg, freqs);
+
+	if (req->n_match_sets) {
+		matches = nla_nest_start(msg, NL80211_ATTR_SCHED_SCAN_MATCH);
+		for (i = 0; i < req->n_match_sets; i++) {
+			match = nla_nest_start(msg, i);
+			nla_put(msg, NL80211_SCHED_SCAN_MATCH_ATTR_SSID,
+				req->match_sets[i].ssid.ssid_len,
+				req->match_sets[i].ssid.ssid);
+			nla_nest_end(msg, match);
+		}
+		nla_nest_end(msg, matches);
+	}
+
+	nla_nest_end(msg, nd);
+
+	return 0;
+}
+
 static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 {
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -8804,6 +8851,11 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info)
 					    rdev->wiphy.wowlan_config->tcp))
 			goto nla_put_failure;
 
+		if (nl80211_send_wowlan_nd(
+			    msg,
+			    rdev->wiphy.wowlan_config->nd_config))
+			goto nla_put_failure;
+
 		nla_nest_end(msg, nl_wowlan);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From a2b12f3c7ac1ea43ae646db74faf0b56c2bba563 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 12 Jan 2015 17:00:37 -0800
Subject: udp: pass udp_offload struct to UDP gro callbacks

This patch introduces udp_offload_callbacks which has the same
GRO functions (but not a GSO function) as offload_callbacks,
except there is an argument to a udp_offload struct passed to
gro_receive and gro_complete functions. This additional argument
can be used to retrieve the per port structure of the encapsulation
for use in gro processing (mostly by doing container_of on the
structure).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       |  7 +++++--
 include/linux/netdevice.h | 15 +++++++++++++--
 net/ipv4/fou.c            | 12 ++++++++----
 net/ipv4/geneve.c         |  6 ++++--
 net/ipv4/udp_offload.c    |  7 +++++--
 5 files changed, 35 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 985359dd6033..5c56a3ff25aa 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -539,7 +539,9 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 	return 1;
 }
 
-static struct sk_buff **vxlan_gro_receive(struct sk_buff **head, struct sk_buff *skb)
+static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
+					  struct sk_buff *skb,
+					  struct udp_offload *uoff)
 {
 	struct sk_buff *p, **pp = NULL;
 	struct vxlanhdr *vh, *vh2;
@@ -578,7 +580,8 @@ out:
 	return pp;
 }
 
-static int vxlan_gro_complete(struct sk_buff *skb, int nhoff)
+static int vxlan_gro_complete(struct sk_buff *skb, int nhoff,
+			      struct udp_offload *uoff)
 {
 	udp_tunnel_gro_complete(skb, nhoff);
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 679e6e90aa4c..47921c291dd6 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1969,7 +1969,7 @@ struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
-					       struct sk_buff *skb);
+						 struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
 };
 
@@ -1979,10 +1979,21 @@ struct packet_offload {
 	struct list_head	 list;
 };
 
+struct udp_offload;
+
+struct udp_offload_callbacks {
+	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
+						 struct sk_buff *skb,
+						 struct udp_offload *uoff);
+	int			(*gro_complete)(struct sk_buff *skb,
+						int nhoff,
+						struct udp_offload *uoff);
+};
+
 struct udp_offload {
 	__be16			 port;
 	u8			 ipproto;
-	struct offload_callbacks callbacks;
+	struct udp_offload_callbacks callbacks;
 };
 
 /* often modified stats are per cpu, other are shared (netdev->stats) */
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 2197c36f722f..3bc0cf07661c 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -174,7 +174,8 @@ drop:
 }
 
 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb)
+					struct sk_buff *skb,
+					struct udp_offload *uoff)
 {
 	const struct net_offload *ops;
 	struct sk_buff **pp = NULL;
@@ -195,7 +196,8 @@ out_unlock:
 	return pp;
 }
 
-static int fou_gro_complete(struct sk_buff *skb, int nhoff)
+static int fou_gro_complete(struct sk_buff *skb, int nhoff,
+			    struct udp_offload *uoff)
 {
 	const struct net_offload *ops;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
@@ -254,7 +256,8 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 }
 
 static struct sk_buff **gue_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb)
+					struct sk_buff *skb,
+					struct udp_offload *uoff)
 {
 	const struct net_offload **offloads;
 	const struct net_offload *ops;
@@ -360,7 +363,8 @@ out:
 	return pp;
 }
 
-static int gue_gro_complete(struct sk_buff *skb, int nhoff)
+static int gue_gro_complete(struct sk_buff *skb, int nhoff,
+			    struct udp_offload *uoff)
 {
 	const struct net_offload **offloads;
 	struct guehdr *guehdr = (struct guehdr *)(skb->data + nhoff);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 23744c7a9718..9568594ca2f1 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -147,7 +147,8 @@ static int geneve_hlen(struct genevehdr *gh)
 }
 
 static struct sk_buff **geneve_gro_receive(struct sk_buff **head,
-					   struct sk_buff *skb)
+					   struct sk_buff *skb,
+					   struct udp_offload *uoff)
 {
 	struct sk_buff *p, **pp = NULL;
 	struct genevehdr *gh, *gh2;
@@ -211,7 +212,8 @@ out:
 	return pp;
 }
 
-static int geneve_gro_complete(struct sk_buff *skb, int nhoff)
+static int geneve_gro_complete(struct sk_buff *skb, int nhoff,
+			       struct udp_offload *uoff)
 {
 	struct genevehdr *gh;
 	struct packet_offload *ptype;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index d3e537ef6b7f..d10f6f4ead27 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -339,7 +339,8 @@ unflush:
 	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
 	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
-	pp = uo_priv->offload->callbacks.gro_receive(head, skb);
+	pp = uo_priv->offload->callbacks.gro_receive(head, skb,
+						     uo_priv->offload);
 
 out_unlock:
 	rcu_read_unlock();
@@ -395,7 +396,9 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
 
 	if (uo_priv != NULL) {
 		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
-		err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+		err = uo_priv->offload->callbacks.gro_complete(skb,
+				nhoff + sizeof(struct udphdr),
+				uo_priv->offload);
 	}
 
 	rcu_read_unlock();
-- 
cgit v1.2.3-70-g09d2


From dfd8645ea1bd91277f841e74c33e1f4dbbede808 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 12 Jan 2015 17:00:38 -0800
Subject: vxlan: Remote checksum offload

Add support for remote checksum offload in VXLAN. This uses a
reserved bit to indicate that RCO is being done, and uses the low order
reserved eight bits of the VNI to hold the start and offset values in a
compressed manner.

Start is encoded in the low order seven bits of VNI. This is start >> 1
so that the checksum start offset is 0-254 using even values only.
Checksum offset (transport checksum field) is indicated in the high
order bit in the low order byte of the VNI. If the bit is set, the
checksum field is for UDP (so offset = start + 6), else checksum
field is for TCP (so offset = start + 16). Only TCP and UDP are
supported in this implementation.

Remote checksum offload for VXLAN is described in:

https://tools.ietf.org/html/draft-herbert-vxlan-rco-00

Tested by running 200 TCP_STREAM connections with VXLAN (over IPv4).

With UDP checksums and Remote Checksum Offload
  IPv4
      Client
        11.84% CPU utilization
      Server
        12.96% CPU utilization
      9197 Mbps
  IPv6
      Client
        12.46% CPU utilization
      Server
        14.48% CPU utilization
      8963 Mbps

With UDP checksums, no remote checksum offload
  IPv4
      Client
        15.67% CPU utilization
      Server
        14.83% CPU utilization
      9094 Mbps
  IPv6
      Client
        16.21% CPU utilization
      Server
        14.32% CPU utilization
      9058 Mbps

No UDP checksums
  IPv4
      Client
        15.03% CPU utilization
      Server
        23.09% CPU utilization
      9089 Mbps
  IPv6
      Client
        16.18% CPU utilization
      Server
        26.57% CPU utilization
       8954 Mbps

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c          | 191 +++++++++++++++++++++++++++++++++++++++++--
 include/net/vxlan.h          |  11 +++
 include/uapi/linux/if_link.h |   2 +
 3 files changed, 198 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 5c56a3ff25aa..99df0d76157c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -539,6 +539,46 @@ static int vxlan_fdb_append(struct vxlan_fdb *f,
 	return 1;
 }
 
+static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
+					  unsigned int off,
+					  struct vxlanhdr *vh, size_t hdrlen,
+					  u32 data)
+{
+	size_t start, offset, plen;
+	__wsum delta;
+
+	if (skb->remcsum_offload)
+		return vh;
+
+	if (!NAPI_GRO_CB(skb)->csum_valid)
+		return NULL;
+
+	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
+	offset = start + ((data & VXLAN_RCO_UDP) ?
+			  offsetof(struct udphdr, check) :
+			  offsetof(struct tcphdr, check));
+
+	plen = hdrlen + offset + sizeof(u16);
+
+	/* Pull checksum that will be written */
+	if (skb_gro_header_hard(skb, off + plen)) {
+		vh = skb_gro_header_slow(skb, off + plen, off);
+		if (!vh)
+			return NULL;
+	}
+
+	delta = remcsum_adjust((void *)vh + hdrlen,
+			       NAPI_GRO_CB(skb)->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+
+	skb->remcsum_offload = 1;
+
+	return vh;
+}
+
 static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 					  struct sk_buff *skb,
 					  struct udp_offload *uoff)
@@ -547,6 +587,9 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 	struct vxlanhdr *vh, *vh2;
 	unsigned int hlen, off_vx;
 	int flush = 1;
+	struct vxlan_sock *vs = container_of(uoff, struct vxlan_sock,
+					     udp_offloads);
+	u32 flags;
 
 	off_vx = skb_gro_offset(skb);
 	hlen = off_vx + sizeof(*vh);
@@ -557,6 +600,19 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 			goto out;
 	}
 
+	skb_gro_pull(skb, sizeof(struct vxlanhdr)); /* pull vxlan header */
+	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
+
+	flags = ntohl(vh->vx_flags);
+
+	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
+		vh = vxlan_gro_remcsum(skb, off_vx, vh, sizeof(struct vxlanhdr),
+				       ntohl(vh->vx_vni));
+
+		if (!vh)
+			goto out;
+	}
+
 	flush = 0;
 
 	for (p = *head; p; p = p->next) {
@@ -570,8 +626,6 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 		}
 	}
 
-	skb_gro_pull(skb, sizeof(struct vxlanhdr));
-	skb_gro_postpull_rcsum(skb, vh, sizeof(struct vxlanhdr));
 	pp = eth_gro_receive(head, skb);
 
 out:
@@ -1087,6 +1141,42 @@ static void vxlan_igmp_leave(struct work_struct *work)
 	dev_put(vxlan->dev);
 }
 
+static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
+				      size_t hdrlen, u32 data)
+{
+	size_t start, offset, plen;
+	__wsum delta;
+
+	if (skb->remcsum_offload) {
+		/* Already processed in GRO path */
+		skb->remcsum_offload = 0;
+		return vh;
+	}
+
+	start = (data & VXLAN_RCO_MASK) << VXLAN_RCO_SHIFT;
+	offset = start + ((data & VXLAN_RCO_UDP) ?
+			  offsetof(struct udphdr, check) :
+			  offsetof(struct tcphdr, check));
+
+	plen = hdrlen + offset + sizeof(u16);
+
+	if (!pskb_may_pull(skb, plen))
+		return NULL;
+
+	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
+
+	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
+		__skb_checksum_complete(skb);
+
+	delta = remcsum_adjust((void *)vh + hdrlen,
+			       skb->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+
+	return vh;
+}
+
 /* Callback from net/ipv4/udp.c to receive packets */
 static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 {
@@ -1111,12 +1201,22 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 
 	if (iptunnel_pull_header(skb, VXLAN_HLEN, htons(ETH_P_TEB)))
 		goto drop;
+	vxh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
 	vs = rcu_dereference_sk_user_data(sk);
 	if (!vs)
 		goto drop;
 
-	if (flags || (vni & 0xff)) {
+	if ((flags & VXLAN_HF_RCO) && (vs->flags & VXLAN_F_REMCSUM_RX)) {
+		vxh = vxlan_remcsum(skb, vxh, sizeof(struct vxlanhdr), vni);
+		if (!vxh)
+			goto drop;
+
+		flags &= ~VXLAN_HF_RCO;
+		vni &= VXLAN_VID_MASK;
+	}
+
+	if (flags || (vni & ~VXLAN_VID_MASK)) {
 		/* If there are any unprocessed flags remaining treat
 		 * this as a malformed packet. This behavior diverges from
 		 * VXLAN RFC (RFC7348) which stipulates that bits in reserved
@@ -1553,8 +1653,23 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	int min_headroom;
 	int err;
 	bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
+	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+	u16 hdrlen = sizeof(struct vxlanhdr);
+
+	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		int csum_start = skb_checksum_start_offset(skb);
+
+		if (csum_start <= VXLAN_MAX_REMCSUM_START &&
+		    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
+		    (skb->csum_offset == offsetof(struct udphdr, check) ||
+		     skb->csum_offset == offsetof(struct tcphdr, check))) {
+			udp_sum = false;
+			type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	}
 
-	skb = udp_tunnel_handle_offloads(skb, udp_sum);
+	skb = iptunnel_handle_offloads(skb, udp_sum, type);
 	if (IS_ERR(skb)) {
 		err = -EINVAL;
 		goto err;
@@ -1583,6 +1698,22 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = vni;
 
+	if (type & SKB_GSO_TUNNEL_REMCSUM) {
+		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
+			   VXLAN_RCO_SHIFT;
+
+		if (skb->csum_offset == offsetof(struct udphdr, check))
+			data |= VXLAN_RCO_UDP;
+
+		vxh->vx_vni |= htonl(data);
+		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+
+		if (!skb_is_gso(skb)) {
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->encapsulation = 0;
+		}
+	}
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1603,8 +1734,23 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	int min_headroom;
 	int err;
 	bool udp_sum = !vs->sock->sk->sk_no_check_tx;
+	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
+	u16 hdrlen = sizeof(struct vxlanhdr);
+
+	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	    skb->ip_summed == CHECKSUM_PARTIAL) {
+		int csum_start = skb_checksum_start_offset(skb);
+
+		if (csum_start <= VXLAN_MAX_REMCSUM_START &&
+		    !(csum_start & VXLAN_RCO_SHIFT_MASK) &&
+		    (skb->csum_offset == offsetof(struct udphdr, check) ||
+		     skb->csum_offset == offsetof(struct tcphdr, check))) {
+			udp_sum = false;
+			type |= SKB_GSO_TUNNEL_REMCSUM;
+		}
+	}
 
-	skb = udp_tunnel_handle_offloads(skb, udp_sum);
+	skb = iptunnel_handle_offloads(skb, udp_sum, type);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1627,6 +1773,22 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
 	vxh->vx_vni = vni;
 
+	if (type & SKB_GSO_TUNNEL_REMCSUM) {
+		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
+			   VXLAN_RCO_SHIFT;
+
+		if (skb->csum_offset == offsetof(struct udphdr, check))
+			data |= VXLAN_RCO_UDP;
+
+		vxh->vx_vni |= htonl(data);
+		vxh->vx_flags |= htonl(VXLAN_HF_RCO);
+
+		if (!skb_is_gso(skb)) {
+			skb->ip_summed = CHECKSUM_NONE;
+			skb->encapsulation = 0;
+		}
+	}
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -2218,6 +2380,8 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_UDP_CSUM]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2339,6 +2503,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	atomic_set(&vs->refcnt, 1);
 	vs->rcv = rcv;
 	vs->data = data;
+	vs->flags = flags;
 
 	/* Initialize the vxlan udp offloads structure */
 	vs->udp_offloads.port = port;
@@ -2533,6 +2698,14 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	    nla_get_u8(data[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]))
 		vxlan->flags |= VXLAN_F_UDP_ZERO_CSUM6_RX;
 
+	if (data[IFLA_VXLAN_REMCSUM_TX] &&
+	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_TX]))
+		vxlan->flags |= VXLAN_F_REMCSUM_TX;
+
+	if (data[IFLA_VXLAN_REMCSUM_RX] &&
+	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
+		vxlan->flags |= VXLAN_F_REMCSUM_RX;
+
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
 			   vxlan->dst_port)) {
 		pr_info("duplicate VNI %u\n", vni);
@@ -2601,6 +2774,8 @@ static size_t vxlan_get_size(const struct net_device *dev)
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_CSUM */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_TX */
 		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_UDP_ZERO_CSUM6_RX */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_TX */
+		nla_total_size(sizeof(__u8)) + /* IFLA_VXLAN_REMCSUM_RX */
 		0;
 }
 
@@ -2666,7 +2841,11 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
 			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_TX)) ||
 	    nla_put_u8(skb, IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
-			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)))
+			!!(vxlan->flags & VXLAN_F_UDP_ZERO_CSUM6_RX)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_TX,
+			!!(vxlan->flags & VXLAN_F_REMCSUM_TX)) ||
+	    nla_put_u8(skb, IFLA_VXLAN_REMCSUM_RX,
+			!!(vxlan->flags & VXLAN_F_REMCSUM_RX)))
 		goto nla_put_failure;
 
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index a0d80736224f..0a7443b49133 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -19,6 +19,14 @@ struct vxlanhdr {
 
 /* VXLAN header flags. */
 #define VXLAN_HF_VNI 0x08000000
+#define VXLAN_HF_RCO 0x00200000
+
+/* Remote checksum offload header option */
+#define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
+#define VXLAN_RCO_UDP   0x80    /* Indicate UDP RCO (TCP when not set *) */
+#define VXLAN_RCO_SHIFT 1       /* Left shift of start */
+#define VXLAN_RCO_SHIFT_MASK ((1 << VXLAN_RCO_SHIFT) - 1)
+#define VXLAN_MAX_REMCSUM_START (VXLAN_RCO_MASK << VXLAN_RCO_SHIFT)
 
 #define VXLAN_N_VID     (1u << 24)
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
@@ -38,6 +46,7 @@ struct vxlan_sock {
 	struct hlist_head vni_list[VNI_HASH_SIZE];
 	atomic_t	  refcnt;
 	struct udp_offload udp_offloads;
+	u32		  flags;
 };
 
 #define VXLAN_F_LEARN			0x01
@@ -49,6 +58,8 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_CSUM		0x40
 #define VXLAN_F_UDP_ZERO_CSUM6_TX	0x80
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
+#define VXLAN_F_REMCSUM_TX		0x200
+#define VXLAN_F_REMCSUM_RX		0x400
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index f7d0d2d7173a..b2723f65846f 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -370,6 +370,8 @@ enum {
 	IFLA_VXLAN_UDP_CSUM,
 	IFLA_VXLAN_UDP_ZERO_CSUM6_TX,
 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
+	IFLA_VXLAN_REMCSUM_TX,
+	IFLA_VXLAN_REMCSUM_RX,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
-- 
cgit v1.2.3-70-g09d2


From 3511494ce2f3d3b77544c79b87511a4ddb61dc89 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:55 +0100
Subject: vxlan: Group Policy extension

Implements supports for the Group Policy VXLAN extension [0] to provide
a lightweight and simple security label mechanism across network peers
based on VXLAN. The security context and associated metadata is mapped
to/from skb->mark. This allows further mapping to a SELinux context
using SECMARK, to implement ACLs directly with nftables, iptables, OVS,
tc, etc.

The group membership is defined by the lower 16 bits of skb->mark, the
upper 16 bits are used for flags.

SELinux allows to manage label to secure local resources. However,
distributed applications require ACLs to implemented across hosts. This
is typically achieved by matching on L2-L4 fields to identify the
original sending host and process on the receiver. On top of that,
netlabel and specifically CIPSO [1] allow to map security contexts to
universal labels.  However, netlabel and CIPSO are relatively complex.
This patch provides a lightweight alternative for overlay network
environments with a trusted underlay. No additional control protocol
is required.

           Host 1:                       Host 2:

      Group A        Group B        Group B     Group A
      +-----+   +-------------+    +-------+   +-----+
      | lxc |   | SELinux CTX |    | httpd |   | VM  |
      +--+--+   +--+----------+    +---+---+   +--+--+
	  \---+---/                     \----+---/
	      |                              |
	  +---+---+                      +---+---+
	  | vxlan |                      | vxlan |
	  +---+---+                      +---+---+
	      +------------------------------+

Backwards compatibility:
A VXLAN-GBP socket can receive standard VXLAN frames and will assign
the default group 0x0000 to such frames. A Linux VXLAN socket will
drop VXLAN-GBP  frames. The extension is therefore disabled by default
and needs to be specifically enabled:

   ip link add [...] type vxlan [...] gbp

In a mixed environment with VXLAN and VXLAN-GBP sockets, the GBP socket
must run on a separate port number.

Examples:
 iptables:
  host1# iptables -I OUTPUT -m owner --uid-owner 101 -j MARK --set-mark 0x200
  host2# iptables -I INPUT -m mark --mark 0x200 -j DROP

 OVS:
  # ovs-ofctl add-flow br0 'in_port=1,actions=load:0x200->NXM_NX_TUN_GBP_ID[],NORMAL'
  # ovs-ofctl add-flow br0 'in_port=2,tun_gbp_id=0x200,actions=drop'

[0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
[1] http://lwn.net/Articles/204905/

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c           | 84 ++++++++++++++++++++++++++++++++++++-------
 include/net/vxlan.h           | 79 +++++++++++++++++++++++++++++++++++++---
 include/uapi/linux/if_link.h  |  1 +
 net/openvswitch/vport-vxlan.c |  9 +++--
 4 files changed, 152 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 99df0d76157c..6dbf8e041922 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -620,7 +620,8 @@ static struct sk_buff **vxlan_gro_receive(struct sk_buff **head,
 			continue;
 
 		vh2 = (struct vxlanhdr *)(p->data + off_vx);
-		if (vh->vx_vni != vh2->vx_vni) {
+		if (vh->vx_flags != vh2->vx_flags ||
+		    vh->vx_vni != vh2->vx_vni) {
 			NAPI_GRO_CB(p)->same_flow = 0;
 			continue;
 		}
@@ -1183,6 +1184,7 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	struct vxlan_sock *vs;
 	struct vxlanhdr *vxh;
 	u32 flags, vni;
+	struct vxlan_metadata md = {0};
 
 	/* Need Vxlan and inner Ethernet header to be present */
 	if (!pskb_may_pull(skb, VXLAN_HLEN))
@@ -1216,6 +1218,24 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		vni &= VXLAN_VID_MASK;
 	}
 
+	/* For backwards compatibility, only allow reserved fields to be
+	 * used by VXLAN extensions if explicitly requested.
+	 */
+	if ((flags & VXLAN_HF_GBP) && (vs->flags & VXLAN_F_GBP)) {
+		struct vxlanhdr_gbp *gbp;
+
+		gbp = (struct vxlanhdr_gbp *)vxh;
+		md.gbp = ntohs(gbp->policy_id);
+
+		if (gbp->dont_learn)
+			md.gbp |= VXLAN_GBP_DONT_LEARN;
+
+		if (gbp->policy_applied)
+			md.gbp |= VXLAN_GBP_POLICY_APPLIED;
+
+		flags &= ~VXLAN_GBP_USED_BITS;
+	}
+
 	if (flags || (vni & ~VXLAN_VID_MASK)) {
 		/* If there are any unprocessed flags remaining treat
 		 * this as a malformed packet. This behavior diverges from
@@ -1229,7 +1249,8 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 		goto bad_flags;
 	}
 
-	vs->rcv(vs, skb, vxh->vx_vni);
+	md.vni = vxh->vx_vni;
+	vs->rcv(vs, skb, &md);
 	return 0;
 
 drop:
@@ -1246,8 +1267,8 @@ error:
 	return 1;
 }
 
-static void vxlan_rcv(struct vxlan_sock *vs,
-		      struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 	struct iphdr *oip = NULL;
 	struct ipv6hdr *oip6 = NULL;
@@ -1258,7 +1279,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 	int err = 0;
 	union vxlan_addr *remote_ip;
 
-	vni = ntohl(vx_vni) >> 8;
+	vni = ntohl(md->vni) >> 8;
 	/* Is this VNI defined? */
 	vxlan = vxlan_vs_find_vni(vs, vni);
 	if (!vxlan)
@@ -1292,6 +1313,7 @@ static void vxlan_rcv(struct vxlan_sock *vs,
 		goto drop;
 
 	skb_reset_network_header(skb);
+	skb->mark = md->gbp;
 
 	if (oip6)
 		err = IP6_ECN_decapsulate(oip6, skb);
@@ -1641,13 +1663,30 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
+static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
+				struct vxlan_metadata *md)
+{
+	struct vxlanhdr_gbp *gbp;
+
+	gbp = (struct vxlanhdr_gbp *)vxh;
+	vxh->vx_flags |= htonl(VXLAN_HF_GBP);
+
+	if (md->gbp & VXLAN_GBP_DONT_LEARN)
+		gbp->dont_learn = 1;
+
+	if (md->gbp & VXLAN_GBP_POLICY_APPLIED)
+		gbp->policy_applied = 1;
+
+	gbp->policy_id = htons(md->gbp & VXLAN_GBP_ID_MASK);
+}
+
 #if IS_ENABLED(CONFIG_IPV6)
 static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 			   struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
-			   __be16 src_port, __be16 dst_port, __be32 vni,
-			   bool xnet)
+			   __be16 src_port, __be16 dst_port,
+			   struct vxlan_metadata *md, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
@@ -1696,7 +1735,7 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1714,6 +1753,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
@@ -1728,7 +1770,8 @@ err:
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet)
+		   __be16 src_port, __be16 dst_port,
+		   struct vxlan_metadata *md, bool xnet)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
@@ -1771,7 +1814,7 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_HF_VNI);
-	vxh->vx_vni = vni;
+	vxh->vx_vni = md->vni;
 
 	if (type & SKB_GSO_TUNNEL_REMCSUM) {
 		u32 data = (skb_checksum_start_offset(skb) - hdrlen) >>
@@ -1789,6 +1832,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
+	if (vs->flags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vs, md);
+
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
@@ -1849,6 +1895,7 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 	const struct iphdr *old_iph;
 	struct flowi4 fl4;
 	union vxlan_addr *dst;
+	struct vxlan_metadata md;
 	__be16 src_port = 0, dst_port;
 	u32 vni;
 	__be16 df = 0;
@@ -1919,11 +1966,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 		tos = ip_tunnel_ecn_encap(tos, old_iph, skb);
 		ttl = ttl ? : ip4_dst_hoplimit(&rt->dst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
 				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port,
-				     htonl(vni << 8),
+				     tos, ttl, df, src_port, dst_port, &md,
 				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
 		if (err < 0) {
 			/* skb is already freed. */
@@ -1976,10 +2024,12 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		}
 
 		ttl = ttl ? : ip6_dst_hoplimit(ndst);
+		md.vni = htonl(vni << 8);
+		md.gbp = skb->mark;
 
 		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
 				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, htonl(vni << 8),
+				      src_port, dst_port, &md,
 				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
 #endif
 	}
@@ -2382,6 +2432,7 @@ static const struct nla_policy vxlan_policy[IFLA_VXLAN_MAX + 1] = {
 	[IFLA_VXLAN_UDP_ZERO_CSUM6_RX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_TX]	= { .type = NLA_U8 },
 	[IFLA_VXLAN_REMCSUM_RX]	= { .type = NLA_U8 },
+	[IFLA_VXLAN_GBP]	= { .type = NLA_FLAG, },
 };
 
 static int vxlan_validate(struct nlattr *tb[], struct nlattr *data[])
@@ -2706,6 +2757,9 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 	    nla_get_u8(data[IFLA_VXLAN_REMCSUM_RX]))
 		vxlan->flags |= VXLAN_F_REMCSUM_RX;
 
+	if (data[IFLA_VXLAN_GBP])
+		vxlan->flags |= VXLAN_F_GBP;
+
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
 			   vxlan->dst_port)) {
 		pr_info("duplicate VNI %u\n", vni);
@@ -2851,6 +2905,10 @@ static int vxlan_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	if (nla_put(skb, IFLA_VXLAN_PORT_RANGE, sizeof(ports), &ports))
 		goto nla_put_failure;
 
+	if (vxlan->flags & VXLAN_F_GBP &&
+	    nla_put_flag(skb, IFLA_VXLAN_GBP))
+		goto nla_put_failure;
+
 	return 0;
 
 nla_put_failure:
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 0a7443b49133..f4a3583171bd 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -11,15 +11,76 @@
 #define VNI_HASH_BITS	10
 #define VNI_HASH_SIZE	(1<<VNI_HASH_BITS)
 
-/* VXLAN protocol header */
+/*
+ * VXLAN Group Based Policy Extension:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |1|-|-|-|1|-|-|-|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * D = Don't Learn bit. When set, this bit indicates that the egress
+ *     VTEP MUST NOT learn the source address of the encapsulated frame.
+ *
+ * A = Indicates that the group policy has already been applied to
+ *     this packet. Policies MUST NOT be applied by devices when the
+ *     A bit is set.
+ *
+ * [0] https://tools.ietf.org/html/draft-smith-vxlan-group-policy
+ */
+struct vxlanhdr_gbp {
+	__u8	vx_flags;
+#ifdef __LITTLE_ENDIAN_BITFIELD
+	__u8	reserved_flags1:3,
+		policy_applied:1,
+		reserved_flags2:2,
+		dont_learn:1,
+		reserved_flags3:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+	__u8	reserved_flags1:1,
+		dont_learn:1,
+		reserved_flags2:2,
+		policy_applied:1,
+		reserved_flags3:3;
+#else
+#error	"Please fix <asm/byteorder.h>"
+#endif
+	__be16	policy_id;
+	__be32	vx_vni;
+};
+
+#define VXLAN_GBP_USED_BITS (VXLAN_HF_GBP | 0xFFFFFF)
+
+/* skb->mark mapping
+ *
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |R|R|R|R|R|R|R|R|R|D|R|R|A|R|R|R|        Group Policy ID        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ */
+#define VXLAN_GBP_DONT_LEARN		(BIT(6) << 16)
+#define VXLAN_GBP_POLICY_APPLIED	(BIT(3) << 16)
+#define VXLAN_GBP_ID_MASK		(0xFFFF)
+
+/* VXLAN protocol header:
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |G|R|R|R|I|R|R|C|               Reserved                        |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ * |                VXLAN Network Identifier (VNI) |   Reserved    |
+ * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ * G = 1	Group Policy (VXLAN-GBP)
+ * I = 1	VXLAN Network Identifier (VNI) present
+ * C = 1	Remote checksum offload (RCO)
+ */
 struct vxlanhdr {
 	__be32 vx_flags;
 	__be32 vx_vni;
 };
 
 /* VXLAN header flags. */
-#define VXLAN_HF_VNI 0x08000000
-#define VXLAN_HF_RCO 0x00200000
+#define VXLAN_HF_RCO BIT(24)
+#define VXLAN_HF_VNI BIT(27)
+#define VXLAN_HF_GBP BIT(31)
 
 /* Remote checksum offload header option */
 #define VXLAN_RCO_MASK  0x7f    /* Last byte of vni field */
@@ -32,8 +93,14 @@ struct vxlanhdr {
 #define VXLAN_VID_MASK  (VXLAN_N_VID - 1)
 #define VXLAN_HLEN (sizeof(struct udphdr) + sizeof(struct vxlanhdr))
 
+struct vxlan_metadata {
+	__be32		vni;
+	u32		gbp;
+};
+
 struct vxlan_sock;
-typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb, __be32 key);
+typedef void (vxlan_rcv_t)(struct vxlan_sock *vh, struct sk_buff *skb,
+			   struct vxlan_metadata *md);
 
 /* per UDP socket information */
 struct vxlan_sock {
@@ -60,6 +127,7 @@ struct vxlan_sock {
 #define VXLAN_F_UDP_ZERO_CSUM6_RX	0x100
 #define VXLAN_F_REMCSUM_TX		0x200
 #define VXLAN_F_REMCSUM_RX		0x400
+#define VXLAN_F_GBP			0x800
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
@@ -70,7 +138,8 @@ void vxlan_sock_release(struct vxlan_sock *vs);
 int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
-		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
+		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
+		   bool xnet);
 
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index b2723f65846f..2a8380edbb7e 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -372,6 +372,7 @@ enum {
 	IFLA_VXLAN_UDP_ZERO_CSUM6_RX,
 	IFLA_VXLAN_REMCSUM_TX,
 	IFLA_VXLAN_REMCSUM_RX,
+	IFLA_VXLAN_GBP,
 	__IFLA_VXLAN_MAX
 };
 #define IFLA_VXLAN_MAX	(__IFLA_VXLAN_MAX - 1)
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 1435a053a870..9919d71c52c3 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -59,7 +59,8 @@ static inline struct vxlan_port *vxlan_vport(const struct vport *vport)
 }
 
 /* Called with rcu_read_lock and BH disabled. */
-static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
+static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
+		      struct vxlan_metadata *md)
 {
 	struct ovs_tunnel_info tun_info;
 	struct vport *vport = vs->data;
@@ -68,7 +69,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb, __be32 vx_vni)
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
-	key = cpu_to_be64(ntohl(vx_vni) >> 8);
+	key = cpu_to_be64(ntohl(md->vni) >> 8);
 	ovs_flow_tun_info_init(&tun_info, iph,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
 			       key, TUNNEL_KEY, NULL, 0);
@@ -146,6 +147,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	struct vxlan_port *vxlan_port = vxlan_vport(vport);
 	__be16 dst_port = inet_sk(vxlan_port->vs->sock->sk)->inet_sport;
 	const struct ovs_key_ipv4_tunnel *tun_key;
+	struct vxlan_metadata md = {0};
 	struct rtable *rt;
 	struct flowi4 fl;
 	__be16 src_port;
@@ -170,12 +172,13 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	skb->ignore_df = 1;
 
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
+	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
 
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 			     fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     htonl(be64_to_cpu(tun_key->tun_id) << 8),
+			     &md,
 			     false);
 	if (err < 0)
 		ip_rt_put(rt);
-- 
cgit v1.2.3-70-g09d2


From ac5132d1a03fe1ebbefb2382b36e829dff056283 Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:56 +0100
Subject: vxlan: Only bind to sockets with compatible flags enabled

A VXLAN net_device looking for an appropriate socket may only consider
a socket which has a matching set of flags/extensions enabled. If
incompatible flags are enabled, return a conflict to have the caller
create a distinct socket with distinct port.

The OVS VXLAN port is kept unaware of extensions at this point.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 29 ++++++++++++++++++-----------
 include/net/vxlan.h |  3 +++
 2 files changed, 21 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 6dbf8e041922..6b6b45622a0a 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -263,15 +263,19 @@ static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
 	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
 }
 
-/* Find VXLAN socket based on network namespace, address family and UDP port */
-static struct vxlan_sock *vxlan_find_sock(struct net *net,
-					  sa_family_t family, __be16 port)
+/* Find VXLAN socket based on network namespace, address family and UDP port
+ * and enabled unshareable flags.
+ */
+static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
+					  __be16 port, u32 flags)
 {
 	struct vxlan_sock *vs;
+	u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
 
 	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
 		if (inet_sk(vs->sock->sk)->inet_sport == port &&
-		    inet_sk(vs->sock->sk)->sk.sk_family == family)
+		    inet_sk(vs->sock->sk)->sk.sk_family == family &&
+		    (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
 			return vs;
 	}
 	return NULL;
@@ -291,11 +295,12 @@ static struct vxlan_dev *vxlan_vs_find_vni(struct vxlan_sock *vs, u32 id)
 
 /* Look up VNI in a per net namespace table */
 static struct vxlan_dev *vxlan_find_vni(struct net *net, u32 id,
-					sa_family_t family, __be16 port)
+					sa_family_t family, __be16 port,
+					u32 flags)
 {
 	struct vxlan_sock *vs;
 
-	vs = vxlan_find_sock(net, family, port);
+	vs = vxlan_find_sock(net, family, port, flags);
 	if (!vs)
 		return NULL;
 
@@ -1957,7 +1962,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 			ip_rt_put(rt);
 			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
-						   dst->sa.sa_family, dst_port);
+						   dst->sa.sa_family, dst_port,
+						   vxlan->flags);
 			if (!dst_vxlan)
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -2016,7 +2022,8 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 			dst_release(ndst);
 			dst_vxlan = vxlan_find_vni(vxlan->net, vni,
-						   dst->sa.sa_family, dst_port);
+						   dst->sa.sa_family, dst_port,
+						   vxlan->flags);
 			if (!dst_vxlan)
 				goto tx_error;
 			vxlan_encap_bypass(skb, vxlan, dst_vxlan);
@@ -2186,7 +2193,7 @@ static int vxlan_init(struct net_device *dev)
 
 	spin_lock(&vn->sock_lock);
 	vs = vxlan_find_sock(vxlan->net, ipv6 ? AF_INET6 : AF_INET,
-			     vxlan->dst_port);
+			     vxlan->dst_port, vxlan->flags);
 	if (vs && atomic_add_unless(&vs->refcnt, 1, 0)) {
 		/* If we have a socket with same port already, reuse it */
 		vxlan_vs_add_dev(vs, vxlan);
@@ -2593,7 +2600,7 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 		return vs;
 
 	spin_lock(&vn->sock_lock);
-	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port);
+	vs = vxlan_find_sock(net, ipv6 ? AF_INET6 : AF_INET, port, flags);
 	if (vs && ((vs->rcv != rcv) ||
 		   !atomic_add_unless(&vs->refcnt, 1, 0)))
 			vs = ERR_PTR(-EBUSY);
@@ -2761,7 +2768,7 @@ static int vxlan_newlink(struct net *net, struct net_device *dev,
 		vxlan->flags |= VXLAN_F_GBP;
 
 	if (vxlan_find_vni(net, vni, use_ipv6 ? AF_INET6 : AF_INET,
-			   vxlan->dst_port)) {
+			   vxlan->dst_port, vxlan->flags)) {
 		pr_info("duplicate VNI %u\n", vni);
 		return -EEXIST;
 	}
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index f4a3583171bd..7be8c342fc95 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -129,6 +129,9 @@ struct vxlan_sock {
 #define VXLAN_F_REMCSUM_RX		0x400
 #define VXLAN_F_GBP			0x800
 
+/* These flags must match in order for a socket to be shareable */
+#define VXLAN_F_UNSHAREABLE		VXLAN_F_GBP
+
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
 				  bool no_share, u32 flags);
-- 
cgit v1.2.3-70-g09d2


From 1dd144cf5b4b47e12438c2c6883925ce1a9b499f Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 15 Jan 2015 03:53:59 +0100
Subject: openvswitch: Support VXLAN Group Policy extension

Introduces support for the group policy extension to the VXLAN virtual
port. The extension is disabled by default and only enabled if the user
has provided the respective configuration.

  ovs-vsctl add-port br0 vxlan0 -- \
     set Interface vxlan0 type=vxlan options:exts=gbp

The configuration interface to enable the extension is based on a new
attribute OVS_VXLAN_EXT_GBP nested inside OVS_TUNNEL_ATTR_EXTENSION
which can carry additional extensions as needed in the future.

The group policy metadata is stored as binary blob (struct ovs_vxlan_opts)
internally just like Geneve options but transported as nested Netlink
attributes to user space.

Renames the existing TUNNEL_OPTIONS_PRESENT to TUNNEL_GENEVE_OPT with the
binary value kept intact, a new flag TUNNEL_VXLAN_OPT is introduced.

The attributes OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS and existing
OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS are implemented mutually exclusive.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip_tunnels.h         |   5 +-
 include/uapi/linux/openvswitch.h |  11 ++++
 net/openvswitch/flow_netlink.c   | 114 ++++++++++++++++++++++++++++++++++-----
 net/openvswitch/vport-geneve.c   |  15 ++++--
 net/openvswitch/vport-vxlan.c    |  82 +++++++++++++++++++++++++++-
 net/openvswitch/vport-vxlan.h    |  11 ++++
 6 files changed, 218 insertions(+), 20 deletions(-)
 create mode 100644 net/openvswitch/vport-vxlan.h

(limited to 'include')

diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 25a59eb388a6..ce4db3cc5647 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -97,7 +97,10 @@ struct ip_tunnel {
 #define TUNNEL_DONT_FRAGMENT    __cpu_to_be16(0x0100)
 #define TUNNEL_OAM		__cpu_to_be16(0x0200)
 #define TUNNEL_CRIT_OPT		__cpu_to_be16(0x0400)
-#define TUNNEL_OPTIONS_PRESENT	__cpu_to_be16(0x0800)
+#define TUNNEL_GENEVE_OPT	__cpu_to_be16(0x0800)
+#define TUNNEL_VXLAN_OPT	__cpu_to_be16(0x1000)
+
+#define TUNNEL_OPTIONS_PRESENT	(TUNNEL_GENEVE_OPT | TUNNEL_VXLAN_OPT)
 
 struct tnl_ptk_info {
 	__be16 flags;
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index f714e8633352..cd8d933963c2 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -252,11 +252,21 @@ enum ovs_vport_attr {
 
 #define OVS_VPORT_ATTR_MAX (__OVS_VPORT_ATTR_MAX - 1)
 
+enum {
+	OVS_VXLAN_EXT_UNSPEC,
+	OVS_VXLAN_EXT_GBP,	/* Flag or __u32 */
+	__OVS_VXLAN_EXT_MAX,
+};
+
+#define OVS_VXLAN_EXT_MAX (__OVS_VXLAN_EXT_MAX - 1)
+
+
 /* OVS_VPORT_ATTR_OPTIONS attributes for tunnels.
  */
 enum {
 	OVS_TUNNEL_ATTR_UNSPEC,
 	OVS_TUNNEL_ATTR_DST_PORT, /* 16-bit UDP port, used by L4 tunnels. */
+	OVS_TUNNEL_ATTR_EXTENSION,
 	__OVS_TUNNEL_ATTR_MAX
 };
 
@@ -328,6 +338,7 @@ enum ovs_tunnel_key_attr {
 	OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
 	OVS_TUNNEL_KEY_ATTR_TP_SRC,		/* be16 src Transport Port. */
 	OVS_TUNNEL_KEY_ATTR_TP_DST,		/* be16 dst Transport Port. */
+	OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS,		/* Nested OVS_VXLAN_EXT_* */
 	__OVS_TUNNEL_KEY_ATTR_MAX
 };
 
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 518941c5bdf1..d210d1be3470 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -49,6 +49,7 @@
 #include <net/mpls.h>
 
 #include "flow_netlink.h"
+#include "vport-vxlan.h"
 
 struct ovs_len_tbl {
 	int len;
@@ -268,6 +269,9 @@ size_t ovs_tun_key_attr_size(void)
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_CSUM */
 		+ nla_total_size(0)    /* OVS_TUNNEL_KEY_ATTR_OAM */
 		+ nla_total_size(256)  /* OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS */
+		/* OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS is mutually exclusive with
+		 * OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS and covered by it.
+		 */
 		+ nla_total_size(2)    /* OVS_TUNNEL_KEY_ATTR_TP_SRC */
 		+ nla_total_size(2);   /* OVS_TUNNEL_KEY_ATTR_TP_DST */
 }
@@ -308,6 +312,7 @@ static const struct ovs_len_tbl ovs_tunnel_key_lens[OVS_TUNNEL_KEY_ATTR_MAX + 1]
 	[OVS_TUNNEL_KEY_ATTR_TP_DST]	    = { .len = sizeof(u16) },
 	[OVS_TUNNEL_KEY_ATTR_OAM]	    = { .len = 0 },
 	[OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS]   = { .len = OVS_ATTR_NESTED },
+	[OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS]    = { .len = OVS_ATTR_NESTED },
 };
 
 /* The size of the argument for each %OVS_KEY_ATTR_* Netlink attribute.  */
@@ -460,6 +465,41 @@ static int genev_tun_opt_from_nlattr(const struct nlattr *a,
 	return 0;
 }
 
+static const struct nla_policy vxlan_opt_policy[OVS_VXLAN_EXT_MAX + 1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_U32 },
+};
+
+static int vxlan_tun_opt_from_nlattr(const struct nlattr *a,
+				     struct sw_flow_match *match, bool is_mask,
+				     bool log)
+{
+	struct nlattr *tb[OVS_VXLAN_EXT_MAX+1];
+	unsigned long opt_key_offset;
+	struct ovs_vxlan_opts opts;
+	int err;
+
+	BUILD_BUG_ON(sizeof(opts) > sizeof(match->key->tun_opts));
+
+	err = nla_parse_nested(tb, OVS_VXLAN_EXT_MAX, a, vxlan_opt_policy);
+	if (err < 0)
+		return err;
+
+	memset(&opts, 0, sizeof(opts));
+
+	if (tb[OVS_VXLAN_EXT_GBP])
+		opts.gbp = nla_get_u32(tb[OVS_VXLAN_EXT_GBP]);
+
+	if (!is_mask)
+		SW_FLOW_KEY_PUT(match, tun_opts_len, sizeof(opts), false);
+	else
+		SW_FLOW_KEY_PUT(match, tun_opts_len, 0xff, true);
+
+	opt_key_offset = TUN_METADATA_OFFSET(sizeof(opts));
+	SW_FLOW_KEY_MEMCPY_OFFSET(match, opt_key_offset, &opts, sizeof(opts),
+				  is_mask);
+	return 0;
+}
+
 static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 				struct sw_flow_match *match, bool is_mask,
 				bool log)
@@ -468,6 +508,7 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 	int rem;
 	bool ttl = false;
 	__be16 tun_flags = 0;
+	int opts_type = 0;
 
 	nla_for_each_nested(a, attr, rem) {
 		int type = nla_type(a);
@@ -527,11 +568,30 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 			tun_flags |= TUNNEL_OAM;
 			break;
 		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
 			err = genev_tun_opt_from_nlattr(a, match, is_mask, log);
 			if (err)
 				return err;
 
-			tun_flags |= TUNNEL_OPTIONS_PRESENT;
+			tun_flags |= TUNNEL_GENEVE_OPT;
+			opts_type = type;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			if (opts_type) {
+				OVS_NLERR(log, "Multiple metadata blocks provided");
+				return -EINVAL;
+			}
+
+			err = vxlan_tun_opt_from_nlattr(a, match, is_mask, log);
+			if (err)
+				return err;
+
+			tun_flags |= TUNNEL_VXLAN_OPT;
+			opts_type = type;
 			break;
 		default:
 			OVS_NLERR(log, "Unknown IPv4 tunnel attribute %d",
@@ -560,6 +620,23 @@ static int ipv4_tun_from_nlattr(const struct nlattr *attr,
 		}
 	}
 
+	return opts_type;
+}
+
+static int vxlan_opt_to_nlattr(struct sk_buff *skb,
+			       const void *tun_opts, int swkey_tun_opts_len)
+{
+	const struct ovs_vxlan_opts *opts = tun_opts;
+	struct nlattr *nla;
+
+	nla = nla_nest_start(skb, OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS);
+	if (!nla)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, OVS_VXLAN_EXT_GBP, opts->gbp) < 0)
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, nla);
 	return 0;
 }
 
@@ -596,10 +673,15 @@ static int __ipv4_tun_to_nlattr(struct sk_buff *skb,
 	if ((output->tun_flags & TUNNEL_OAM) &&
 	    nla_put_flag(skb, OVS_TUNNEL_KEY_ATTR_OAM))
 		return -EMSGSIZE;
-	if (tun_opts &&
-	    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
-		    swkey_tun_opts_len, tun_opts))
-		return -EMSGSIZE;
+	if (tun_opts) {
+		if (output->tun_flags & TUNNEL_GENEVE_OPT &&
+		    nla_put(skb, OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+			    swkey_tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+		else if (output->tun_flags & TUNNEL_VXLAN_OPT &&
+			 vxlan_opt_to_nlattr(skb, tun_opts, swkey_tun_opts_len))
+			return -EMSGSIZE;
+	}
 
 	return 0;
 }
@@ -680,7 +762,7 @@ static int metadata_from_nlattrs(struct sw_flow_match *match,  u64 *attrs,
 	}
 	if (*attrs & (1 << OVS_KEY_ATTR_TUNNEL)) {
 		if (ipv4_tun_from_nlattr(a[OVS_KEY_ATTR_TUNNEL], match,
-					 is_mask, log))
+					 is_mask, log) < 0)
 			return -EINVAL;
 		*attrs &= ~(1 << OVS_KEY_ATTR_TUNNEL);
 	}
@@ -1578,17 +1660,23 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	struct sw_flow_key key;
 	struct ovs_tunnel_info *tun_info;
 	struct nlattr *a;
-	int err, start;
+	int err, start, opts_type;
 
 	ovs_match_init(&match, &key, NULL);
-	err = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
-	if (err)
-		return err;
+	opts_type = ipv4_tun_from_nlattr(nla_data(attr), &match, false, log);
+	if (opts_type < 0)
+		return opts_type;
 
 	if (key.tun_opts_len) {
-		err = validate_geneve_opts(&key);
-		if (err < 0)
-			return err;
+		switch (opts_type) {
+		case OVS_TUNNEL_KEY_ATTR_GENEVE_OPTS:
+			err = validate_geneve_opts(&key);
+			if (err < 0)
+				return err;
+			break;
+		case OVS_TUNNEL_KEY_ATTR_VXLAN_OPTS:
+			break;
+		}
 	};
 
 	start = add_nested_action_start(sfa, OVS_ACTION_ATTR_SET, log);
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 88a010c98c05..7ca3d454ff3b 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -88,7 +88,7 @@ static void geneve_rcv(struct geneve_sock *gs, struct sk_buff *skb)
 
 	opts_len = geneveh->opt_len * 4;
 
-	flags = TUNNEL_KEY | TUNNEL_OPTIONS_PRESENT |
+	flags = TUNNEL_KEY | TUNNEL_GENEVE_OPT |
 		(udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0) |
 		(geneveh->oam ? TUNNEL_OAM : 0) |
 		(geneveh->critical ? TUNNEL_CRIT_OPT : 0);
@@ -178,7 +178,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 sport;
 	struct rtable *rt;
 	struct flowi4 fl;
-	u8 vni[3];
+	u8 vni[3], opts_len, *opts;
 	__be16 df;
 	int err;
 
@@ -200,11 +200,18 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 	tunnel_id_to_vni(tun_key->tun_id, vni);
 	skb->ignore_df = 1;
 
+	if (tun_key->tun_flags & TUNNEL_GENEVE_OPT) {
+		opts = (u8 *)tun_info->options;
+		opts_len = tun_info->options_len;
+	} else {
+		opts = NULL;
+		opts_len = 0;
+	}
+
 	err = geneve_xmit_skb(geneve_port->gs, rt, skb, fl.saddr,
 			      tun_key->ipv4_dst, tun_key->ipv4_tos,
 			      tun_key->ipv4_ttl, df, sport, dport,
-			      tun_key->tun_flags, vni,
-			      tun_info->options_len, (u8 *)tun_info->options,
+			      tun_key->tun_flags, vni, opts_len, opts,
 			      false);
 	if (err < 0)
 		ip_rt_put(rt);
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 9919d71c52c3..8a2d54cba9ba 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -40,6 +40,7 @@
 
 #include "datapath.h"
 #include "vport.h"
+#include "vport-vxlan.h"
 
 /**
  * struct vxlan_port - Keeps track of open UDP ports
@@ -49,6 +50,7 @@
 struct vxlan_port {
 	struct vxlan_sock *vs;
 	char name[IFNAMSIZ];
+	u32 exts; /* VXLAN_F_* in <net/vxlan.h> */
 };
 
 static struct vport_ops ovs_vxlan_vport_ops;
@@ -63,16 +65,26 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 		      struct vxlan_metadata *md)
 {
 	struct ovs_tunnel_info tun_info;
+	struct vxlan_port *vxlan_port;
 	struct vport *vport = vs->data;
 	struct iphdr *iph;
+	struct ovs_vxlan_opts opts = {
+		.gbp = md->gbp,
+	};
 	__be64 key;
+	__be16 flags;
+
+	flags = TUNNEL_KEY;
+	vxlan_port = vxlan_vport(vport);
+	if (vxlan_port->exts & VXLAN_F_GBP)
+		flags |= TUNNEL_VXLAN_OPT;
 
 	/* Save outer tunnel values */
 	iph = ip_hdr(skb);
 	key = cpu_to_be64(ntohl(md->vni) >> 8);
 	ovs_flow_tun_info_init(&tun_info, iph,
 			       udp_hdr(skb)->source, udp_hdr(skb)->dest,
-			       key, TUNNEL_KEY, NULL, 0);
+			       key, flags, &opts, sizeof(opts));
 
 	ovs_vport_receive(vport, skb, &tun_info);
 }
@@ -84,6 +96,21 @@ static int vxlan_get_options(const struct vport *vport, struct sk_buff *skb)
 
 	if (nla_put_u16(skb, OVS_TUNNEL_ATTR_DST_PORT, ntohs(dst_port)))
 		return -EMSGSIZE;
+
+	if (vxlan_port->exts) {
+		struct nlattr *exts;
+
+		exts = nla_nest_start(skb, OVS_TUNNEL_ATTR_EXTENSION);
+		if (!exts)
+			return -EMSGSIZE;
+
+		if (vxlan_port->exts & VXLAN_F_GBP &&
+		    nla_put_flag(skb, OVS_VXLAN_EXT_GBP))
+			return -EMSGSIZE;
+
+		nla_nest_end(skb, exts);
+	}
+
 	return 0;
 }
 
@@ -96,6 +123,31 @@ static void vxlan_tnl_destroy(struct vport *vport)
 	ovs_vport_deferred_free(vport);
 }
 
+static const struct nla_policy exts_policy[OVS_VXLAN_EXT_MAX+1] = {
+	[OVS_VXLAN_EXT_GBP]	= { .type = NLA_FLAG, },
+};
+
+static int vxlan_configure_exts(struct vport *vport, struct nlattr *attr)
+{
+	struct nlattr *exts[OVS_VXLAN_EXT_MAX+1];
+	struct vxlan_port *vxlan_port;
+	int err;
+
+	if (nla_len(attr) < sizeof(struct nlattr))
+		return -EINVAL;
+
+	err = nla_parse_nested(exts, OVS_VXLAN_EXT_MAX, attr, exts_policy);
+	if (err < 0)
+		return err;
+
+	vxlan_port = vxlan_vport(vport);
+
+	if (exts[OVS_VXLAN_EXT_GBP])
+		vxlan_port->exts |= VXLAN_F_GBP;
+
+	return 0;
+}
+
 static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 {
 	struct net *net = ovs_dp_get_net(parms->dp);
@@ -128,7 +180,17 @@ static struct vport *vxlan_tnl_create(const struct vport_parms *parms)
 	vxlan_port = vxlan_vport(vport);
 	strncpy(vxlan_port->name, parms->name, IFNAMSIZ);
 
-	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true, 0);
+	a = nla_find_nested(options, OVS_TUNNEL_ATTR_EXTENSION);
+	if (a) {
+		err = vxlan_configure_exts(vport, a);
+		if (err) {
+			ovs_vport_free(vport);
+			goto error;
+		}
+	}
+
+	vs = vxlan_sock_add(net, htons(dst_port), vxlan_rcv, vport, true,
+			    vxlan_port->exts);
 	if (IS_ERR(vs)) {
 		ovs_vport_free(vport);
 		return (void *)vs;
@@ -141,6 +203,21 @@ error:
 	return ERR_PTR(err);
 }
 
+static int vxlan_ext_gbp(struct sk_buff *skb)
+{
+	const struct ovs_tunnel_info *tun_info;
+	const struct ovs_vxlan_opts *opts;
+
+	tun_info = OVS_CB(skb)->egress_tun_info;
+	opts = tun_info->options;
+
+	if (tun_info->tunnel.tun_flags & TUNNEL_VXLAN_OPT &&
+	    tun_info->options_len >= sizeof(*opts))
+		return opts->gbp;
+	else
+		return 0;
+}
+
 static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 {
 	struct net *net = ovs_dp_get_net(vport->dp);
@@ -173,6 +250,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
 	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
+	md.gbp = vxlan_ext_gbp(skb);
 
 	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
 			     fl.saddr, tun_key->ipv4_dst,
diff --git a/net/openvswitch/vport-vxlan.h b/net/openvswitch/vport-vxlan.h
new file mode 100644
index 000000000000..4b08233e73d5
--- /dev/null
+++ b/net/openvswitch/vport-vxlan.h
@@ -0,0 +1,11 @@
+#ifndef VPORT_VXLAN_H
+#define VPORT_VXLAN_H 1
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+struct ovs_vxlan_opts {
+	__u32 gbp;
+};
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From f89903d53f4d39577be98940f7cfa49d66f86db5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:02:46 +0100
Subject: mac80211: remove 80+80 MHz rate reporting

These rates are treated the same as 160 MHz in the spec,
so it makes no sense to distinguish them. As no driver
uses them yet, this is also not a problem, just remove
them.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 6 ++----
 net/mac80211/cfg.c     | 2 --
 net/mac80211/rx.c      | 5 -----
 net/mac80211/util.c    | 2 --
 4 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 123f2308958a..275ee56152ad 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -931,15 +931,13 @@ enum mac80211_rx_flags {
  * These flags are used with the @vht_flag member of
  *	&struct ieee80211_rx_status.
  * @RX_VHT_FLAG_80MHZ: 80 MHz was used
- * @RX_VHT_FLAG_80P80MHZ: 80+80 MHz was used
  * @RX_VHT_FLAG_160MHZ: 160 MHz was used
  * @RX_VHT_FLAG_BF: packet was beamformed
  */
 enum mac80211_rx_vht_flags {
 	RX_VHT_FLAG_80MHZ		= BIT(0),
-	RX_VHT_FLAG_80P80MHZ		= BIT(1),
-	RX_VHT_FLAG_160MHZ		= BIT(2),
-	RX_VHT_FLAG_BF			= BIT(3),
+	RX_VHT_FLAG_160MHZ		= BIT(1),
+	RX_VHT_FLAG_BF			= BIT(2),
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index fd6860d7f557..6d5076fbf87a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -465,8 +465,6 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
 	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
 		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80P80MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH;
 	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
 		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
 }
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 3a1a3ba40bd8..3d79d498e7f6 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -361,9 +361,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		u16 known = local->hw.radiotap_vht_details;
 
 		rthdr->it_present |= cpu_to_le32(1 << IEEE80211_RADIOTAP_VHT);
-		/* known field - how to handle 80+80? */
-		if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
-			known &= ~IEEE80211_RADIOTAP_VHT_KNOWN_BANDWIDTH;
 		put_unaligned_le16(known, pos);
 		pos += 2;
 		/* flags */
@@ -378,8 +375,6 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 		/* bandwidth */
 		if (status->vht_flag & RX_VHT_FLAG_80MHZ)
 			*pos++ = 4;
-		else if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
-			*pos++ = 0; /* marked not known above */
 		else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
 			*pos++ = 11;
 		else if (status->flag & RX_FLAG_40MHZ)
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 83ba6cd9cf8d..db7216124736 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2552,8 +2552,6 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
 		if (status->vht_flag & RX_VHT_FLAG_80MHZ)
 			ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_80P80MHZ)
-			ri.flags |= RATE_INFO_FLAGS_80P80_MHZ_WIDTH;
 		if (status->vht_flag & RX_VHT_FLAG_160MHZ)
 			ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
 		if (status->flag & RX_FLAG_SHORT_GI)
-- 
cgit v1.2.3-70-g09d2


From 97d910d0aaa619ca530d08e2b1125b8014ccb030 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:05:21 +0100
Subject: cfg80211: remove 80+80 MHz rate reporting

These rates are treated the same as 160 MHz in the spec, so
it makes no sense to distinguish them. As no driver uses them
yet, this is also not a problem, just remove them.

In the userspace API the field remains reserved to preserve
API and ABI.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 8 +++-----
 include/uapi/linux/nl80211.h | 3 ++-
 net/wireless/nl80211.c       | 3 ---
 net/wireless/util.c          | 3 +--
 4 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 38abc07503fd..0322048fddab 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -875,7 +875,6 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
  * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission
  * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission
- * @RATE_INFO_FLAGS_80P80_MHZ_WIDTH: 80+80 MHz width transmission
  * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
@@ -885,10 +884,9 @@ enum rate_info_flags {
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
 	RATE_INFO_FLAGS_40_MHZ_WIDTH		= BIT(2),
 	RATE_INFO_FLAGS_80_MHZ_WIDTH		= BIT(3),
-	RATE_INFO_FLAGS_80P80_MHZ_WIDTH		= BIT(4),
-	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(5),
-	RATE_INFO_FLAGS_SHORT_GI		= BIT(6),
-	RATE_INFO_FLAGS_60G			= BIT(7),
+	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(4),
+	RATE_INFO_FLAGS_SHORT_GI		= BIT(5),
+	RATE_INFO_FLAGS_60G			= BIT(6),
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b6c1a00bd8d2..11cdb85ac646 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2278,7 +2278,8 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_VHT_MCS: MCS index for VHT (u8)
  * @NL80211_RATE_INFO_VHT_NSS: number of streams in VHT (u8)
  * @NL80211_RATE_INFO_80_MHZ_WIDTH: 80 MHz VHT rate
- * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: 80+80 MHz VHT rate
+ * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
+ *	same as 160 for purposes of the bitrates
  * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 380784378df8..8998484ea970 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3614,9 +3614,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 		if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_80P80_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_80P80_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH))
 			return false;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index d0ac795445b7..6942d48f1ac5 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1073,8 +1073,7 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	if (WARN_ON_ONCE(rate->mcs > 9))
 		return 0;
 
-	idx = rate->flags & (RATE_INFO_FLAGS_160_MHZ_WIDTH |
-			     RATE_INFO_FLAGS_80P80_MHZ_WIDTH) ? 3 :
+	idx = rate->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH ? 3 :
 		  rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 :
 		  rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0;
 
-- 
cgit v1.2.3-70-g09d2


From b51f3beecfbbfc946749a91fb444cb8917cf444f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 15 Jan 2015 16:14:02 +0100
Subject: cfg80211: change bandwidth reporting to explicit field

For some reason, we made the bandwidth separate flags, which
is rather confusing - a single rate cannot have different
bandwidths at the same time.

Change this to no longer be flags but use a separate field
for the bandwidth ('bw') instead.

While at it, add support for 5 and 10 MHz rates - these are
reported as regular legacy rates with their real bitrate,
but tagged as 5/10 now to make it easier to distinguish them.

In the nl80211 API, the flags are preserved, but the code
now can also clearly only set a single one of the flags.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c |  3 ++-
 drivers/net/wireless/mwifiex/cfg80211.c    | 11 ++++----
 include/net/cfg80211.h                     | 33 ++++++++++++++++++------
 include/uapi/linux/nl80211.h               |  8 ++++++
 net/mac80211/cfg.c                         | 31 +++++++++++++++--------
 net/mac80211/util.c                        | 25 +++++++++++++------
 net/wireless/nl80211.c                     | 40 +++++++++++++++++++++---------
 net/wireless/util.c                        | 24 ++++++++++++++----
 8 files changed, 125 insertions(+), 50 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 44dd6ef923cd..85da63a67faf 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -1827,6 +1827,7 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 		}
 
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
+		sinfo->txrate.bw = RATE_INFO_BW_20;
 	} else if (is_rate_ht40(rate, &mcs, &sgi)) {
 		if (sgi) {
 			sinfo->txrate.flags |= RATE_INFO_FLAGS_SHORT_GI;
@@ -1835,7 +1836,7 @@ static int ath6kl_get_station(struct wiphy *wiphy, struct net_device *dev,
 			sinfo->txrate.mcs = mcs;
 		}
 
-		sinfo->txrate.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+		sinfo->txrate.bw = RATE_INFO_BW_40;
 		sinfo->txrate.flags |= RATE_INFO_FLAGS_MCS;
 	} else {
 		ath6kl_dbg(ATH6KL_DBG_WLAN_CFG,
diff --git a/drivers/net/wireless/mwifiex/cfg80211.c b/drivers/net/wireless/mwifiex/cfg80211.c
index 71312ff52703..1996a8b612d2 100644
--- a/drivers/net/wireless/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/mwifiex/cfg80211.c
@@ -856,16 +856,16 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 tx_htinfo,
 			/* HT or VHT */
 			switch (tx_htinfo & (BIT(3) | BIT(2))) {
 			case 0:
-				/* This will be 20MHz */
+				rate->bw = RATE_INFO_BW_20;
 				break;
 			case (BIT(2)):
-				rate->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_40;
 				break;
 			case (BIT(3)):
-				rate->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_80;
 				break;
 			case (BIT(3) | BIT(2)):
-				rate->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_160;
 				break;
 			}
 
@@ -885,8 +885,9 @@ mwifiex_parse_htinfo(struct mwifiex_private *priv, u8 tx_htinfo,
 		if ((tx_htinfo & BIT(0)) && (priv->tx_rate < 16)) {
 			rate->mcs = priv->tx_rate;
 			rate->flags |= RATE_INFO_FLAGS_MCS;
+			rate->bw = RATE_INFO_BW_20;
 			if (tx_htinfo & BIT(1))
-				rate->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+				rate->bw = RATE_INFO_BW_40;
 			if (tx_htinfo & BIT(2))
 				rate->flags |= RATE_INFO_FLAGS_SHORT_GI;
 		}
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0322048fddab..7b44ba0a7632 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -873,20 +873,35 @@ int cfg80211_check_station_change(struct wiphy *wiphy,
  *
  * @RATE_INFO_FLAGS_MCS: mcs field filled with HT MCS
  * @RATE_INFO_FLAGS_VHT_MCS: mcs field filled with VHT MCS
- * @RATE_INFO_FLAGS_40_MHZ_WIDTH: 40 MHz width transmission
- * @RATE_INFO_FLAGS_80_MHZ_WIDTH: 80 MHz width transmission
- * @RATE_INFO_FLAGS_160_MHZ_WIDTH: 160 MHz width transmission
  * @RATE_INFO_FLAGS_SHORT_GI: 400ns guard interval
  * @RATE_INFO_FLAGS_60G: 60GHz MCS
  */
 enum rate_info_flags {
 	RATE_INFO_FLAGS_MCS			= BIT(0),
 	RATE_INFO_FLAGS_VHT_MCS			= BIT(1),
-	RATE_INFO_FLAGS_40_MHZ_WIDTH		= BIT(2),
-	RATE_INFO_FLAGS_80_MHZ_WIDTH		= BIT(3),
-	RATE_INFO_FLAGS_160_MHZ_WIDTH		= BIT(4),
-	RATE_INFO_FLAGS_SHORT_GI		= BIT(5),
-	RATE_INFO_FLAGS_60G			= BIT(6),
+	RATE_INFO_FLAGS_SHORT_GI		= BIT(2),
+	RATE_INFO_FLAGS_60G			= BIT(3),
+};
+
+/**
+ * enum rate_info_bw - rate bandwidth information
+ *
+ * Used by the driver to indicate the rate bandwidth.
+ *
+ * @RATE_INFO_BW_5: 5 MHz bandwidth
+ * @RATE_INFO_BW_10: 10 MHz bandwidth
+ * @RATE_INFO_BW_20: 20 MHz bandwidth
+ * @RATE_INFO_BW_40: 40 MHz bandwidth
+ * @RATE_INFO_BW_80: 80 MHz bandwidth
+ * @RATE_INFO_BW_160: 160 MHz bandwidth
+ */
+enum rate_info_bw {
+	RATE_INFO_BW_5,
+	RATE_INFO_BW_10,
+	RATE_INFO_BW_20,
+	RATE_INFO_BW_40,
+	RATE_INFO_BW_80,
+	RATE_INFO_BW_160,
 };
 
 /**
@@ -898,12 +913,14 @@ enum rate_info_flags {
  * @mcs: mcs index if struct describes a 802.11n bitrate
  * @legacy: bitrate in 100kbit/s for 802.11abg
  * @nss: number of streams (VHT only)
+ * @bw: bandwidth (from &enum rate_info_bw)
  */
 struct rate_info {
 	u8 flags;
 	u8 mcs;
 	u16 legacy;
 	u8 nss;
+	u8 bw;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 11cdb85ac646..f52797a90816 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2281,6 +2281,12 @@ struct nl80211_sta_flag_update {
  * @NL80211_RATE_INFO_80P80_MHZ_WIDTH: unused - 80+80 is treated the
  *	same as 160 for purposes of the bitrates
  * @NL80211_RATE_INFO_160_MHZ_WIDTH: 160 MHz VHT rate
+ * @NL80211_RATE_INFO_10_MHZ_WIDTH: 10 MHz width - note that this is
+ *	a legacy rate and will be reported as the actual bitrate, i.e.
+ *	half the base (20 MHz) rate
+ * @NL80211_RATE_INFO_5_MHZ_WIDTH: 5 MHz width - note that this is
+ *	a legacy rate and will be reported as the actual bitrate, i.e.
+ *	a quarter of the base (20 MHz) rate
  * @__NL80211_RATE_INFO_AFTER_LAST: internal use
  */
 enum nl80211_rate_info {
@@ -2295,6 +2301,8 @@ enum nl80211_rate_info {
 	NL80211_RATE_INFO_80_MHZ_WIDTH,
 	NL80211_RATE_INFO_80P80_MHZ_WIDTH,
 	NL80211_RATE_INFO_160_MHZ_WIDTH,
+	NL80211_RATE_INFO_10_MHZ_WIDTH,
+	NL80211_RATE_INFO_5_MHZ_WIDTH,
 
 	/* keep last */
 	__NL80211_RATE_INFO_AFTER_LAST,
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 6d5076fbf87a..ff090ef1ea2c 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -428,11 +428,13 @@ void sta_set_rate_info_tx(struct sta_info *sta,
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 	}
 	if (rate->flags & IEEE80211_TX_RC_40_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
-	if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
-		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+		rinfo->bw = RATE_INFO_BW_40;
+	else if (rate->flags & IEEE80211_TX_RC_80_MHZ_WIDTH)
+		rinfo->bw = RATE_INFO_BW_80;
+	else if (rate->flags & IEEE80211_TX_RC_160_MHZ_WIDTH)
+		rinfo->bw = RATE_INFO_BW_160;
+	else
+		rinfo->bw = RATE_INFO_BW_20;
 	if (rate->flags & IEEE80211_TX_RC_SHORT_GI)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
 }
@@ -459,14 +461,21 @@ void sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
 		rinfo->legacy = DIV_ROUND_UP(brate, 1 << shift);
 	}
 
-	if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
 	if (sta->last_rx_rate_flag & RX_FLAG_SHORT_GI)
 		rinfo->flags |= RATE_INFO_FLAGS_SHORT_GI;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-	if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
-		rinfo->flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+
+	if (sta->last_rx_rate_flag & RX_FLAG_5MHZ)
+		rinfo->bw = RATE_INFO_BW_5;
+	else if (sta->last_rx_rate_flag & RX_FLAG_10MHZ)
+		rinfo->bw = RATE_INFO_BW_10;
+	else if (sta->last_rx_rate_flag & RX_FLAG_40MHZ)
+		rinfo->bw = RATE_INFO_BW_40;
+	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_80MHZ)
+		rinfo->bw = RATE_INFO_BW_80;
+	else if (sta->last_rx_rate_vht_flag & RX_VHT_FLAG_160MHZ)
+		rinfo->bw = RATE_INFO_BW_160;
+	else
+		rinfo->bw = RATE_INFO_BW_20;
 }
 
 static int ieee80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index db7216124736..fbd37d43dfce 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2541,7 +2541,9 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		ri.mcs = status->rate_idx;
 		ri.flags |= RATE_INFO_FLAGS_MCS;
 		if (status->flag & RX_FLAG_40MHZ)
-			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
+			ri.bw = RATE_INFO_BW_40;
+		else
+			ri.bw = RATE_INFO_BW_20;
 		if (status->flag & RX_FLAG_SHORT_GI)
 			ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
 	} else if (status->flag & RX_FLAG_VHT) {
@@ -2549,11 +2551,13 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		ri.mcs = status->rate_idx;
 		ri.nss = status->vht_nss;
 		if (status->flag & RX_FLAG_40MHZ)
-			ri.flags |= RATE_INFO_FLAGS_40_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_80MHZ)
-			ri.flags |= RATE_INFO_FLAGS_80_MHZ_WIDTH;
-		if (status->vht_flag & RX_VHT_FLAG_160MHZ)
-			ri.flags |= RATE_INFO_FLAGS_160_MHZ_WIDTH;
+			ri.bw = RATE_INFO_BW_40;
+		else if (status->vht_flag & RX_VHT_FLAG_80MHZ)
+			ri.bw = RATE_INFO_BW_80;
+		else if (status->vht_flag & RX_VHT_FLAG_160MHZ)
+			ri.bw = RATE_INFO_BW_160;
+		else
+			ri.bw = RATE_INFO_BW_20;
 		if (status->flag & RX_FLAG_SHORT_GI)
 			ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
 	} else {
@@ -2561,10 +2565,15 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
 		int shift = 0;
 		int bitrate;
 
-		if (status->flag & RX_FLAG_10MHZ)
+		if (status->flag & RX_FLAG_10MHZ) {
 			shift = 1;
-		if (status->flag & RX_FLAG_5MHZ)
+			ri.bw = RATE_INFO_BW_10;
+		} else if (status->flag & RX_FLAG_5MHZ) {
 			shift = 2;
+			ri.bw = RATE_INFO_BW_5;
+		} else {
+			ri.bw = RATE_INFO_BW_20;
+		}
 
 		sband = local->hw.wiphy->bands[status->band];
 		bitrate = sband->bitrates[status->rate_idx].bitrate;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 8998484ea970..8e56eeb583aa 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3578,6 +3578,7 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	struct nlattr *rate;
 	u32 bitrate;
 	u16 bitrate_compat;
+	enum nl80211_attrs rate_flg;
 
 	rate = nla_nest_start(msg, attr);
 	if (!rate)
@@ -3594,12 +3595,36 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 	    nla_put_u16(msg, NL80211_RATE_INFO_BITRATE, bitrate_compat))
 		return false;
 
+	switch (info->bw) {
+	case RATE_INFO_BW_5:
+		rate_flg = NL80211_RATE_INFO_5_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_10:
+		rate_flg = NL80211_RATE_INFO_10_MHZ_WIDTH;
+		break;
+	default:
+		WARN_ON(1);
+		/* fall through */
+	case RATE_INFO_BW_20:
+		rate_flg = 0;
+		break;
+	case RATE_INFO_BW_40:
+		rate_flg = NL80211_RATE_INFO_40_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_80:
+		rate_flg = NL80211_RATE_INFO_80_MHZ_WIDTH;
+		break;
+	case RATE_INFO_BW_160:
+		rate_flg = NL80211_RATE_INFO_160_MHZ_WIDTH;
+		break;
+	}
+
+	if (rate_flg && nla_put_flag(msg, rate_flg))
+		return false;
+
 	if (info->flags & RATE_INFO_FLAGS_MCS) {
 		if (nla_put_u8(msg, NL80211_RATE_INFO_MCS, info->mcs))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
@@ -3608,15 +3633,6 @@ static bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
 			return false;
 		if (nla_put_u8(msg, NL80211_RATE_INFO_VHT_NSS, info->nss))
 			return false;
-		if (info->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_40_MHZ_WIDTH))
-			return false;
-		if (info->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_80_MHZ_WIDTH))
-			return false;
-		if (info->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH &&
-		    nla_put_flag(msg, NL80211_RATE_INFO_160_MHZ_WIDTH))
-			return false;
 		if (info->flags & RATE_INFO_FLAGS_SHORT_GI &&
 		    nla_put_flag(msg, NL80211_RATE_INFO_SHORT_GI))
 			return false;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 6942d48f1ac5..3535e8ade48f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -1073,9 +1073,24 @@ static u32 cfg80211_calculate_bitrate_vht(struct rate_info *rate)
 	if (WARN_ON_ONCE(rate->mcs > 9))
 		return 0;
 
-	idx = rate->flags & RATE_INFO_FLAGS_160_MHZ_WIDTH ? 3 :
-		  rate->flags & RATE_INFO_FLAGS_80_MHZ_WIDTH ? 2 :
-		  rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH ? 1 : 0;
+	switch (rate->bw) {
+	case RATE_INFO_BW_160:
+		idx = 3;
+		break;
+	case RATE_INFO_BW_80:
+		idx = 2;
+		break;
+	case RATE_INFO_BW_40:
+		idx = 1;
+		break;
+	case RATE_INFO_BW_5:
+	case RATE_INFO_BW_10:
+	default:
+		WARN_ON(1);
+		/* fall through */
+	case RATE_INFO_BW_20:
+		idx = 0;
+	}
 
 	bitrate = base[idx][rate->mcs];
 	bitrate *= rate->nss;
@@ -1106,8 +1121,7 @@ u32 cfg80211_calculate_bitrate(struct rate_info *rate)
 	modulation = rate->mcs & 7;
 	streams = (rate->mcs >> 3) + 1;
 
-	bitrate = (rate->flags & RATE_INFO_FLAGS_40_MHZ_WIDTH) ?
-			13500000 : 6500000;
+	bitrate = (rate->bw == RATE_INFO_BW_40) ? 13500000 : 6500000;
 
 	if (modulation < 4)
 		bitrate *= (modulation + 1);
-- 
cgit v1.2.3-70-g09d2


From 5055c371bfd53fd369b895051b541318c2bad495 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 14 Jan 2015 15:17:06 -0800
Subject: ipv4: per cpu uncached list

RAW sockets with hdrinc suffer from contention on rt_uncached_lock
spinlock.

One solution is to use percpu lists, since most routes are destroyed
by the cpu that created them.

It is unclear why we even have to put these routes in uncached_list,
as all outgoing packets should be freed when a device is dismantled.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: caacf05e5ad1 ("ipv4: Properly purge netdev references on uncached routes.")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/route.h |  2 ++
 net/ipv4/route.c    | 46 +++++++++++++++++++++++++++++++++-------------
 2 files changed, 35 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/include/net/route.h b/include/net/route.h
index b17cf28f996e..fe22d03afb6a 100644
--- a/include/net/route.h
+++ b/include/net/route.h
@@ -46,6 +46,7 @@
 
 struct fib_nh;
 struct fib_info;
+struct uncached_list;
 struct rtable {
 	struct dst_entry	dst;
 
@@ -64,6 +65,7 @@ struct rtable {
 	u32			rt_pmtu;
 
 	struct list_head	rt_uncached;
+	struct uncached_list	*rt_uncached_list;
 };
 
 static inline bool rt_is_input_route(const struct rtable *rt)
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 6a2155b02602..ce112d0f2698 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1325,14 +1325,22 @@ static bool rt_cache_route(struct fib_nh *nh, struct rtable *rt)
 	return ret;
 }
 
-static DEFINE_SPINLOCK(rt_uncached_lock);
-static LIST_HEAD(rt_uncached_list);
+struct uncached_list {
+	spinlock_t		lock;
+	struct list_head	head;
+};
+
+static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt_uncached_list);
 
 static void rt_add_uncached_list(struct rtable *rt)
 {
-	spin_lock_bh(&rt_uncached_lock);
-	list_add_tail(&rt->rt_uncached, &rt_uncached_list);
-	spin_unlock_bh(&rt_uncached_lock);
+	struct uncached_list *ul = raw_cpu_ptr(&rt_uncached_list);
+
+	rt->rt_uncached_list = ul;
+
+	spin_lock_bh(&ul->lock);
+	list_add_tail(&rt->rt_uncached, &ul->head);
+	spin_unlock_bh(&ul->lock);
 }
 
 static void ipv4_dst_destroy(struct dst_entry *dst)
@@ -1340,27 +1348,32 @@ static void ipv4_dst_destroy(struct dst_entry *dst)
 	struct rtable *rt = (struct rtable *) dst;
 
 	if (!list_empty(&rt->rt_uncached)) {
-		spin_lock_bh(&rt_uncached_lock);
+		struct uncached_list *ul = rt->rt_uncached_list;
+
+		spin_lock_bh(&ul->lock);
 		list_del(&rt->rt_uncached);
-		spin_unlock_bh(&rt_uncached_lock);
+		spin_unlock_bh(&ul->lock);
 	}
 }
 
 void rt_flush_dev(struct net_device *dev)
 {
-	if (!list_empty(&rt_uncached_list)) {
-		struct net *net = dev_net(dev);
-		struct rtable *rt;
+	struct net *net = dev_net(dev);
+	struct rtable *rt;
+	int cpu;
+
+	for_each_possible_cpu(cpu) {
+		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
 
-		spin_lock_bh(&rt_uncached_lock);
-		list_for_each_entry(rt, &rt_uncached_list, rt_uncached) {
+		spin_lock_bh(&ul->lock);
+		list_for_each_entry(rt, &ul->head, rt_uncached) {
 			if (rt->dst.dev != dev)
 				continue;
 			rt->dst.dev = net->loopback_dev;
 			dev_hold(rt->dst.dev);
 			dev_put(dev);
 		}
-		spin_unlock_bh(&rt_uncached_lock);
+		spin_unlock_bh(&ul->lock);
 	}
 }
 
@@ -2717,6 +2730,7 @@ struct ip_rt_acct __percpu *ip_rt_acct __read_mostly;
 int __init ip_rt_init(void)
 {
 	int rc = 0;
+	int cpu;
 
 	ip_idents = kmalloc(IP_IDENTS_SZ * sizeof(*ip_idents), GFP_KERNEL);
 	if (!ip_idents)
@@ -2724,6 +2738,12 @@ int __init ip_rt_init(void)
 
 	prandom_bytes(ip_idents, IP_IDENTS_SZ * sizeof(*ip_idents));
 
+	for_each_possible_cpu(cpu) {
+		struct uncached_list *ul = &per_cpu(rt_uncached_list, cpu);
+
+		INIT_LIST_HEAD(&ul->head);
+		spin_lock_init(&ul->lock);
+	}
 #ifdef CONFIG_IP_ROUTE_CLASSID
 	ip_rt_acct = __alloc_percpu(256 * sizeof(struct ip_rt_acct), __alignof__(struct ip_rt_acct));
 	if (!ip_rt_acct)
-- 
cgit v1.2.3-70-g09d2


From 57699a40b4f2694d3ee63fd5e6465ec8f600b620 Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Fri, 16 Jan 2015 11:13:09 +0800
Subject: rhashtable: Fix race in rhashtable_destroy() and use regular
 work_struct

When we put our declared work task in the global workqueue with
schedule_delayed_work(), its delay parameter is always zero.
Therefore, we should define a regular work in rhashtable structure
instead of a delayed work.

By the way, we add a condition to check whether resizing functions
are NULL before cancelling the work, avoiding to cancel an
uninitialized work.

Lastly, while we wait for all work items we submitted before to run
to completion with cancel_delayed_work(), ht->mutex has been taken in
rhashtable_destroy(). Moreover, cancel_delayed_work() doesn't return
until all work items are accomplished, and when work items are
scheduled, the work's function - rht_deferred_worker() will be called.
However, as rht_deferred_worker() also needs to acquire the lock,
deadlock might happen at the moment as the lock is already held before.
So if the cancel work function is moved out of the lock covered scope,
this will avoid the deadlock.

Fixes: 97defe1 ("rhashtable: Per bucket locks & deferred expansion/shrinking")
Signed-off-by: Ying Xue <ying.xue@windriver.com>
Cc: Thomas Graf <tgraf@suug.ch>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 +-
 lib/rhashtable.c           | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 9570832ab07c..a2562ed53ea3 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -119,7 +119,7 @@ struct rhashtable {
 	atomic_t			nelems;
 	atomic_t			shift;
 	struct rhashtable_params	p;
-	struct delayed_work             run_work;
+	struct work_struct		run_work;
 	struct mutex                    mutex;
 	bool                            being_destroyed;
 };
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index aca699813ba9..84a78e396a56 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -485,7 +485,7 @@ static void rht_deferred_worker(struct work_struct *work)
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
 
-	ht = container_of(work, struct rhashtable, run_work.work);
+	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
 	tbl = rht_dereference(ht->tbl, ht);
 
@@ -507,7 +507,7 @@ static void rhashtable_wakeup_worker(struct rhashtable *ht)
 	if (tbl == new_tbl &&
 	    ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) ||
 	     (ht->p.shrink_decision && ht->p.shrink_decision(ht, size))))
-		schedule_delayed_work(&ht->run_work, 0);
+		schedule_work(&ht->run_work);
 }
 
 static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
@@ -903,7 +903,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 		get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd));
 
 	if (ht->p.grow_decision || ht->p.shrink_decision)
-		INIT_DEFERRABLE_WORK(&ht->run_work, rht_deferred_worker);
+		INIT_WORK(&ht->run_work, rht_deferred_worker);
 
 	return 0;
 }
@@ -921,11 +921,11 @@ void rhashtable_destroy(struct rhashtable *ht)
 {
 	ht->being_destroyed = true;
 
-	mutex_lock(&ht->mutex);
+	if (ht->p.grow_decision || ht->p.shrink_decision)
+		cancel_work_sync(&ht->run_work);
 
-	cancel_delayed_work(&ht->run_work);
+	mutex_lock(&ht->mutex);
 	bucket_table_free(rht_dereference(ht->tbl, ht));
-
 	mutex_unlock(&ht->mutex);
 }
 EXPORT_SYMBOL_GPL(rhashtable_destroy);
-- 
cgit v1.2.3-70-g09d2


From 0026b6551b51a9520b912f41b8d447b89a825f5a Mon Sep 17 00:00:00 2001
From: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Date: Sun, 4 Jan 2015 16:55:14 +0100
Subject: Bluetooth: Remove unused function

Remove the function hci_conn_change_link_key() that is not used anywhere.

This was partially found by using a static code analysis program called
cppcheck.

Signed-off-by: Rickard Strandqvist <rickard_strandqvist@spectrumdigital.se>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 -
 net/bluetooth/hci_conn.c         | 15 ---------------
 2 files changed, 16 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1f21fe48b38e..7777124bff55 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -781,7 +781,6 @@ int hci_conn_check_link_mode(struct hci_conn *conn);
 int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level);
 int hci_conn_security(struct hci_conn *conn, __u8 sec_level, __u8 auth_type,
 		      bool initiator);
-int hci_conn_change_link_key(struct hci_conn *conn);
 int hci_conn_switch_role(struct hci_conn *conn, __u8 role);
 
 void hci_conn_enter_active_mode(struct hci_conn *conn, __u8 force_active);
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 2e724e0b75b9..c9b8fa544785 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -1084,21 +1084,6 @@ int hci_conn_check_secure(struct hci_conn *conn, __u8 sec_level)
 }
 EXPORT_SYMBOL(hci_conn_check_secure);
 
-/* Change link key */
-int hci_conn_change_link_key(struct hci_conn *conn)
-{
-	BT_DBG("hcon %p", conn);
-
-	if (!test_and_set_bit(HCI_CONN_AUTH_PEND, &conn->flags)) {
-		struct hci_cp_change_conn_link_key cp;
-		cp.handle = cpu_to_le16(conn->handle);
-		hci_send_cmd(conn->hdev, HCI_OP_CHANGE_CONN_LINK_KEY,
-			     sizeof(cp), &cp);
-	}
-
-	return 0;
-}
-
 /* Switch role */
 int hci_conn_switch_role(struct hci_conn *conn, __u8 role)
 {
-- 
cgit v1.2.3-70-g09d2


From d23b8ad8ab23f5a18b91e2396fb63d10f66b08d6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 09:52:39 +0100
Subject: tc: add BPF based action

This action provides a possibility to exec custom BPF code.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_bpf.h        |  25 +++++
 include/uapi/linux/tc_act/Kbuild   |   1 +
 include/uapi/linux/tc_act/tc_bpf.h |  31 ++++++
 net/sched/Kconfig                  |  12 +++
 net/sched/Makefile                 |   1 +
 net/sched/act_bpf.c                | 205 +++++++++++++++++++++++++++++++++++++
 6 files changed, 275 insertions(+)
 create mode 100644 include/net/tc_act/tc_bpf.h
 create mode 100644 include/uapi/linux/tc_act/tc_bpf.h
 create mode 100644 net/sched/act_bpf.c

(limited to 'include')

diff --git a/include/net/tc_act/tc_bpf.h b/include/net/tc_act/tc_bpf.h
new file mode 100644
index 000000000000..86a070ffc930
--- /dev/null
+++ b/include/net/tc_act/tc_bpf.h
@@ -0,0 +1,25 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __NET_TC_BPF_H
+#define __NET_TC_BPF_H
+
+#include <linux/filter.h>
+#include <net/act_api.h>
+
+struct tcf_bpf {
+	struct tcf_common	common;
+	struct bpf_prog		*filter;
+	struct sock_filter	*bpf_ops;
+	u16			bpf_num_ops;
+};
+#define to_bpf(a) \
+	container_of(a->priv, struct tcf_bpf, common)
+
+#endif /* __NET_TC_BPF_H */
diff --git a/include/uapi/linux/tc_act/Kbuild b/include/uapi/linux/tc_act/Kbuild
index b057da2b87a4..19d5219b0b99 100644
--- a/include/uapi/linux/tc_act/Kbuild
+++ b/include/uapi/linux/tc_act/Kbuild
@@ -8,3 +8,4 @@ header-y += tc_nat.h
 header-y += tc_pedit.h
 header-y += tc_skbedit.h
 header-y += tc_vlan.h
+header-y += tc_bpf.h
diff --git a/include/uapi/linux/tc_act/tc_bpf.h b/include/uapi/linux/tc_act/tc_bpf.h
new file mode 100644
index 000000000000..5288bd77e63b
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_bpf.h
@@ -0,0 +1,31 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef __LINUX_TC_BPF_H
+#define __LINUX_TC_BPF_H
+
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_BPF 13
+
+struct tc_act_bpf {
+	tc_gen;
+};
+
+enum {
+	TCA_ACT_BPF_UNSPEC,
+	TCA_ACT_BPF_TM,
+	TCA_ACT_BPF_PARMS,
+	TCA_ACT_BPF_OPS_LEN,
+	TCA_ACT_BPF_OPS,
+	__TCA_ACT_BPF_MAX,
+};
+#define TCA_ACT_BPF_MAX (__TCA_ACT_BPF_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index c54c9d9d1ffb..466943551581 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -698,6 +698,18 @@ config NET_ACT_VLAN
 	  To compile this code as a module, choose M here: the
 	  module will be called act_vlan.
 
+config NET_ACT_BPF
+        tristate "BPF based action"
+        depends on NET_CLS_ACT
+        ---help---
+	  Say Y here to execute BPF code on packets. The BPF code will decide
+	  if the packet should be dropped or not.
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_bpf.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 679f24ae7f93..7ca2b4e76312 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_NET_ACT_SIMP)	+= act_simple.o
 obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
+obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_bpf.c b/net/sched/act_bpf.c
new file mode 100644
index 000000000000..1bd257e473a9
--- /dev/null
+++ b/net/sched/act_bpf.c
@@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2015 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/filter.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+
+#include <linux/tc_act/tc_bpf.h>
+#include <net/tc_act/tc_bpf.h>
+
+#define BPF_TAB_MASK     15
+
+static int tcf_bpf(struct sk_buff *skb, const struct tc_action *a,
+		   struct tcf_result *res)
+{
+	struct tcf_bpf *b = a->priv;
+	int action;
+	int filter_res;
+
+	spin_lock(&b->tcf_lock);
+	b->tcf_tm.lastuse = jiffies;
+	bstats_update(&b->tcf_bstats, skb);
+	action = b->tcf_action;
+
+	filter_res = BPF_PROG_RUN(b->filter, skb);
+	if (filter_res == 0) {
+		/* Return code 0 from the BPF program
+		 * is being interpreted as a drop here.
+		 */
+		action = TC_ACT_SHOT;
+		b->tcf_qstats.drops++;
+	}
+
+	spin_unlock(&b->tcf_lock);
+	return action;
+}
+
+static int tcf_bpf_dump(struct sk_buff *skb, struct tc_action *a,
+			int bind, int ref)
+{
+	unsigned char *tp = skb_tail_pointer(skb);
+	struct tcf_bpf *b = a->priv;
+	struct tc_act_bpf opt = {
+		.index    = b->tcf_index,
+		.refcnt   = b->tcf_refcnt - ref,
+		.bindcnt  = b->tcf_bindcnt - bind,
+		.action   = b->tcf_action,
+	};
+	struct tcf_t t;
+	struct nlattr *nla;
+
+	if (nla_put(skb, TCA_ACT_BPF_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (nla_put_u16(skb, TCA_ACT_BPF_OPS_LEN, b->bpf_num_ops))
+		goto nla_put_failure;
+
+	nla = nla_reserve(skb, TCA_ACT_BPF_OPS, b->bpf_num_ops *
+			  sizeof(struct sock_filter));
+	if (!nla)
+		goto nla_put_failure;
+
+	memcpy(nla_data(nla), b->bpf_ops, nla_len(nla));
+
+	t.install = jiffies_to_clock_t(jiffies - b->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - b->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(b->tcf_tm.expires);
+	if (nla_put(skb, TCA_ACT_BPF_TM, sizeof(t), &t))
+		goto nla_put_failure;
+	return skb->len;
+
+nla_put_failure:
+	nlmsg_trim(skb, tp);
+	return -1;
+}
+
+static const struct nla_policy act_bpf_policy[TCA_ACT_BPF_MAX + 1] = {
+	[TCA_ACT_BPF_PARMS]	= { .len = sizeof(struct tc_act_bpf) },
+	[TCA_ACT_BPF_OPS_LEN]	= { .type = NLA_U16 },
+	[TCA_ACT_BPF_OPS]	= { .type = NLA_BINARY,
+				    .len = sizeof(struct sock_filter) * BPF_MAXINSNS },
+};
+
+static int tcf_bpf_init(struct net *net, struct nlattr *nla,
+			struct nlattr *est, struct tc_action *a,
+			int ovr, int bind)
+{
+	struct nlattr *tb[TCA_ACT_BPF_MAX + 1];
+	struct tc_act_bpf *parm;
+	struct tcf_bpf *b;
+	u16 bpf_size, bpf_num_ops;
+	struct sock_filter *bpf_ops;
+	struct sock_fprog_kern tmp;
+	struct bpf_prog *fp;
+	int ret;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_ACT_BPF_MAX, nla, act_bpf_policy);
+	if (ret < 0)
+		return ret;
+
+	if (!tb[TCA_ACT_BPF_PARMS] ||
+	    !tb[TCA_ACT_BPF_OPS_LEN] || !tb[TCA_ACT_BPF_OPS])
+		return -EINVAL;
+	parm = nla_data(tb[TCA_ACT_BPF_PARMS]);
+
+	bpf_num_ops = nla_get_u16(tb[TCA_ACT_BPF_OPS_LEN]);
+	if (bpf_num_ops	> BPF_MAXINSNS || bpf_num_ops == 0)
+		return -EINVAL;
+
+	bpf_size = bpf_num_ops * sizeof(*bpf_ops);
+	bpf_ops = kzalloc(bpf_size, GFP_KERNEL);
+	if (!bpf_ops)
+		return -ENOMEM;
+
+	memcpy(bpf_ops, nla_data(tb[TCA_ACT_BPF_OPS]), bpf_size);
+
+	tmp.len = bpf_num_ops;
+	tmp.filter = bpf_ops;
+
+	ret = bpf_prog_create(&fp, &tmp);
+	if (ret)
+		goto free_bpf_ops;
+
+	if (!tcf_hash_check(parm->index, a, bind)) {
+		ret = tcf_hash_create(parm->index, est, a, sizeof(*b), bind);
+		if (ret)
+			goto destroy_fp;
+
+		ret = ACT_P_CREATED;
+	} else {
+		if (bind)
+			goto destroy_fp;
+		tcf_hash_release(a, bind);
+		if (!ovr) {
+			ret = -EEXIST;
+			goto destroy_fp;
+		}
+	}
+
+	b = to_bpf(a);
+	spin_lock_bh(&b->tcf_lock);
+	b->tcf_action = parm->action;
+	b->bpf_num_ops = bpf_num_ops;
+	b->bpf_ops = bpf_ops;
+	b->filter = fp;
+	spin_unlock_bh(&b->tcf_lock);
+
+	if (ret == ACT_P_CREATED)
+		tcf_hash_insert(a);
+	return ret;
+
+destroy_fp:
+	bpf_prog_destroy(fp);
+free_bpf_ops:
+	kfree(bpf_ops);
+	return ret;
+}
+
+static void tcf_bpf_cleanup(struct tc_action *a, int bind)
+{
+	struct tcf_bpf *b = a->priv;
+
+	bpf_prog_destroy(b->filter);
+}
+
+static struct tc_action_ops act_bpf_ops = {
+	.kind =		"bpf",
+	.type =		TCA_ACT_BPF,
+	.owner =	THIS_MODULE,
+	.act =		tcf_bpf,
+	.dump =		tcf_bpf_dump,
+	.cleanup =	tcf_bpf_cleanup,
+	.init =		tcf_bpf_init,
+};
+
+static int __init bpf_init_module(void)
+{
+	return tcf_register_action(&act_bpf_ops, BPF_TAB_MASK);
+}
+
+static void __exit bpf_cleanup_module(void)
+{
+	tcf_unregister_action(&act_bpf_ops);
+}
+
+module_init(bpf_init_module);
+module_exit(bpf_cleanup_module);
+
+MODULE_AUTHOR("Jiri Pirko <jiri@resnulli.us>");
+MODULE_DESCRIPTION("TC BPF based action");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-70-g09d2


From 03bf0c281234028388108d0aee720954f5fe6924 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 23:49:36 +0100
Subject: switchdev: introduce switchdev notifier

This patch introduces new notifier for purposes of exposing events which happen
on switch driver side. The consumers of the event messages are mainly involved
masters, namely bridge and ovs.

Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   | 31 ++++++++++++++++++++++
 net/switchdev/switchdev.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 96 insertions(+)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 8a6d1641fd9b..7f8d74372d87 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -11,12 +11,27 @@
 #define _LINUX_SWITCHDEV_H_
 
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
+
+struct netdev_switch_notifier_info {
+	struct net_device *dev;
+};
+
+static inline struct net_device *
+netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info)
+{
+	return info->dev;
+}
 
 #ifdef CONFIG_NET_SWITCHDEV
 
 int netdev_switch_parent_id_get(struct net_device *dev,
 				struct netdev_phys_item_id *psid);
 int netdev_switch_port_stp_update(struct net_device *dev, u8 state);
+int register_netdev_switch_notifier(struct notifier_block *nb);
+int unregister_netdev_switch_notifier(struct notifier_block *nb);
+int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
+				 struct netdev_switch_notifier_info *info);
 
 #else
 
@@ -32,6 +47,22 @@ static inline int netdev_switch_port_stp_update(struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
+static inline int register_netdev_switch_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int unregister_netdev_switch_notifier(struct notifier_block *nb)
+{
+	return 0;
+}
+
+static inline int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
+					       struct netdev_switch_notifier_info *info);
+{
+	return NOTIFY_DONE;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index d162b21b14bd..22e02f4edd99 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -11,6 +11,8 @@
 #include <linux/kernel.h>
 #include <linux/types.h>
 #include <linux/init.h>
+#include <linux/mutex.h>
+#include <linux/notifier.h>
 #include <linux/netdevice.h>
 #include <net/switchdev.h>
 
@@ -50,3 +52,66 @@ int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
 	return ops->ndo_switch_port_stp_update(dev, state);
 }
 EXPORT_SYMBOL(netdev_switch_port_stp_update);
+
+static DEFINE_MUTEX(netdev_switch_mutex);
+static RAW_NOTIFIER_HEAD(netdev_switch_notif_chain);
+
+/**
+ *	register_netdev_switch_notifier - Register nofifier
+ *	@nb: notifier_block
+ *
+ *	Register switch device notifier. This should be used by code
+ *	which needs to monitor events happening in particular device.
+ *	Return values are same as for atomic_notifier_chain_register().
+ */
+int register_netdev_switch_notifier(struct notifier_block *nb)
+{
+	int err;
+
+	mutex_lock(&netdev_switch_mutex);
+	err = raw_notifier_chain_register(&netdev_switch_notif_chain, nb);
+	mutex_unlock(&netdev_switch_mutex);
+	return err;
+}
+EXPORT_SYMBOL(register_netdev_switch_notifier);
+
+/**
+ *	unregister_netdev_switch_notifier - Unregister nofifier
+ *	@nb: notifier_block
+ *
+ *	Unregister switch device notifier.
+ *	Return values are same as for atomic_notifier_chain_unregister().
+ */
+int unregister_netdev_switch_notifier(struct notifier_block *nb)
+{
+	int err;
+
+	mutex_lock(&netdev_switch_mutex);
+	err = raw_notifier_chain_unregister(&netdev_switch_notif_chain, nb);
+	mutex_unlock(&netdev_switch_mutex);
+	return err;
+}
+EXPORT_SYMBOL(unregister_netdev_switch_notifier);
+
+/**
+ *	call_netdev_switch_notifiers - Call nofifiers
+ *	@val: value passed unmodified to notifier function
+ *	@dev: port device
+ *	@info: notifier information data
+ *
+ *	Call all network notifier blocks. This should be called by driver
+ *	when it needs to propagate hardware event.
+ *	Return values are same as for atomic_notifier_call_chain().
+ */
+int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
+				 struct netdev_switch_notifier_info *info)
+{
+	int err;
+
+	info->dev = dev;
+	mutex_lock(&netdev_switch_mutex);
+	err = raw_notifier_call_chain(&netdev_switch_notif_chain, val, info);
+	mutex_unlock(&netdev_switch_mutex);
+	return err;
+}
+EXPORT_SYMBOL(call_netdev_switch_notifiers);
-- 
cgit v1.2.3-70-g09d2


From 3aeb66176ffa8fefd7a9f7d37bda1d8adcf469a1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Thu, 15 Jan 2015 23:49:37 +0100
Subject: net: replace br_fdb_external_learn_* calls with switchdev notifier
 events

This patch benefits from newly introduced switchdev notifier and uses it
to propagate fdb learn events from rocker driver to bridge. That avoids
direct function calls and possible use by other listeners (ovs).

Suggested-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/rocker/rocker.c | 10 +++++--
 include/linux/if_bridge.h            | 18 -------------
 include/net/switchdev.h              | 11 ++++++++
 net/bridge/br.c                      | 52 +++++++++++++++++++++++++++++++++++-
 net/bridge/br_fdb.c                  | 38 +++-----------------------
 net/bridge/br_private.h              |  4 +++
 6 files changed, 78 insertions(+), 55 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index cad8cf962cdf..964d719b150f 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -3026,11 +3026,17 @@ static void rocker_port_fdb_learn_work(struct work_struct *work)
 		container_of(work, struct rocker_fdb_learn_work, work);
 	bool removing = (lw->flags & ROCKER_OP_FLAG_REMOVE);
 	bool learned = (lw->flags & ROCKER_OP_FLAG_LEARNED);
+	struct netdev_switch_notifier_fdb_info info;
+
+	info.addr = lw->addr;
+	info.vid = lw->vid;
 
 	if (learned && removing)
-		br_fdb_external_learn_del(lw->dev, lw->addr, lw->vid);
+		call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_DEL,
+					     lw->dev, &info.info);
 	else if (learned && !removing)
-		br_fdb_external_learn_add(lw->dev, lw->addr, lw->vid);
+		call_netdev_switch_notifiers(NETDEV_SWITCH_FDB_ADD,
+					     lw->dev, &info.info);
 
 	kfree(work);
 }
diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 0a8ce762a47f..a57bca2ea97e 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -50,24 +50,6 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use
 typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
 
-#if IS_ENABLED(CONFIG_BRIDGE)
-int br_fdb_external_learn_add(struct net_device *dev,
-			      const unsigned char *addr, u16 vid);
-int br_fdb_external_learn_del(struct net_device *dev,
-			      const unsigned char *addr, u16 vid);
-#else
-static inline int br_fdb_external_learn_add(struct net_device *dev,
-					    const unsigned char *addr, u16 vid)
-{
-	return 0;
-}
-static inline int br_fdb_external_learn_del(struct net_device *dev,
-					    const unsigned char *addr, u16 vid)
-{
-	return 0;
-}
-#endif
-
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
 int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 7f8d74372d87..201120e18e4d 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -13,10 +13,21 @@
 #include <linux/netdevice.h>
 #include <linux/notifier.h>
 
+enum netdev_switch_notifier_type {
+	NETDEV_SWITCH_FDB_ADD = 1,
+	NETDEV_SWITCH_FDB_DEL,
+};
+
 struct netdev_switch_notifier_info {
 	struct net_device *dev;
 };
 
+struct netdev_switch_notifier_fdb_info {
+	struct netdev_switch_notifier_info info; /* must be first */
+	const unsigned char *addr;
+	u16 vid;
+};
+
 static inline struct net_device *
 netdev_switch_notifier_info_to_dev(const struct netdev_switch_notifier_info *info)
 {
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 44425aff7cba..fb57ab6b24f9 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -19,6 +19,7 @@
 #include <linux/llc.h>
 #include <net/llc.h>
 #include <net/stp.h>
+#include <net/switchdev.h>
 
 #include "br_private.h"
 
@@ -120,6 +121,48 @@ static struct notifier_block br_device_notifier = {
 	.notifier_call = br_device_event
 };
 
+static int br_netdev_switch_event(struct notifier_block *unused,
+				  unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_switch_notifier_info_to_dev(ptr);
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+	struct netdev_switch_notifier_fdb_info *fdb_info;
+	int err = NOTIFY_DONE;
+
+	rtnl_lock();
+	p = br_port_get_rtnl(dev);
+	if (!p)
+		goto out;
+
+	br = p->br;
+
+	switch (event) {
+	case NETDEV_SWITCH_FDB_ADD:
+		fdb_info = ptr;
+		err = br_fdb_external_learn_add(br, p, fdb_info->addr,
+						fdb_info->vid);
+		if (err)
+			err = notifier_from_errno(err);
+		break;
+	case NETDEV_SWITCH_FDB_DEL:
+		fdb_info = ptr;
+		err = br_fdb_external_learn_del(br, p, fdb_info->addr,
+						fdb_info->vid);
+		if (err)
+			err = notifier_from_errno(err);
+		break;
+	}
+
+out:
+	rtnl_unlock();
+	return err;
+}
+
+static struct notifier_block br_netdev_switch_notifier = {
+	.notifier_call = br_netdev_switch_event,
+};
+
 static void __net_exit br_net_exit(struct net *net)
 {
 	struct net_device *dev;
@@ -169,10 +212,14 @@ static int __init br_init(void)
 	if (err)
 		goto err_out3;
 
-	err = br_netlink_init();
+	err = register_netdev_switch_notifier(&br_netdev_switch_notifier);
 	if (err)
 		goto err_out4;
 
+	err = br_netlink_init();
+	if (err)
+		goto err_out5;
+
 	brioctl_set(br_ioctl_deviceless_stub);
 
 #if IS_ENABLED(CONFIG_ATM_LANE)
@@ -185,6 +232,8 @@ static int __init br_init(void)
 
 	return 0;
 
+err_out5:
+	unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
 err_out4:
 	unregister_netdevice_notifier(&br_device_notifier);
 err_out3:
@@ -202,6 +251,7 @@ static void __exit br_deinit(void)
 {
 	stp_proto_unregister(&br_stp_proto);
 	br_netlink_fini();
+	unregister_netdev_switch_notifier(&br_netdev_switch_notifier);
 	unregister_netdevice_notifier(&br_device_notifier);
 	brioctl_set(NULL);
 	unregister_pernet_subsys(&br_net_ops);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index e6e0372bc3cd..03667e65cc29 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -990,26 +990,14 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
 	}
 }
 
-int br_fdb_external_learn_add(struct net_device *dev,
+int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid)
 {
-	struct net_bridge_port *p;
-	struct net_bridge *br;
 	struct hlist_head *head;
 	struct net_bridge_fdb_entry *fdb;
 	int err = 0;
 
-	rtnl_lock();
-
-	p = br_port_get_rtnl(dev);
-	if (!p) {
-		pr_info("bridge: %s not a bridge port\n", dev->name);
-		err = -EINVAL;
-		goto err_rtnl_unlock;
-	}
-
-	br = p->br;
-
+	ASSERT_RTNL();
 	spin_lock_bh(&br->hash_lock);
 
 	head = &br->hash[br_mac_hash(addr, vid)];
@@ -1034,33 +1022,18 @@ int br_fdb_external_learn_add(struct net_device *dev,
 
 err_unlock:
 	spin_unlock_bh(&br->hash_lock);
-err_rtnl_unlock:
-	rtnl_unlock();
 
 	return err;
 }
-EXPORT_SYMBOL(br_fdb_external_learn_add);
 
-int br_fdb_external_learn_del(struct net_device *dev,
+int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
 			      const unsigned char *addr, u16 vid)
 {
-	struct net_bridge_port *p;
-	struct net_bridge *br;
 	struct hlist_head *head;
 	struct net_bridge_fdb_entry *fdb;
 	int err = 0;
 
-	rtnl_lock();
-
-	p = br_port_get_rtnl(dev);
-	if (!p) {
-		pr_info("bridge: %s not a bridge port\n", dev->name);
-		err = -EINVAL;
-		goto err_rtnl_unlock;
-	}
-
-	br = p->br;
-
+	ASSERT_RTNL();
 	spin_lock_bh(&br->hash_lock);
 
 	head = &br->hash[br_mac_hash(addr, vid)];
@@ -1071,9 +1044,6 @@ int br_fdb_external_learn_del(struct net_device *dev,
 		err = -ENOENT;
 
 	spin_unlock_bh(&br->hash_lock);
-err_rtnl_unlock:
-	rtnl_unlock();
 
 	return err;
 }
-EXPORT_SYMBOL(br_fdb_external_learn_del);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index d808d766334d..e8e3f3681680 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -402,6 +402,10 @@ int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct net_device *dev, struct net_device *fdev, int idx);
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
 void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p);
+int br_fdb_external_learn_add(struct net_bridge *br, struct net_bridge_port *p,
+			      const unsigned char *addr, u16 vid);
+int br_fdb_external_learn_del(struct net_bridge *br, struct net_bridge_port *p,
+			      const unsigned char *addr, u16 vid);
 
 /* br_forward.c */
 void br_deliver(const struct net_bridge_port *to, struct sk_buff *skb);
-- 
cgit v1.2.3-70-g09d2


From 053c095a82cf773075e83d7233b5cc19a1f73ece Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 16 Jan 2015 22:09:00 +0100
Subject: netlink: make nlmsg_end() and genlmsg_end() void

Contrary to common expectations for an "int" return, these functions
return only a positive value -- if used correctly they cannot even
return 0 because the message header will necessarily be in the skb.

This makes the very common pattern of

  if (genlmsg_end(...) < 0) { ... }

be a whole bunch of dead code. Many places also simply do

  return nlmsg_end(...);

and the caller is expected to deal with it.

This also commonly (at least for me) causes errors, because it is very
common to write

  if (my_function(...))
    /* error condition */

and if my_function() does "return nlmsg_end()" this is of course wrong.

Additionally, there's not a single place in the kernel that actually
needs the message length returned, and if anyone needs it later then
it'll be very easy to just use skb->len there.

Remove this, and make the functions void. This removes a bunch of dead
code as described above. The patch adds lines because I did

-	return nlmsg_end(...);
+	nlmsg_end(...);
+	return 0;

I could have preserved all the function's return values by returning
skb->len, but instead I've audited all the places calling the affected
functions and found that none cared. A few places actually compared
the return value with <= 0 in dump functionality, but that could just
be changed to < 0 with no change in behaviour, so I opted for the more
efficient version.

One instance of the error I've made numerous times now is also present
in net/phonet/pn_netlink.c in the route_dumpit() function - it didn't
check for <0 or <=0 and thus broke out of the loop every single time.
I've preserved this since it will (I think) have caused the messages to
userspace to be formatted differently with just a single message for
every SKB returned to userspace. It's possible that this isn't needed
for the tools that actually use this, but I don't even know what they
are so couldn't test that changing this behaviour would be acceptable.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/acpi/event.c                  |  7 +------
 drivers/net/ethernet/rocker/rocker.c  |  3 ++-
 drivers/net/vxlan.c                   |  3 ++-
 drivers/net/wireless/mac80211_hwsim.c |  3 ++-
 drivers/scsi/pmcraid.c                |  8 +-------
 drivers/target/target_core_user.c     |  4 +---
 drivers/thermal/thermal_core.c        |  6 +-----
 fs/dlm/netlink.c                      |  7 +------
 include/net/genetlink.h               |  4 ++--
 include/net/netlink.h                 |  6 +-----
 kernel/taskstats.c                    | 13 ++-----------
 net/bridge/br_fdb.c                   |  3 ++-
 net/bridge/br_mdb.c                   |  3 ++-
 net/bridge/br_netlink.c               |  3 ++-
 net/can/gw.c                          |  3 ++-
 net/core/fib_rules.c                  |  3 ++-
 net/core/neighbour.c                  | 12 ++++++++----
 net/core/rtnetlink.c                  |  9 ++++++---
 net/decnet/dn_dev.c                   |  3 ++-
 net/decnet/dn_route.c                 |  3 ++-
 net/decnet/dn_table.c                 |  3 ++-
 net/ieee802154/netlink.c              | 12 ++----------
 net/ieee802154/nl-mac.c               |  3 ++-
 net/ieee802154/nl-phy.c               |  3 ++-
 net/ieee802154/nl802154.c             |  6 ++++--
 net/ipv4/devinet.c                    |  8 +++++---
 net/ipv4/fib_semantics.c              |  3 ++-
 net/ipv4/inet_diag.c                  |  9 ++++++---
 net/ipv4/ipmr.c                       |  3 ++-
 net/ipv4/route.c                      |  3 ++-
 net/ipv4/tcp_metrics.c                |  3 ++-
 net/ipv6/addrconf.c                   | 32 +++++++++++++++++++-------------
 net/ipv6/addrlabel.c                  |  5 +++--
 net/ipv6/ip6_fib.c                    |  1 -
 net/ipv6/ip6mr.c                      |  3 ++-
 net/ipv6/route.c                      |  3 ++-
 net/l2tp/l2tp_netlink.c               | 10 ++++++----
 net/netfilter/ipvs/ip_vs_ctl.c        |  9 ++++++---
 net/netfilter/nf_tables_api.c         | 18 ++++++++++++------
 net/netlabel/netlabel_cipso_v4.c      |  3 ++-
 net/netlabel/netlabel_mgmt.c          |  6 ++++--
 net/netlabel/netlabel_unlabeled.c     |  3 ++-
 net/netlink/diag.c                    |  3 ++-
 net/netlink/genetlink.c               |  6 ++++--
 net/nfc/netlink.c                     | 12 +++++++-----
 net/openvswitch/datapath.c            |  9 ++++++---
 net/packet/diag.c                     |  3 ++-
 net/phonet/pn_netlink.c               | 16 +++++++++++-----
 net/unix/diag.c                       |  3 ++-
 net/wireless/nl80211.c                | 27 ++++++++++++++++++---------
 net/xfrm/xfrm_user.c                  | 27 ++++++++++++++++++---------
 51 files changed, 203 insertions(+), 158 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/event.c b/drivers/acpi/event.c
index ef2d730734dc..e24ea4e796e4 100644
--- a/drivers/acpi/event.c
+++ b/drivers/acpi/event.c
@@ -100,7 +100,6 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 	struct acpi_genl_event *event;
 	void *msg_header;
 	int size;
-	int result;
 
 	/* allocate memory */
 	size = nla_total_size(sizeof(struct acpi_genl_event)) +
@@ -137,11 +136,7 @@ int acpi_bus_generate_netlink_event(const char *device_class,
 	event->data = data;
 
 	/* send multicast genetlink message */
-	result = genlmsg_end(skb, msg_header);
-	if (result < 0) {
-		nlmsg_free(skb);
-		return result;
-	}
+	genlmsg_end(skb, msg_header);
 
 	genlmsg_multicast(&acpi_event_genl_family, skb, 0, 0, GFP_ATOMIC);
 	return 0;
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 964d719b150f..d54781e71cd4 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -3674,7 +3674,8 @@ static int rocker_fdb_fill_info(struct sk_buff *skb,
 	if (vid && nla_put_u16(skb, NDA_VLAN, vid))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 6b6b45622a0a..c5f79e7513a6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -363,7 +363,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 494e7335aa64..4a4c6586a8d2 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2557,7 +2557,8 @@ static int mac80211_hwsim_get_radio(struct sk_buff *skb,
 	if (res < 0)
 		goto out_err;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 out_err:
 	genlmsg_cancel(skb, hdr);
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index 8c27b6a77ec4..cf222f46eac5 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -1473,13 +1473,7 @@ static int pmcraid_notify_aen(
 	}
 
 	/* send genetlink multicast message to notify appplications */
-	result = genlmsg_end(skb, msg_header);
-
-	if (result < 0) {
-		pmcraid_err("genlmsg_end failed\n");
-		nlmsg_free(skb);
-		return result;
-	}
+	genlmsg_end(skb, msg_header);
 
 	result = genlmsg_multicast(&pmcraid_event_family, skb,
 				   0, 0, GFP_ATOMIC);
diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c
index 1157b559683b..1a1bcf71ec9d 100644
--- a/drivers/target/target_core_user.c
+++ b/drivers/target/target_core_user.c
@@ -784,9 +784,7 @@ static int tcmu_netlink_event(enum tcmu_genl_cmd cmd, const char *name, int mino
 	if (ret < 0)
 		goto free_skb;
 
-	ret = genlmsg_end(skb, msg_header);
-	if (ret < 0)
-		goto free_skb;
+	genlmsg_end(skb, msg_header);
 
 	ret = genlmsg_multicast(&tcmu_genl_family, skb, 0,
 				TCMU_MCGRP_CONFIG, GFP_KERNEL);
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 87e0b0782023..48491d1a81d6 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -1759,11 +1759,7 @@ int thermal_generate_netlink_event(struct thermal_zone_device *tz,
 	thermal_event->event = event;
 
 	/* send multicast genetlink message */
-	result = genlmsg_end(skb, msg_header);
-	if (result < 0) {
-		nlmsg_free(skb);
-		return result;
-	}
+	genlmsg_end(skb, msg_header);
 
 	result = genlmsg_multicast(&thermal_event_genl_family, skb, 0,
 				   0, GFP_ATOMIC);
diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c
index e7cfbaf8d0e2..1e6e227134d7 100644
--- a/fs/dlm/netlink.c
+++ b/fs/dlm/netlink.c
@@ -56,13 +56,8 @@ static int send_data(struct sk_buff *skb)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data((struct nlmsghdr *)skb->data);
 	void *data = genlmsg_data(genlhdr);
-	int rv;
 
-	rv = genlmsg_end(skb, data);
-	if (rv < 0) {
-		nlmsg_free(skb);
-		return rv;
-	}
+	genlmsg_end(skb, data);
 
 	return genlmsg_unicast(&init_net, skb, listener_nlportid);
 }
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 84125088c309..f24aa83b80b6 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -245,9 +245,9 @@ static inline void *genlmsg_put_reply(struct sk_buff *skb,
  * @skb: socket buffer the message is stored in
  * @hdr: user specific header
  */
-static inline int genlmsg_end(struct sk_buff *skb, void *hdr)
+static inline void genlmsg_end(struct sk_buff *skb, void *hdr)
 {
-	return nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
+	nlmsg_end(skb, hdr - GENL_HDRLEN - NLMSG_HDRLEN);
 }
 
 /**
diff --git a/include/net/netlink.h b/include/net/netlink.h
index d5869b90bfbb..e010ee8da41d 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -490,14 +490,10 @@ static inline struct sk_buff *nlmsg_new(size_t payload, gfp_t flags)
  * Corrects the netlink message header to include the appeneded
  * attributes. Only necessary if attributes have been added to
  * the message.
- *
- * Returns the total data length of the skb.
  */
-static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
+static inline void nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	nlh->nlmsg_len = skb_tail_pointer(skb) - (unsigned char *)nlh;
-
-	return skb->len;
 }
 
 /**
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 670fff88a961..21f82c29c914 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -111,13 +111,8 @@ static int send_reply(struct sk_buff *skb, struct genl_info *info)
 {
 	struct genlmsghdr *genlhdr = nlmsg_data(nlmsg_hdr(skb));
 	void *reply = genlmsg_data(genlhdr);
-	int rc;
 
-	rc = genlmsg_end(skb, reply);
-	if (rc < 0) {
-		nlmsg_free(skb);
-		return rc;
-	}
+	genlmsg_end(skb, reply);
 
 	return genlmsg_reply(skb, info);
 }
@@ -134,11 +129,7 @@ static void send_cpu_listeners(struct sk_buff *skb,
 	void *reply = genlmsg_data(genlhdr);
 	int rc, delcount = 0;
 
-	rc = genlmsg_end(skb, reply);
-	if (rc < 0) {
-		nlmsg_free(skb);
-		return;
-	}
+	genlmsg_end(skb, reply);
 
 	rc = 0;
 	down_read(&listeners->sem);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 03667e65cc29..08bf04bdac58 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -633,7 +633,8 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	if (fdb->vlan_id && nla_put(skb, NDA_VLAN, sizeof(u16), &fdb->vlan_id))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index fed61c971b17..409608960899 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -190,7 +190,8 @@ static int nlmsg_populate_mdb_fill(struct sk_buff *skb,
 
 	nla_nest_end(skb, nest2);
 	nla_nest_end(skb, nest);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 end:
 	nla_nest_end(skb, nest);
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 163950b10d8c..528cf2790a5f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -263,7 +263,8 @@ static int br_fill_ifinfo(struct sk_buff *skb,
 	}
 
 done:
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/can/gw.c b/net/can/gw.c
index 295f62e62eb3..a6f448e18ea8 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -575,7 +575,8 @@ static int cgw_put_job(struct sk_buff *skb, struct cgw_job *gwj, int type,
 			goto cancel;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 cancel:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c
index 185c341fafbd..44706e81b2e0 100644
--- a/net/core/fib_rules.c
+++ b/net/core/fib_rules.c
@@ -609,7 +609,8 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule,
 	if (ops->fill(rule, skb, frh) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 8d614c93f86a..d36d564f149f 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1884,7 +1884,8 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl,
 		goto nla_put_failure;
 
 	read_unlock_bh(&tbl->lock);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	read_unlock_bh(&tbl->lock);
@@ -1917,7 +1918,8 @@ static int neightbl_fill_param_info(struct sk_buff *skb,
 		goto errout;
 
 	read_unlock_bh(&tbl->lock);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 errout:
 	read_unlock_bh(&tbl->lock);
 	nlmsg_cancel(skb, nlh);
@@ -2202,7 +2204,8 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh,
 	    nla_put(skb, NDA_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -2232,7 +2235,8 @@ static int pneigh_fill_info(struct sk_buff *skb, struct pneigh_entry *pn,
 	if (nla_put(skb, NDA_DST, tbl->key_len, pn->key))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index eadc5c0e2dfa..e13b9dbdf154 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -1199,7 +1199,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 
 	nla_nest_end(skb, af_spec);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -2326,7 +2327,8 @@ static int nlmsg_populate_fdb_fill(struct sk_buff *skb,
 	if (nla_put(skb, NDA_LLADDR, ETH_ALEN, addr))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -2809,7 +2811,8 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 
 	nla_nest_end(skb, protinfo);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
 	return -EMSGSIZE;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index 4400da7739da..b2c26b081134 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -702,7 +702,8 @@ static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa,
 	     nla_put_string(skb, IFA_LABEL, ifa->ifa_label)) ||
 	     nla_put_u32(skb, IFA_FLAGS, ifa_flags))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c
index daccc4a36d80..812e5e6e88fb 100644
--- a/net/decnet/dn_route.c
+++ b/net/decnet/dn_route.c
@@ -1616,7 +1616,8 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq,
 	    nla_put_u32(skb, RTA_IIF, rt->fld.flowidn_iif) < 0)
 		goto errout;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 errout:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c
index 3f19fcbf126d..1540b506e3e0 100644
--- a/net/decnet/dn_table.c
+++ b/net/decnet/dn_table.c
@@ -367,7 +367,8 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		nla_nest_end(skb, mp_head);
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 errout:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index fa1464762d0d..c8133c07ceee 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -63,13 +63,9 @@ int ieee802154_nl_mcast(struct sk_buff *msg, unsigned int group)
 	struct nlmsghdr *nlh = nlmsg_hdr(msg);
 	void *hdr = genlmsg_data(nlmsg_data(nlh));
 
-	if (genlmsg_end(msg, hdr) < 0)
-		goto out;
+	genlmsg_end(msg, hdr);
 
 	return genlmsg_multicast(&nl802154_family, msg, 0, group, GFP_ATOMIC);
-out:
-	nlmsg_free(msg);
-	return -ENOBUFS;
 }
 
 struct sk_buff *ieee802154_nl_new_reply(struct genl_info *info,
@@ -96,13 +92,9 @@ int ieee802154_nl_reply(struct sk_buff *msg, struct genl_info *info)
 	struct nlmsghdr *nlh = nlmsg_hdr(msg);
 	void *hdr = genlmsg_data(nlmsg_data(nlh));
 
-	if (genlmsg_end(msg, hdr) < 0)
-		goto out;
+	genlmsg_end(msg, hdr);
 
 	return genlmsg_reply(msg, info);
-out:
-	nlmsg_free(msg);
-	return -ENOBUFS;
 }
 
 static const struct genl_ops ieee8021154_ops[] = {
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index 3c902e9516fb..9105265920fe 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -136,7 +136,8 @@ static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid,
 	}
 
 	wpan_phy_put(phy);
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	wpan_phy_put(phy);
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 7baf98b14611..1b9d25f6e898 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -65,7 +65,8 @@ static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid,
 		goto nla_put_failure;
 	mutex_unlock(&phy->pib_lock);
 	kfree(buf);
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	mutex_unlock(&phy->pib_lock);
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index a25b9bbd077b..a4daf91b8d0a 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -306,7 +306,8 @@ static int nl802154_send_wpan_phy(struct cfg802154_registered_device *rdev,
 		goto nla_put_failure;
 
 finish:
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -489,7 +490,8 @@ nl802154_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags,
 	if (nla_put_u8(msg, NL802154_ATTR_LBT_MODE, wpan_dev->lbt))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 214882e7d6de..5f344eb3fc25 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1522,7 +1522,8 @@ static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
 			  preferred, valid))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -1566,7 +1567,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
 				if (inet_fill_ifaddr(skb, ifa,
 					     NETLINK_CB(cb->skb).portid,
 					     cb->nlh->nlmsg_seq,
-					     RTM_NEWADDR, NLM_F_MULTI) <= 0) {
+					     RTM_NEWADDR, NLM_F_MULTI) < 0) {
 					rcu_read_unlock();
 					goto done;
 				}
@@ -1749,7 +1750,8 @@ static int inet_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 			IPV4_DEVCONF(*devconf, PROXY_ARP)) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index d2b7b5521b1b..265cb72b7c1b 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1091,7 +1091,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 		nla_nest_end(skb, mp);
 	}
 #endif
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index e34dccbc4d70..81751f12645f 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -203,7 +203,8 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk,
 		icsk->icsk_ca_ops->get_info(sk, ext, skb);
 
 out:
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 errout:
 	nlmsg_cancel(skb, nlh);
@@ -271,7 +272,8 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw,
 	}
 #endif
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
@@ -758,7 +760,8 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk,
 	}
 #endif
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk,
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index c8034587859d..9d78427652d2 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -2290,7 +2290,8 @@ static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb,
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index ce112d0f2698..f6e43ca5e641 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2390,7 +2390,8 @@ static int rt_fill_info(struct net *net,  __be32 dst, __be32 src,
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, error) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c
index ed9c9a91851c..e5f41bd5ec1b 100644
--- a/net/ipv4/tcp_metrics.c
+++ b/net/ipv4/tcp_metrics.c
@@ -886,7 +886,8 @@ static int tcp_metrics_dump_info(struct sk_buff *skb,
 	if (tcp_metrics_fill_info(skb, tm) < 0)
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7c8bbeb27b7..8975d9501d50 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -489,7 +489,8 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex,
 	    nla_put_s32(skb, NETCONFA_PROXY_NEIGH, devconf->proxy_ndp) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -619,7 +620,7 @@ static int inet6_netconf_dump_devconf(struct sk_buff *skb,
 						       cb->nlh->nlmsg_seq,
 						       RTM_NEWNETCONF,
 						       NLM_F_MULTI,
-						       -1) <= 0) {
+						       -1) < 0) {
 				rcu_read_unlock();
 				goto done;
 			}
@@ -635,7 +636,7 @@ cont:
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq,
 					       RTM_NEWNETCONF, NLM_F_MULTI,
-					       -1) <= 0)
+					       -1) < 0)
 			goto done;
 		else
 			h++;
@@ -646,7 +647,7 @@ cont:
 					       NETLINK_CB(cb->skb).portid,
 					       cb->nlh->nlmsg_seq,
 					       RTM_NEWNETCONF, NLM_F_MULTI,
-					       -1) <= 0)
+					       -1) < 0)
 			goto done;
 		else
 			h++;
@@ -4047,7 +4048,8 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa,
 	if (nla_put_u32(skb, IFA_FLAGS, ifa->flags) < 0)
 		goto error;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 error:
 	nlmsg_cancel(skb, nlh);
@@ -4076,7 +4078,8 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca,
 		return -EMSGSIZE;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
@@ -4101,7 +4104,8 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca,
 		return -EMSGSIZE;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 enum addr_type_t {
@@ -4134,7 +4138,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 						cb->nlh->nlmsg_seq,
 						RTM_NEWADDR,
 						NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
 		}
@@ -4151,7 +4155,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 						  cb->nlh->nlmsg_seq,
 						  RTM_GETMULTICAST,
 						  NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 		}
 		break;
@@ -4166,7 +4170,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb,
 						  cb->nlh->nlmsg_seq,
 						  RTM_GETANYCAST,
 						  NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 		}
 		break;
@@ -4638,7 +4642,8 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev,
 		goto nla_put_failure;
 
 	nla_nest_end(skb, protoinfo);
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -4670,7 +4675,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb)
 			if (inet6_fill_ifinfo(skb, idev,
 					      NETLINK_CB(cb->skb).portid,
 					      cb->nlh->nlmsg_seq,
-					      RTM_NEWLINK, NLM_F_MULTI) <= 0)
+					      RTM_NEWLINK, NLM_F_MULTI) < 0)
 				goto out;
 cont:
 			idx++;
@@ -4747,7 +4752,8 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev,
 	ci.valid_time = ntohl(pinfo->valid);
 	if (nla_put(skb, PREFIX_CACHEINFO, sizeof(ci), &ci))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c
index fd0dc47f471d..e43e79d0a612 100644
--- a/net/ipv6/addrlabel.c
+++ b/net/ipv6/addrlabel.c
@@ -490,7 +490,8 @@ static int ip6addrlbl_fill(struct sk_buff *skb,
 		return -EMSGSIZE;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
@@ -510,7 +511,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb)
 					      cb->nlh->nlmsg_seq,
 					      RTM_NEWADDRLABEL,
 					      NLM_F_MULTI);
-			if (err <= 0)
+			if (err < 0)
 				break;
 		}
 		idx++;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 03c520a4ebeb..53775ee7d376 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -277,7 +277,6 @@ static int fib6_dump_node(struct fib6_walker *w)
 			w->leaf = rt;
 			return 1;
 		}
-		WARN_ON(res == 0);
 	}
 	w->leaf = NULL;
 	return 0;
diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c
index 722669754bbf..34b682617f50 100644
--- a/net/ipv6/ip6mr.c
+++ b/net/ipv6/ip6mr.c
@@ -2388,7 +2388,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb,
 	if (err < 0 && err != -ENOENT)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 34dcbb59df75..c60f15775c53 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -2725,7 +2725,8 @@ static int rt6_fill_node(struct net *net,
 	if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0)
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c
index 6b16598f31d5..b4e923f77954 100644
--- a/net/l2tp/l2tp_netlink.c
+++ b/net/l2tp/l2tp_netlink.c
@@ -390,7 +390,8 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
 	}
 
 out:
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -451,7 +452,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid,
 					cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					tunnel, L2TP_CMD_TUNNEL_GET) <= 0)
+					tunnel, L2TP_CMD_TUNNEL_GET) < 0)
 			goto out;
 
 		ti++;
@@ -752,7 +753,8 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
 		goto nla_put_failure;
 	nla_nest_end(skb, nest);
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -816,7 +818,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback
 
 		if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid,
 					 cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					 session, L2TP_CMD_SESSION_GET) <= 0)
+					 session, L2TP_CMD_SESSION_GET) < 0)
 			break;
 
 		si++;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index b8295a430a56..e55759056361 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -2887,7 +2887,8 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb,
 	if (ip_vs_genl_fill_service(skb, svc) < 0)
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -3079,7 +3080,8 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest,
 	if (ip_vs_genl_fill_dest(skb, dest) < 0)
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -3215,7 +3217,8 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __u32 state,
 	if (ip_vs_genl_fill_daemon(skb, state, mcast_ifn, syncid))
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3b3ddb4fb9ee..70f697827b9b 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -427,7 +427,8 @@ static int nf_tables_fill_table_info(struct sk_buff *skb, struct net *net,
 	    nla_put_be32(skb, NFTA_TABLE_USE, htonl(table->use)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -971,7 +972,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb, struct net *net,
 	if (nla_put_be32(skb, NFTA_CHAIN_USE, htonl(chain->use)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -1707,7 +1709,8 @@ static int nf_tables_fill_rule_info(struct sk_buff *skb, struct net *net,
 	    nla_put(skb, NFTA_RULE_USERDATA, rule->ulen, nft_userdata(rule)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -2361,7 +2364,8 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
 		goto nla_put_failure;
 	nla_nest_end(skb, desc);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -3035,7 +3039,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
 
 	nla_nest_end(skb, nest);
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
@@ -3324,7 +3329,8 @@ static int nf_tables_fill_gen_info(struct sk_buff *skb, struct net *net,
 	if (nla_put_be32(skb, NFTA_GEN_ID, htonl(net->nft.base_seq)))
 		goto nla_put_failure;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_trim(skb, nlh);
diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c
index c2f2a53a4879..179625353cac 100644
--- a/net/netlabel/netlabel_cipso_v4.c
+++ b/net/netlabel/netlabel_cipso_v4.c
@@ -641,7 +641,8 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg)
 	if (ret_val != 0)
 		goto listall_cb_failure;
 
-	return genlmsg_end(cb_arg->skb, data);
+	genlmsg_end(cb_arg->skb, data);
+	return 0;
 
 listall_cb_failure:
 	genlmsg_cancel(cb_arg->skb, data);
diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c
index e66e977ef2fa..8b3b789c43c2 100644
--- a/net/netlabel/netlabel_mgmt.c
+++ b/net/netlabel/netlabel_mgmt.c
@@ -456,7 +456,8 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg)
 		goto listall_cb_failure;
 
 	cb_arg->seq++;
-	return genlmsg_end(cb_arg->skb, data);
+	genlmsg_end(cb_arg->skb, data);
+	return 0;
 
 listall_cb_failure:
 	genlmsg_cancel(cb_arg->skb, data);
@@ -620,7 +621,8 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb,
 	if (ret_val != 0)
 		goto protocols_cb_failure;
 
-	return genlmsg_end(skb, data);
+	genlmsg_end(skb, data);
+	return 0;
 
 protocols_cb_failure:
 	genlmsg_cancel(skb, data);
diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c
index 78a63c18779e..aec7994f78cf 100644
--- a/net/netlabel/netlabel_unlabeled.c
+++ b/net/netlabel/netlabel_unlabeled.c
@@ -1163,7 +1163,8 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd,
 		goto list_cb_failure;
 
 	cb_arg->seq++;
-	return genlmsg_end(cb_arg->skb, data);
+	genlmsg_end(cb_arg->skb, data);
+	return 0;
 
 list_cb_failure:
 	genlmsg_cancel(cb_arg->skb, data);
diff --git a/net/netlink/diag.c b/net/netlink/diag.c
index bb59a7ed0859..3ee63a3cff30 100644
--- a/net/netlink/diag.c
+++ b/net/netlink/diag.c
@@ -91,7 +91,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 	    sk_diag_put_rings_cfg(sk, skb))
 		goto out_nlmsg_trim;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_nlmsg_trim:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 2e11061ef885..f52a7d5734cd 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -756,7 +756,8 @@ static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq,
 		nla_nest_end(skb, nla_grps);
 	}
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
@@ -796,7 +797,8 @@ static int ctrl_fill_mcgrp_info(struct genl_family *family,
 	nla_nest_end(skb, nest);
 	nla_nest_end(skb, nla_grps);
 
-	return genlmsg_end(skb, hdr);
+	genlmsg_end(skb, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, hdr);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 44989fc8cddf..be387e6219a0 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -102,7 +102,8 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target,
 			goto nla_put_failure;
 	}
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -518,7 +519,8 @@ static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 	    nla_put_u8(msg, NFC_ATTR_RF_MODE, dev->rf_mode))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -908,7 +910,8 @@ static int nfc_genl_send_params(struct sk_buff *msg,
 	    nla_put_u16(msg, NFC_ATTR_LLC_PARAM_MIUX, be16_to_cpu(local->miux)))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 
@@ -1247,8 +1250,7 @@ static int nfc_genl_send_se(struct sk_buff *msg, struct nfc_dev *dev,
 		    nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type))
 			goto nla_put_failure;
 
-		if (genlmsg_end(msg, hdr) < 0)
-			goto nla_put_failure;
+		genlmsg_end(msg, hdr);
 	}
 
 	return 0;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 8bda3cc12344..f45f1bf4422c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -799,7 +799,8 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 	if (err)
 		goto error;
 
-	return genlmsg_end(skb, ovs_header);
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
 error:
 	genlmsg_cancel(skb, ovs_header);
@@ -1349,7 +1350,8 @@ static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb,
 	if (nla_put_u32(skb, OVS_DP_ATTR_USER_FEATURES, dp->user_features))
 		goto nla_put_failure;
 
-	return genlmsg_end(skb, ovs_header);
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(skb, ovs_header);
@@ -1723,7 +1725,8 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb,
 	if (err == -EMSGSIZE)
 		goto error;
 
-	return genlmsg_end(skb, ovs_header);
+	genlmsg_end(skb, ovs_header);
+	return 0;
 
 nla_put_failure:
 	err = -EMSGSIZE;
diff --git a/net/packet/diag.c b/net/packet/diag.c
index 92f2c7107eec..0ed68f0238bf 100644
--- a/net/packet/diag.c
+++ b/net/packet/diag.c
@@ -177,7 +177,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb,
 				     PACKET_DIAG_FILTER))
 		goto out_nlmsg_trim;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_nlmsg_trim:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c
index b64151ade6b3..54d766842c2b 100644
--- a/net/phonet/pn_netlink.c
+++ b/net/phonet/pn_netlink.c
@@ -121,7 +121,8 @@ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr,
 	ifm->ifa_index = dev->ifindex;
 	if (nla_put_u8(skb, IFA_LOCAL, addr))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -190,7 +191,8 @@ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst,
 	if (nla_put_u8(skb, RTA_DST, dst) ||
 	    nla_put_u32(skb, RTA_OIF, dev->ifindex))
 		goto nla_put_failure;
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
@@ -282,9 +284,13 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
 
 		if (addr_idx++ < addr_start_idx)
 			continue;
-		if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
-				cb->nlh->nlmsg_seq, RTM_NEWROUTE))
-			goto out;
+		fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid,
+			   cb->nlh->nlmsg_seq, RTM_NEWROUTE);
+		/* fill_route() used to return > 0 (or negative errors) but
+		 * never 0 - ignore the return value and just go out to
+		 * call dumpit again from outside to preserve the behavior
+		 */
+		goto out;
 	}
 
 out:
diff --git a/net/unix/diag.c b/net/unix/diag.c
index 86fa0f3b2caf..ef542fbca9fe 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -155,7 +155,8 @@ static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_r
 	if (nla_put_u8(skb, UNIX_DIAG_SHUTDOWN, sk->sk_shutdown))
 		goto out_nlmsg_trim;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_nlmsg_trim:
 	nlmsg_cancel(skb, nlh);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 380784378df8..4ed9039bd5f9 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1721,7 +1721,8 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 		break;
 	}
  finish:
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -2404,7 +2405,8 @@ static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flag
 			goto nla_put_failure;
 	}
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -3825,7 +3827,8 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
 		    sinfo->assoc_req_ies))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -4555,7 +4558,8 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq,
 
 	nla_nest_end(msg, pinfoattr);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -5507,7 +5511,8 @@ static int nl80211_send_regdom(struct sk_buff *msg, struct netlink_callback *cb,
 	    nla_put_flag(msg, NL80211_ATTR_WIPHY_SELF_MANAGED_REG))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
 nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -6577,7 +6582,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 
 	nla_nest_end(msg, bss);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  fail_unlock_rcu:
 	rcu_read_unlock();
@@ -6686,7 +6692,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq,
 
 	nla_nest_end(msg, infoattr);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -11025,7 +11032,8 @@ static int nl80211_send_scan_msg(struct sk_buff *msg,
 	/* ignore errors and send incomplete event anyway */
 	nl80211_add_scan_req(msg, rdev);
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
@@ -11048,7 +11056,8 @@ nl80211_send_sched_scan_msg(struct sk_buff *msg,
 	    nla_put_u32(msg, NL80211_ATTR_IFINDEX, netdev->ifindex))
 		goto nla_put_failure;
 
-	return genlmsg_end(msg, hdr);
+	genlmsg_end(msg, hdr);
+	return 0;
 
  nla_put_failure:
 	genlmsg_cancel(msg, hdr);
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 8128594ab379..7de2ed9ec46d 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -1019,7 +1019,8 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net,
 		return err;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_set_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1121,7 +1122,8 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net,
 		return err;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
@@ -1842,7 +1844,8 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		goto out_cancel;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_cancel:
 	nlmsg_cancel(skb, nlh);
@@ -2282,7 +2285,8 @@ static int build_migrate(struct sk_buff *skb, const struct xfrm_migrate *m,
 			goto out_cancel;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 
 out_cancel:
 	nlmsg_cancel(skb, nlh);
@@ -2490,7 +2494,8 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct
 	if (err)
 		return err;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_exp_state_notify(struct xfrm_state *x, const struct km_event *c)
@@ -2712,7 +2717,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x,
 		return err;
 	}
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt,
@@ -2827,7 +2833,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp,
 	}
 	upe->hard = !!hard;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_exp_policy_notify(struct xfrm_policy *xp, int dir, const struct km_event *c)
@@ -2986,7 +2993,8 @@ static int build_report(struct sk_buff *skb, u8 proto,
 			return err;
 		}
 	}
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_send_report(struct net *net, u8 proto,
@@ -3031,7 +3039,8 @@ static int build_mapping(struct sk_buff *skb, struct xfrm_state *x,
 	um->old_sport = x->encap->encap_sport;
 	um->reqid = x->props.reqid;
 
-	return nlmsg_end(skb, nlh);
+	nlmsg_end(skb, nlh);
+	return 0;
 }
 
 static int xfrm_send_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr,
-- 
cgit v1.2.3-70-g09d2


From 88340160f3ad22401b00f4efcee44f7ec4769b19 Mon Sep 17 00:00:00 2001
From: Martin KaFai Lau <kafai@fb.com>
Date: Fri, 16 Jan 2015 10:11:00 -0800
Subject: ip_tunnel: Create percpu gro_cell

In the ipip tunnel, the skb->queue_mapping is lost in ipip_rcv().
All skb will be queued to the same cell->napi_skbs.  The
gro_cell_poll is pinned to one core under load.  In production traffic,
we also see severe rx_dropped in the tunl iface and it is probably due to
this limit: skb_queue_len(&cell->napi_skbs) > netdev_max_backlog.

This patch is trying to alloc_percpu(struct gro_cell) and schedule
gro_cell_poll to process the skb in the same core.

Signed-off-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/gro_cells.h | 29 ++++++++++++-----------------
 1 file changed, 12 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/include/net/gro_cells.h b/include/net/gro_cells.h
index 734d9b5f577a..0f712c0bc0bf 100644
--- a/include/net/gro_cells.h
+++ b/include/net/gro_cells.h
@@ -8,25 +8,23 @@
 struct gro_cell {
 	struct sk_buff_head	napi_skbs;
 	struct napi_struct	napi;
-} ____cacheline_aligned_in_smp;
+};
 
 struct gro_cells {
-	unsigned int		gro_cells_mask;
-	struct gro_cell		*cells;
+	struct gro_cell __percpu	*cells;
 };
 
 static inline void gro_cells_receive(struct gro_cells *gcells, struct sk_buff *skb)
 {
-	struct gro_cell *cell = gcells->cells;
+	struct gro_cell *cell;
 	struct net_device *dev = skb->dev;
 
-	if (!cell || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
+	if (!gcells->cells || skb_cloned(skb) || !(dev->features & NETIF_F_GRO)) {
 		netif_rx(skb);
 		return;
 	}
 
-	if (skb_rx_queue_recorded(skb))
-		cell += skb_get_rx_queue(skb) & gcells->gro_cells_mask;
+	cell = this_cpu_ptr(gcells->cells);
 
 	if (skb_queue_len(&cell->napi_skbs) > netdev_max_backlog) {
 		atomic_long_inc(&dev->rx_dropped);
@@ -72,15 +70,12 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
 {
 	int i;
 
-	gcells->gro_cells_mask = roundup_pow_of_two(netif_get_num_default_rss_queues()) - 1;
-	gcells->cells = kcalloc(gcells->gro_cells_mask + 1,
-				sizeof(struct gro_cell),
-				GFP_KERNEL);
+	gcells->cells = alloc_percpu(struct gro_cell);
 	if (!gcells->cells)
 		return -ENOMEM;
 
-	for (i = 0; i <= gcells->gro_cells_mask; i++) {
-		struct gro_cell *cell = gcells->cells + i;
+	for_each_possible_cpu(i) {
+		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 
 		skb_queue_head_init(&cell->napi_skbs);
 		netif_napi_add(dev, &cell->napi, gro_cell_poll, 64);
@@ -91,16 +86,16 @@ static inline int gro_cells_init(struct gro_cells *gcells, struct net_device *de
 
 static inline void gro_cells_destroy(struct gro_cells *gcells)
 {
-	struct gro_cell *cell = gcells->cells;
 	int i;
 
-	if (!cell)
+	if (!gcells->cells)
 		return;
-	for (i = 0; i <= gcells->gro_cells_mask; i++,cell++) {
+	for_each_possible_cpu(i) {
+		struct gro_cell *cell = per_cpu_ptr(gcells->cells, i);
 		netif_napi_del(&cell->napi);
 		skb_queue_purge(&cell->napi_skbs);
 	}
-	kfree(gcells->cells);
+	free_percpu(gcells->cells);
 	gcells->cells = NULL;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 27c00132852b77b12e625cd6a0ccf43d6bf5795f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Sun, 18 Jan 2015 10:25:56 +0100
Subject: switchdev: fix typo in inline function definition

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 201120e18e4d..205e63698da9 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -69,7 +69,7 @@ static inline int unregister_netdev_switch_notifier(struct notifier_block *nb)
 }
 
 static inline int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
-					       struct netdev_switch_notifier_info *info);
+					       struct netdev_switch_notifier_info *info)
 {
 	return NOTIFY_DONE;
 }
-- 
cgit v1.2.3-70-g09d2


From 0c7aecd4bde4b7302cd41986d3a29e4f0b0ed218 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:15 +0100
Subject: netns: add rtnl cmd to add and get peer netns ids

With this patch, a user can define an id for a peer netns by providing a FD or a
PID. These ids are local to the netns where it is added (ie valid only into this
netns).

The main function (ie the one exported to other module), peernet2id(), allows to
get the id of a peer netns. If no id has been assigned by the user, this
function allocates one.

These ids will be used in netlink messages to point to a peer netns, for example
in case of a x-netns interface.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                        |   1 +
 include/net/net_namespace.h        |   4 +
 include/uapi/linux/Kbuild          |   1 +
 include/uapi/linux/net_namespace.h |  23 ++++
 include/uapi/linux/rtnetlink.h     |   5 +
 net/core/net_namespace.c           | 211 +++++++++++++++++++++++++++++++++++++
 6 files changed, 245 insertions(+)
 create mode 100644 include/uapi/linux/net_namespace.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 9de900572633..9b91d9f0257e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6578,6 +6578,7 @@ F:	include/linux/netdevice.h
 F:	include/uapi/linux/in.h
 F:	include/uapi/linux/net.h
 F:	include/uapi/linux/netdevice.h
+F:	include/uapi/linux/net_namespace.h
 F:	tools/net/
 F:	tools/testing/selftests/net/
 F:	lib/random32.c
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 2e8756b8c775..36faf4990c4b 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -60,6 +60,7 @@ struct net {
 	struct list_head	exit_list;	/* Use only net_mutex */
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
+	struct idr		netns_ids;
 
 	struct ns_common	ns;
 
@@ -290,6 +291,9 @@ static inline struct net *read_pnet(struct net * const *pnet)
 #define __net_initconst	__initconst
 #endif
 
+int peernet2id(struct net *net, struct net *peer);
+struct net *get_net_ns_by_id(struct net *net, int id);
+
 struct pernet_operations {
 	struct list_head list;
 	int (*init)(struct net *net);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 00b100023c47..14b7b6e44c77 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -283,6 +283,7 @@ header-y += net.h
 header-y += netlink_diag.h
 header-y += netlink.h
 header-y += netrom.h
+header-y += net_namespace.h
 header-y += net_tstamp.h
 header-y += nfc.h
 header-y += nfs2.h
diff --git a/include/uapi/linux/net_namespace.h b/include/uapi/linux/net_namespace.h
new file mode 100644
index 000000000000..778cd2c3ebf4
--- /dev/null
+++ b/include/uapi/linux/net_namespace.h
@@ -0,0 +1,23 @@
+/* Copyright (c) 2015 6WIND S.A.
+ * Author: Nicolas Dichtel <nicolas.dichtel@6wind.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ */
+#ifndef _UAPI_LINUX_NET_NAMESPACE_H_
+#define _UAPI_LINUX_NET_NAMESPACE_H_
+
+/* Attributes of RTM_NEWNSID/RTM_GETNSID messages */
+enum {
+	NETNSA_NONE,
+#define NETNSA_NSID_NOT_ASSIGNED -1
+	NETNSA_NSID,
+	NETNSA_PID,
+	NETNSA_FD,
+	__NETNSA_MAX,
+};
+
+#define NETNSA_MAX		(__NETNSA_MAX - 1)
+
+#endif /* _UAPI_LINUX_NET_NAMESPACE_H_ */
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index a1d18593f41e..5cc5d66bf519 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -132,6 +132,11 @@ enum {
 	RTM_GETMDB = 86,
 #define RTM_GETMDB RTM_GETMDB
 
+	RTM_NEWNSID = 88,
+#define RTM_NEWNSID RTM_NEWNSID
+	RTM_GETNSID = 90,
+#define RTM_GETNSID RTM_GETNSID
+
 	__RTM_MAX,
 #define RTM_MAX		(((__RTM_MAX + 3) & ~3) - 1)
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index ce780c722e48..9d1a4cac83b6 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -15,6 +15,10 @@
 #include <linux/file.h>
 #include <linux/export.h>
 #include <linux/user_namespace.h>
+#include <linux/net_namespace.h>
+#include <linux/rtnetlink.h>
+#include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 
@@ -144,6 +148,77 @@ static void ops_free_list(const struct pernet_operations *ops,
 	}
 }
 
+static int alloc_netid(struct net *net, struct net *peer, int reqid)
+{
+	int min = 0, max = 0;
+
+	ASSERT_RTNL();
+
+	if (reqid >= 0) {
+		min = reqid;
+		max = reqid + 1;
+	}
+
+	return idr_alloc(&net->netns_ids, peer, min, max, GFP_KERNEL);
+}
+
+/* This function is used by idr_for_each(). If net is equal to peer, the
+ * function returns the id so that idr_for_each() stops. Because we cannot
+ * returns the id 0 (idr_for_each() will not stop), we return the magic value
+ * NET_ID_ZERO (-1) for it.
+ */
+#define NET_ID_ZERO -1
+static int net_eq_idr(int id, void *net, void *peer)
+{
+	if (net_eq(net, peer))
+		return id ? : NET_ID_ZERO;
+	return 0;
+}
+
+static int __peernet2id(struct net *net, struct net *peer, bool alloc)
+{
+	int id = idr_for_each(&net->netns_ids, net_eq_idr, peer);
+
+	ASSERT_RTNL();
+
+	/* Magic value for id 0. */
+	if (id == NET_ID_ZERO)
+		return 0;
+	if (id > 0)
+		return id;
+
+	if (alloc)
+		return alloc_netid(net, peer, -1);
+
+	return -ENOENT;
+}
+
+/* This function returns the id of a peer netns. If no id is assigned, one will
+ * be allocated and returned.
+ */
+int peernet2id(struct net *net, struct net *peer)
+{
+	int id = __peernet2id(net, peer, true);
+
+	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
+}
+
+struct net *get_net_ns_by_id(struct net *net, int id)
+{
+	struct net *peer;
+
+	if (id < 0)
+		return NULL;
+
+	rcu_read_lock();
+	peer = idr_find(&net->netns_ids, id);
+	if (peer)
+		get_net(peer);
+	rcu_read_unlock();
+
+	return peer;
+}
+
 /*
  * setup_net runs the initializers for the network namespace object.
  */
@@ -158,6 +233,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
 	atomic_set(&net->passive, 1);
 	net->dev_base_seq = 1;
 	net->user_ns = user_ns;
+	idr_init(&net->netns_ids);
 
 #ifdef NETNS_REFCNT_DEBUG
 	atomic_set(&net->use_count, 0);
@@ -288,6 +364,14 @@ static void cleanup_net(struct work_struct *work)
 	list_for_each_entry(net, &net_kill_list, cleanup_list) {
 		list_del_rcu(&net->list);
 		list_add_tail(&net->exit_list, &net_exit_list);
+		for_each_net(tmp) {
+			int id = __peernet2id(tmp, net, false);
+
+			if (id >= 0)
+				idr_remove(&tmp->netns_ids, id);
+		}
+		idr_destroy(&net->netns_ids);
+
 	}
 	rtnl_unlock();
 
@@ -402,6 +486,130 @@ static struct pernet_operations __net_initdata net_ns_ops = {
 	.exit = net_ns_net_exit,
 };
 
+static struct nla_policy rtnl_net_policy[NETNSA_MAX + 1] = {
+	[NETNSA_NONE]		= { .type = NLA_UNSPEC },
+	[NETNSA_NSID]		= { .type = NLA_S32 },
+	[NETNSA_PID]		= { .type = NLA_U32 },
+	[NETNSA_FD]		= { .type = NLA_U32 },
+};
+
+static int rtnl_net_newid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[NETNSA_MAX + 1];
+	struct net *peer;
+	int nsid, err;
+
+	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+			  rtnl_net_policy);
+	if (err < 0)
+		return err;
+	if (!tb[NETNSA_NSID])
+		return -EINVAL;
+	nsid = nla_get_s32(tb[NETNSA_NSID]);
+
+	if (tb[NETNSA_PID])
+		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+	else if (tb[NETNSA_FD])
+		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+	else
+		return -EINVAL;
+	if (IS_ERR(peer))
+		return PTR_ERR(peer);
+
+	if (__peernet2id(net, peer, false) >= 0) {
+		err = -EEXIST;
+		goto out;
+	}
+
+	err = alloc_netid(net, peer, nsid);
+	if (err > 0)
+		err = 0;
+out:
+	put_net(peer);
+	return err;
+}
+
+static int rtnl_net_get_size(void)
+{
+	return NLMSG_ALIGN(sizeof(struct rtgenmsg))
+	       + nla_total_size(sizeof(s32)) /* NETNSA_NSID */
+	       ;
+}
+
+static int rtnl_net_fill(struct sk_buff *skb, u32 portid, u32 seq, int flags,
+			 int cmd, struct net *net, struct net *peer)
+{
+	struct nlmsghdr *nlh;
+	struct rtgenmsg *rth;
+	int id;
+
+	ASSERT_RTNL();
+
+	nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rth), flags);
+	if (!nlh)
+		return -EMSGSIZE;
+
+	rth = nlmsg_data(nlh);
+	rth->rtgen_family = AF_UNSPEC;
+
+	id = __peernet2id(net, peer, false);
+	if  (id < 0)
+		id = NETNSA_NSID_NOT_ASSIGNED;
+	if (nla_put_s32(skb, NETNSA_NSID, id))
+		goto nla_put_failure;
+
+	nlmsg_end(skb, nlh);
+	return 0;
+
+nla_put_failure:
+	nlmsg_cancel(skb, nlh);
+	return -EMSGSIZE;
+}
+
+static int rtnl_net_getid(struct sk_buff *skb, struct nlmsghdr *nlh)
+{
+	struct net *net = sock_net(skb->sk);
+	struct nlattr *tb[NETNSA_MAX + 1];
+	struct sk_buff *msg;
+	int err = -ENOBUFS;
+	struct net *peer;
+
+	err = nlmsg_parse(nlh, sizeof(struct rtgenmsg), tb, NETNSA_MAX,
+			  rtnl_net_policy);
+	if (err < 0)
+		return err;
+	if (tb[NETNSA_PID])
+		peer = get_net_ns_by_pid(nla_get_u32(tb[NETNSA_PID]));
+	else if (tb[NETNSA_FD])
+		peer = get_net_ns_by_fd(nla_get_u32(tb[NETNSA_FD]));
+	else
+		return -EINVAL;
+
+	if (IS_ERR(peer))
+		return PTR_ERR(peer);
+
+	msg = nlmsg_new(rtnl_net_get_size(), GFP_KERNEL);
+	if (!msg) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	err = rtnl_net_fill(msg, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0,
+			    RTM_GETNSID, net, peer);
+	if (err < 0)
+		goto err_out;
+
+	err = rtnl_unicast(msg, net, NETLINK_CB(skb).portid);
+	goto out;
+
+err_out:
+	nlmsg_free(msg);
+out:
+	put_net(peer);
+	return err;
+}
+
 static int __init net_ns_init(void)
 {
 	struct net_generic *ng;
@@ -435,6 +643,9 @@ static int __init net_ns_init(void)
 
 	register_pernet_subsys(&net_ns_ops);
 
+	rtnl_register(PF_UNSPEC, RTM_NEWNSID, rtnl_net_newid, NULL, NULL);
+	rtnl_register(PF_UNSPEC, RTM_GETNSID, rtnl_net_getid, NULL, NULL);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-70-g09d2


From d37512a277dfb2cef8a578e25a3246f61399a55a Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:16 +0100
Subject: rtnl: add link netns id to interface messages

This patch adds a new attribute (IFLA_LINK_NETNSID) which contains the 'link'
netns id when this netns is different from the netns where the interface
stands (for example for x-net interfaces like ip tunnels).
With this attribute, it's possible to interpret correctly all advertised
information (like IFLA_LINK, etc.).

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/rtnetlink.h      |  2 ++
 include/uapi/linux/if_link.h |  1 +
 net/core/rtnetlink.c         | 13 +++++++++++++
 3 files changed, 16 insertions(+)

(limited to 'include')

diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h
index e21b9f9653c0..6c6d5393fc34 100644
--- a/include/net/rtnetlink.h
+++ b/include/net/rtnetlink.h
@@ -46,6 +46,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  *			    to create when creating a new device.
  *	@get_num_rx_queues: Function to determine number of receive queues
  *			    to create when creating a new device.
+ *	@get_link_net: Function to get the i/o netns of the device
  */
 struct rtnl_link_ops {
 	struct list_head	list;
@@ -93,6 +94,7 @@ struct rtnl_link_ops {
 	int			(*fill_slave_info)(struct sk_buff *skb,
 						   const struct net_device *dev,
 						   const struct net_device *slave_dev);
+	struct net		*(*get_link_net)(const struct net_device *dev);
 };
 
 int __rtnl_link_register(struct rtnl_link_ops *ops);
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 2a8380edbb7e..0deee3eeddbf 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -146,6 +146,7 @@ enum {
 	IFLA_PHYS_PORT_ID,
 	IFLA_CARRIER_CHANGES,
 	IFLA_PHYS_SWITCH_ID,
+	IFLA_LINK_NETNSID,
 	__IFLA_MAX
 };
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 47b39f3e867c..bd6370f0cb31 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -875,6 +875,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + nla_total_size(1) /* IFLA_OPERSTATE */
 	       + nla_total_size(1) /* IFLA_LINKMODE */
 	       + nla_total_size(4) /* IFLA_CARRIER_CHANGES */
+	       + nla_total_size(4) /* IFLA_LINK_NETNSID */
 	       + nla_total_size(ext_filter_mask
 			        & RTEXT_FILTER_VF ? 4 : 0) /* IFLA_NUM_VF */
 	       + rtnl_vfinfo_size(dev, ext_filter_mask) /* IFLA_VFINFO_LIST */
@@ -1169,6 +1170,18 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev,
 			goto nla_put_failure;
 	}
 
+	if (dev->rtnl_link_ops &&
+	    dev->rtnl_link_ops->get_link_net) {
+		struct net *link_net = dev->rtnl_link_ops->get_link_net(dev);
+
+		if (!net_eq(dev_net(dev), link_net)) {
+			int id = peernet2id(dev_net(dev), link_net);
+
+			if (nla_put_s32(skb, IFLA_LINK_NETNSID, id))
+				goto nla_put_failure;
+		}
+	}
+
 	if (!(af_spec = nla_nest_start(skb, IFLA_AF_SPEC)))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3-70-g09d2


From 1728d4fabd1bc9965728de25dda0b694b8da6450 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 15 Jan 2015 15:11:17 +0100
Subject: tunnels: advertise link netns via netlink

Implement rtnl_link_ops->get_link_net() callback so that IFLA_LINK_NETNSID is
added to rtnetlink messages.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c      | 8 ++++++++
 include/net/ip6_tunnel.h | 1 +
 include/net/ip_tunnels.h | 1 +
 net/ipv4/ip_gre.c        | 2 ++
 net/ipv4/ip_tunnel.c     | 8 ++++++++
 net/ipv4/ip_vti.c        | 1 +
 net/ipv4/ipip.c          | 1 +
 net/ipv6/ip6_gre.c       | 1 +
 net/ipv6/ip6_tunnel.c    | 9 +++++++++
 net/ipv6/ip6_vti.c       | 1 +
 net/ipv6/sit.c           | 1 +
 11 files changed, 34 insertions(+)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index c5f79e7513a6..0346eaa6d236 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2923,6 +2923,13 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+static struct net *vxlan_get_link_net(const struct net_device *dev)
+{
+	struct vxlan_dev *vxlan = netdev_priv(dev);
+
+	return vxlan->net;
+}
+
 static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.kind		= "vxlan",
 	.maxtype	= IFLA_VXLAN_MAX,
@@ -2934,6 +2941,7 @@ static struct rtnl_link_ops vxlan_link_ops __read_mostly = {
 	.dellink	= vxlan_dellink,
 	.get_size	= vxlan_get_size,
 	.fill_info	= vxlan_fill_info,
+	.get_link_net	= vxlan_get_link_net,
 };
 
 static void vxlan_handle_lowerdev_unregister(struct vxlan_net *vn,
diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h
index 9326c41c2d7f..76c091b53dae 100644
--- a/include/net/ip6_tunnel.h
+++ b/include/net/ip6_tunnel.h
@@ -70,6 +70,7 @@ int ip6_tnl_xmit_ctl(struct ip6_tnl *t, const struct in6_addr *laddr,
 __u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw);
 __u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr,
 			     const struct in6_addr *raddr);
+struct net *ip6_tnl_get_link_net(const struct net_device *dev);
 
 static inline void ip6tunnel_xmit(struct sk_buff *skb, struct net_device *dev)
 {
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index ce4db3cc5647..2c47061a6954 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -141,6 +141,7 @@ int ip_tunnel_encap_del_ops(const struct ip_tunnel_encap_ops *op,
 int ip_tunnel_init(struct net_device *dev);
 void ip_tunnel_uninit(struct net_device *dev);
 void  ip_tunnel_dellink(struct net_device *dev, struct list_head *head);
+struct net *ip_tunnel_get_link_net(const struct net_device *dev);
 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 		       struct rtnl_link_ops *ops, char *devname);
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 942576e27df1..6e7727f27393 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -829,6 +829,7 @@ static struct rtnl_link_ops ipgre_link_ops __read_mostly = {
 	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipgre_get_size,
 	.fill_info	= ipgre_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
@@ -843,6 +844,7 @@ static struct rtnl_link_ops ipgre_tap_ops __read_mostly = {
 	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipgre_get_size,
 	.fill_info	= ipgre_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static int __net_init ipgre_tap_init_net(struct net *net)
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index d3e447936720..2cd08280c77b 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -972,6 +972,14 @@ void ip_tunnel_dellink(struct net_device *dev, struct list_head *head)
 }
 EXPORT_SYMBOL_GPL(ip_tunnel_dellink);
 
+struct net *ip_tunnel_get_link_net(const struct net_device *dev)
+{
+	struct ip_tunnel *tunnel = netdev_priv(dev);
+
+	return tunnel->net;
+}
+EXPORT_SYMBOL(ip_tunnel_get_link_net);
+
 int ip_tunnel_init_net(struct net *net, int ip_tnl_net_id,
 				  struct rtnl_link_ops *ops, char *devname)
 {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1a7e979e80ba..94efe148181c 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -531,6 +531,7 @@ static struct rtnl_link_ops vti_link_ops __read_mostly = {
 	.dellink        = ip_tunnel_dellink,
 	.get_size	= vti_get_size,
 	.fill_info	= vti_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static int __init vti_init(void)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 40403114f00a..b58d6689874c 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -498,6 +498,7 @@ static struct rtnl_link_ops ipip_link_ops __read_mostly = {
 	.dellink	= ip_tunnel_dellink,
 	.get_size	= ipip_get_size,
 	.fill_info	= ipip_fill_info,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static struct xfrm_tunnel ipip_handler __read_mostly = {
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 13cda4c6313b..9306a5ff9149 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1662,6 +1662,7 @@ static struct rtnl_link_ops ip6gre_link_ops __read_mostly = {
 	.dellink	= ip6gre_dellink,
 	.get_size	= ip6gre_get_size,
 	.fill_info	= ip6gre_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
 };
 
 static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = {
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 92b3da571980..266a264ec212 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1760,6 +1760,14 @@ nla_put_failure:
 	return -EMSGSIZE;
 }
 
+struct net *ip6_tnl_get_link_net(const struct net_device *dev)
+{
+	struct ip6_tnl *tunnel = netdev_priv(dev);
+
+	return tunnel->net;
+}
+EXPORT_SYMBOL(ip6_tnl_get_link_net);
+
 static const struct nla_policy ip6_tnl_policy[IFLA_IPTUN_MAX + 1] = {
 	[IFLA_IPTUN_LINK]		= { .type = NLA_U32 },
 	[IFLA_IPTUN_LOCAL]		= { .len = sizeof(struct in6_addr) },
@@ -1783,6 +1791,7 @@ static struct rtnl_link_ops ip6_link_ops __read_mostly = {
 	.dellink	= ip6_tnl_dellink,
 	.get_size	= ip6_tnl_get_size,
 	.fill_info	= ip6_tnl_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
 };
 
 static struct xfrm6_tunnel ip4ip6_handler __read_mostly = {
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index ace10d0b3aac..5fb9e212eca8 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -1016,6 +1016,7 @@ static struct rtnl_link_ops vti6_link_ops __read_mostly = {
 	.changelink	= vti6_changelink,
 	.get_size	= vti6_get_size,
 	.fill_info	= vti6_fill_info,
+	.get_link_net	= ip6_tnl_get_link_net,
 };
 
 static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n)
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 213546bd6d5d..3cc197c72b59 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1763,6 +1763,7 @@ static struct rtnl_link_ops sit_link_ops __read_mostly = {
 	.get_size	= ipip6_get_size,
 	.fill_info	= ipip6_fill_info,
 	.dellink	= ipip6_dellink,
+	.get_link_net	= ip_tunnel_get_link_net,
 };
 
 static struct xfrm_tunnel sit_handler __read_mostly = {
-- 
cgit v1.2.3-70-g09d2


From 22a5dc0e5e3e8fef804230cd73ed7b0afd4c7bae Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 18 Jan 2015 16:35:14 -0500
Subject: net: sched: Introduce connmark action

This tc action allows you to retrieve the connection tracking mark
This action has been used heavily by openwrt for a few years now.

There are known limitations currently:

doesn't work for initial packets, since we only query the ct table.
  Fine given use case is for returning packets

no implicit defrag.
  frags should be rare so fix later..

won't work for more complex tasks, e.g. lookup of other extensions
  since we have no means to store results

we still have a 2nd lookup later on via normal conntrack path.
This shouldn't break anything though since skb->nfct isn't altered.

V2:
remove unnecessary braces (Jiri)
change the action identifier to 14 (Jiri)
Fix some stylistic issues caught by checkpatch
V3:
Move module params to bottom (Cong)
Get rid of tcf_hashinfo_init and friends and conform to newer API (Cong)

Acked-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_connmark.h        |  14 +++
 include/uapi/linux/tc_act/tc_connmark.h |  22 ++++
 net/sched/Kconfig                       |  11 ++
 net/sched/Makefile                      |   1 +
 net/sched/act_connmark.c                | 192 ++++++++++++++++++++++++++++++++
 5 files changed, 240 insertions(+)
 create mode 100644 include/net/tc_act/tc_connmark.h
 create mode 100644 include/uapi/linux/tc_act/tc_connmark.h
 create mode 100644 net/sched/act_connmark.c

(limited to 'include')

diff --git a/include/net/tc_act/tc_connmark.h b/include/net/tc_act/tc_connmark.h
new file mode 100644
index 000000000000..5c1104c2e24f
--- /dev/null
+++ b/include/net/tc_act/tc_connmark.h
@@ -0,0 +1,14 @@
+#ifndef __NET_TC_CONNMARK_H
+#define __NET_TC_CONNMARK_H
+
+#include <net/act_api.h>
+
+struct tcf_connmark_info {
+	struct tcf_common common;
+	u16 zone;
+};
+
+#define to_connmark(a) \
+	container_of(a->priv, struct tcf_connmark_info, common)
+
+#endif /* __NET_TC_CONNMARK_H */
diff --git a/include/uapi/linux/tc_act/tc_connmark.h b/include/uapi/linux/tc_act/tc_connmark.h
new file mode 100644
index 000000000000..994b0971bce2
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_connmark.h
@@ -0,0 +1,22 @@
+#ifndef __UAPI_TC_CONNMARK_H
+#define __UAPI_TC_CONNMARK_H
+
+#include <linux/types.h>
+#include <linux/pkt_cls.h>
+
+#define TCA_ACT_CONNMARK 14
+
+struct tc_connmark {
+	tc_gen;
+	__u16 zone;
+};
+
+enum {
+	TCA_CONNMARK_UNSPEC,
+	TCA_CONNMARK_PARMS,
+	TCA_CONNMARK_TM,
+	__TCA_CONNMARK_MAX
+};
+#define TCA_CONNMARK_MAX (__TCA_CONNMARK_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 466943551581..475e35e261ec 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -710,6 +710,17 @@ config NET_ACT_BPF
 	  To compile this code as a module, choose M here: the
 	  module will be called act_bpf.
 
+config NET_ACT_CONNMARK
+        tristate "Netfilter Connection Mark Retriever"
+        depends on NET_CLS_ACT && NETFILTER && IP_NF_IPTABLES
+        ---help---
+	  Say Y here to allow retrieving of conn mark
+
+	  If unsure, say N.
+
+	  To compile this code as a module, choose M here: the
+	  module will be called act_connmark.
+
 config NET_CLS_IND
 	bool "Incoming device classification"
 	depends on NET_CLS_U32 || NET_CLS_FW
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 7ca2b4e76312..7ca7f4c1b8c2 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -18,6 +18,7 @@ obj-$(CONFIG_NET_ACT_SKBEDIT)	+= act_skbedit.o
 obj-$(CONFIG_NET_ACT_CSUM)	+= act_csum.o
 obj-$(CONFIG_NET_ACT_VLAN)	+= act_vlan.o
 obj-$(CONFIG_NET_ACT_BPF)	+= act_bpf.o
+obj-$(CONFIG_NET_ACT_CONNMARK)	+= act_connmark.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
new file mode 100644
index 000000000000..8e472518f9f6
--- /dev/null
+++ b/net/sched/act_connmark.c
@@ -0,0 +1,192 @@
+/*
+ * net/sched/act_connmark.c  netfilter connmark retriever action
+ * skb mark is over-written
+ *
+ * Copyright (c) 2011 Felix Fietkau <nbd@openwrt.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+*/
+
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/pkt_cls.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <net/netlink.h>
+#include <net/pkt_sched.h>
+#include <net/act_api.h>
+#include <uapi/linux/tc_act/tc_connmark.h>
+#include <net/tc_act/tc_connmark.h>
+
+#include <net/netfilter/nf_conntrack.h>
+#include <net/netfilter/nf_conntrack_core.h>
+#include <net/netfilter/nf_conntrack_zones.h>
+
+#define CONNMARK_TAB_MASK     3
+
+static int tcf_connmark(struct sk_buff *skb, const struct tc_action *a,
+			struct tcf_result *res)
+{
+	const struct nf_conntrack_tuple_hash *thash;
+	struct nf_conntrack_tuple tuple;
+	enum ip_conntrack_info ctinfo;
+	struct tcf_connmark_info *ca = a->priv;
+	struct nf_conn *c;
+	int proto;
+
+	spin_lock(&ca->tcf_lock);
+	ca->tcf_tm.lastuse = jiffies;
+	bstats_update(&ca->tcf_bstats, skb);
+
+	if (skb->protocol == htons(ETH_P_IP)) {
+		if (skb->len < sizeof(struct iphdr))
+			goto out;
+
+		proto = NFPROTO_IPV4;
+	} else if (skb->protocol == htons(ETH_P_IPV6)) {
+		if (skb->len < sizeof(struct ipv6hdr))
+			goto out;
+
+		proto = NFPROTO_IPV6;
+	} else {
+		goto out;
+	}
+
+	c = nf_ct_get(skb, &ctinfo);
+	if (c) {
+		skb->mark = c->mark;
+		/* using overlimits stats to count how many packets marked */
+		ca->tcf_qstats.overlimits++;
+		nf_ct_put(c);
+		goto out;
+	}
+
+	if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
+			       proto, &tuple))
+		goto out;
+
+	thash = nf_conntrack_find_get(dev_net(skb->dev), ca->zone, &tuple);
+	if (!thash)
+		goto out;
+
+	c = nf_ct_tuplehash_to_ctrack(thash);
+	/* using overlimits stats to count how many packets marked */
+	ca->tcf_qstats.overlimits++;
+	skb->mark = c->mark;
+	nf_ct_put(c);
+
+out:
+	skb->nfct = NULL;
+	spin_unlock(&ca->tcf_lock);
+	return ca->tcf_action;
+}
+
+static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
+	[TCA_CONNMARK_PARMS] = { .len = sizeof(struct tc_connmark) },
+};
+
+static int tcf_connmark_init(struct net *net, struct nlattr *nla,
+			     struct nlattr *est, struct tc_action *a,
+			     int ovr, int bind)
+{
+	struct nlattr *tb[TCA_CONNMARK_MAX + 1];
+	struct tcf_connmark_info *ci;
+	struct tc_connmark *parm;
+	int ret = 0;
+
+	if (!nla)
+		return -EINVAL;
+
+	ret = nla_parse_nested(tb, TCA_CONNMARK_MAX, nla, connmark_policy);
+	if (ret < 0)
+		return ret;
+
+	parm = nla_data(tb[TCA_CONNMARK_PARMS]);
+
+	if (!tcf_hash_check(parm->index, a, bind)) {
+		ret = tcf_hash_create(parm->index, est, a, sizeof(*ci), bind);
+		if (ret)
+			return ret;
+
+		ci = to_connmark(a);
+		ci->tcf_action = parm->action;
+		ci->zone = parm->zone;
+
+		tcf_hash_insert(a);
+		ret = ACT_P_CREATED;
+	} else {
+		ci = to_connmark(a);
+		if (bind)
+			return 0;
+		tcf_hash_release(a, bind);
+		if (!ovr)
+			return -EEXIST;
+		/* replacing action and zone */
+		ci->tcf_action = parm->action;
+		ci->zone = parm->zone;
+	}
+
+	return ret;
+}
+
+static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
+				    int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_connmark_info *ci = a->priv;
+
+	struct tc_connmark opt = {
+		.index   = ci->tcf_index,
+		.refcnt  = ci->tcf_refcnt - ref,
+		.bindcnt = ci->tcf_bindcnt - bind,
+		.action  = ci->tcf_action,
+		.zone   = ci->zone,
+	};
+	struct tcf_t t;
+
+	if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	t.install = jiffies_to_clock_t(jiffies - ci->tcf_tm.install);
+	t.lastuse = jiffies_to_clock_t(jiffies - ci->tcf_tm.lastuse);
+	t.expires = jiffies_to_clock_t(ci->tcf_tm.expires);
+	if (nla_put(skb, TCA_CONNMARK_TM, sizeof(t), &t))
+		goto nla_put_failure;
+
+	return skb->len;
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static struct tc_action_ops act_connmark_ops = {
+	.kind		=	"connmark",
+	.type		=	TCA_ACT_CONNMARK,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_connmark,
+	.dump		=	tcf_connmark_dump,
+	.init		=	tcf_connmark_init,
+};
+
+static int __init connmark_init_module(void)
+{
+	return tcf_register_action(&act_connmark_ops, CONNMARK_TAB_MASK);
+}
+
+static void __exit connmark_cleanup_module(void)
+{
+	tcf_unregister_action(&act_connmark_ops);
+}
+
+module_init(connmark_init_module);
+module_exit(connmark_cleanup_module);
+MODULE_AUTHOR("Felix Fietkau <nbd@openwrt.org>");
+MODULE_DESCRIPTION("Connection tracking mark restoring");
+MODULE_LICENSE("GPL");
+
-- 
cgit v1.2.3-70-g09d2


From fa7e1fbcb52cc9efab394526a566d80fb31529bb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 22 Jan 2015 18:44:19 +0100
Subject: mac80211: allow drivers to control software crypto

Some drivers unfortunately cannot support software crypto, but
mac80211 currently assumes that they do.

This has the issue that if the hardware enabling fails for some
reason, the software fallback is used, which won't work. This
clearly isn't desirable, the error should be reported and the
key setting refused.

Support this in mac80211 by allowing drivers to set a new HW
flag IEEE80211_HW_SW_CRYPTO_CONTROL, in which case mac80211 will
only allow software fallback if the set_key() method returns 1.
The driver will also need to advertise supported cipher suites
so that mac80211 doesn't advertise any (future) software ciphers
that the driver can't actually do.

While at it, to make it easier to support this, refactor the
ieee80211_init_cipher_suites() code.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 12 ++++++++++
 net/mac80211/key.c     | 10 +++++---
 net/mac80211/main.c    | 65 ++++++++++++++++++++++++++++++--------------------
 3 files changed, 58 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 275ee56152ad..33b87c50a4cf 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1634,6 +1634,12 @@ struct ieee80211_tx_control {
  *	be created.  It is expected user-space will create vifs as
  *	desired (and thus have them named as desired).
  *
+ * @IEEE80211_HW_SW_CRYPTO_CONTROL: The driver wants to control which of the
+ *	crypto algorithms can be done in software - so don't automatically
+ *	try to fall back to it if hardware crypto fails, but do so only if
+ *	the driver returns 1. This also forces the driver to advertise its
+ *	supported cipher suites.
+ *
  * @IEEE80211_HW_QUEUE_CONTROL: The driver wants to control per-interface
  *	queue mapping in order to use different queues (not just one per AC)
  *	for different virtual interfaces. See the doc section on HW queue
@@ -1681,6 +1687,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_MFP_CAPABLE			= 1<<13,
 	IEEE80211_HW_WANT_MONITOR_VIF			= 1<<14,
 	IEEE80211_HW_NO_AUTO_VIF			= 1<<15,
+	IEEE80211_HW_SW_CRYPTO_CONTROL			= 1<<16,
 	/* free slots */
 	IEEE80211_HW_REPORTS_TX_ACK_STATUS		= 1<<18,
 	IEEE80211_HW_CONNECTION_MONITOR			= 1<<19,
@@ -1955,6 +1962,11 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * added; if you return 0 then hw_key_idx must be assigned to the
  * hardware key index, you are free to use the full u8 range.
  *
+ * Note that in the case that the @IEEE80211_HW_SW_CRYPTO_CONTROL flag is
+ * set, mac80211 will not automatically fall back to software crypto if
+ * enabling hardware crypto failed. The set_key() call may also return the
+ * value 1 to permit this specific key/algorithm to be done in software.
+ *
  * When the cmd is %DISABLE_KEY then it must succeed.
  *
  * Note that it is permissible to not decrypt a frame even if a key
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index f8d9f0ee59bf..5167c53aa15f 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -90,7 +90,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 {
 	struct ieee80211_sub_if_data *sdata;
 	struct sta_info *sta;
-	int ret;
+	int ret = -EOPNOTSUPP;
 
 	might_sleep();
 
@@ -150,7 +150,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 		return 0;
 	}
 
-	if (ret != -ENOSPC && ret != -EOPNOTSUPP)
+	if (ret != -ENOSPC && ret != -EOPNOTSUPP && ret != 1)
 		sdata_err(sdata,
 			  "failed to set key (%d, %pM) to hardware (%d)\n",
 			  key->conf.keyidx,
@@ -163,7 +163,11 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
-		/* all of these we can do in software */
+		/* all of these we can do in software - if driver can */
+		if (ret == 1)
+			return 0;
+		if (key->local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL)
+			return -EINVAL;
 		return 0;
 	default:
 		return -EINVAL;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 46264cb6604b..ea6b82ac4f0b 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -658,7 +658,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 	bool have_wep = !(IS_ERR(local->wep_tx_tfm) ||
 			  IS_ERR(local->wep_rx_tfm));
 	bool have_mfp = local->hw.flags & IEEE80211_HW_MFP_CAPABLE;
-	const struct ieee80211_cipher_scheme *cs = local->hw.cipher_schemes;
 	int n_suites = 0, r = 0, w = 0;
 	u32 *suites;
 	static const u32 cipher_suites[] = {
@@ -672,12 +671,38 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		WLAN_CIPHER_SUITE_AES_CMAC
 	};
 
-	/* Driver specifies the ciphers, we have nothing to do... */
-	if (local->hw.wiphy->cipher_suites && have_wep)
-		return 0;
+	if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL ||
+	    local->hw.wiphy->cipher_suites) {
+		/* If the driver advertises, or doesn't support SW crypto,
+		 * we only need to remove WEP if necessary.
+		 */
+		if (have_wep)
+			return 0;
+
+		/* well if it has _no_ ciphers ... fine */
+		if (!local->hw.wiphy->n_cipher_suites)
+			return 0;
+
+		/* Driver provides cipher suites, but we need to exclude WEP */
+		suites = kmemdup(local->hw.wiphy->cipher_suites,
+				 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
+				 GFP_KERNEL);
+		if (!suites)
+			return -ENOMEM;
 
-	/* Set up cipher suites if driver relies on mac80211 cipher defs */
-	if (!local->hw.wiphy->cipher_suites && !cs) {
+		for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
+			u32 suite = local->hw.wiphy->cipher_suites[r];
+
+			if (suite == WLAN_CIPHER_SUITE_WEP40 ||
+			    suite == WLAN_CIPHER_SUITE_WEP104)
+				continue;
+			suites[w++] = suite;
+		}
+	} else if (!local->hw.cipher_schemes) {
+		/* If the driver doesn't have cipher schemes, there's nothing
+		 * else to do other than assign the (software supported and
+		 * perhaps offloaded) cipher suites.
+		 */
 		local->hw.wiphy->cipher_suites = cipher_suites;
 		local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
 
@@ -689,12 +714,16 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 			local->hw.wiphy->n_cipher_suites -= 2;
 		}
 
+		/* not dynamically allocated, so just return */
 		return 0;
-	}
+	} else {
+		const struct ieee80211_cipher_scheme *cs;
 
-	if (!local->hw.wiphy->cipher_suites) {
-		/*
-		 * Driver specifies cipher schemes only
+		cs = local->hw.cipher_schemes;
+
+		/* Driver specifies cipher schemes only (but not cipher suites
+		 * including the schemes)
+		 *
 		 * We start counting ciphers defined by schemes, TKIP and CCMP
 		 */
 		n_suites = local->hw.n_cipher_schemes + 2;
@@ -724,22 +753,6 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 
 		for (r = 0; r < local->hw.n_cipher_schemes; r++)
 			suites[w++] = cs[r].cipher;
-	} else {
-		/* Driver provides cipher suites, but we need to exclude WEP */
-		suites = kmemdup(local->hw.wiphy->cipher_suites,
-				 sizeof(u32) * local->hw.wiphy->n_cipher_suites,
-				 GFP_KERNEL);
-		if (!suites)
-			return -ENOMEM;
-
-		for (r = 0; r < local->hw.wiphy->n_cipher_suites; r++) {
-			u32 suite = local->hw.wiphy->cipher_suites[r];
-
-			if (suite == WLAN_CIPHER_SUITE_WEP40 ||
-			    suite == WLAN_CIPHER_SUITE_WEP104)
-				continue;
-			suites[w++] = suite;
-		}
 	}
 
 	local->hw.wiphy->cipher_suites = suites;
-- 
cgit v1.2.3-70-g09d2


From 4b681c82d2f9bef121c912ffcaac89a004af3f2c Mon Sep 17 00:00:00 2001
From: Vadim Kochan <vadim4j@gmail.com>
Date: Mon, 12 Jan 2015 16:34:05 +0200
Subject: nl80211: Allow set network namespace by fd

Added new NL80211_ATTR_NETNS_FD which allows to
set namespace via nl80211 by fd.

Signed-off-by: Vadim Kochan <vadim4j@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  2 ++
 net/core/net_namespace.c     |  1 +
 net/wireless/nl80211.c       | 16 +++++++++++-----
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f52797a90816..f68532b015db 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2098,6 +2098,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_SURVEY_RADIO_STATS,
 
+	NL80211_ATTR_NETNS_FD,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7f155175bba8..5d5ee8f3e4ff 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -361,6 +361,7 @@ struct net *get_net_ns_by_fd(int fd)
 	return ERR_PTR(-EINVAL);
 }
 #endif
+EXPORT_SYMBOL_GPL(get_net_ns_by_fd);
 
 struct net *get_net_ns_by_pid(pid_t pid)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c5661c5ad8f3..c64100ec79e3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -397,6 +397,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
+	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -7762,14 +7763,19 @@ static int nl80211_wiphy_netns(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net *net;
 	int err;
-	u32 pid;
 
-	if (!info->attrs[NL80211_ATTR_PID])
-		return -EINVAL;
+	if (info->attrs[NL80211_ATTR_PID]) {
+		u32 pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
+
+		net = get_net_ns_by_pid(pid);
+	} else if (info->attrs[NL80211_ATTR_NETNS_FD]) {
+		u32 fd = nla_get_u32(info->attrs[NL80211_ATTR_NETNS_FD]);
 
-	pid = nla_get_u32(info->attrs[NL80211_ATTR_PID]);
+		net = get_net_ns_by_fd(fd);
+	} else {
+		return -EINVAL;
+	}
 
-	net = get_net_ns_by_pid(pid);
 	if (IS_ERR(net))
 		return PTR_ERR(net);
 
-- 
cgit v1.2.3-70-g09d2


From db82d8a966ded064bd4cf0e1fcca13442f50d0ae Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Date: Wed, 14 Jan 2015 12:55:08 +0100
Subject: mac80211: enable TPC through mac80211 stack

Control per packet Transmit Power Control (TPC) in lower drivers
according to TX power settings configured by the user. In particular TPC is
enabled if value passed in enum nl80211_tx_power_setting is
NL80211_TX_POWER_LIMITED (allow using less than specified from userspace),
whereas TPC is disabled if nl80211_tx_power_setting is set to
NL80211_TX_POWER_FIXED (use value configured from userspace)

Signed-off-by: Lorenzo Bianconi <lorenzo.bianconi83@gmail.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  7 +++++++
 net/mac80211/cfg.c         | 19 ++++++++++++++++---
 net/mac80211/chan.c        |  4 ++--
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/iface.c       |  5 +++--
 5 files changed, 30 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 33b87c50a4cf..866073e27ea2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -376,6 +376,12 @@ enum ieee80211_rssi_event {
  * @ssid_len: Length of SSID given in @ssid.
  * @hidden_ssid: The SSID of the current vif is hidden. Only valid in AP-mode.
  * @txpower: TX power in dBm
+ * @txpower_type: TX power adjustment used to control per packet Transmit
+ *	Power Control (TPC) in lower driver for the current vif. In particular
+ *	TPC is enabled if value passed in %txpower_type is
+ *	NL80211_TX_POWER_LIMITED (allow using less than specified from
+ *	userspace), whereas TPC is disabled if %txpower_type is set to
+ *	NL80211_TX_POWER_FIXED (use value configured from userspace)
  * @p2p_noa_attr: P2P NoA attribute for P2P powersave
  */
 struct ieee80211_bss_conf {
@@ -411,6 +417,7 @@ struct ieee80211_bss_conf {
 	size_t ssid_len;
 	bool hidden_ssid;
 	int txpower;
+	enum nl80211_tx_power_setting txpower_type;
 	struct ieee80211_p2p_noa_attr p2p_noa_attr;
 };
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ff090ef1ea2c..a777114d663b 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2110,6 +2110,8 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata;
+	enum nl80211_tx_power_setting txp_type = type;
+	bool update_txp_type = false;
 
 	if (wdev) {
 		sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
@@ -2117,6 +2119,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 		switch (type) {
 		case NL80211_TX_POWER_AUTOMATIC:
 			sdata->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
+			txp_type = NL80211_TX_POWER_LIMITED;
 			break;
 		case NL80211_TX_POWER_LIMITED:
 		case NL80211_TX_POWER_FIXED:
@@ -2126,7 +2129,12 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 			break;
 		}
 
-		ieee80211_recalc_txpower(sdata);
+		if (txp_type != sdata->vif.bss_conf.txpower_type) {
+			update_txp_type = true;
+			sdata->vif.bss_conf.txpower_type = txp_type;
+		}
+
+		ieee80211_recalc_txpower(sdata, update_txp_type);
 
 		return 0;
 	}
@@ -2134,6 +2142,7 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 	switch (type) {
 	case NL80211_TX_POWER_AUTOMATIC:
 		local->user_power_level = IEEE80211_UNSET_POWER_LEVEL;
+		txp_type = NL80211_TX_POWER_LIMITED;
 		break;
 	case NL80211_TX_POWER_LIMITED:
 	case NL80211_TX_POWER_FIXED:
@@ -2144,10 +2153,14 @@ static int ieee80211_set_tx_power(struct wiphy *wiphy,
 	}
 
 	mutex_lock(&local->iflist_mtx);
-	list_for_each_entry(sdata, &local->interfaces, list)
+	list_for_each_entry(sdata, &local->interfaces, list) {
 		sdata->user_power_level = local->user_power_level;
+		if (txp_type != sdata->vif.bss_conf.txpower_type)
+			update_txp_type = true;
+		sdata->vif.bss_conf.txpower_type = txp_type;
+	}
 	list_for_each_entry(sdata, &local->interfaces, list)
-		ieee80211_recalc_txpower(sdata);
+		ieee80211_recalc_txpower(sdata, update_txp_type);
 	mutex_unlock(&local->iflist_mtx);
 
 	return 0;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 35b11e11e0c4..ff0d2db09df9 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -655,7 +655,7 @@ out:
 	}
 
 	if (new_ctx && ieee80211_chanctx_num_assigned(local, new_ctx) > 0) {
-		ieee80211_recalc_txpower(sdata);
+		ieee80211_recalc_txpower(sdata, false);
 		ieee80211_recalc_chanctx_min_def(local, new_ctx);
 	}
 
@@ -1387,7 +1387,7 @@ static int ieee80211_vif_use_reserved_switch(struct ieee80211_local *local)
 				ieee80211_bss_info_change_notify(sdata,
 								 changed);
 
-			ieee80211_recalc_txpower(sdata);
+			ieee80211_recalc_txpower(sdata, false);
 		}
 
 		ieee80211_recalc_chanctx_chantype(local, ctx);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 156ea79e0157..6e1b184183fb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1621,7 +1621,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local);
 void ieee80211_del_virtual_monitor(struct ieee80211_local *local);
 
 bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata);
+void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+			      bool update_bss);
 
 static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata)
 {
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 677422e11e07..4371c123a95e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -73,9 +73,10 @@ bool __ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
 	return false;
 }
 
-void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata)
+void ieee80211_recalc_txpower(struct ieee80211_sub_if_data *sdata,
+			      bool update_bss)
 {
-	if (__ieee80211_recalc_txpower(sdata))
+	if (__ieee80211_recalc_txpower(sdata) || update_bss)
 		ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_TXPOWER);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 9c74893441d3cf4b258a82b19cbf6bfd2ed6e549 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Fri, 16 Jan 2015 16:04:09 +0200
Subject: nl80211: add an attribute to allow delaying the first scheduled scan
 cycle

The userspace may want to delay the the first scheduled scan or
net-detect cycle.  Add an optional attribute to the scheduled scan
configuration to pass the delay to be (optionally) used by the driver.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
[add the attribute to the policy to validate it]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  5 +++++
 include/uapi/linux/nl80211.h | 20 ++++++++++++++------
 net/wireless/nl80211.c       |  5 +++++
 3 files changed, 24 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7b44ba0a7632..64e09e1e8099 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1493,6 +1493,10 @@ struct cfg80211_match_set {
  * @rcu_head: RCU callback used to free the struct
  * @owner_nlportid: netlink portid of owner (if this should is a request
  *	owned by a particular socket)
+ * @delay: delay in seconds to use before starting the first scan
+ *	cycle.  The driver may ignore this parameter and start
+ *	immediately (or at any other time), if this feature is not
+ *	supported.
  */
 struct cfg80211_sched_scan_request {
 	struct cfg80211_ssid *ssids;
@@ -1506,6 +1510,7 @@ struct cfg80211_sched_scan_request {
 	struct cfg80211_match_set *match_sets;
 	int n_match_sets;
 	s32 min_rssi_thold;
+	u32 delay;
 
 	u8 mac_addr[ETH_ALEN] __aligned(2);
 	u8 mac_addr_mask[ETH_ALEN] __aligned(2);
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index f68532b015db..1cbc3aae425c 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -324,7 +324,9 @@
  *	if passed, define which channels should be scanned; if not
  *	passed, all channels allowed for the current regulatory domain
  *	are used.  Extra IEs can also be passed from the userspace by
- *	using the %NL80211_ATTR_IE attribute.
+ *	using the %NL80211_ATTR_IE attribute.  The first cycle of the
+ *	scheduled scan can be delayed by %NL80211_ATTR_SCHED_SCAN_DELAY
+ *	is supplied.
  * @NL80211_CMD_STOP_SCHED_SCAN: stop a scheduled scan. Returns -ENOENT if
  *	scheduled scan is not running. The caller may assume that as soon
  *	as the call returns, it is safe to start a new scheduled scan again.
@@ -1735,6 +1737,9 @@ enum nl80211_commands {
  *	should be contained in the result as the sum of the respective counters
  *	over all channels.
  *
+ * @NL80211_ATTR_SCHED_SCAN_DELAY: delay before a scheduled scan (or a
+ *	WoWLAN net-detect scan) is started, u32 in seconds.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2100,6 +2105,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_NETNS_FD,
 
+	NL80211_ATTR_SCHED_SCAN_DELAY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -3743,11 +3750,12 @@ struct nl80211_pattern_support {
  * @NL80211_WOWLAN_TRIG_NET_DETECT: wake up when a configured network
  *	is detected.  This is a nested attribute that contains the
  *	same attributes used with @NL80211_CMD_START_SCHED_SCAN.  It
- *	specifies how the scan is performed (e.g. the interval and the
- *	channels to scan) as well as the scan results that will
- *	trigger a wake (i.e. the matchsets).  This attribute is also
- *	sent in a response to @NL80211_CMD_GET_WIPHY, indicating the
- *	number of match sets supported by the driver (u32).
+ *	specifies how the scan is performed (e.g. the interval, the
+ *	channels to scan and the initial delay) as well as the scan
+ *	results that will trigger a wake (i.e. the matchsets).  This
+ *	attribute is also sent in a response to
+ *	@NL80211_CMD_GET_WIPHY, indicating the number of match sets
+ *	supported by the driver (u32).
  * @NL80211_WOWLAN_TRIG_NET_DETECT_RESULTS: nested attribute
  *	containing an array with information about what triggered the
  *	wake up.  If no elements are present in the array, it means
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c64100ec79e3..4542e8683beb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -398,6 +398,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MAC_MASK] = { .len = ETH_ALEN },
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
+	[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
 };
 
 /* policy for the key attributes */
@@ -6205,6 +6206,10 @@ nl80211_parse_sched_scan(struct wiphy *wiphy, struct wireless_dev *wdev,
 		}
 	}
 
+	if (attrs[NL80211_ATTR_SCHED_SCAN_DELAY])
+		request->delay =
+			nla_get_u32(attrs[NL80211_ATTR_SCHED_SCAN_DELAY]);
+
 	request->interval = interval;
 	request->scan_start = jiffies;
 
-- 
cgit v1.2.3-70-g09d2


From a1443f5a273713d4bfda360e45aa6e1d14fe7324 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Fri, 23 Jan 2015 15:42:46 +0200
Subject: Bluetooth: Convert Set SC to use HCI Request

This patch converts the Set Secure Connection HCI handling to use a HCI
request instead of using a hard-coded callback in hci_event.c. This e.g.
ensures that we don't clear the flags incorrectly if something goes
wrong with the power up process (not related to a mgmt Set SC command).

The code can also be simplified a bit since only one pending Set SC
command is allowed, i.e. mgmt_pending_foreach usage is not needed.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 -
 net/bluetooth/hci_event.c        |  4 +-
 net/bluetooth/mgmt.c             | 92 +++++++++++++++++++++-------------------
 3 files changed, 50 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 7777124bff55..0f5e59f1e3cb 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1369,7 +1369,6 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
 void mgmt_auth_failed(struct hci_conn *conn, u8 status);
 void mgmt_auth_enable_complete(struct hci_dev *hdev, u8 status);
 void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
-void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status);
 void mgmt_set_class_of_dev_complete(struct hci_dev *hdev, u8 *dev_class,
 				    u8 status);
 void mgmt_set_local_name_complete(struct hci_dev *hdev, u8 *name, u8 status);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index a58845e98921..e2b81adc232f 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -525,9 +525,7 @@ static void hci_cc_write_sc_support(struct hci_dev *hdev, struct sk_buff *skb)
 			hdev->features[1][0] &= ~LMP_HOST_SC;
 	}
 
-	if (test_bit(HCI_MGMT, &hdev->dev_flags))
-		mgmt_sc_enable_complete(hdev, sent->support, status);
-	else if (!status) {
+	if (!test_bit(HCI_MGMT, &hdev->dev_flags) && !status) {
 		if (sent->support)
 			set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
 		else
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 862a005d9db2..25e40e82b9a2 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -4741,11 +4741,57 @@ unlock:
 	return err;
 }
 
+static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
+{
+	struct pending_cmd *cmd;
+	struct mgmt_mode *cp;
+
+	BT_DBG("%s status %u", hdev->name, status);
+
+	hci_dev_lock(hdev);
+
+	cmd = mgmt_pending_find(MGMT_OP_SET_SECURE_CONN, hdev);
+	if (!cmd)
+		goto unlock;
+
+	if (status) {
+		cmd_status(cmd->sk, cmd->index, cmd->opcode,
+			   mgmt_status(status));
+		goto remove;
+	}
+
+	cp = cmd->param;
+
+	switch (cp->val) {
+	case 0x00:
+		clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		break;
+	case 0x01:
+		set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		break;
+	case 0x02:
+		set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
+		set_bit(HCI_SC_ONLY, &hdev->dev_flags);
+		break;
+	}
+
+	send_settings_rsp(cmd->sk, MGMT_OP_SET_SECURE_CONN, hdev);
+	new_settings(hdev, cmd->sk);
+
+remove:
+	mgmt_pending_remove(cmd);
+unlock:
+	hci_dev_unlock(hdev);
+}
+
 static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 			   void *data, u16 len)
 {
 	struct mgmt_mode *cp = data;
 	struct pending_cmd *cmd;
+	struct hci_request req;
 	u8 val;
 	int err;
 
@@ -4814,17 +4860,14 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 		goto failed;
 	}
 
-	err = hci_send_cmd(hdev, HCI_OP_WRITE_SC_SUPPORT, 1, &val);
+	hci_req_init(&req, hdev);
+	hci_req_add(&req, HCI_OP_WRITE_SC_SUPPORT, 1, &val);
+	err = hci_req_run(&req, sc_enable_complete);
 	if (err < 0) {
 		mgmt_pending_remove(cmd);
 		goto failed;
 	}
 
-	if (cp->val == 0x02)
-		set_bit(HCI_SC_ONLY, &hdev->dev_flags);
-	else
-		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
-
 failed:
 	hci_dev_unlock(hdev);
 	return err;
@@ -7001,43 +7044,6 @@ void mgmt_ssp_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
 	hci_req_run(&req, NULL);
 }
 
-void mgmt_sc_enable_complete(struct hci_dev *hdev, u8 enable, u8 status)
-{
-	struct cmd_lookup match = { NULL, hdev };
-	bool changed = false;
-
-	if (status) {
-		u8 mgmt_err = mgmt_status(status);
-
-		if (enable) {
-			if (test_and_clear_bit(HCI_SC_ENABLED,
-					       &hdev->dev_flags))
-				new_settings(hdev, NULL);
-			clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
-		}
-
-		mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev,
-				     cmd_status_rsp, &mgmt_err);
-		return;
-	}
-
-	if (enable) {
-		changed = !test_and_set_bit(HCI_SC_ENABLED, &hdev->dev_flags);
-	} else {
-		changed = test_and_clear_bit(HCI_SC_ENABLED, &hdev->dev_flags);
-		clear_bit(HCI_SC_ONLY, &hdev->dev_flags);
-	}
-
-	mgmt_pending_foreach(MGMT_OP_SET_SECURE_CONN, hdev,
-			     settings_rsp, &match);
-
-	if (changed)
-		new_settings(hdev, match.sk);
-
-	if (match.sk)
-		sock_put(match.sk);
-}
-
 static void sk_lookup(struct pending_cmd *cmd, void *data)
 {
 	struct cmd_lookup *match = data;
-- 
cgit v1.2.3-70-g09d2


From c5ed1df781cb544d4e4d189bb5b6ec7336d8888c Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 19 Jan 2015 08:30:30 +0100
Subject: bcma: use standard bus scanning during early register
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Starting with kernel 3.19-rc1 early registration of bcma on MIPS is done
a bit later, with memory allocator available. This allows us to simplify
code by using standard bus scanning method.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/bcma_private.h   |  7 +----
 drivers/bcma/host_soc.c       |  2 +-
 drivers/bcma/main.c           | 33 +++++++----------------
 drivers/bcma/scan.c           | 62 +++----------------------------------------
 include/linux/bcma/bcma_soc.h |  2 --
 5 files changed, 15 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 59422b5fa46c..3f314c98d089 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -28,9 +28,7 @@ void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core);
 void bcma_init_bus(struct bcma_bus *bus);
 int bcma_bus_register(struct bcma_bus *bus);
 void bcma_bus_unregister(struct bcma_bus *bus);
-int __init bcma_bus_early_register(struct bcma_bus *bus,
-				   struct bcma_device *core_cc,
-				   struct bcma_device *core_mips);
+int __init bcma_bus_early_register(struct bcma_bus *bus);
 #ifdef CONFIG_PM
 int bcma_bus_suspend(struct bcma_bus *bus);
 int bcma_bus_resume(struct bcma_bus *bus);
@@ -39,9 +37,6 @@ int bcma_bus_resume(struct bcma_bus *bus);
 /* scan.c */
 void bcma_detect_chip(struct bcma_bus *bus);
 int bcma_bus_scan(struct bcma_bus *bus);
-int __init bcma_bus_scan_early(struct bcma_bus *bus,
-			       struct bcma_device_id *match,
-			       struct bcma_device *core);
 
 /* sprom.c */
 int bcma_sprom_get(struct bcma_bus *bus);
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 335cbcfd945b..2dce34789329 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -193,7 +193,7 @@ int __init bcma_host_soc_init(struct bcma_soc *soc)
 	int err;
 
 	/* Scan bus and initialize it */
-	err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips);
+	err = bcma_bus_early_register(bus);
 	if (err)
 		iounmap(bus->mmio);
 
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index c166d444fef6..c3c5e0a2d5be 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -489,35 +489,20 @@ void bcma_bus_unregister(struct bcma_bus *bus)
 	kfree(cores[0]);
 }
 
-int __init bcma_bus_early_register(struct bcma_bus *bus,
-				   struct bcma_device *core_cc,
-				   struct bcma_device *core_mips)
+/*
+ * This is a special version of bus registration function designed for SoCs.
+ * It scans bus and performs basic initialization of main cores only.
+ * Please note it requires memory allocation, however it won't try to sleep.
+ */
+int __init bcma_bus_early_register(struct bcma_bus *bus)
 {
 	int err;
 	struct bcma_device *core;
-	struct bcma_device_id match;
-
-	match.manuf = BCMA_MANUF_BCM;
-	match.id = bcma_cc_core_id(bus);
-	match.class = BCMA_CL_SIM;
-	match.rev = BCMA_ANY_REV;
-
-	/* Scan for chip common core */
-	err = bcma_bus_scan_early(bus, &match, core_cc);
-	if (err) {
-		bcma_err(bus, "Failed to scan for common core: %d\n", err);
-		return -1;
-	}
-
-	match.manuf = BCMA_MANUF_MIPS;
-	match.id = BCMA_CORE_MIPS_74K;
-	match.class = BCMA_CL_SIM;
-	match.rev = BCMA_ANY_REV;
 
-	/* Scan for mips core */
-	err = bcma_bus_scan_early(bus, &match, core_mips);
+	/* Scan for devices (cores) */
+	err = bcma_bus_scan(bus);
 	if (err) {
-		bcma_err(bus, "Failed to scan for mips core: %d\n", err);
+		bcma_err(bus, "Failed to scan bus: %d\n", err);
 		return -1;
 	}
 
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index 5328ee5b4df0..df806b9c5490 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -461,6 +461,10 @@ int bcma_bus_scan(struct bcma_bus *bus)
 
 	int err, core_num = 0;
 
+	/* Skip if bus was already scanned (e.g. during early register) */
+	if (bus->nr_cores)
+		return 0;
+
 	erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
 	if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
 		eromptr = ioremap_nocache(erombase, BCMA_CORE_SIZE);
@@ -519,61 +523,3 @@ out:
 
 	return err;
 }
-
-int __init bcma_bus_scan_early(struct bcma_bus *bus,
-			       struct bcma_device_id *match,
-			       struct bcma_device *core)
-{
-	u32 erombase;
-	u32 __iomem *eromptr, *eromend;
-
-	int err = -ENODEV;
-	int core_num = 0;
-
-	erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
-	if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
-		eromptr = ioremap_nocache(erombase, BCMA_CORE_SIZE);
-		if (!eromptr)
-			return -ENOMEM;
-	} else {
-		eromptr = bus->mmio;
-	}
-
-	eromend = eromptr + BCMA_CORE_SIZE / sizeof(u32);
-
-	bcma_scan_switch_core(bus, erombase);
-
-	while (eromptr < eromend) {
-		memset(core, 0, sizeof(*core));
-		INIT_LIST_HEAD(&core->list);
-		core->bus = bus;
-
-		err = bcma_get_next_core(bus, &eromptr, match, core_num, core);
-		if (err == -ENODEV) {
-			core_num++;
-			continue;
-		} else if (err == -ENXIO)
-			continue;
-		else if (err == -ESPIPE)
-			break;
-		else if (err < 0)
-			goto out;
-
-		core->core_index = core_num++;
-		bus->nr_cores++;
-		bcma_info(bus, "Core %d found: %s (manuf 0x%03X, id 0x%03X, rev 0x%02X, class 0x%X)\n",
-			  core->core_index, bcma_device_name(&core->id),
-			  core->id.manuf, core->id.id, core->id.rev,
-			  core->id.class);
-
-		list_add_tail(&core->list, &bus->cores);
-		err = 0;
-		break;
-	}
-
-out:
-	if (bus->hosttype == BCMA_HOSTTYPE_SOC)
-		iounmap(eromptr);
-
-	return err;
-}
diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h
index f24d245f8394..1b5fc0c3b1b5 100644
--- a/include/linux/bcma/bcma_soc.h
+++ b/include/linux/bcma/bcma_soc.h
@@ -5,8 +5,6 @@
 
 struct bcma_soc {
 	struct bcma_bus bus;
-	struct bcma_device core_cc;
-	struct bcma_device core_mips;
 };
 
 int __init bcma_host_soc_register(struct bcma_soc *soc);
-- 
cgit v1.2.3-70-g09d2


From 193523bf937309d57c6dd7839bcf34d7a029dbee Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Tue, 20 Jan 2015 15:15:47 +0100
Subject: vxlan: advertise netns of vxlan dev in fdb msg

Netlink FDB messages are sent in the link netns. The header of these messages
contains the ifindex (ndm_ifindex) of the netdevice, but this ifindex is
unusable in case of x-netns vxlan.
I named the new attribute NDA_NDM_IFINDEX_NETNSID, to avoid confusion with
NDA_IFINDEX.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c            | 5 +++++
 include/uapi/linux/neighbour.h | 1 +
 net/core/net_namespace.c       | 1 +
 3 files changed, 7 insertions(+)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 0346eaa6d236..19d3664ab9dd 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -339,6 +339,11 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	ndm->ndm_flags = fdb->flags;
 	ndm->ndm_type = RTN_UNICAST;
 
+	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
+	    nla_put_s32(skb, NDA_NDM_IFINDEX_NETNSID,
+			peernet2id(vxlan->net, dev_net(vxlan->dev))))
+		goto nla_put_failure;
+
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index f3d77f9f1e0b..38f236853cc0 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -25,6 +25,7 @@ enum {
 	NDA_VNI,
 	NDA_IFINDEX,
 	NDA_MASTER,
+	NDA_NDM_IFINDEX_NETNSID,
 	__NDA_MAX
 };
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 9d1a4cac83b6..b7bde551ef76 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -202,6 +202,7 @@ int peernet2id(struct net *net, struct net *peer)
 
 	return id >= 0 ? id : NETNSA_NSID_NOT_ASSIGNED;
 }
+EXPORT_SYMBOL(peernet2id);
 
 struct net *get_net_ns_by_id(struct net *net, int id)
 {
-- 
cgit v1.2.3-70-g09d2


From d998f8efa47221405ceae129aa93fa6d4ac8510d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 20 Jan 2015 11:23:04 -0800
Subject: udp: Do not require sock in udp_tunnel_xmit_skb

The UDP tunnel transmit functions udp_tunnel_xmit_skb and
udp_tunnel6_xmit_skb include a socket argument. The socket being
passed to the functions (from VXLAN) is a UDP created for receive
side. The only thing that the socket is used for in the transmit
functions is to get the setting for checksum (enabled or zero).
This patch removes the argument and and adds a nocheck argument
for checksum setting. This eliminates the unnecessary dependency
on a UDP socket for UDP tunnel transmit.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       | 10 ++++++----
 include/net/udp_tunnel.h  | 16 ++++++++--------
 net/ipv4/geneve.c         |  5 +++--
 net/ipv4/udp_tunnel.c     | 12 ++++++------
 net/ipv6/ip6_udp_tunnel.c | 12 ++++++------
 5 files changed, 29 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 19d3664ab9dd..a288ceab502e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1769,8 +1769,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
-	udp_tunnel6_xmit_skb(vs->sock, dst, skb, dev, saddr, daddr, prio,
-			     ttl, src_port, dst_port);
+	udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio,
+			     ttl, src_port, dst_port,
+			     udp_get_no_check6_tx(vs->sock->sk));
 	return 0;
 err:
 	dst_release(dst);
@@ -1848,8 +1849,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
-	return udp_tunnel_xmit_skb(vs->sock, rt, skb, src, dst, tos,
-				   ttl, df, src_port, dst_port, xnet);
+	return udp_tunnel_xmit_skb(rt, skb, src, dst, tos,
+				   ttl, df, src_port, dst_port, xnet,
+				   vs->sock->sk->sk_no_check_tx);
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h
index 2a50a70ef587..1a20d33d56bc 100644
--- a/include/net/udp_tunnel.h
+++ b/include/net/udp_tunnel.h
@@ -77,17 +77,17 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 			   struct udp_tunnel_sock_cfg *sock_cfg);
 
 /* Transmit the skb using UDP encapsulation. */
-int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
-			struct sk_buff *skb, __be32 src, __be32 dst,
-			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
-			__be16 dst_port, bool xnet);
+int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
+			__be32 src, __be32 dst, __u8 tos, __u8 ttl,
+			__be16 df, __be16 src_port, __be16 dst_port,
+			bool xnet, bool nocheck);
 
 #if IS_ENABLED(CONFIG_IPV6)
-int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
-			 struct sk_buff *skb, struct net_device *dev,
-			 struct in6_addr *saddr, struct in6_addr *daddr,
+int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
+			 struct net_device *dev, struct in6_addr *saddr,
+			 struct in6_addr *daddr,
 			 __u8 prio, __u8 ttl, __be16 src_port,
-			 __be16 dst_port);
+			 __be16 dst_port, bool nocheck);
 #endif
 
 void udp_tunnel_sock_release(struct socket *sock);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 9568594ca2f1..93e51199e44b 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -136,8 +136,9 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
-	return udp_tunnel_xmit_skb(gs->sock, rt, skb, src, dst,
-				   tos, ttl, df, src_port, dst_port, xnet);
+	return udp_tunnel_xmit_skb(rt, skb, src, dst,
+				   tos, ttl, df, src_port, dst_port, xnet,
+				   gs->sock->sk->sk_no_check_tx);
 }
 EXPORT_SYMBOL_GPL(geneve_xmit_skb);
 
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 9996e63ed304..c83b35485056 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -75,10 +75,10 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock,
 }
 EXPORT_SYMBOL_GPL(setup_udp_tunnel_sock);
 
-int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
-			struct sk_buff *skb, __be32 src, __be32 dst,
-			__u8 tos, __u8 ttl, __be16 df, __be16 src_port,
-			__be16 dst_port, bool xnet)
+int udp_tunnel_xmit_skb(struct rtable *rt, struct sk_buff *skb,
+			__be32 src, __be32 dst, __u8 tos, __u8 ttl,
+			__be16 df, __be16 src_port, __be16 dst_port,
+			bool xnet, bool nocheck)
 {
 	struct udphdr *uh;
 
@@ -90,9 +90,9 @@ int udp_tunnel_xmit_skb(struct socket *sock, struct rtable *rt,
 	uh->source = src_port;
 	uh->len = htons(skb->len);
 
-	udp_set_csum(sock->sk->sk_no_check_tx, skb, src, dst, skb->len);
+	udp_set_csum(nocheck, skb, src, dst, skb->len);
 
-	return iptunnel_xmit(sock->sk, rt, skb, src, dst, IPPROTO_UDP,
+	return iptunnel_xmit(skb->sk, rt, skb, src, dst, IPPROTO_UDP,
 			     tos, ttl, df, xnet);
 }
 EXPORT_SYMBOL_GPL(udp_tunnel_xmit_skb);
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 8db6c98fe218..32d9b268e7d8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -62,14 +62,14 @@ error:
 }
 EXPORT_SYMBOL_GPL(udp_sock_create6);
 
-int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
-			 struct sk_buff *skb, struct net_device *dev,
-			 struct in6_addr *saddr, struct in6_addr *daddr,
-			 __u8 prio, __u8 ttl, __be16 src_port, __be16 dst_port)
+int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
+			 struct net_device *dev, struct in6_addr *saddr,
+			 struct in6_addr *daddr,
+			 __u8 prio, __u8 ttl, __be16 src_port,
+			 __be16 dst_port, bool nocheck)
 {
 	struct udphdr *uh;
 	struct ipv6hdr *ip6h;
-	struct sock *sk = sock->sk;
 
 	__skb_push(skb, sizeof(*uh));
 	skb_reset_transport_header(skb);
@@ -85,7 +85,7 @@ int udp_tunnel6_xmit_skb(struct socket *sock, struct dst_entry *dst,
 			    | IPSKB_REROUTED);
 	skb_dst_set(skb, dst);
 
-	udp6_set_csum(udp_get_no_check6_tx(sk), skb, saddr, daddr, skb->len);
+	udp6_set_csum(nocheck, skb, saddr, daddr, skb->len);
 
 	__skb_push(skb, sizeof(*ip6h));
 	skb_reset_network_header(skb);
-- 
cgit v1.2.3-70-g09d2


From af33c1adae1e095e90d14fe35501256ebb07aabf Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 20 Jan 2015 11:23:05 -0800
Subject: vxlan: Eliminate dependency on UDP socket in transmit path

In the vxlan transmit path there is no need to reference the socket
for a tunnel which is needed for the receive side. We do, however,
need the vxlan_dev flags. This patch eliminate references
to the socket in the transmit path, and changes VXLAN_F_UNSHAREABLE
to be VXLAN_F_RCV_FLAGS. This mask is used to store the flags
applicable to receive (GBP, CSUM6_RX, and REMCSUM_RX) in the
vxlan_sock flags.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c           | 60 ++++++++++++++++++++-----------------------
 include/net/vxlan.h           | 13 ++++++----
 net/openvswitch/vport-vxlan.c |  6 ++---
 3 files changed, 38 insertions(+), 41 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a288ceab502e..87736e65cd15 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -270,12 +270,13 @@ static struct vxlan_sock *vxlan_find_sock(struct net *net, sa_family_t family,
 					  __be16 port, u32 flags)
 {
 	struct vxlan_sock *vs;
-	u32 match_flags = flags & VXLAN_F_UNSHAREABLE;
+
+	flags &= VXLAN_F_RCV_FLAGS;
 
 	hlist_for_each_entry_rcu(vs, vs_head(net, port), hlist) {
 		if (inet_sk(vs->sock->sk)->inet_sport == port &&
 		    inet_sk(vs->sock->sk)->sk.sk_family == family &&
-		    (vs->flags & VXLAN_F_UNSHAREABLE) == match_flags)
+		    vs->flags == flags)
 			return vs;
 	}
 	return NULL;
@@ -1674,7 +1675,7 @@ static bool route_shortcircuit(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
-static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
+static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, u32 vxflags,
 				struct vxlan_metadata *md)
 {
 	struct vxlanhdr_gbp *gbp;
@@ -1692,21 +1693,20 @@ static void vxlan_build_gbp_hdr(struct vxlanhdr *vxh, struct vxlan_sock *vs,
 }
 
 #if IS_ENABLED(CONFIG_IPV6)
-static int vxlan6_xmit_skb(struct vxlan_sock *vs,
-			   struct dst_entry *dst, struct sk_buff *skb,
+static int vxlan6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb,
 			   struct net_device *dev, struct in6_addr *saddr,
 			   struct in6_addr *daddr, __u8 prio, __u8 ttl,
 			   __be16 src_port, __be16 dst_port,
-			   struct vxlan_metadata *md, bool xnet)
+			   struct vxlan_metadata *md, bool xnet, u32 vxflags)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
 	int err;
-	bool udp_sum = !udp_get_no_check6_tx(vs->sock->sk);
+	bool udp_sum = !(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX);
 	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 	u16 hdrlen = sizeof(struct vxlanhdr);
 
-	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
 	    skb->ip_summed == CHECKSUM_PARTIAL) {
 		int csum_start = skb_checksum_start_offset(skb);
 
@@ -1764,14 +1764,14 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
-	if (vs->flags & VXLAN_F_GBP)
-		vxlan_build_gbp_hdr(vxh, vs, md);
+	if (vxflags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vxflags, md);
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	udp_tunnel6_xmit_skb(dst, skb, dev, saddr, daddr, prio,
 			     ttl, src_port, dst_port,
-			     udp_get_no_check6_tx(vs->sock->sk));
+			     !!(vxflags & VXLAN_F_UDP_ZERO_CSUM6_TX));
 	return 0;
 err:
 	dst_release(dst);
@@ -1779,20 +1779,19 @@ err:
 }
 #endif
 
-int vxlan_xmit_skb(struct vxlan_sock *vs,
-		   struct rtable *rt, struct sk_buff *skb,
+int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be16 src_port, __be16 dst_port,
-		   struct vxlan_metadata *md, bool xnet)
+		   struct vxlan_metadata *md, bool xnet, u32 vxflags)
 {
 	struct vxlanhdr *vxh;
 	int min_headroom;
 	int err;
-	bool udp_sum = !vs->sock->sk->sk_no_check_tx;
+	bool udp_sum = !!(vxflags & VXLAN_F_UDP_CSUM);
 	int type = udp_sum ? SKB_GSO_UDP_TUNNEL_CSUM : SKB_GSO_UDP_TUNNEL;
 	u16 hdrlen = sizeof(struct vxlanhdr);
 
-	if ((vs->flags & VXLAN_F_REMCSUM_TX) &&
+	if ((vxflags & VXLAN_F_REMCSUM_TX) &&
 	    skb->ip_summed == CHECKSUM_PARTIAL) {
 		int csum_start = skb_checksum_start_offset(skb);
 
@@ -1844,14 +1843,14 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		}
 	}
 
-	if (vs->flags & VXLAN_F_GBP)
-		vxlan_build_gbp_hdr(vxh, vs, md);
+	if (vxflags & VXLAN_F_GBP)
+		vxlan_build_gbp_hdr(vxh, vxflags, md);
 
 	skb_set_inner_protocol(skb, htons(ETH_P_TEB));
 
 	return udp_tunnel_xmit_skb(rt, skb, src, dst, tos,
 				   ttl, df, src_port, dst_port, xnet,
-				   vs->sock->sk->sk_no_check_tx);
+				   !(vxflags & VXLAN_F_UDP_CSUM));
 }
 EXPORT_SYMBOL_GPL(vxlan_xmit_skb);
 
@@ -1983,10 +1982,11 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		md.vni = htonl(vni << 8);
 		md.gbp = skb->mark;
 
-		err = vxlan_xmit_skb(vxlan->vn_sock, rt, skb,
-				     fl4.saddr, dst->sin.sin_addr.s_addr,
-				     tos, ttl, df, src_port, dst_port, &md,
-				     !net_eq(vxlan->net, dev_net(vxlan->dev)));
+		err = vxlan_xmit_skb(rt, skb, fl4.saddr,
+				     dst->sin.sin_addr.s_addr, tos, ttl, df,
+				     src_port, dst_port, &md,
+				     !net_eq(vxlan->net, dev_net(vxlan->dev)),
+				     vxlan->flags);
 		if (err < 0) {
 			/* skb is already freed. */
 			skb = NULL;
@@ -2042,10 +2042,10 @@ static void vxlan_xmit_one(struct sk_buff *skb, struct net_device *dev,
 		md.vni = htonl(vni << 8);
 		md.gbp = skb->mark;
 
-		err = vxlan6_xmit_skb(vxlan->vn_sock, ndst, skb,
-				      dev, &fl6.saddr, &fl6.daddr, 0, ttl,
-				      src_port, dst_port, &md,
-				      !net_eq(vxlan->net, dev_net(vxlan->dev)));
+		err = vxlan6_xmit_skb(ndst, skb, dev, &fl6.saddr, &fl6.daddr,
+				      0, ttl, src_port, dst_port, &md,
+				      !net_eq(vxlan->net, dev_net(vxlan->dev)),
+				      vxlan->flags);
 #endif
 	}
 
@@ -2517,15 +2517,11 @@ static struct socket *vxlan_create_sock(struct net *net, bool ipv6,
 
 	if (ipv6) {
 		udp_conf.family = AF_INET6;
-		udp_conf.use_udp6_tx_checksums =
-		    !(flags & VXLAN_F_UDP_ZERO_CSUM6_TX);
 		udp_conf.use_udp6_rx_checksums =
 		    !(flags & VXLAN_F_UDP_ZERO_CSUM6_RX);
 	} else {
 		udp_conf.family = AF_INET;
 		udp_conf.local_ip.s_addr = INADDR_ANY;
-		udp_conf.use_udp_checksums =
-		    !!(flags & VXLAN_F_UDP_CSUM);
 	}
 
 	udp_conf.local_udp_port = port;
@@ -2569,7 +2565,7 @@ static struct vxlan_sock *vxlan_socket_create(struct net *net, __be16 port,
 	atomic_set(&vs->refcnt, 1);
 	vs->rcv = rcv;
 	vs->data = data;
-	vs->flags = flags;
+	vs->flags = (flags & VXLAN_F_RCV_FLAGS);
 
 	/* Initialize the vxlan udp offloads structure */
 	vs->udp_offloads.port = port;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 7be8c342fc95..2927d6244481 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -129,8 +129,12 @@ struct vxlan_sock {
 #define VXLAN_F_REMCSUM_RX		0x400
 #define VXLAN_F_GBP			0x800
 
-/* These flags must match in order for a socket to be shareable */
-#define VXLAN_F_UNSHAREABLE		VXLAN_F_GBP
+/* Flags that are used in the receive patch. These flags must match in
+ * order for a socket to be shareable
+ */
+#define VXLAN_F_RCV_FLAGS		(VXLAN_F_GBP |			\
+					 VXLAN_F_UDP_ZERO_CSUM6_RX |	\
+					 VXLAN_F_REMCSUM_RX)
 
 struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 				  vxlan_rcv_t *rcv, void *data,
@@ -138,11 +142,10 @@ struct vxlan_sock *vxlan_sock_add(struct net *net, __be16 port,
 
 void vxlan_sock_release(struct vxlan_sock *vs);
 
-int vxlan_xmit_skb(struct vxlan_sock *vs,
-		   struct rtable *rt, struct sk_buff *skb,
+int vxlan_xmit_skb(struct rtable *rt, struct sk_buff *skb,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be16 src_port, __be16 dst_port, struct vxlan_metadata *md,
-		   bool xnet);
+		   bool xnet, u32 vxflags);
 
 static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
 						     netdev_features_t features)
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 8a2d54cba9ba..3cc983bf444a 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -252,12 +252,10 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
 	md.gbp = vxlan_ext_gbp(skb);
 
-	err = vxlan_xmit_skb(vxlan_port->vs, rt, skb,
-			     fl.saddr, tun_key->ipv4_dst,
+	err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     &md,
-			     false);
+			     &md, false, vxlan_port->exts);
 	if (err < 0)
 		ip_rt_put(rt);
 	return err;
-- 
cgit v1.2.3-70-g09d2


From 872bf2fb69d90e3619befee842fc26db39d8e475 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 25 Jan 2015 16:59:35 +0200
Subject: net/mlx4_core: Maintain a persistent memory for mlx4 device

Maintain a persistent memory that should survive reset flow/PCI error.
This comes as a preparation for coming series to support above flows.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/alias_GUID.c            |   2 +-
 drivers/infiniband/hw/mlx4/mad.c                   |   3 +-
 drivers/infiniband/hw/mlx4/main.c                  |  17 ++--
 drivers/infiniband/hw/mlx4/mr.c                    |   6 +-
 drivers/infiniband/hw/mlx4/sysfs.c                 |   6 +-
 drivers/net/ethernet/mellanox/mlx4/alloc.c         |  15 +--
 drivers/net/ethernet/mellanox/mlx4/catas.c         |  13 +--
 drivers/net/ethernet/mellanox/mlx4/cmd.c           |  46 +++++----
 drivers/net/ethernet/mellanox/mlx4/en_cq.c         |   4 +-
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c    |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_main.c       |   4 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c         |   4 +-
 drivers/net/ethernet/mellanox/mlx4/en_tx.c         |   4 +-
 drivers/net/ethernet/mellanox/mlx4/eq.c            |  42 ++++----
 drivers/net/ethernet/mellanox/mlx4/icm.c           |  11 ++-
 drivers/net/ethernet/mellanox/mlx4/main.c          | 106 +++++++++++++--------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |   9 +-
 drivers/net/ethernet/mellanox/mlx4/mr.c            |   8 +-
 drivers/net/ethernet/mellanox/mlx4/pd.c            |   6 +-
 drivers/net/ethernet/mellanox/mlx4/port.c          |  17 ++--
 drivers/net/ethernet/mellanox/mlx4/reset.c         |  23 +++--
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  36 ++++---
 include/linux/mlx4/device.h                        |  11 ++-
 24 files changed, 234 insertions(+), 163 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/alias_GUID.c b/drivers/infiniband/hw/mlx4/alias_GUID.c
index 0eb141c41416..a31e031afd87 100644
--- a/drivers/infiniband/hw/mlx4/alias_GUID.c
+++ b/drivers/infiniband/hw/mlx4/alias_GUID.c
@@ -154,7 +154,7 @@ void mlx4_ib_notify_slaves_on_guid_change(struct mlx4_ib_dev *dev,
 			continue;
 
 		slave_id = (block_num * NUM_ALIAS_GUID_IN_REC) + i ;
-		if (slave_id >= dev->dev->num_vfs + 1)
+		if (slave_id >= dev->dev->persist->num_vfs + 1)
 			return;
 		tmp_cur_ag = *(__be64 *)&p_data[i * GUID_REC_SIZE];
 		form_cache_ag = get_cached_alias_guid(dev, port_num,
diff --git a/drivers/infiniband/hw/mlx4/mad.c b/drivers/infiniband/hw/mlx4/mad.c
index 82a7dd87089b..c7619716c31d 100644
--- a/drivers/infiniband/hw/mlx4/mad.c
+++ b/drivers/infiniband/hw/mlx4/mad.c
@@ -1951,7 +1951,8 @@ static int mlx4_ib_alloc_demux_ctx(struct mlx4_ib_dev *dev,
 	ctx->ib_dev = &dev->ib_dev;
 
 	for (i = 0;
-	     i < min(dev->dev->caps.sqp_demux, (u16)(dev->dev->num_vfs + 1));
+	     i < min(dev->dev->caps.sqp_demux,
+	     (u16)(dev->dev->persist->num_vfs + 1));
 	     i++) {
 		struct mlx4_active_ports actv_ports =
 			mlx4_get_active_ports(dev->dev, i);
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 57ecc5b204f3..b4fa6f658800 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -198,7 +198,7 @@ static int mlx4_ib_query_device(struct ib_device *ibdev,
 
 	props->vendor_id	   = be32_to_cpup((__be32 *) (out_mad->data + 36)) &
 		0xffffff;
-	props->vendor_part_id	   = dev->dev->pdev->device;
+	props->vendor_part_id	   = dev->dev->persist->pdev->device;
 	props->hw_ver		   = be32_to_cpup((__be32 *) (out_mad->data + 32));
 	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
 
@@ -1375,7 +1375,7 @@ static ssize_t show_hca(struct device *device, struct device_attribute *attr,
 {
 	struct mlx4_ib_dev *dev =
 		container_of(device, struct mlx4_ib_dev, ib_dev.dev);
-	return sprintf(buf, "MT%d\n", dev->dev->pdev->device);
+	return sprintf(buf, "MT%d\n", dev->dev->persist->pdev->device);
 }
 
 static ssize_t show_fw_ver(struct device *device, struct device_attribute *attr,
@@ -1937,7 +1937,8 @@ static void init_pkeys(struct mlx4_ib_dev *ibdev)
 	int i;
 
 	if (mlx4_is_master(ibdev->dev)) {
-		for (slave = 0; slave <= ibdev->dev->num_vfs; ++slave) {
+		for (slave = 0; slave <= ibdev->dev->persist->num_vfs;
+		     ++slave) {
 			for (port = 1; port <= ibdev->dev->caps.num_ports; ++port) {
 				for (i = 0;
 				     i < ibdev->dev->phys_caps.pkey_phys_table_len[port];
@@ -1994,7 +1995,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 	mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB) {
 		for (j = 0; j < eq_per_port; j++) {
 			snprintf(name, sizeof(name), "mlx4-ib-%d-%d@%s",
-				 i, j, dev->pdev->bus->name);
+				 i, j, dev->persist->pdev->bus->name);
 			/* Set IRQ for specific name (per ring) */
 			if (mlx4_assign_eq(dev, name, NULL,
 					   &ibdev->eq_table[eq])) {
@@ -2058,7 +2059,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 
 	ibdev = (struct mlx4_ib_dev *) ib_alloc_device(sizeof *ibdev);
 	if (!ibdev) {
-		dev_err(&dev->pdev->dev, "Device struct alloc failed\n");
+		dev_err(&dev->persist->pdev->dev,
+			"Device struct alloc failed\n");
 		return NULL;
 	}
 
@@ -2085,7 +2087,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 	ibdev->num_ports		= num_ports;
 	ibdev->ib_dev.phys_port_cnt     = ibdev->num_ports;
 	ibdev->ib_dev.num_comp_vectors	= dev->caps.num_comp_vectors;
-	ibdev->ib_dev.dma_device	= &dev->pdev->dev;
+	ibdev->ib_dev.dma_device	= &dev->persist->pdev->dev;
 
 	if (dev->caps.userspace_caps)
 		ibdev->ib_dev.uverbs_abi_ver = MLX4_IB_UVERBS_ABI_VERSION;
@@ -2236,7 +2238,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 				sizeof(long),
 				GFP_KERNEL);
 		if (!ibdev->ib_uc_qpns_bitmap) {
-			dev_err(&dev->pdev->dev, "bit map alloc failed\n");
+			dev_err(&dev->persist->pdev->dev,
+				"bit map alloc failed\n");
 			goto err_steer_qp_release;
 		}
 
diff --git a/drivers/infiniband/hw/mlx4/mr.c b/drivers/infiniband/hw/mlx4/mr.c
index c36ccbd9a644..e0d271782d0a 100644
--- a/drivers/infiniband/hw/mlx4/mr.c
+++ b/drivers/infiniband/hw/mlx4/mr.c
@@ -401,7 +401,8 @@ struct ib_fast_reg_page_list *mlx4_ib_alloc_fast_reg_page_list(struct ib_device
 	if (!mfrpl->ibfrpl.page_list)
 		goto err_free;
 
-	mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->pdev->dev,
+	mfrpl->mapped_page_list = dma_alloc_coherent(&dev->dev->persist->
+						     pdev->dev,
 						     size, &mfrpl->map,
 						     GFP_KERNEL);
 	if (!mfrpl->mapped_page_list)
@@ -423,7 +424,8 @@ void mlx4_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
 	struct mlx4_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
 	int size = page_list->max_page_list_len * sizeof (u64);
 
-	dma_free_coherent(&dev->dev->pdev->dev, size, mfrpl->mapped_page_list,
+	dma_free_coherent(&dev->dev->persist->pdev->dev, size,
+			  mfrpl->mapped_page_list,
 			  mfrpl->map);
 	kfree(mfrpl->ibfrpl.page_list);
 	kfree(mfrpl);
diff --git a/drivers/infiniband/hw/mlx4/sysfs.c b/drivers/infiniband/hw/mlx4/sysfs.c
index cb4c66e723b5..d10c2b8a5dad 100644
--- a/drivers/infiniband/hw/mlx4/sysfs.c
+++ b/drivers/infiniband/hw/mlx4/sysfs.c
@@ -375,7 +375,7 @@ static void get_name(struct mlx4_ib_dev *dev, char *name, int i, int max)
 	char base_name[9];
 
 	/* pci_name format is: bus:dev:func -> xxxx:yy:zz.n */
-	strlcpy(name, pci_name(dev->dev->pdev), max);
+	strlcpy(name, pci_name(dev->dev->persist->pdev), max);
 	strncpy(base_name, name, 8); /*till xxxx:yy:*/
 	base_name[8] = '\0';
 	/* with no ARI only 3 last bits are used so when the fn is higher than 8
@@ -792,7 +792,7 @@ static int register_pkey_tree(struct mlx4_ib_dev *device)
 	if (!mlx4_is_master(device->dev))
 		return 0;
 
-	for (i = 0; i <= device->dev->num_vfs; ++i)
+	for (i = 0; i <= device->dev->persist->num_vfs; ++i)
 		register_one_pkey_tree(device, i);
 
 	return 0;
@@ -807,7 +807,7 @@ static void unregister_pkey_tree(struct mlx4_ib_dev *device)
 	if (!mlx4_is_master(device->dev))
 		return;
 
-	for (slave = device->dev->num_vfs; slave >= 0; --slave) {
+	for (slave = device->dev->persist->num_vfs; slave >= 0; --slave) {
 		list_for_each_entry_safe(p, t,
 					 &device->pkeys.pkey_port_list[slave],
 					 entry) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index 963dd7e6d547..a716c26e0d99 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -592,7 +592,7 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 		buf->nbufs        = 1;
 		buf->npages       = 1;
 		buf->page_shift   = get_order(size) + PAGE_SHIFT;
-		buf->direct.buf   = dma_alloc_coherent(&dev->pdev->dev,
+		buf->direct.buf   = dma_alloc_coherent(&dev->persist->pdev->dev,
 						       size, &t, gfp);
 		if (!buf->direct.buf)
 			return -ENOMEM;
@@ -619,7 +619,8 @@ int mlx4_buf_alloc(struct mlx4_dev *dev, int size, int max_direct,
 
 		for (i = 0; i < buf->nbufs; ++i) {
 			buf->page_list[i].buf =
-				dma_alloc_coherent(&dev->pdev->dev, PAGE_SIZE,
+				dma_alloc_coherent(&dev->persist->pdev->dev,
+						   PAGE_SIZE,
 						   &t, gfp);
 			if (!buf->page_list[i].buf)
 				goto err_free;
@@ -657,7 +658,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 	int i;
 
 	if (buf->nbufs == 1)
-		dma_free_coherent(&dev->pdev->dev, size, buf->direct.buf,
+		dma_free_coherent(&dev->persist->pdev->dev, size,
+				  buf->direct.buf,
 				  buf->direct.map);
 	else {
 		if (BITS_PER_LONG == 64 && buf->direct.buf)
@@ -665,7 +667,8 @@ void mlx4_buf_free(struct mlx4_dev *dev, int size, struct mlx4_buf *buf)
 
 		for (i = 0; i < buf->nbufs; ++i)
 			if (buf->page_list[i].buf)
-				dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+				dma_free_coherent(&dev->persist->pdev->dev,
+						  PAGE_SIZE,
 						  buf->page_list[i].buf,
 						  buf->page_list[i].map);
 		kfree(buf->page_list);
@@ -738,7 +741,7 @@ int mlx4_db_alloc(struct mlx4_dev *dev, struct mlx4_db *db, int order, gfp_t gfp
 		if (!mlx4_alloc_db_from_pgdir(pgdir, db, order))
 			goto out;
 
-	pgdir = mlx4_alloc_db_pgdir(&(dev->pdev->dev), gfp);
+	pgdir = mlx4_alloc_db_pgdir(&dev->persist->pdev->dev, gfp);
 	if (!pgdir) {
 		ret = -ENOMEM;
 		goto out;
@@ -775,7 +778,7 @@ void mlx4_db_free(struct mlx4_dev *dev, struct mlx4_db *db)
 	set_bit(i, db->u.pgdir->bits[o]);
 
 	if (bitmap_full(db->u.pgdir->order1, MLX4_DB_PER_PAGE / 2)) {
-		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+		dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
 				  db->u.pgdir->db_page, db->u.pgdir->db_dma);
 		list_del(&db->u.pgdir->list);
 		kfree(db->u.pgdir);
diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 9c656fe4983d..1a102c9bac99 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -70,7 +70,7 @@ static void poll_catas(unsigned long dev_ptr)
 
 	if (readl(priv->catas_err.map)) {
 		/* If the device is off-line, we cannot try to recover it */
-		if (pci_channel_offline(dev->pdev))
+		if (pci_channel_offline(dev->persist->pdev))
 			mod_timer(&priv->catas_err.timer,
 				  round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
 		else {
@@ -94,6 +94,7 @@ static void catas_reset(struct work_struct *work)
 {
 	struct mlx4_priv *priv, *tmppriv;
 	struct mlx4_dev *dev;
+	struct mlx4_dev_persistent *persist;
 
 	LIST_HEAD(tlist);
 	int ret;
@@ -103,20 +104,20 @@ static void catas_reset(struct work_struct *work)
 	spin_unlock_irq(&catas_lock);
 
 	list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
-		struct pci_dev *pdev = priv->dev.pdev;
+		struct pci_dev *pdev = priv->dev.persist->pdev;
 
 		/* If the device is off-line, we cannot reset it */
 		if (pci_channel_offline(pdev))
 			continue;
 
-		ret = mlx4_restart_one(priv->dev.pdev);
+		ret = mlx4_restart_one(priv->dev.persist->pdev);
 		/* 'priv' now is not valid */
 		if (ret)
 			pr_err("mlx4 %s: Reset failed (%d)\n",
 			       pci_name(pdev), ret);
 		else {
-			dev  = pci_get_drvdata(pdev);
-			mlx4_dbg(dev, "Reset succeeded\n");
+			persist  = pci_get_drvdata(pdev);
+			mlx4_dbg(persist->dev, "Reset succeeded\n");
 		}
 	}
 }
@@ -134,7 +135,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
 	init_timer(&priv->catas_err.timer);
 	priv->catas_err.map = NULL;
 
-	addr = pci_resource_start(dev->pdev, priv->fw.catas_bar) +
+	addr = pci_resource_start(dev->persist->pdev, priv->fw.catas_bar) +
 		priv->fw.catas_offset;
 
 	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 5c93d1451c44..7cd90e6a4272 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -307,7 +307,7 @@ static int cmd_pending(struct mlx4_dev *dev)
 {
 	u32 status;
 
-	if (pci_channel_offline(dev->pdev))
+	if (pci_channel_offline(dev->persist->pdev))
 		return -EIO;
 
 	status = readl(mlx4_priv(dev)->cmd.hcr + HCR_STATUS_OFFSET);
@@ -328,7 +328,7 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 
 	mutex_lock(&cmd->hcr_mutex);
 
-	if (pci_channel_offline(dev->pdev)) {
+	if (pci_channel_offline(dev->persist->pdev)) {
 		/*
 		 * Device is going through error recovery
 		 * and cannot accept commands.
@@ -342,7 +342,7 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 		end += msecs_to_jiffies(GO_BIT_TIMEOUT_MSECS);
 
 	while (cmd_pending(dev)) {
-		if (pci_channel_offline(dev->pdev)) {
+		if (pci_channel_offline(dev->persist->pdev)) {
 			/*
 			 * Device is going through error recovery
 			 * and cannot accept commands.
@@ -464,7 +464,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 
 	down(&priv->cmd.poll_sem);
 
-	if (pci_channel_offline(dev->pdev)) {
+	if (pci_channel_offline(dev->persist->pdev)) {
 		/*
 		 * Device is going through error recovery
 		 * and cannot accept commands.
@@ -487,7 +487,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 
 	end = msecs_to_jiffies(timeout) + jiffies;
 	while (cmd_pending(dev) && time_before(jiffies, end)) {
-		if (pci_channel_offline(dev->pdev)) {
+		if (pci_channel_offline(dev->persist->pdev)) {
 			/*
 			 * Device is going through error recovery
 			 * and cannot accept commands.
@@ -612,7 +612,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	       int out_is_imm, u32 in_modifier, u8 op_modifier,
 	       u16 op, unsigned long timeout, int native)
 {
-	if (pci_channel_offline(dev->pdev))
+	if (pci_channel_offline(dev->persist->pdev))
 		return -EIO;
 
 	if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
@@ -1997,11 +1997,12 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 
 	if (mlx4_is_master(dev))
 		priv->mfunc.comm =
-		ioremap(pci_resource_start(dev->pdev, priv->fw.comm_bar) +
+		ioremap(pci_resource_start(dev->persist->pdev,
+					   priv->fw.comm_bar) +
 			priv->fw.comm_base, MLX4_COMM_PAGESIZE);
 	else
 		priv->mfunc.comm =
-		ioremap(pci_resource_start(dev->pdev, 2) +
+		ioremap(pci_resource_start(dev->persist->pdev, 2) +
 			MLX4_SLAVE_COMM_BASE, MLX4_COMM_PAGESIZE);
 	if (!priv->mfunc.comm) {
 		mlx4_err(dev, "Couldn't map communication vector\n");
@@ -2107,9 +2108,9 @@ err_comm_admin:
 err_comm:
 	iounmap(priv->mfunc.comm);
 err_vhcr:
-	dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
-					     priv->mfunc.vhcr,
-					     priv->mfunc.vhcr_dma);
+	dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+			  priv->mfunc.vhcr,
+			  priv->mfunc.vhcr_dma);
 	priv->mfunc.vhcr = NULL;
 	return -ENOMEM;
 }
@@ -2130,8 +2131,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
 	}
 
 	if (!mlx4_is_slave(dev) && !priv->cmd.hcr) {
-		priv->cmd.hcr = ioremap(pci_resource_start(dev->pdev, 0) +
-					MLX4_HCR_BASE, MLX4_HCR_SIZE);
+		priv->cmd.hcr = ioremap(pci_resource_start(dev->persist->pdev,
+					0) + MLX4_HCR_BASE, MLX4_HCR_SIZE);
 		if (!priv->cmd.hcr) {
 			mlx4_err(dev, "Couldn't map command register\n");
 			goto err;
@@ -2140,7 +2141,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
 	}
 
 	if (mlx4_is_mfunc(dev) && !priv->mfunc.vhcr) {
-		priv->mfunc.vhcr = dma_alloc_coherent(&(dev->pdev->dev), PAGE_SIZE,
+		priv->mfunc.vhcr = dma_alloc_coherent(&dev->persist->pdev->dev,
+						      PAGE_SIZE,
 						      &priv->mfunc.vhcr_dma,
 						      GFP_KERNEL);
 		if (!priv->mfunc.vhcr)
@@ -2150,7 +2152,8 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
 	}
 
 	if (!priv->cmd.pool) {
-		priv->cmd.pool = pci_pool_create("mlx4_cmd", dev->pdev,
+		priv->cmd.pool = pci_pool_create("mlx4_cmd",
+						 dev->persist->pdev,
 						 MLX4_MAILBOX_SIZE,
 						 MLX4_MAILBOX_SIZE, 0);
 		if (!priv->cmd.pool)
@@ -2202,7 +2205,7 @@ void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask)
 	}
 	if (mlx4_is_mfunc(dev) && priv->mfunc.vhcr &&
 	    (cleanup_mask & MLX4_CMD_CLEANUP_VHCR)) {
-		dma_free_coherent(&(dev->pdev->dev), PAGE_SIZE,
+		dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
 				  priv->mfunc.vhcr, priv->mfunc.vhcr_dma);
 		priv->mfunc.vhcr = NULL;
 	}
@@ -2306,8 +2309,9 @@ u32 mlx4_comm_get_version(void)
 
 static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf)
 {
-	if ((vf < 0) || (vf >= dev->num_vfs)) {
-		mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n", vf, dev->num_vfs);
+	if ((vf < 0) || (vf >= dev->persist->num_vfs)) {
+		mlx4_err(dev, "Bad vf number:%d (number of activated vf: %d)\n",
+			 vf, dev->persist->num_vfs);
 		return -EINVAL;
 	}
 
@@ -2316,7 +2320,7 @@ static int mlx4_get_slave_indx(struct mlx4_dev *dev, int vf)
 
 int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave)
 {
-	if (slave < 1 || slave > dev->num_vfs) {
+	if (slave < 1 || slave > dev->persist->num_vfs) {
 		mlx4_err(dev,
 			 "Bad slave number:%d (number of activated slaves: %lu)\n",
 			 slave, dev->num_slaves);
@@ -2388,7 +2392,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport(struct mlx4_dev *dev,
 	if (port <= 0 || port > dev->caps.num_ports)
 		return slaves_pport;
 
-	for (i = 0; i < dev->num_vfs + 1; i++) {
+	for (i = 0; i < dev->persist->num_vfs + 1; i++) {
 		struct mlx4_active_ports actv_ports =
 			mlx4_get_active_ports(dev, i);
 		if (test_bit(port - 1, actv_ports.ports))
@@ -2408,7 +2412,7 @@ struct mlx4_slaves_pport mlx4_phys_to_slaves_pport_actv(
 
 	bitmap_zero(slaves_pport.slaves, MLX4_MFUNC_MAX);
 
-	for (i = 0; i < dev->num_vfs + 1; i++) {
+	for (i = 0; i < dev->persist->num_vfs + 1; i++) {
 		struct mlx4_active_ports actv_ports =
 			mlx4_get_active_ports(dev, i);
 		if (bitmap_equal(crit_ports->ports, actv_ports.ports,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_cq.c b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
index 82322b1c8411..22da4d0d0f05 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_cq.c
@@ -70,10 +70,10 @@ int mlx4_en_create_cq(struct mlx4_en_priv *priv,
 	/* Allocate HW buffers on provided NUMA node.
 	 * dev->numa_node is used in mtt range allocation flow.
 	 */
-	set_dev_node(&mdev->dev->pdev->dev, node);
+	set_dev_node(&mdev->dev->persist->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &cq->wqres,
 				cq->buf_size, 2 * PAGE_SIZE);
-	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
+	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_cq;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 90e0f045a6bc..569eda9e83d6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -92,7 +92,7 @@ mlx4_en_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *drvinfo)
 		(u16) (mdev->dev->caps.fw_ver >> 32),
 		(u16) ((mdev->dev->caps.fw_ver >> 16) & 0xffff),
 		(u16) (mdev->dev->caps.fw_ver & 0xffff));
-	strlcpy(drvinfo->bus_info, pci_name(mdev->dev->pdev),
+	strlcpy(drvinfo->bus_info, pci_name(mdev->dev->persist->pdev),
 		sizeof(drvinfo->bus_info));
 	drvinfo->n_stats = 0;
 	drvinfo->regdump_len = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_main.c b/drivers/net/ethernet/mellanox/mlx4/en_main.c
index 9f16f754137b..c643d2bbb7b9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_main.c
@@ -241,8 +241,8 @@ static void *mlx4_en_add(struct mlx4_dev *dev)
 	spin_lock_init(&mdev->uar_lock);
 
 	mdev->dev = dev;
-	mdev->dma_device = &(dev->pdev->dev);
-	mdev->pdev = dev->pdev;
+	mdev->dma_device = &dev->persist->pdev->dev;
+	mdev->pdev = dev->persist->pdev;
 	mdev->device_up = false;
 
 	mdev->LSO_support = !!(dev->caps.flags & (1 << 15));
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index d0d6dc1b8e46..43a3f9822f74 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2457,7 +2457,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	netif_set_real_num_tx_queues(dev, prof->tx_ring_num);
 	netif_set_real_num_rx_queues(dev, prof->rx_ring_num);
 
-	SET_NETDEV_DEV(dev, &mdev->dev->pdev->dev);
+	SET_NETDEV_DEV(dev, &mdev->dev->persist->pdev->dev);
 	dev->dev_port = port - 1;
 
 	/*
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a0474eb94aa3..2ba5d368edce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -387,10 +387,10 @@ int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
 		 ring->rx_info, tmp);
 
 	/* Allocate HW buffers on provided NUMA node */
-	set_dev_node(&mdev->dev->pdev->dev, node);
+	set_dev_node(&mdev->dev->persist->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres,
 				 ring->buf_size, 2 * PAGE_SIZE);
-	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
+	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
 	if (err)
 		goto err_info;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 359bb1286eb5..55f9f5c5344e 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -91,10 +91,10 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	ring->buf_size = ALIGN(size * ring->stride, MLX4_EN_PAGE_SIZE);
 
 	/* Allocate HW buffers on provided NUMA node */
-	set_dev_node(&mdev->dev->pdev->dev, node);
+	set_dev_node(&mdev->dev->persist->pdev->dev, node);
 	err = mlx4_alloc_hwq_res(mdev->dev, &ring->wqres, ring->buf_size,
 				 2 * PAGE_SIZE);
-	set_dev_node(&mdev->dev->pdev->dev, mdev->dev->numa_node);
+	set_dev_node(&mdev->dev->persist->pdev->dev, mdev->dev->numa_node);
 	if (err) {
 		en_err(priv, "Failed allocating hwq resources\n");
 		goto err_bounce;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 3d275fbaf0eb..7538c9ce98a9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -237,7 +237,7 @@ int mlx4_gen_guid_change_eqe(struct mlx4_dev *dev, int slave, u8 port)
 	struct mlx4_eqe eqe;
 
 	/*don't send if we don't have the that slave */
-	if (dev->num_vfs < slave)
+	if (dev->persist->num_vfs < slave)
 		return 0;
 	memset(&eqe, 0, sizeof eqe);
 
@@ -255,7 +255,7 @@ int mlx4_gen_port_state_change_eqe(struct mlx4_dev *dev, int slave, u8 port,
 	struct mlx4_eqe eqe;
 
 	/*don't send if we don't have the that slave */
-	if (dev->num_vfs < slave)
+	if (dev->persist->num_vfs < slave)
 		return 0;
 	memset(&eqe, 0, sizeof eqe);
 
@@ -310,7 +310,7 @@ static void set_all_slave_state(struct mlx4_dev *dev, u8 port, int event)
 	struct mlx4_slaves_pport slaves_pport = mlx4_phys_to_slaves_pport(dev,
 									  port);
 
-	for (i = 0; i < dev->num_vfs + 1; i++)
+	for (i = 0; i < dev->persist->num_vfs + 1; i++)
 		if (test_bit(i, slaves_pport.slaves))
 			set_and_calc_slave_port_state(dev, i, port,
 						      event, &gen_event);
@@ -560,7 +560,8 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 				mlx4_priv(dev)->sense.do_sense_port[port] = 1;
 				if (!mlx4_is_master(dev))
 					break;
-				for (i = 0; i < dev->num_vfs + 1; i++) {
+				for (i = 0; i < dev->persist->num_vfs + 1;
+				     i++) {
 					if (!test_bit(i, slaves_port.slaves))
 						continue;
 					if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH) {
@@ -596,7 +597,9 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 				if (!mlx4_is_master(dev))
 					break;
 				if (dev->caps.port_type[port] == MLX4_PORT_TYPE_ETH)
-					for (i = 0; i < dev->num_vfs + 1; i++) {
+					for (i = 0;
+					     i < dev->persist->num_vfs + 1;
+					     i++) {
 						if (!test_bit(i, slaves_port.slaves))
 							continue;
 						if (i == mlx4_master_func_num(dev))
@@ -865,7 +868,7 @@ static void __iomem *mlx4_get_eq_uar(struct mlx4_dev *dev, struct mlx4_eq *eq)
 
 	if (!priv->eq_table.uar_map[index]) {
 		priv->eq_table.uar_map[index] =
-			ioremap(pci_resource_start(dev->pdev, 2) +
+			ioremap(pci_resource_start(dev->persist->pdev, 2) +
 				((eq->eqn / 4) << PAGE_SHIFT),
 				PAGE_SIZE);
 		if (!priv->eq_table.uar_map[index]) {
@@ -928,8 +931,10 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
 	eq_context = mailbox->buf;
 
 	for (i = 0; i < npages; ++i) {
-		eq->page_list[i].buf = dma_alloc_coherent(&dev->pdev->dev,
-							  PAGE_SIZE, &t, GFP_KERNEL);
+		eq->page_list[i].buf = dma_alloc_coherent(&dev->persist->
+							  pdev->dev,
+							  PAGE_SIZE, &t,
+							  GFP_KERNEL);
 		if (!eq->page_list[i].buf)
 			goto err_out_free_pages;
 
@@ -995,7 +1000,7 @@ err_out_free_eq:
 err_out_free_pages:
 	for (i = 0; i < npages; ++i)
 		if (eq->page_list[i].buf)
-			dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
+			dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
 					  eq->page_list[i].buf,
 					  eq->page_list[i].map);
 
@@ -1044,9 +1049,9 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
 
 	mlx4_mtt_cleanup(dev, &eq->mtt);
 	for (i = 0; i < npages; ++i)
-		dma_free_coherent(&dev->pdev->dev, PAGE_SIZE,
-				    eq->page_list[i].buf,
-				    eq->page_list[i].map);
+		dma_free_coherent(&dev->persist->pdev->dev, PAGE_SIZE,
+				  eq->page_list[i].buf,
+				  eq->page_list[i].map);
 
 	kfree(eq->page_list);
 	mlx4_bitmap_free(&priv->eq_table.bitmap, eq->eqn, MLX4_USE_RR);
@@ -1060,7 +1065,7 @@ static void mlx4_free_irqs(struct mlx4_dev *dev)
 	int	i, vec;
 
 	if (eq_table->have_irq)
-		free_irq(dev->pdev->irq, dev);
+		free_irq(dev->persist->pdev->irq, dev);
 
 	for (i = 0; i < dev->caps.num_comp_vectors + 1; ++i)
 		if (eq_table->eq[i].have_irq) {
@@ -1089,7 +1094,8 @@ static int mlx4_map_clr_int(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
-	priv->clr_base = ioremap(pci_resource_start(dev->pdev, priv->fw.clr_int_bar) +
+	priv->clr_base = ioremap(pci_resource_start(dev->persist->pdev,
+				 priv->fw.clr_int_bar) +
 				 priv->fw.clr_int_base, MLX4_CLR_INT_SIZE);
 	if (!priv->clr_base) {
 		mlx4_err(dev, "Couldn't map interrupt clear register, aborting\n");
@@ -1212,13 +1218,13 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 					 i * MLX4_IRQNAME_SIZE,
 					 MLX4_IRQNAME_SIZE,
 					 "mlx4-comp-%d@pci:%s", i,
-					 pci_name(dev->pdev));
+					 pci_name(dev->persist->pdev));
 			} else {
 				snprintf(priv->eq_table.irq_names +
 					 i * MLX4_IRQNAME_SIZE,
 					 MLX4_IRQNAME_SIZE,
 					 "mlx4-async@pci:%s",
-					 pci_name(dev->pdev));
+					 pci_name(dev->persist->pdev));
 			}
 
 			eq_name = priv->eq_table.irq_names +
@@ -1235,8 +1241,8 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 		snprintf(priv->eq_table.irq_names,
 			 MLX4_IRQNAME_SIZE,
 			 DRV_NAME "@pci:%s",
-			 pci_name(dev->pdev));
-		err = request_irq(dev->pdev->irq, mlx4_interrupt,
+			 pci_name(dev->persist->pdev));
+		err = request_irq(dev->persist->pdev->irq, mlx4_interrupt,
 				  IRQF_SHARED, priv->eq_table.irq_names, dev);
 		if (err)
 			goto err_out_async;
diff --git a/drivers/net/ethernet/mellanox/mlx4/icm.c b/drivers/net/ethernet/mellanox/mlx4/icm.c
index 97c9b1db1d27..2a9dd460a95f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/icm.c
+++ b/drivers/net/ethernet/mellanox/mlx4/icm.c
@@ -56,7 +56,7 @@ static void mlx4_free_icm_pages(struct mlx4_dev *dev, struct mlx4_icm_chunk *chu
 	int i;
 
 	if (chunk->nsg > 0)
-		pci_unmap_sg(dev->pdev, chunk->mem, chunk->npages,
+		pci_unmap_sg(dev->persist->pdev, chunk->mem, chunk->npages,
 			     PCI_DMA_BIDIRECTIONAL);
 
 	for (i = 0; i < chunk->npages; ++i)
@@ -69,7 +69,8 @@ static void mlx4_free_icm_coherent(struct mlx4_dev *dev, struct mlx4_icm_chunk *
 	int i;
 
 	for (i = 0; i < chunk->npages; ++i)
-		dma_free_coherent(&dev->pdev->dev, chunk->mem[i].length,
+		dma_free_coherent(&dev->persist->pdev->dev,
+				  chunk->mem[i].length,
 				  lowmem_page_address(sg_page(&chunk->mem[i])),
 				  sg_dma_address(&chunk->mem[i]));
 }
@@ -173,7 +174,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 			--cur_order;
 
 		if (coherent)
-			ret = mlx4_alloc_icm_coherent(&dev->pdev->dev,
+			ret = mlx4_alloc_icm_coherent(&dev->persist->pdev->dev,
 						      &chunk->mem[chunk->npages],
 						      cur_order, gfp_mask);
 		else
@@ -193,7 +194,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 		if (coherent)
 			++chunk->nsg;
 		else if (chunk->npages == MLX4_ICM_CHUNK_LEN) {
-			chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+			chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
 						chunk->npages,
 						PCI_DMA_BIDIRECTIONAL);
 
@@ -208,7 +209,7 @@ struct mlx4_icm *mlx4_alloc_icm(struct mlx4_dev *dev, int npages,
 	}
 
 	if (!coherent && chunk) {
-		chunk->nsg = pci_map_sg(dev->pdev, chunk->mem,
+		chunk->nsg = pci_map_sg(dev->persist->pdev, chunk->mem,
 					chunk->npages,
 					PCI_DMA_BIDIRECTIONAL);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 03e9eb0dc761..abcee61f8a47 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -318,10 +318,11 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		return -ENODEV;
 	}
 
-	if (dev_cap->uar_size > pci_resource_len(dev->pdev, 2)) {
+	if (dev_cap->uar_size > pci_resource_len(dev->persist->pdev, 2)) {
 		mlx4_err(dev, "HCA reported UAR size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
 			 dev_cap->uar_size,
-			 (unsigned long long) pci_resource_len(dev->pdev, 2));
+			 (unsigned long long)
+			 pci_resource_len(dev->persist->pdev, 2));
 		return -ENODEV;
 	}
 
@@ -541,8 +542,10 @@ static int mlx4_get_pcie_dev_link_caps(struct mlx4_dev *dev,
 	*speed = PCI_SPEED_UNKNOWN;
 	*width = PCIE_LNK_WIDTH_UNKNOWN;
 
-	err1 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP, &lnkcap1);
-	err2 = pcie_capability_read_dword(dev->pdev, PCI_EXP_LNKCAP2, &lnkcap2);
+	err1 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP,
+					  &lnkcap1);
+	err2 = pcie_capability_read_dword(dev->persist->pdev, PCI_EXP_LNKCAP2,
+					  &lnkcap2);
 	if (!err2 && lnkcap2) { /* PCIe r3.0-compliant */
 		if (lnkcap2 & PCI_EXP_LNKCAP2_SLS_8_0GB)
 			*speed = PCIE_SPEED_8_0GT;
@@ -587,7 +590,7 @@ static void mlx4_check_pcie_caps(struct mlx4_dev *dev)
 		return;
 	}
 
-	err = pcie_get_minimum_link(dev->pdev, &speed, &width);
+	err = pcie_get_minimum_link(dev->persist->pdev, &speed, &width);
 	if (err || speed == PCI_SPEED_UNKNOWN ||
 	    width == PCIE_LNK_WIDTH_UNKNOWN) {
 		mlx4_warn(dev,
@@ -837,10 +840,12 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 
 	if (dev->caps.uar_page_size * (dev->caps.num_uars -
 				       dev->caps.reserved_uars) >
-				       pci_resource_len(dev->pdev, 2)) {
+				       pci_resource_len(dev->persist->pdev,
+							2)) {
 		mlx4_err(dev, "HCA reported UAR region size of 0x%x bigger than PCI resource 2 size of 0x%llx, aborting\n",
 			 dev->caps.uar_page_size * dev->caps.num_uars,
-			 (unsigned long long) pci_resource_len(dev->pdev, 2));
+			 (unsigned long long)
+			 pci_resource_len(dev->persist->pdev, 2));
 		goto err_mem;
 	}
 
@@ -1492,9 +1497,9 @@ static int map_bf_area(struct mlx4_dev *dev)
 	if (!dev->caps.bf_reg_size)
 		return -ENXIO;
 
-	bf_start = pci_resource_start(dev->pdev, 2) +
+	bf_start = pci_resource_start(dev->persist->pdev, 2) +
 			(dev->caps.num_uars << PAGE_SHIFT);
-	bf_len = pci_resource_len(dev->pdev, 2) -
+	bf_len = pci_resource_len(dev->persist->pdev, 2) -
 			(dev->caps.num_uars << PAGE_SHIFT);
 	priv->bf_mapping = io_mapping_create_wc(bf_start, bf_len);
 	if (!priv->bf_mapping)
@@ -1536,7 +1541,8 @@ static int map_internal_clock(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	priv->clock_mapping =
-		ioremap(pci_resource_start(dev->pdev, priv->fw.clock_bar) +
+		ioremap(pci_resource_start(dev->persist->pdev,
+					   priv->fw.clock_bar) +
 			priv->fw.clock_offset, MLX4_CLOCK_SIZE);
 
 	if (!priv->clock_mapping)
@@ -1705,7 +1711,8 @@ static void choose_steering_mode(struct mlx4_dev *dev,
 	if (mlx4_log_num_mgm_entry_size <= 0 &&
 	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
 	    (!mlx4_is_mfunc(dev) ||
-	     (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) &&
+	     (dev_cap->fs_max_num_qp_per_entry >=
+	     (dev->persist->num_vfs + 1))) &&
 	    choose_log_fs_mgm_entry_size(dev_cap->fs_max_num_qp_per_entry) >=
 		MLX4_MIN_MGM_LOG_ENTRY_SIZE) {
 		dev->oper_log_mgm_entry_size =
@@ -2288,7 +2295,8 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 		for (i = 0; i < nreq; ++i)
 			entries[i].entry = i;
 
-		nreq = pci_enable_msix_range(dev->pdev, entries, 2, nreq);
+		nreq = pci_enable_msix_range(dev->persist->pdev, entries, 2,
+					     nreq);
 
 		if (nreq < 0) {
 			kfree(entries);
@@ -2316,7 +2324,7 @@ no_msi:
 	dev->caps.comp_pool	   = 0;
 
 	for (i = 0; i < 2; ++i)
-		priv->eq_table.eq[i].irq = dev->pdev->irq;
+		priv->eq_table.eq[i].irq = dev->persist->pdev->irq;
 }
 
 static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
@@ -2344,7 +2352,7 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 	info->port_attr.show      = show_port_type;
 	sysfs_attr_init(&info->port_attr.attr);
 
-	err = device_create_file(&dev->pdev->dev, &info->port_attr);
+	err = device_create_file(&dev->persist->pdev->dev, &info->port_attr);
 	if (err) {
 		mlx4_err(dev, "Failed to create file for port %d\n", port);
 		info->port = -1;
@@ -2361,10 +2369,12 @@ static int mlx4_init_port_info(struct mlx4_dev *dev, int port)
 	info->port_mtu_attr.show      = show_port_ib_mtu;
 	sysfs_attr_init(&info->port_mtu_attr.attr);
 
-	err = device_create_file(&dev->pdev->dev, &info->port_mtu_attr);
+	err = device_create_file(&dev->persist->pdev->dev,
+				 &info->port_mtu_attr);
 	if (err) {
 		mlx4_err(dev, "Failed to create mtu file for port %d\n", port);
-		device_remove_file(&info->dev->pdev->dev, &info->port_attr);
+		device_remove_file(&info->dev->persist->pdev->dev,
+				   &info->port_attr);
 		info->port = -1;
 	}
 
@@ -2376,8 +2386,9 @@ static void mlx4_cleanup_port_info(struct mlx4_port_info *info)
 	if (info->port < 0)
 		return;
 
-	device_remove_file(&info->dev->pdev->dev, &info->port_attr);
-	device_remove_file(&info->dev->pdev->dev, &info->port_mtu_attr);
+	device_remove_file(&info->dev->persist->pdev->dev, &info->port_attr);
+	device_remove_file(&info->dev->persist->pdev->dev,
+			   &info->port_mtu_attr);
 }
 
 static int mlx4_init_steering(struct mlx4_dev *dev)
@@ -2444,10 +2455,11 @@ static int mlx4_get_ownership(struct mlx4_dev *dev)
 	void __iomem *owner;
 	u32 ret;
 
-	if (pci_channel_offline(dev->pdev))
+	if (pci_channel_offline(dev->persist->pdev))
 		return -EIO;
 
-	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+			MLX4_OWNER_BASE,
 			MLX4_OWNER_SIZE);
 	if (!owner) {
 		mlx4_err(dev, "Failed to obtain ownership bit\n");
@@ -2463,10 +2475,11 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
 {
 	void __iomem *owner;
 
-	if (pci_channel_offline(dev->pdev))
+	if (pci_channel_offline(dev->persist->pdev))
 		return;
 
-	owner = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_OWNER_BASE,
+	owner = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+			MLX4_OWNER_BASE,
 			MLX4_OWNER_SIZE);
 	if (!owner) {
 		mlx4_err(dev, "Failed to obtain ownership bit\n");
@@ -2514,13 +2527,13 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
 		dev_flags |= MLX4_FLAG_SRIOV |
 			MLX4_FLAG_MASTER;
 		dev_flags &= ~MLX4_FLAG_SLAVE;
-		dev->num_vfs = total_vfs;
+		dev->persist->num_vfs = total_vfs;
 	}
 	return dev_flags;
 
 disable_sriov:
 	atomic_dec(&pf_loading);
-	dev->num_vfs = 0;
+	dev->persist->num_vfs = 0;
 	kfree(dev->dev_vfs);
 	return dev_flags & ~MLX4_FLAG_MASTER;
 }
@@ -2607,7 +2620,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			existing_vfs = pci_num_vf(pdev);
 			if (existing_vfs)
 				dev->flags |= MLX4_FLAG_SRIOV;
-			dev->num_vfs = total_vfs;
+			dev->persist->num_vfs = total_vfs;
 		}
 	}
 
@@ -2771,12 +2784,14 @@ slave_start:
 				 dev->caps.num_ports);
 			goto err_close;
 		}
-		memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs));
+		memcpy(dev->persist->nvfs, nvfs, sizeof(dev->persist->nvfs));
 
-		for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) {
+		for (i = 0;
+		     i < sizeof(dev->persist->nvfs)/
+		     sizeof(dev->persist->nvfs[0]); i++) {
 			unsigned j;
 
-			for (j = 0; j < dev->nvfs[i]; ++sum, ++j) {
+			for (j = 0; j < dev->persist->nvfs[i]; ++sum, ++j) {
 				dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
 				dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
 					dev->caps.num_ports;
@@ -2846,7 +2861,7 @@ slave_start:
 
 	priv->removed = 0;
 
-	if (mlx4_is_master(dev) && dev->num_vfs)
+	if (mlx4_is_master(dev) && dev->persist->num_vfs)
 		atomic_dec(&pf_loading);
 
 	kfree(dev_cap);
@@ -2908,7 +2923,7 @@ err_sriov:
 	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
 		pci_disable_sriov(pdev);
 
-	if (mlx4_is_master(dev) && dev->num_vfs)
+	if (mlx4_is_master(dev) && dev->persist->num_vfs)
 		atomic_dec(&pf_loading);
 
 	kfree(priv->dev.dev_vfs);
@@ -3076,20 +3091,28 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 
 	dev       = &priv->dev;
-	dev->pdev = pdev;
-	pci_set_drvdata(pdev, dev);
+	dev->persist = kzalloc(sizeof(*dev->persist), GFP_KERNEL);
+	if (!dev->persist) {
+		kfree(priv);
+		return -ENOMEM;
+	}
+	dev->persist->pdev = pdev;
+	dev->persist->dev = dev;
+	pci_set_drvdata(pdev, dev->persist);
 	priv->pci_dev_data = id->driver_data;
 
 	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
-	if (ret)
+	if (ret) {
+		kfree(dev->persist);
 		kfree(priv);
-
+	}
 	return ret;
 }
 
 static void mlx4_unload_one(struct pci_dev *pdev)
 {
-	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
+	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+	struct mlx4_dev  *dev  = persist->dev;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int               pci_dev_data;
 	int p;
@@ -3155,7 +3178,7 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 		mlx4_warn(dev, "Disabling SR-IOV\n");
 		pci_disable_sriov(pdev);
 		dev->flags &= ~MLX4_FLAG_SRIOV;
-		dev->num_vfs = 0;
+		dev->persist->num_vfs = 0;
 	}
 
 	if (!mlx4_is_slave(dev))
@@ -3175,26 +3198,29 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 
 static void mlx4_remove_one(struct pci_dev *pdev)
 {
-	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
+	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+	struct mlx4_dev  *dev  = persist->dev;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	mlx4_unload_one(pdev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
+	kfree(dev->persist);
 	kfree(priv);
 	pci_set_drvdata(pdev, NULL);
 }
 
 int mlx4_restart_one(struct pci_dev *pdev)
 {
-	struct mlx4_dev	 *dev  = pci_get_drvdata(pdev);
+	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
+	struct mlx4_dev	 *dev  = persist->dev;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
 	int pci_dev_data, err, total_vfs;
 
 	pci_dev_data = priv->pci_dev_data;
-	total_vfs = dev->num_vfs;
-	memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs));
+	total_vfs = dev->persist->num_vfs;
+	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
 
 	mlx4_unload_one(pdev);
 	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index bdd4eea2247c..faa37ab75a9d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -221,16 +221,17 @@ extern int mlx4_debug_level;
 #define mlx4_dbg(mdev, format, ...)					\
 do {									\
 	if (mlx4_debug_level)						\
-		dev_printk(KERN_DEBUG, &(mdev)->pdev->dev, format,	\
+		dev_printk(KERN_DEBUG,					\
+			   &(mdev)->persist->pdev->dev, format,		\
 			   ##__VA_ARGS__);				\
 } while (0)
 
 #define mlx4_err(mdev, format, ...)					\
-	dev_err(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+	dev_err(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
 #define mlx4_info(mdev, format, ...)					\
-	dev_info(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+	dev_info(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
 #define mlx4_warn(mdev, format, ...)					\
-	dev_warn(&(mdev)->pdev->dev, format, ##__VA_ARGS__)
+	dev_warn(&(mdev)->persist->pdev->dev, format, ##__VA_ARGS__)
 
 extern int mlx4_log_num_mgm_entry_size;
 extern int log_mtts_per_seg;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 7094a9c70fd5..8dbdf1d29357 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -708,13 +708,13 @@ static int mlx4_write_mtt_chunk(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 	if (!mtts)
 		return -ENOMEM;
 
-	dma_sync_single_for_cpu(&dev->pdev->dev, dma_handle,
+	dma_sync_single_for_cpu(&dev->persist->pdev->dev, dma_handle,
 				npages * sizeof (u64), DMA_TO_DEVICE);
 
 	for (i = 0; i < npages; ++i)
 		mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
 
-	dma_sync_single_for_device(&dev->pdev->dev, dma_handle,
+	dma_sync_single_for_device(&dev->persist->pdev->dev, dma_handle,
 				   npages * sizeof (u64), DMA_TO_DEVICE);
 
 	return 0;
@@ -1020,13 +1020,13 @@ int mlx4_map_phys_fmr(struct mlx4_dev *dev, struct mlx4_fmr *fmr, u64 *page_list
 	/* Make sure MPT status is visible before writing MTT entries */
 	wmb();
 
-	dma_sync_single_for_cpu(&dev->pdev->dev, fmr->dma_handle,
+	dma_sync_single_for_cpu(&dev->persist->pdev->dev, fmr->dma_handle,
 				npages * sizeof(u64), DMA_TO_DEVICE);
 
 	for (i = 0; i < npages; ++i)
 		fmr->mtts[i] = cpu_to_be64(page_list[i] | MLX4_MTT_FLAG_PRESENT);
 
-	dma_sync_single_for_device(&dev->pdev->dev, fmr->dma_handle,
+	dma_sync_single_for_device(&dev->persist->pdev->dev, fmr->dma_handle,
 				   npages * sizeof(u64), DMA_TO_DEVICE);
 
 	fmr->mpt->key    = cpu_to_be32(key);
diff --git a/drivers/net/ethernet/mellanox/mlx4/pd.c b/drivers/net/ethernet/mellanox/mlx4/pd.c
index 74216071201f..a42b4c0a9ed9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/pd.c
@@ -151,11 +151,13 @@ int mlx4_uar_alloc(struct mlx4_dev *dev, struct mlx4_uar *uar)
 		return -ENOMEM;
 
 	if (mlx4_is_slave(dev))
-		offset = uar->index % ((int) pci_resource_len(dev->pdev, 2) /
+		offset = uar->index % ((int)pci_resource_len(dev->persist->pdev,
+							     2) /
 				       dev->caps.uar_page_size);
 	else
 		offset = uar->index;
-	uar->pfn = (pci_resource_start(dev->pdev, 2) >> PAGE_SHIFT) + offset;
+	uar->pfn = (pci_resource_start(dev->persist->pdev, 2) >> PAGE_SHIFT)
+		    + offset;
 	uar->map = NULL;
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 30eb1ead0fe6..9f268f05290a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -553,9 +553,9 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port)
 		slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
 				    dev, &exclusive_ports);
 		slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
-					   dev->num_vfs + 1);
+					   dev->persist->num_vfs + 1);
 	}
-	vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
+	vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
 	if (slave_gid <= ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) % vfs))
 		return ((MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs) + 1;
 	return (MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS) / vfs;
@@ -590,10 +590,10 @@ int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port)
 		slaves_pport_actv = mlx4_phys_to_slaves_pport_actv(
 				    dev, &exclusive_ports);
 		slave_gid -= bitmap_weight(slaves_pport_actv.slaves,
-					   dev->num_vfs + 1);
+					   dev->persist->num_vfs + 1);
 	}
 	gids = MLX4_ROCE_MAX_GIDS - MLX4_ROCE_PF_GIDS;
-	vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
+	vfs = bitmap_weight(slaves_pport.slaves, dev->persist->num_vfs + 1) - 1;
 	if (slave_gid <= gids % vfs)
 		return MLX4_ROCE_PF_GIDS + ((gids / vfs) + 1) * (slave_gid - 1);
 
@@ -644,7 +644,7 @@ void mlx4_reset_roce_gids(struct mlx4_dev *dev, int slave)
 	int num_eth_ports, err;
 	int i;
 
-	if (slave < 0 || slave > dev->num_vfs)
+	if (slave < 0 || slave > dev->persist->num_vfs)
 		return;
 
 	actv_ports = mlx4_get_active_ports(dev, slave);
@@ -1214,7 +1214,8 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
 		return -EINVAL;
 
 	slaves_pport = mlx4_phys_to_slaves_pport(dev, port);
-	num_vfs = bitmap_weight(slaves_pport.slaves, dev->num_vfs + 1) - 1;
+	num_vfs = bitmap_weight(slaves_pport.slaves,
+				dev->persist->num_vfs + 1) - 1;
 
 	for (i = 0; i < MLX4_ROCE_MAX_GIDS; i++) {
 		if (!memcmp(priv->port[port].gid_table.roce_gids[i].raw, gid,
@@ -1258,7 +1259,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
 							dev, &exclusive_ports);
 				num_vfs_before += bitmap_weight(
 						slaves_pport_actv.slaves,
-						dev->num_vfs + 1);
+						dev->persist->num_vfs + 1);
 			}
 
 			/* candidate_slave_gid isn't necessarily the correct slave, but
@@ -1288,7 +1289,7 @@ int mlx4_get_slave_from_roce_gid(struct mlx4_dev *dev, int port, u8 *gid,
 						dev, &exclusive_ports);
 				slave_gid += bitmap_weight(
 						slaves_pport_actv.slaves,
-						dev->num_vfs + 1);
+						dev->persist->num_vfs + 1);
 			}
 		}
 		*slave_id = slave_gid;
diff --git a/drivers/net/ethernet/mellanox/mlx4/reset.c b/drivers/net/ethernet/mellanox/mlx4/reset.c
index ea1c6d092145..0076d88587ca 100644
--- a/drivers/net/ethernet/mellanox/mlx4/reset.c
+++ b/drivers/net/ethernet/mellanox/mlx4/reset.c
@@ -76,19 +76,21 @@ int mlx4_reset(struct mlx4_dev *dev)
 		goto out;
 	}
 
-	pcie_cap = pci_pcie_cap(dev->pdev);
+	pcie_cap = pci_pcie_cap(dev->persist->pdev);
 
 	for (i = 0; i < 64; ++i) {
 		if (i == 22 || i == 23)
 			continue;
-		if (pci_read_config_dword(dev->pdev, i * 4, hca_header + i)) {
+		if (pci_read_config_dword(dev->persist->pdev, i * 4,
+					  hca_header + i)) {
 			err = -ENODEV;
 			mlx4_err(dev, "Couldn't save HCA PCI header, aborting\n");
 			goto out;
 		}
 	}
 
-	reset = ioremap(pci_resource_start(dev->pdev, 0) + MLX4_RESET_BASE,
+	reset = ioremap(pci_resource_start(dev->persist->pdev, 0) +
+			MLX4_RESET_BASE,
 			MLX4_RESET_SIZE);
 	if (!reset) {
 		err = -ENOMEM;
@@ -122,8 +124,8 @@ int mlx4_reset(struct mlx4_dev *dev)
 
 	end = jiffies + MLX4_RESET_TIMEOUT_JIFFIES;
 	do {
-		if (!pci_read_config_word(dev->pdev, PCI_VENDOR_ID, &vendor) &&
-		    vendor != 0xffff)
+		if (!pci_read_config_word(dev->persist->pdev, PCI_VENDOR_ID,
+					  &vendor) && vendor != 0xffff)
 			break;
 
 		msleep(1);
@@ -138,14 +140,16 @@ int mlx4_reset(struct mlx4_dev *dev)
 	/* Now restore the PCI headers */
 	if (pcie_cap) {
 		devctl = hca_header[(pcie_cap + PCI_EXP_DEVCTL) / 4];
-		if (pcie_capability_write_word(dev->pdev, PCI_EXP_DEVCTL,
+		if (pcie_capability_write_word(dev->persist->pdev,
+					       PCI_EXP_DEVCTL,
 					       devctl)) {
 			err = -ENODEV;
 			mlx4_err(dev, "Couldn't restore HCA PCI Express Device Control register, aborting\n");
 			goto out;
 		}
 		linkctl = hca_header[(pcie_cap + PCI_EXP_LNKCTL) / 4];
-		if (pcie_capability_write_word(dev->pdev, PCI_EXP_LNKCTL,
+		if (pcie_capability_write_word(dev->persist->pdev,
+					       PCI_EXP_LNKCTL,
 					       linkctl)) {
 			err = -ENODEV;
 			mlx4_err(dev, "Couldn't restore HCA PCI Express Link control register, aborting\n");
@@ -157,7 +161,8 @@ int mlx4_reset(struct mlx4_dev *dev)
 		if (i * 4 == PCI_COMMAND)
 			continue;
 
-		if (pci_write_config_dword(dev->pdev, i * 4, hca_header[i])) {
+		if (pci_write_config_dword(dev->persist->pdev, i * 4,
+					   hca_header[i])) {
 			err = -ENODEV;
 			mlx4_err(dev, "Couldn't restore HCA reg %x, aborting\n",
 				 i);
@@ -165,7 +170,7 @@ int mlx4_reset(struct mlx4_dev *dev)
 		}
 	}
 
-	if (pci_write_config_dword(dev->pdev, PCI_COMMAND,
+	if (pci_write_config_dword(dev->persist->pdev, PCI_COMMAND,
 				   hca_header[PCI_COMMAND / 4])) {
 		err = -ENODEV;
 		mlx4_err(dev, "Couldn't restore HCA COMMAND, aborting\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 4efbd1eca611..3e93879bccce 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -309,12 +309,13 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
 	int allocated, free, reserved, guaranteed, from_free;
 	int from_rsvd;
 
-	if (slave > dev->num_vfs)
+	if (slave > dev->persist->num_vfs)
 		return -EINVAL;
 
 	spin_lock(&res_alloc->alloc_lock);
 	allocated = (port > 0) ?
-		res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+		res_alloc->allocated[(port - 1) *
+		(dev->persist->num_vfs + 1) + slave] :
 		res_alloc->allocated[slave];
 	free = (port > 0) ? res_alloc->res_port_free[port - 1] :
 		res_alloc->res_free;
@@ -352,7 +353,8 @@ static inline int mlx4_grant_resource(struct mlx4_dev *dev, int slave,
 	if (!err) {
 		/* grant the request */
 		if (port > 0) {
-			res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] += count;
+			res_alloc->allocated[(port - 1) *
+			(dev->persist->num_vfs + 1) + slave] += count;
 			res_alloc->res_port_free[port - 1] -= count;
 			res_alloc->res_port_rsvd[port - 1] -= from_rsvd;
 		} else {
@@ -376,13 +378,14 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
 		&priv->mfunc.master.res_tracker.res_alloc[res_type];
 	int allocated, guaranteed, from_rsvd;
 
-	if (slave > dev->num_vfs)
+	if (slave > dev->persist->num_vfs)
 		return;
 
 	spin_lock(&res_alloc->alloc_lock);
 
 	allocated = (port > 0) ?
-		res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] :
+		res_alloc->allocated[(port - 1) *
+		(dev->persist->num_vfs + 1) + slave] :
 		res_alloc->allocated[slave];
 	guaranteed = res_alloc->guaranteed[slave];
 
@@ -397,7 +400,8 @@ static inline void mlx4_release_resource(struct mlx4_dev *dev, int slave,
 	}
 
 	if (port > 0) {
-		res_alloc->allocated[(port - 1) * (dev->num_vfs + 1) + slave] -= count;
+		res_alloc->allocated[(port - 1) *
+		(dev->persist->num_vfs + 1) + slave] -= count;
 		res_alloc->res_port_free[port - 1] += count;
 		res_alloc->res_port_rsvd[port - 1] += from_rsvd;
 	} else {
@@ -415,7 +419,8 @@ static inline void initialize_res_quotas(struct mlx4_dev *dev,
 					 enum mlx4_resource res_type,
 					 int vf, int num_instances)
 {
-	res_alloc->guaranteed[vf] = num_instances / (2 * (dev->num_vfs + 1));
+	res_alloc->guaranteed[vf] = num_instances /
+				    (2 * (dev->persist->num_vfs + 1));
 	res_alloc->quota[vf] = (num_instances / 2) + res_alloc->guaranteed[vf];
 	if (vf == mlx4_master_func_num(dev)) {
 		res_alloc->res_free = num_instances;
@@ -486,21 +491,26 @@ int mlx4_init_resource_tracker(struct mlx4_dev *dev)
 	for (i = 0; i < MLX4_NUM_OF_RESOURCE_TYPE; i++) {
 		struct resource_allocator *res_alloc =
 			&priv->mfunc.master.res_tracker.res_alloc[i];
-		res_alloc->quota = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
-		res_alloc->guaranteed = kmalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+		res_alloc->quota = kmalloc((dev->persist->num_vfs + 1) *
+					   sizeof(int), GFP_KERNEL);
+		res_alloc->guaranteed = kmalloc((dev->persist->num_vfs + 1) *
+						sizeof(int), GFP_KERNEL);
 		if (i == RES_MAC || i == RES_VLAN)
 			res_alloc->allocated = kzalloc(MLX4_MAX_PORTS *
-						       (dev->num_vfs + 1) * sizeof(int),
-							GFP_KERNEL);
+						       (dev->persist->num_vfs
+						       + 1) *
+						       sizeof(int), GFP_KERNEL);
 		else
-			res_alloc->allocated = kzalloc((dev->num_vfs + 1) * sizeof(int), GFP_KERNEL);
+			res_alloc->allocated = kzalloc((dev->persist->
+							num_vfs + 1) *
+						       sizeof(int), GFP_KERNEL);
 
 		if (!res_alloc->quota || !res_alloc->guaranteed ||
 		    !res_alloc->allocated)
 			goto no_mem_err;
 
 		spin_lock_init(&res_alloc->alloc_lock);
-		for (t = 0; t < dev->num_vfs + 1; t++) {
+		for (t = 0; t < dev->persist->num_vfs + 1; t++) {
 			struct mlx4_active_ports actv_ports =
 				mlx4_get_active_ports(dev, t);
 			switch (i) {
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index f1e41b33462f..1069ce65e8b4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -744,8 +744,15 @@ struct mlx4_vf_dev {
 	u8			n_ports;
 };
 
-struct mlx4_dev {
+struct mlx4_dev_persistent {
 	struct pci_dev	       *pdev;
+	struct mlx4_dev	       *dev;
+	int                     nvfs[MLX4_MAX_PORTS + 1];
+	int			num_vfs;
+};
+
+struct mlx4_dev {
+	struct mlx4_dev_persistent *persist;
 	unsigned long		flags;
 	unsigned long		num_slaves;
 	struct mlx4_caps	caps;
@@ -754,13 +761,11 @@ struct mlx4_dev {
 	struct radix_tree_root	qp_table_tree;
 	u8			rev_id;
 	char			board_id[MLX4_BOARD_ID_LEN];
-	int			num_vfs;
 	int			numa_node;
 	int			oper_log_mgm_entry_size;
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 	struct mlx4_vf_dev     *dev_vfs;
-	int                     nvfs[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_eqe {
-- 
cgit v1.2.3-70-g09d2


From dd0eefe3abbf47442db296bf68f27eb2860c1cdf Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 25 Jan 2015 16:59:36 +0200
Subject: net/mlx4_core: Set device configuration data to be persistent across
 reset

When an HCA enters an internal error state, this is detected by the driver.
The driver then should reset the HCA and restart the software stack.

Keep ports information and some SRIOV configuration in a persistent area
to have it valid across reset.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 45 +++++++++++++++++++++++++++++--
 include/linux/mlx4/device.h               |  2 ++
 2 files changed, 45 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index abcee61f8a47..2c5a555dff89 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3109,18 +3109,34 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	return ret;
 }
 
+static void mlx4_clean_dev(struct mlx4_dev *dev)
+{
+	struct mlx4_dev_persistent *persist = dev->persist;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	memset(priv, 0, sizeof(*priv));
+	priv->dev.persist = persist;
+}
+
 static void mlx4_unload_one(struct pci_dev *pdev)
 {
 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
 	struct mlx4_dev  *dev  = persist->dev;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int               pci_dev_data;
-	int p;
+	int p, i;
 	int active_vfs = 0;
 
 	if (priv->removed)
 		return;
 
+	/* saving current ports type for further use */
+	for (i = 0; i < dev->caps.num_ports; i++) {
+		dev->persist->curr_port_type[i] = dev->caps.port_type[i + 1];
+		dev->persist->curr_port_poss_type[i] = dev->caps.
+						       possible_type[i + 1];
+	}
+
 	pci_dev_data = priv->pci_dev_data;
 
 	/* Disabling SR-IOV is not allowed while there are active vf's */
@@ -3191,7 +3207,7 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 	kfree(dev->caps.qp1_proxy);
 	kfree(dev->dev_vfs);
 
-	memset(priv, 0, sizeof(*priv));
+	mlx4_clean_dev(dev);
 	priv->pci_dev_data = pci_dev_data;
 	priv->removed = 1;
 }
@@ -3210,6 +3226,25 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, NULL);
 }
 
+static int restore_current_port_types(struct mlx4_dev *dev,
+				      enum mlx4_port_type *types,
+				      enum mlx4_port_type *poss_types)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int err, i;
+
+	mlx4_stop_sense(dev);
+
+	mutex_lock(&priv->port_mutex);
+	for (i = 0; i < dev->caps.num_ports; i++)
+		dev->caps.possible_type[i + 1] = poss_types[i];
+	err = mlx4_change_port_types(dev, types);
+	mlx4_start_sense(dev);
+	mutex_unlock(&priv->port_mutex);
+
+	return err;
+}
+
 int mlx4_restart_one(struct pci_dev *pdev)
 {
 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
@@ -3230,6 +3265,12 @@ int mlx4_restart_one(struct pci_dev *pdev)
 		return err;
 	}
 
+	err = restore_current_port_types(dev, dev->persist->curr_port_type,
+					 dev->persist->curr_port_poss_type);
+	if (err)
+		mlx4_err(dev, "could not restore original port types (%d)\n",
+			 err);
+
 	return err;
 }
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1069ce65e8b4..8c3837ac1a2d 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -749,6 +749,8 @@ struct mlx4_dev_persistent {
 	struct mlx4_dev	       *dev;
 	int                     nvfs[MLX4_MAX_PORTS + 1];
 	int			num_vfs;
+	enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
+	enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_dev {
-- 
cgit v1.2.3-70-g09d2


From ad9a0bf08ffbf32b8f292c3bb78ca0f24bb8f6b2 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 25 Jan 2015 16:59:37 +0200
Subject: net/mlx4_core: Refactor the catas flow to work per device

Using a WQ per device instead of a single global WQ, this allows
independent reset handling per device even when SRIOV is used.

This comes as a pre-patch for supporting chip reset
for both native and SRIOV.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/catas.c | 75 ++++++++++++++----------------
 drivers/net/ethernet/mellanox/mlx4/main.c  | 12 ++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h  |  3 +-
 include/linux/mlx4/device.h                |  2 +
 4 files changed, 48 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 1a102c9bac99..5bb9aa6e281d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -40,10 +40,7 @@ enum {
 	MLX4_CATAS_POLL_INTERVAL	= 5 * HZ,
 };
 
-static DEFINE_SPINLOCK(catas_lock);
 
-static LIST_HEAD(catas_list);
-static struct work_struct catas_work;
 
 static int internal_err_reset = 1;
 module_param(internal_err_reset, int, 0644);
@@ -77,13 +74,9 @@ static void poll_catas(unsigned long dev_ptr)
 			dump_err_buf(dev);
 			mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
 
-			if (internal_err_reset) {
-				spin_lock(&catas_lock);
-				list_add(&priv->catas_err.list, &catas_list);
-				spin_unlock(&catas_lock);
-
-				queue_work(mlx4_wq, &catas_work);
-			}
+			if (internal_err_reset)
+				queue_work(dev->persist->catas_wq,
+					   &dev->persist->catas_work);
 		}
 	} else
 		mod_timer(&priv->catas_err.timer,
@@ -92,34 +85,23 @@ static void poll_catas(unsigned long dev_ptr)
 
 static void catas_reset(struct work_struct *work)
 {
-	struct mlx4_priv *priv, *tmppriv;
-	struct mlx4_dev *dev;
-	struct mlx4_dev_persistent *persist;
-
-	LIST_HEAD(tlist);
+	struct mlx4_dev_persistent *persist =
+		container_of(work, struct mlx4_dev_persistent,
+			     catas_work);
+	struct pci_dev *pdev = persist->pdev;
 	int ret;
 
-	spin_lock_irq(&catas_lock);
-	list_splice_init(&catas_list, &tlist);
-	spin_unlock_irq(&catas_lock);
-
-	list_for_each_entry_safe(priv, tmppriv, &tlist, catas_err.list) {
-		struct pci_dev *pdev = priv->dev.persist->pdev;
-
-		/* If the device is off-line, we cannot reset it */
-		if (pci_channel_offline(pdev))
-			continue;
+	/* If the device is off-line, we cannot reset it */
+	if (pci_channel_offline(pdev))
+		return;
 
-		ret = mlx4_restart_one(priv->dev.persist->pdev);
-		/* 'priv' now is not valid */
-		if (ret)
-			pr_err("mlx4 %s: Reset failed (%d)\n",
-			       pci_name(pdev), ret);
-		else {
-			persist  = pci_get_drvdata(pdev);
-			mlx4_dbg(persist->dev, "Reset succeeded\n");
-		}
-	}
+	ret = mlx4_restart_one(pdev);
+	/* 'priv' now is not valid */
+	if (ret)
+		pr_err("mlx4 %s: Reset failed (%d)\n",
+		       pci_name(pdev), ret);
+	else
+		mlx4_dbg(persist->dev, "Reset succeeded\n");
 }
 
 void mlx4_start_catas_poll(struct mlx4_dev *dev)
@@ -158,15 +140,26 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
 
 	del_timer_sync(&priv->catas_err.timer);
 
-	if (priv->catas_err.map)
+	if (priv->catas_err.map) {
 		iounmap(priv->catas_err.map);
+		priv->catas_err.map = NULL;
+	}
+}
 
-	spin_lock_irq(&catas_lock);
-	list_del(&priv->catas_err.list);
-	spin_unlock_irq(&catas_lock);
+int  mlx4_catas_init(struct mlx4_dev *dev)
+{
+	INIT_WORK(&dev->persist->catas_work, catas_reset);
+	dev->persist->catas_wq = create_singlethread_workqueue("mlx4_health");
+	if (!dev->persist->catas_wq)
+		return -ENOMEM;
+
+	return 0;
 }
 
-void  __init mlx4_catas_init(void)
+void mlx4_catas_end(struct mlx4_dev *dev)
 {
-	INIT_WORK(&catas_work, catas_reset);
+	if (dev->persist->catas_wq) {
+		destroy_workqueue(dev->persist->catas_wq);
+		dev->persist->catas_wq = NULL;
+	}
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 2c5a555dff89..a61694cc1476 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3064,11 +3064,19 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 	}
 
-	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	err = mlx4_catas_init(&priv->dev);
 	if (err)
 		goto err_release_regions;
+
+	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	if (err)
+		goto err_catas;
+
 	return 0;
 
+err_catas:
+	mlx4_catas_end(&priv->dev);
+
 err_release_regions:
 	pci_release_regions(pdev);
 
@@ -3219,6 +3227,7 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	mlx4_unload_one(pdev);
+	mlx4_catas_end(dev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	kfree(dev->persist);
@@ -3403,7 +3412,6 @@ static int __init mlx4_init(void)
 	if (mlx4_verify_params())
 		return -EINVAL;
 
-	mlx4_catas_init();
 
 	mlx4_wq = create_singlethread_workqueue("mlx4");
 	if (!mlx4_wq)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index faa37ab75a9d..d41af84f965b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -995,7 +995,8 @@ void __mlx4_xrcd_free(struct mlx4_dev *dev, u32 xrcdn);
 
 void mlx4_start_catas_poll(struct mlx4_dev *dev);
 void mlx4_stop_catas_poll(struct mlx4_dev *dev);
-void mlx4_catas_init(void);
+int mlx4_catas_init(struct mlx4_dev *dev);
+void mlx4_catas_end(struct mlx4_dev *dev);
 int mlx4_restart_one(struct pci_dev *pdev);
 int mlx4_register_device(struct mlx4_dev *dev);
 void mlx4_unregister_device(struct mlx4_dev *dev);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 8c3837ac1a2d..da425d2f3708 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -751,6 +751,8 @@ struct mlx4_dev_persistent {
 	int			num_vfs;
 	enum mlx4_port_type curr_port_type[MLX4_MAX_PORTS + 1];
 	enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
+	struct work_struct      catas_work;
+	struct workqueue_struct *catas_wq;
 };
 
 struct mlx4_dev {
-- 
cgit v1.2.3-70-g09d2


From f6bc11e42646e661e699a5593cbd1e9dba7191d0 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 25 Jan 2015 16:59:38 +0200
Subject: net/mlx4_core: Enhance the catas flow to support device reset

This includes:

- resetting the chip when a fatal error is detected (the current code
  does not do this).

- exposing the ability to enter error state from outside the catas code
  by calling its functionality. (E.g. FW Command timeout, AER error).

- managing a persistent device state. This is needed to sync between
  reset flow cases.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/catas.c | 122 ++++++++++++++++++++++-------
 drivers/net/ethernet/mellanox/mlx4/main.c  |   6 ++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h  |   2 +-
 include/linux/mlx4/device.h                |   7 ++
 4 files changed, 108 insertions(+), 29 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 5bb9aa6e281d..588d6b5e1211 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -48,6 +48,83 @@ MODULE_PARM_DESC(internal_err_reset,
 		 "Reset device on internal errors if non-zero"
 		 " (default 1, in SRIOV mode default is 0)");
 
+static int read_vendor_id(struct mlx4_dev *dev)
+{
+	u16 vendor_id = 0;
+	int ret;
+
+	ret = pci_read_config_word(dev->persist->pdev, 0, &vendor_id);
+	if (ret) {
+		mlx4_err(dev, "Failed to read vendor ID, ret=%d\n", ret);
+		return ret;
+	}
+
+	if (vendor_id == 0xffff) {
+		mlx4_err(dev, "PCI can't be accessed to read vendor id\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int mlx4_reset_master(struct mlx4_dev *dev)
+{
+	int err = 0;
+
+	if (!pci_channel_offline(dev->persist->pdev)) {
+		err = read_vendor_id(dev);
+		/* If PCI can't be accessed to read vendor ID we assume that its
+		 * link was disabled and chip was already reset.
+		 */
+		if (err)
+			return 0;
+
+		err = mlx4_reset(dev);
+		if (err)
+			mlx4_err(dev, "Fail to reset HCA\n");
+	}
+
+	return err;
+}
+
+void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
+{
+	int err;
+	struct mlx4_dev *dev;
+
+	if (!internal_err_reset)
+		return;
+
+	mutex_lock(&persist->device_state_mutex);
+	if (persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+		goto out;
+
+	dev = persist->dev;
+	mlx4_err(dev, "device is going to be reset\n");
+	err = mlx4_reset_master(dev);
+	BUG_ON(err != 0);
+
+	dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
+	mlx4_err(dev, "device was reset successfully\n");
+	mutex_unlock(&persist->device_state_mutex);
+
+	/* At that step HW was already reset, now notify clients */
+	mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+	return;
+
+out:
+	mutex_unlock(&persist->device_state_mutex);
+}
+
+static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
+{
+	int err = 0;
+
+	mlx4_enter_error_state(persist);
+	err = mlx4_restart_one(persist->pdev);
+	mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err);
+}
+
 static void dump_err_buf(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -66,21 +143,22 @@ static void poll_catas(unsigned long dev_ptr)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
 	if (readl(priv->catas_err.map)) {
-		/* If the device is off-line, we cannot try to recover it */
-		if (pci_channel_offline(dev->persist->pdev))
-			mod_timer(&priv->catas_err.timer,
-				  round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
-		else {
-			dump_err_buf(dev);
-			mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
-
-			if (internal_err_reset)
-				queue_work(dev->persist->catas_wq,
-					   &dev->persist->catas_work);
-		}
-	} else
-		mod_timer(&priv->catas_err.timer,
-			  round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+		dump_err_buf(dev);
+		goto internal_err;
+	}
+
+	if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		mlx4_warn(dev, "Internal error mark was detected on device\n");
+		goto internal_err;
+	}
+
+	mod_timer(&priv->catas_err.timer,
+		  round_jiffies(jiffies + MLX4_CATAS_POLL_INTERVAL));
+	return;
+
+internal_err:
+	if (internal_err_reset)
+		queue_work(dev->persist->catas_wq, &dev->persist->catas_work);
 }
 
 static void catas_reset(struct work_struct *work)
@@ -88,20 +166,8 @@ static void catas_reset(struct work_struct *work)
 	struct mlx4_dev_persistent *persist =
 		container_of(work, struct mlx4_dev_persistent,
 			     catas_work);
-	struct pci_dev *pdev = persist->pdev;
-	int ret;
-
-	/* If the device is off-line, we cannot reset it */
-	if (pci_channel_offline(pdev))
-		return;
 
-	ret = mlx4_restart_one(pdev);
-	/* 'priv' now is not valid */
-	if (ret)
-		pr_err("mlx4 %s: Reset failed (%d)\n",
-		       pci_name(pdev), ret);
-	else
-		mlx4_dbg(persist->dev, "Reset succeeded\n");
+	mlx4_handle_error_state(persist);
 }
 
 void mlx4_start_catas_poll(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index a61694cc1476..dc2d910fcc88 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2624,6 +2624,11 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 		}
 	}
 
+	/* on load remove any previous indication of internal error,
+	 * device is up.
+	 */
+	dev->persist->state = MLX4_DEVICE_STATE_UP;
+
 slave_start:
 	err = mlx4_cmd_init(dev);
 	if (err) {
@@ -3108,6 +3113,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	dev->persist->dev = dev;
 	pci_set_drvdata(pdev, dev->persist);
 	priv->pci_dev_data = id->driver_data;
+	mutex_init(&dev->persist->device_state_mutex);
 
 	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
 	if (ret) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index d41af84f965b..aa1ecbc5a606 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -1178,7 +1178,7 @@ void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type);
 
 void mlx4_srq_event(struct mlx4_dev *dev, u32 srqn, int event_type);
 
-void mlx4_handle_catas_err(struct mlx4_dev *dev);
+void mlx4_enter_error_state(struct mlx4_dev_persistent *persist);
 
 int mlx4_SENSE_PORT(struct mlx4_dev *dev, int port,
 		    enum mlx4_port_type *type);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index da425d2f3708..7d5d317cb7a6 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -411,6 +411,11 @@ enum {
 	MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK	= 1 << 4,
 };
 
+enum {
+	MLX4_DEVICE_STATE_UP			= 1 << 0,
+	MLX4_DEVICE_STATE_INTERNAL_ERROR	= 1 << 1,
+};
+
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
 			     MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
 
@@ -753,6 +758,8 @@ struct mlx4_dev_persistent {
 	enum mlx4_port_type curr_port_poss_type[MLX4_MAX_PORTS + 1];
 	struct work_struct      catas_work;
 	struct workqueue_struct *catas_wq;
+	struct mutex	device_state_mutex; /* protect HW state */
+	u8		state;
 };
 
 struct mlx4_dev {
-- 
cgit v1.2.3-70-g09d2


From f5aef5aa35063f2b45c3605871cd525d0cb7fb7a Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 25 Jan 2015 16:59:39 +0200
Subject: net/mlx4_core: Activate reset flow upon fatal command cases

We activate reset flow upon command fatal errors, when the device enters an
erroneous state, and must be reset.

The cases below are assumed to be fatal: FW command timed-out, an error from FW
on closing commands, pci is offline when posting/pending a command.

In those cases we place the device into an error state: chip is reset, pending
commands are awakened and completed immediately. Subsequent commands will
return immediately.

The return code in the above cases will depend on the command. Commands which
free and close resources will return success (because the chip was reset, so
callers may safely free their kernel resources). Other commands will return -EIO.

Since the device's state was marked as error, the catas poller will
detect this and restart the device's software stack (as is done when a FW
internal error is directly detected). The device state is protected by a
persistent mutex lives on its mlx4_dev, as such no need any more for the
hcr_mutex which is removed.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/catas.c |  11 +-
 drivers/net/ethernet/mellanox/mlx4/cmd.c   | 163 +++++++++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx4/mcg.c   |   3 +
 drivers/net/ethernet/mellanox/mlx4/mlx4.h  |   2 +-
 include/linux/mlx4/cmd.h                   |   1 +
 5 files changed, 153 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 588d6b5e1211..63f14ffcc906 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -42,8 +42,8 @@ enum {
 
 
-static int internal_err_reset = 1;
-module_param(internal_err_reset, int, 0644);
+int mlx4_internal_err_reset = 1;
+module_param_named(internal_err_reset, mlx4_internal_err_reset,  int, 0644);
 MODULE_PARM_DESC(internal_err_reset,
 		 "Reset device on internal errors if non-zero"
 		 " (default 1, in SRIOV mode default is 0)");
@@ -92,7 +92,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 	int err;
 	struct mlx4_dev *dev;
 
-	if (!internal_err_reset)
+	if (!mlx4_internal_err_reset)
 		return;
 
 	mutex_lock(&persist->device_state_mutex);
@@ -110,6 +110,7 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 
 	/* At that step HW was already reset, now notify clients */
 	mlx4_dispatch_event(dev, MLX4_DEV_EVENT_CATASTROPHIC_ERROR, 0);
+	mlx4_cmd_wake_completions(dev);
 	return;
 
 out:
@@ -157,7 +158,7 @@ static void poll_catas(unsigned long dev_ptr)
 	return;
 
 internal_err:
-	if (internal_err_reset)
+	if (mlx4_internal_err_reset)
 		queue_work(dev->persist->catas_wq, &dev->persist->catas_work);
 }
 
@@ -177,7 +178,7 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
 
 	/*If we are in SRIOV the default of the module param must be 0*/
 	if (mlx4_is_mfunc(dev))
-		internal_err_reset = 0;
+		mlx4_internal_err_reset = 0;
 
 	INIT_LIST_HEAD(&priv->catas_err.list);
 	init_timer(&priv->catas_err.timer);
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 7cd90e6a4272..3895b2b5fc92 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -182,6 +182,72 @@ static u8 mlx4_errno_to_status(int errno)
 	}
 }
 
+static int mlx4_internal_err_ret_value(struct mlx4_dev *dev, u16 op,
+				       u8 op_modifier)
+{
+	switch (op) {
+	case MLX4_CMD_UNMAP_ICM:
+	case MLX4_CMD_UNMAP_ICM_AUX:
+	case MLX4_CMD_UNMAP_FA:
+	case MLX4_CMD_2RST_QP:
+	case MLX4_CMD_HW2SW_EQ:
+	case MLX4_CMD_HW2SW_CQ:
+	case MLX4_CMD_HW2SW_SRQ:
+	case MLX4_CMD_HW2SW_MPT:
+	case MLX4_CMD_CLOSE_HCA:
+	case MLX4_QP_FLOW_STEERING_DETACH:
+	case MLX4_CMD_FREE_RES:
+	case MLX4_CMD_CLOSE_PORT:
+		return CMD_STAT_OK;
+
+	case MLX4_CMD_QP_ATTACH:
+		/* On Detach case return success */
+		if (op_modifier == 0)
+			return CMD_STAT_OK;
+		return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+
+	default:
+		return mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+	}
+}
+
+static int mlx4_closing_cmd_fatal_error(u16 op, u8 fw_status)
+{
+	/* Any error during the closing commands below is considered fatal */
+	if (op == MLX4_CMD_CLOSE_HCA ||
+	    op == MLX4_CMD_HW2SW_EQ ||
+	    op == MLX4_CMD_HW2SW_CQ ||
+	    op == MLX4_CMD_2RST_QP ||
+	    op == MLX4_CMD_HW2SW_SRQ ||
+	    op == MLX4_CMD_SYNC_TPT ||
+	    op == MLX4_CMD_UNMAP_ICM ||
+	    op == MLX4_CMD_UNMAP_ICM_AUX ||
+	    op == MLX4_CMD_UNMAP_FA)
+		return 1;
+	/* Error on MLX4_CMD_HW2SW_MPT is fatal except when fw status equals
+	  * CMD_STAT_REG_BOUND.
+	  * This status indicates that memory region has memory windows bound to it
+	  * which may result from invalid user space usage and is not fatal.
+	  */
+	if (op == MLX4_CMD_HW2SW_MPT && fw_status != CMD_STAT_REG_BOUND)
+		return 1;
+	return 0;
+}
+
+static int mlx4_cmd_reset_flow(struct mlx4_dev *dev, u16 op, u8 op_modifier,
+			       int err)
+{
+	/* Only if reset flow is really active return code is based on
+	  * command, otherwise current error code is returned.
+	  */
+	if (mlx4_internal_err_reset) {
+		mlx4_enter_error_state(dev->persist);
+		err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+	}
+
+	return err;
+}
+
 static int comm_pending(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -258,7 +324,7 @@ static int mlx4_comm_cmd_wait(struct mlx4_dev *dev, u8 op,
 	cmd->free_head = context->next;
 	spin_unlock(&cmd->context_lock);
 
-	init_completion(&context->done);
+	reinit_completion(&context->done);
 
 	mlx4_comm_cmd_post(dev, op, param);
 
@@ -323,17 +389,21 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 {
 	struct mlx4_cmd *cmd = &mlx4_priv(dev)->cmd;
 	u32 __iomem *hcr = cmd->hcr;
-	int ret = -EAGAIN;
+	int ret = -EIO;
 	unsigned long end;
 
-	mutex_lock(&cmd->hcr_mutex);
-
-	if (pci_channel_offline(dev->persist->pdev)) {
+	mutex_lock(&dev->persist->device_state_mutex);
+	/* To avoid writing to unknown addresses after the device state was
+	  * changed to internal error and the chip was reset,
+	  * check the INTERNAL_ERROR flag which is updated under
+	  * device_state_mutex lock.
+	  */
+	if (pci_channel_offline(dev->persist->pdev) ||
+	    (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)) {
 		/*
 		 * Device is going through error recovery
 		 * and cannot accept commands.
 		 */
-		ret = -EIO;
 		goto out;
 	}
 
@@ -347,7 +417,6 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 			 * Device is going through error recovery
 			 * and cannot accept commands.
 			 */
-			ret = -EIO;
 			goto out;
 		}
 
@@ -391,7 +460,11 @@ static int mlx4_cmd_post(struct mlx4_dev *dev, u64 in_param, u64 out_param,
 	ret = 0;
 
 out:
-	mutex_unlock(&cmd->hcr_mutex);
+	if (ret)
+		mlx4_warn(dev, "Could not post command 0x%x: ret=%d, in_param=0x%llx, in_mod=0x%x, op_mod=0x%x\n",
+			  op, ret, in_param, in_modifier, op_modifier);
+	mutex_unlock(&dev->persist->device_state_mutex);
+
 	return ret;
 }
 
@@ -464,12 +537,12 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 
 	down(&priv->cmd.poll_sem);
 
-	if (pci_channel_offline(dev->persist->pdev)) {
+	if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
 		/*
 		 * Device is going through error recovery
 		 * and cannot accept commands.
 		 */
-		err = -EIO;
+		err = mlx4_internal_err_ret_value(dev, op, op_modifier);
 		goto out;
 	}
 
@@ -483,7 +556,7 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
 			    in_modifier, op_modifier, op, CMD_POLL_TOKEN, 0);
 	if (err)
-		goto out;
+		goto out_reset;
 
 	end = msecs_to_jiffies(timeout) + jiffies;
 	while (cmd_pending(dev) && time_before(jiffies, end)) {
@@ -493,6 +566,11 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 			 * and cannot accept commands.
 			 */
 			err = -EIO;
+			goto out_reset;
+		}
+
+		if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+			err = mlx4_internal_err_ret_value(dev, op, op_modifier);
 			goto out;
 		}
 
@@ -502,8 +580,8 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	if (cmd_pending(dev)) {
 		mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
 			  op);
-		err = -ETIMEDOUT;
-		goto out;
+		err = -EIO;
+		goto out_reset;
 	}
 
 	if (out_is_imm)
@@ -515,10 +593,17 @@ static int mlx4_cmd_poll(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	stat = be32_to_cpu((__force __be32)
 			   __raw_readl(hcr + HCR_STATUS_OFFSET)) >> 24;
 	err = mlx4_status_to_errno(stat);
-	if (err)
+	if (err) {
 		mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
 			 op, stat);
+		if (mlx4_closing_cmd_fatal_error(op, stat))
+			goto out_reset;
+		goto out;
+	}
 
+out_reset:
+	if (err)
+		err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
 out:
 	up(&priv->cmd.poll_sem);
 	return err;
@@ -565,17 +650,19 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 		goto out;
 	}
 
-	init_completion(&context->done);
+	reinit_completion(&context->done);
 
-	mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
-		      in_modifier, op_modifier, op, context->token, 1);
+	err = mlx4_cmd_post(dev, in_param, out_param ? *out_param : 0,
+			    in_modifier, op_modifier, op, context->token, 1);
+	if (err)
+		goto out_reset;
 
 	if (!wait_for_completion_timeout(&context->done,
 					 msecs_to_jiffies(timeout))) {
 		mlx4_warn(dev, "command 0x%x timed out (go bit not cleared)\n",
 			  op);
-		err = -EBUSY;
-		goto out;
+		err = -EIO;
+		goto out_reset;
 	}
 
 	err = context->result;
@@ -592,12 +679,20 @@ static int mlx4_cmd_wait(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 		else
 			mlx4_err(dev, "command 0x%x failed: fw status = 0x%x\n",
 				 op, context->fw_status);
+		if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+			err = mlx4_internal_err_ret_value(dev, op, op_modifier);
+		else if (mlx4_closing_cmd_fatal_error(op, context->fw_status))
+			goto out_reset;
+
 		goto out;
 	}
 
 	if (out_is_imm)
 		*out_param = context->out_param;
 
+out_reset:
+	if (err)
+		err = mlx4_cmd_reset_flow(dev, op, op_modifier, err);
 out:
 	spin_lock(&cmd->context_lock);
 	context->next = cmd->free_head;
@@ -613,9 +708,12 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 	       u16 op, unsigned long timeout, int native)
 {
 	if (pci_channel_offline(dev->persist->pdev))
-		return -EIO;
+		return mlx4_cmd_reset_flow(dev, op, op_modifier, -EIO);
 
 	if (!mlx4_is_mfunc(dev) || (native && mlx4_is_master(dev))) {
+		if (dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+			return mlx4_internal_err_ret_value(dev, op,
+							  op_modifier);
 		if (mlx4_priv(dev)->cmd.use_events)
 			return mlx4_cmd_wait(dev, in_param, out_param,
 					     out_is_imm, in_modifier,
@@ -2121,7 +2219,6 @@ int mlx4_cmd_init(struct mlx4_dev *dev)
 	int flags = 0;
 
 	if (!priv->cmd.initialized) {
-		mutex_init(&priv->cmd.hcr_mutex);
 		mutex_init(&priv->cmd.slave_cmd_mutex);
 		sema_init(&priv->cmd.poll_sem, 1);
 		priv->cmd.use_events = 0;
@@ -2232,6 +2329,11 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev)
 	for (i = 0; i < priv->cmd.max_cmds; ++i) {
 		priv->cmd.context[i].token = i;
 		priv->cmd.context[i].next  = i + 1;
+		/* To support fatal error flow, initialize all
+		 * cmd contexts to allow simulating completions
+		 * with complete() at any time.
+		 */
+		init_completion(&priv->cmd.context[i].done);
 	}
 
 	priv->cmd.context[priv->cmd.max_cmds - 1].next = -1;
@@ -2329,6 +2431,25 @@ int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave)
 	return slave - 1;
 }
 
+void mlx4_cmd_wake_completions(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_cmd_context *context;
+	int i;
+
+	spin_lock(&priv->cmd.context_lock);
+	if (priv->cmd.context) {
+		for (i = 0; i < priv->cmd.max_cmds; ++i) {
+			context = &priv->cmd.context[i];
+			context->fw_status = CMD_STAT_INTERNAL_ERR;
+			context->result    =
+				mlx4_status_to_errno(CMD_STAT_INTERNAL_ERR);
+			complete(&context->done);
+		}
+	}
+	spin_unlock(&priv->cmd.context_lock);
+}
+
 struct mlx4_active_ports mlx4_get_active_ports(struct mlx4_dev *dev, int slave)
 {
 	struct mlx4_active_ports actv_ports;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mcg.c b/drivers/net/ethernet/mellanox/mlx4/mcg.c
index a3867e7ef885..d22d9283d2cd 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mcg.c
@@ -1318,6 +1318,9 @@ out:
 	mutex_unlock(&priv->mcg_table.mutex);
 
 	mlx4_free_cmd_mailbox(dev, mailbox);
+	if (err && dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR)
+		/* In case device is under an error, return success as a closing command */
+		err = 0;
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index aa1ecbc5a606..5c772ea4473b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -235,6 +235,7 @@ do {									\
 
 extern int mlx4_log_num_mgm_entry_size;
 extern int log_mtts_per_seg;
+extern int mlx4_internal_err_reset;
 
 #define MLX4_MAX_NUM_SLAVES	(MLX4_MAX_NUM_PF + MLX4_MAX_NUM_VF)
 #define ALL_SLAVES 0xff
@@ -607,7 +608,6 @@ struct mlx4_mgm {
 struct mlx4_cmd {
 	struct pci_pool	       *pool;
 	void __iomem	       *hcr;
-	struct mutex		hcr_mutex;
 	struct mutex		slave_cmd_mutex;
 	struct semaphore	poll_sem;
 	struct semaphore	event_sem;
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 64d25941b329..e7543844cc7a 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -279,6 +279,7 @@ int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_in
 int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state);
 int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
 			      struct mlx4_config_dev_params *params);
+void mlx4_cmd_wake_completions(struct mlx4_dev *dev);
 /*
  * mlx4_get_slave_default_vlan -
  * return true if VST ( default vlan)
-- 
cgit v1.2.3-70-g09d2


From c69453e294c9f16da977b68e658a8028b854c209 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 25 Jan 2015 16:59:40 +0200
Subject: net/mlx4_core: Manage interface state for Reset flow cases

We need to manage interface state to sync between reset flow and some other
relative cases such as remove_one. This has to be done to prevent certain
races. For example in case software stack is down as a result of unload call,
the remove_one should skip the unload phase.

Implement the remove_one case, handling AER and other cases comes next.

The interface can be up/down, upon remove_one, the state will include an extra
bit indicating that the device is cleaned-up, forcing other tasks to finish
before the final cleanup.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/catas.c | 13 +++++++++++--
 drivers/net/ethernet/mellanox/mlx4/intf.c  |  2 ++
 drivers/net/ethernet/mellanox/mlx4/main.c  | 13 ++++++++++++-
 include/linux/mlx4/device.h                |  7 +++++++
 4 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 63f14ffcc906..3fcf3cfaedfc 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -122,8 +122,14 @@ static void mlx4_handle_error_state(struct mlx4_dev_persistent *persist)
 	int err = 0;
 
 	mlx4_enter_error_state(persist);
-	err = mlx4_restart_one(persist->pdev);
-	mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n", err);
+	mutex_lock(&persist->interface_state_mutex);
+	if (persist->interface_state & MLX4_INTERFACE_STATE_UP &&
+	    !(persist->interface_state & MLX4_INTERFACE_STATE_DELETION)) {
+		err = mlx4_restart_one(persist->pdev);
+		mlx4_info(persist->dev, "mlx4_restart_one was ended, ret=%d\n",
+			  err);
+	}
+	mutex_unlock(&persist->interface_state_mutex);
 }
 
 static void dump_err_buf(struct mlx4_dev *dev)
@@ -211,6 +217,9 @@ void mlx4_stop_catas_poll(struct mlx4_dev *dev)
 		iounmap(priv->catas_err.map);
 		priv->catas_err.map = NULL;
 	}
+
+	if (dev->persist->interface_state & MLX4_INTERFACE_STATE_DELETION)
+		flush_workqueue(dev->persist->catas_wq);
 }
 
 int  mlx4_catas_init(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 116895ac8b35..fba0b96a6f28 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -138,6 +138,7 @@ int mlx4_register_device(struct mlx4_dev *dev)
 
 	mutex_lock(&intf_mutex);
 
+	dev->persist->interface_state |= MLX4_INTERFACE_STATE_UP;
 	list_add_tail(&priv->dev_list, &dev_list);
 	list_for_each_entry(intf, &intf_list, list)
 		mlx4_add_device(intf, priv);
@@ -162,6 +163,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
 		mlx4_remove_device(intf, priv);
 
 	list_del(&priv->dev_list);
+	dev->persist->interface_state &= ~MLX4_INTERFACE_STATE_UP;
 
 	mutex_unlock(&intf_mutex);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index dc2d910fcc88..d59cae5da3f0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -3114,6 +3114,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 	pci_set_drvdata(pdev, dev->persist);
 	priv->pci_dev_data = id->driver_data;
 	mutex_init(&dev->persist->device_state_mutex);
+	mutex_init(&dev->persist->interface_state_mutex);
 
 	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
 	if (ret) {
@@ -3232,7 +3233,17 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct mlx4_dev  *dev  = persist->dev;
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
-	mlx4_unload_one(pdev);
+	mutex_lock(&persist->interface_state_mutex);
+	persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
+	mutex_unlock(&persist->interface_state_mutex);
+
+	/* device marked to be under deletion running now without the lock
+	 * letting other tasks to be terminated
+	 */
+	if (persist->interface_state & MLX4_INTERFACE_STATE_UP)
+		mlx4_unload_one(pdev);
+	else
+		mlx4_info(dev, "%s: interface is down\n", __func__);
 	mlx4_catas_end(dev);
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 7d5d317cb7a6..33f9ca71925c 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -416,6 +416,11 @@ enum {
 	MLX4_DEVICE_STATE_INTERNAL_ERROR	= 1 << 1,
 };
 
+enum {
+	MLX4_INTERFACE_STATE_UP		= 1 << 0,
+	MLX4_INTERFACE_STATE_DELETION	= 1 << 1,
+};
+
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
 			     MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
 
@@ -760,6 +765,8 @@ struct mlx4_dev_persistent {
 	struct workqueue_struct *catas_wq;
 	struct mutex	device_state_mutex; /* protect HW state */
 	u8		state;
+	struct mutex	interface_state_mutex; /* protect SW state */
+	u8	interface_state;
 };
 
 struct mlx4_dev {
-- 
cgit v1.2.3-70-g09d2


From 55ad359225b2232b9b8f04a0dfa169bd3a7d86d2 Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 25 Jan 2015 16:59:42 +0200
Subject: net/mlx4_core: Enable device recovery flow with SRIOV

In SRIOV, both the PF and the VF may attempt device recovery whenever they
assume that the device is not functioning.  When the PF driver resets the
device, the VF should detect this and attempt to reinitialize itself.

The VF must be able to reset itself under all circumstances, even
if the PF is not responsive.

The VF shall reset itself in the following cases:

1. Commands are not processed within reasonable time over the communication channel.
This is done considering device state and the correct return code based on
the command as was done in the native mode, done in the next patch.

2. The VF driver receives an internal error event reported by the PF on the
communication channel. This occurs when the PF driver resets the device or
when VF is out of sync with the PF.

Add 'VF reset' capability, which allows the VF to reinitialize itself even when the
PF is not responsive.

As PF and VF may run their reset flow simulantanisly, there are several cases
that are handled:
- Prevent freeing VF resources upon FLR, when PF is in its unloading stage.
- Prevent PF getting VF commands before it has finished initializing its resources.
- Upon VF startup, check that comm-channel is online before sending
  commands to the PF and getting timed-out.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/catas.c | 120 +++++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx4/cmd.c   |  77 ++++++++++++----
 drivers/net/ethernet/mellanox/mlx4/eq.c    |  10 ++-
 drivers/net/ethernet/mellanox/mlx4/intf.c  |   6 +-
 drivers/net/ethernet/mellanox/mlx4/main.c  | 135 +++++++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx4/mlx4.h  |   7 +-
 include/linux/mlx4/cmd.h                   |   2 +
 include/linux/mlx4/device.h                |   5 ++
 8 files changed, 292 insertions(+), 70 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/catas.c b/drivers/net/ethernet/mellanox/mlx4/catas.c
index 3fcf3cfaedfc..715de8affcc9 100644
--- a/drivers/net/ethernet/mellanox/mlx4/catas.c
+++ b/drivers/net/ethernet/mellanox/mlx4/catas.c
@@ -45,8 +45,7 @@ enum {
 int mlx4_internal_err_reset = 1;
 module_param_named(internal_err_reset, mlx4_internal_err_reset,  int, 0644);
 MODULE_PARM_DESC(internal_err_reset,
-		 "Reset device on internal errors if non-zero"
-		 " (default 1, in SRIOV mode default is 0)");
+		 "Reset device on internal errors if non-zero (default 1)");
 
 static int read_vendor_id(struct mlx4_dev *dev)
 {
@@ -71,6 +70,9 @@ static int mlx4_reset_master(struct mlx4_dev *dev)
 {
 	int err = 0;
 
+	if (mlx4_is_master(dev))
+		mlx4_report_internal_err_comm_event(dev);
+
 	if (!pci_channel_offline(dev->persist->pdev)) {
 		err = read_vendor_id(dev);
 		/* If PCI can't be accessed to read vendor ID we assume that its
@@ -87,6 +89,81 @@ static int mlx4_reset_master(struct mlx4_dev *dev)
 	return err;
 }
 
+static int mlx4_reset_slave(struct mlx4_dev *dev)
+{
+#define COM_CHAN_RST_REQ_OFFSET 0x10
+#define COM_CHAN_RST_ACK_OFFSET 0x08
+
+	u32 comm_flags;
+	u32 rst_req;
+	u32 rst_ack;
+	unsigned long end;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	if (pci_channel_offline(dev->persist->pdev))
+		return 0;
+
+	comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+				  MLX4_COMM_CHAN_FLAGS));
+	if (comm_flags == 0xffffffff) {
+		mlx4_err(dev, "VF reset is not needed\n");
+		return 0;
+	}
+
+	if (!(dev->caps.vf_caps & MLX4_VF_CAP_FLAG_RESET)) {
+		mlx4_err(dev, "VF reset is not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+		COM_CHAN_RST_REQ_OFFSET;
+	rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+		COM_CHAN_RST_ACK_OFFSET;
+	if (rst_req != rst_ack) {
+		mlx4_err(dev, "Communication channel isn't sync, fail to send reset\n");
+		return -EIO;
+	}
+
+	rst_req ^= 1;
+	mlx4_warn(dev, "VF is sending reset request to Firmware\n");
+	comm_flags = rst_req << COM_CHAN_RST_REQ_OFFSET;
+	__raw_writel((__force u32)cpu_to_be32(comm_flags),
+		     (__iomem char *)priv->mfunc.comm + MLX4_COMM_CHAN_FLAGS);
+	/* Make sure that our comm channel write doesn't
+	 * get mixed in with writes from another CPU.
+	 */
+	mmiowb();
+
+	end = msecs_to_jiffies(MLX4_COMM_TIME) + jiffies;
+	while (time_before(jiffies, end)) {
+		comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+					  MLX4_COMM_CHAN_FLAGS));
+		rst_ack = (comm_flags & (u32)(1 << COM_CHAN_RST_ACK_OFFSET)) >>
+			COM_CHAN_RST_ACK_OFFSET;
+
+		/* Reading rst_req again since the communication channel can
+		 * be reset at any time by the PF and all its bits will be
+		 * set to zero.
+		 */
+		rst_req = (comm_flags & (u32)(1 << COM_CHAN_RST_REQ_OFFSET)) >>
+			COM_CHAN_RST_REQ_OFFSET;
+
+		if (rst_ack == rst_req) {
+			mlx4_warn(dev, "VF Reset succeed\n");
+			return 0;
+		}
+		cond_resched();
+	}
+	mlx4_err(dev, "Fail to send reset over the communication channel\n");
+	return -ETIMEDOUT;
+}
+
+static int mlx4_comm_internal_err(u32 slave_read)
+{
+	return (u32)COMM_CHAN_EVENT_INTERNAL_ERR ==
+		(slave_read & (u32)COMM_CHAN_EVENT_INTERNAL_ERR) ? 1 : 0;
+}
+
 void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 {
 	int err;
@@ -101,7 +178,10 @@ void mlx4_enter_error_state(struct mlx4_dev_persistent *persist)
 
 	dev = persist->dev;
 	mlx4_err(dev, "device is going to be reset\n");
-	err = mlx4_reset_master(dev);
+	if (mlx4_is_slave(dev))
+		err = mlx4_reset_slave(dev);
+	else
+		err = mlx4_reset_master(dev);
 	BUG_ON(err != 0);
 
 	dev->persist->state |= MLX4_DEVICE_STATE_INTERNAL_ERROR;
@@ -148,8 +228,15 @@ static void poll_catas(unsigned long dev_ptr)
 {
 	struct mlx4_dev *dev = (struct mlx4_dev *) dev_ptr;
 	struct mlx4_priv *priv = mlx4_priv(dev);
-
-	if (readl(priv->catas_err.map)) {
+	u32 slave_read;
+
+	if (mlx4_is_slave(dev)) {
+		slave_read = swab32(readl(&priv->mfunc.comm->slave_read));
+		if (mlx4_comm_internal_err(slave_read)) {
+			mlx4_warn(dev, "Internal error detected on the communication channel\n");
+			goto internal_err;
+		}
+	} else if (readl(priv->catas_err.map)) {
 		dump_err_buf(dev);
 		goto internal_err;
 	}
@@ -182,22 +269,21 @@ void mlx4_start_catas_poll(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	phys_addr_t addr;
 
-	/*If we are in SRIOV the default of the module param must be 0*/
-	if (mlx4_is_mfunc(dev))
-		mlx4_internal_err_reset = 0;
-
 	INIT_LIST_HEAD(&priv->catas_err.list);
 	init_timer(&priv->catas_err.timer);
 	priv->catas_err.map = NULL;
 
-	addr = pci_resource_start(dev->persist->pdev, priv->fw.catas_bar) +
-		priv->fw.catas_offset;
-
-	priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
-	if (!priv->catas_err.map) {
-		mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
-			  (unsigned long long) addr);
-		return;
+	if (!mlx4_is_slave(dev)) {
+		addr = pci_resource_start(dev->persist->pdev,
+					  priv->fw.catas_bar) +
+					  priv->fw.catas_offset;
+
+		priv->catas_err.map = ioremap(addr, priv->fw.catas_size * 4);
+		if (!priv->catas_err.map) {
+			mlx4_warn(dev, "Failed to map internal error buffer at 0x%llx\n",
+				  (unsigned long long)addr);
+			return;
+		}
 	}
 
 	priv->catas_err.timer.data     = (unsigned long) dev;
diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 3895b2b5fc92..7652eed4bbc8 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -42,6 +42,7 @@
 #include <linux/mlx4/device.h>
 #include <linux/semaphore.h>
 #include <rdma/ib_smi.h>
+#include <linux/delay.h>
 
 #include <asm/io.h>
 
@@ -729,7 +730,7 @@ int __mlx4_cmd(struct mlx4_dev *dev, u64 in_param, u64 *out_param,
 EXPORT_SYMBOL_GPL(__mlx4_cmd);
 
 
-static int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
+int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev)
 {
 	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_ARM_COMM_CHANNEL,
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
@@ -1945,8 +1946,11 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
 		break;
 	case MLX4_COMM_CMD_VHCR_POST:
 		if ((slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_EN) &&
-		    (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST))
+		    (slave_state[slave].last_cmd != MLX4_COMM_CMD_VHCR_POST)) {
+			mlx4_warn(dev, "slave:%d is out of sync, cmd=0x%x, last command=0x%x, reset is needed\n",
+				  slave, cmd, slave_state[slave].last_cmd);
 			goto reset_slave;
+		}
 
 		mutex_lock(&priv->cmd.slave_cmd_mutex);
 		if (mlx4_master_process_vhcr(dev, slave, NULL)) {
@@ -1980,7 +1984,18 @@ static void mlx4_master_do_cmd(struct mlx4_dev *dev, int slave, u8 cmd,
 
 reset_slave:
 	/* cleanup any slave resources */
-	mlx4_delete_all_resources_for_slave(dev, slave);
+	if (dev->persist->interface_state & MLX4_INTERFACE_STATE_UP)
+		mlx4_delete_all_resources_for_slave(dev, slave);
+
+	if (cmd != MLX4_COMM_CMD_RESET) {
+		mlx4_warn(dev, "Turn on internal error to force reset, slave=%d, cmd=0x%x\n",
+			  slave, cmd);
+		/* Turn on internal error letting slave reset itself immeditaly,
+		 * otherwise it might take till timeout on command is passed
+		 */
+		reply |= ((u32)COMM_CHAN_EVENT_INTERNAL_ERR);
+	}
+
 	spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
 	if (!slave_state[slave].is_slave_going_down)
 		slave_state[slave].last_cmd = MLX4_COMM_CMD_RESET;
@@ -2056,17 +2071,28 @@ void mlx4_master_comm_channel(struct work_struct *work)
 static int sync_toggles(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	int wr_toggle;
-	int rd_toggle;
+	u32 wr_toggle;
+	u32 rd_toggle;
 	unsigned long end;
 
-	wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write)) >> 31;
-	end = jiffies + msecs_to_jiffies(5000);
+	wr_toggle = swab32(readl(&priv->mfunc.comm->slave_write));
+	if (wr_toggle == 0xffffffff)
+		end = jiffies + msecs_to_jiffies(30000);
+	else
+		end = jiffies + msecs_to_jiffies(5000);
 
 	while (time_before(jiffies, end)) {
-		rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read)) >> 31;
-		if (rd_toggle == wr_toggle) {
-			priv->cmd.comm_toggle = rd_toggle;
+		rd_toggle = swab32(readl(&priv->mfunc.comm->slave_read));
+		if (wr_toggle == 0xffffffff || rd_toggle == 0xffffffff) {
+			/* PCI might be offline */
+			msleep(100);
+			wr_toggle = swab32(readl(&priv->mfunc.comm->
+					   slave_write));
+			continue;
+		}
+
+		if (rd_toggle >> 31 == wr_toggle >> 31) {
+			priv->cmd.comm_toggle = rd_toggle >> 31;
 			return 0;
 		}
 
@@ -2172,13 +2198,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 		if (mlx4_init_resource_tracker(dev))
 			goto err_thread;
 
-		err = mlx4_ARM_COMM_CHANNEL(dev);
-		if (err) {
-			mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
-				 err);
-			goto err_resource;
-		}
-
 	} else {
 		err = sync_toggles(dev);
 		if (err) {
@@ -2188,8 +2207,6 @@ int mlx4_multi_func_init(struct mlx4_dev *dev)
 	}
 	return 0;
 
-err_resource:
-	mlx4_free_resource_tracker(dev, RES_TR_FREE_ALL);
 err_thread:
 	flush_workqueue(priv->mfunc.master.comm_wq);
 	destroy_workqueue(priv->mfunc.master.comm_wq);
@@ -2266,6 +2283,27 @@ err:
 	return -ENOMEM;
 }
 
+void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int slave;
+	u32 slave_read;
+
+	/* Report an internal error event to all
+	 * communication channels.
+	 */
+	for (slave = 0; slave < dev->num_slaves; slave++) {
+		slave_read = swab32(readl(&priv->mfunc.comm[slave].slave_read));
+		slave_read |= (u32)COMM_CHAN_EVENT_INTERNAL_ERR;
+		__raw_writel((__force u32)cpu_to_be32(slave_read),
+			     &priv->mfunc.comm[slave].slave_read);
+		/* Make sure that our comm channel write doesn't
+		 * get mixed in with writes from another CPU.
+		 */
+		mmiowb();
+	}
+}
+
 void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2281,6 +2319,7 @@ void mlx4_multi_func_cleanup(struct mlx4_dev *dev)
 		kfree(priv->mfunc.master.slave_state);
 		kfree(priv->mfunc.master.vf_admin);
 		kfree(priv->mfunc.master.vf_oper);
+		dev->num_slaves = 0;
 	}
 
 	iounmap(priv->mfunc.comm);
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 7538c9ce98a9..2f2e6067426d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -429,8 +429,14 @@ void mlx4_master_handle_slave_flr(struct work_struct *work)
 		if (MLX4_COMM_CMD_FLR == slave_state[i].last_cmd) {
 			mlx4_dbg(dev, "mlx4_handle_slave_flr: clean slave: %d\n",
 				 i);
-
-			mlx4_delete_all_resources_for_slave(dev, i);
+			/* In case of 'Reset flow' FLR can be generated for
+			 * a slave before mlx4_load_one is done.
+			 * make sure interface is up before trying to delete
+			 * slave resources which weren't allocated yet.
+			 */
+			if (dev->persist->interface_state &
+			    MLX4_INTERFACE_STATE_UP)
+				mlx4_delete_all_resources_for_slave(dev, i);
 			/*return the slave to running mode*/
 			spin_lock_irqsave(&priv->mfunc.master.slave_state_lock, flags);
 			slave_state[i].last_cmd = MLX4_COMM_CMD_RESET;
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index fba0b96a6f28..68d2bad325d5 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -144,8 +144,7 @@ int mlx4_register_device(struct mlx4_dev *dev)
 		mlx4_add_device(intf, priv);
 
 	mutex_unlock(&intf_mutex);
-	if (!mlx4_is_slave(dev))
-		mlx4_start_catas_poll(dev);
+	mlx4_start_catas_poll(dev);
 
 	return 0;
 }
@@ -155,8 +154,7 @@ void mlx4_unregister_device(struct mlx4_dev *dev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_interface *intf;
 
-	if (!mlx4_is_slave(dev))
-		mlx4_stop_catas_poll(dev);
+	mlx4_stop_catas_poll(dev);
 	mutex_lock(&intf_mutex);
 
 	list_for_each_entry(intf, &intf_list, list)
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 6bb0fca137cd..1baf1f1e2866 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -108,6 +108,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe,
 					 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
 					 MLX4_FUNC_CAP_DMFS_A0_STATIC)
 
+#define RESET_PERSIST_MASK_FLAGS	(MLX4_FLAG_SRIOV)
+
 static char mlx4_version[] =
 	DRV_NAME ": Mellanox ConnectX core driver v"
 	DRV_VERSION " (" DRV_RELDATE ")\n";
@@ -1579,6 +1581,50 @@ static void mlx4_close_fw(struct mlx4_dev *dev)
 	}
 }
 
+static int mlx4_comm_check_offline(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_OFFLINE_OFFSET 0x09
+
+	u32 comm_flags;
+	u32 offline_bit;
+	unsigned long end;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	end = msecs_to_jiffies(MLX4_COMM_OFFLINE_TIME_OUT) + jiffies;
+	while (time_before(jiffies, end)) {
+		comm_flags = swab32(readl((__iomem char *)priv->mfunc.comm +
+					  MLX4_COMM_CHAN_FLAGS));
+		offline_bit = (comm_flags &
+			       (u32)(1 << COMM_CHAN_OFFLINE_OFFSET));
+		if (!offline_bit)
+			return 0;
+		/* There are cases as part of AER/Reset flow that PF needs
+		 * around 100 msec to load. We therefore sleep for 100 msec
+		 * to allow other tasks to make use of that CPU during this
+		 * time interval.
+		 */
+		msleep(100);
+	}
+	mlx4_err(dev, "Communication channel is offline.\n");
+	return -EIO;
+}
+
+static void mlx4_reset_vf_support(struct mlx4_dev *dev)
+{
+#define COMM_CHAN_RST_OFFSET 0x1e
+
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	u32 comm_rst;
+	u32 comm_caps;
+
+	comm_caps = swab32(readl((__iomem char *)priv->mfunc.comm +
+				 MLX4_COMM_CHAN_CAPS));
+	comm_rst = (comm_caps & (u32)(1 << COMM_CHAN_RST_OFFSET));
+
+	if (comm_rst)
+		dev->caps.vf_caps |= MLX4_VF_CAP_FLAG_RESET;
+}
+
 static int mlx4_init_slave(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -1594,6 +1640,12 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
 
 	mutex_lock(&priv->cmd.slave_cmd_mutex);
 	priv->cmd.max_cmds = 1;
+	if (mlx4_comm_check_offline(dev)) {
+		mlx4_err(dev, "PF is not responsive, skipping initialization\n");
+		goto err_offline;
+	}
+
+	mlx4_reset_vf_support(dev);
 	mlx4_warn(dev, "Sending reset\n");
 	ret_from_reset = mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0,
 				       MLX4_COMM_TIME);
@@ -1637,6 +1689,7 @@ static int mlx4_init_slave(struct mlx4_dev *dev)
 
 err:
 	mlx4_comm_cmd(dev, MLX4_COMM_CMD_RESET, 0, 0);
+err_offline:
 	mutex_unlock(&priv->cmd.slave_cmd_mutex);
 	return -EIO;
 }
@@ -2494,11 +2547,19 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
 				  !!((flags) & MLX4_FLAG_MASTER))
 
 static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
-			     u8 total_vfs, int existing_vfs)
+			     u8 total_vfs, int existing_vfs, int reset_flow)
 {
 	u64 dev_flags = dev->flags;
 	int err = 0;
 
+	if (reset_flow) {
+		dev->dev_vfs = kcalloc(total_vfs, sizeof(*dev->dev_vfs),
+				       GFP_KERNEL);
+		if (!dev->dev_vfs)
+			goto free_mem;
+		return dev_flags;
+	}
+
 	atomic_inc(&pf_loading);
 	if (dev->flags &  MLX4_FLAG_SRIOV) {
 		if (existing_vfs != total_vfs) {
@@ -2533,6 +2594,7 @@ static u64 mlx4_enable_sriov(struct mlx4_dev *dev, struct pci_dev *pdev,
 
 disable_sriov:
 	atomic_dec(&pf_loading);
+free_mem:
 	dev->persist->num_vfs = 0;
 	kfree(dev->dev_vfs);
 	return dev_flags & ~MLX4_FLAG_MASTER;
@@ -2557,7 +2619,8 @@ static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap
 }
 
 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
-			 int total_vfs, int *nvfs, struct mlx4_priv *priv)
+			 int total_vfs, int *nvfs, struct mlx4_priv *priv,
+			 int reset_flow)
 {
 	struct mlx4_dev *dev;
 	unsigned sum = 0;
@@ -2679,8 +2742,10 @@ slave_start:
 				goto err_fw;
 
 			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
-				u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
-								  existing_vfs);
+				u64 dev_flags = mlx4_enable_sriov(dev, pdev,
+								  total_vfs,
+								  existing_vfs,
+								  reset_flow);
 
 				mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
 				dev->flags = dev_flags;
@@ -2722,7 +2787,7 @@ slave_start:
 			if (dev->flags & MLX4_FLAG_SRIOV) {
 				if (!existing_vfs)
 					pci_disable_sriov(pdev);
-				if (mlx4_is_master(dev))
+				if (mlx4_is_master(dev) && !reset_flow)
 					atomic_dec(&pf_loading);
 				dev->flags &= ~MLX4_FLAG_SRIOV;
 			}
@@ -2736,7 +2801,8 @@ slave_start:
 	}
 
 	if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
-		u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
+		u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
+						  existing_vfs, reset_flow);
 
 		if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
 			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
@@ -2848,6 +2914,17 @@ slave_start:
 		goto err_steer;
 
 	mlx4_init_quotas(dev);
+	/* When PF resources are ready arm its comm channel to enable
+	 * getting commands
+	 */
+	if (mlx4_is_master(dev)) {
+		err = mlx4_ARM_COMM_CHANNEL(dev);
+		if (err) {
+			mlx4_err(dev, " Failed to arm comm channel eq: %x\n",
+				 err);
+			goto err_steer;
+		}
+	}
 
 	for (port = 1; port <= dev->caps.num_ports; port++) {
 		err = mlx4_init_port_info(dev, port);
@@ -2866,7 +2943,7 @@ slave_start:
 
 	priv->removed = 0;
 
-	if (mlx4_is_master(dev) && dev->persist->num_vfs)
+	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
 		atomic_dec(&pf_loading);
 
 	kfree(dev_cap);
@@ -2925,10 +3002,12 @@ err_cmd:
 	mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
 
 err_sriov:
-	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
+	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs) {
 		pci_disable_sriov(pdev);
+		dev->flags &= ~MLX4_FLAG_SRIOV;
+	}
 
-	if (mlx4_is_master(dev) && dev->persist->num_vfs)
+	if (mlx4_is_master(dev) && dev->persist->num_vfs && !reset_flow)
 		atomic_dec(&pf_loading);
 
 	kfree(priv->dev.dev_vfs);
@@ -3073,7 +3152,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
 	if (err)
 		goto err_release_regions;
 
-	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 0);
 	if (err)
 		goto err_catas;
 
@@ -3131,9 +3210,11 @@ static void mlx4_clean_dev(struct mlx4_dev *dev)
 {
 	struct mlx4_dev_persistent *persist = dev->persist;
 	struct mlx4_priv *priv = mlx4_priv(dev);
+	unsigned long	flags = (dev->flags & RESET_PERSIST_MASK_FLAGS);
 
 	memset(priv, 0, sizeof(*priv));
 	priv->dev.persist = persist;
+	priv->dev.flags = flags;
 }
 
 static void mlx4_unload_one(struct pci_dev *pdev)
@@ -3143,7 +3224,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int               pci_dev_data;
 	int p, i;
-	int active_vfs = 0;
 
 	if (priv->removed)
 		return;
@@ -3157,14 +3237,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 
 	pci_dev_data = priv->pci_dev_data;
 
-	/* Disabling SR-IOV is not allowed while there are active vf's */
-	if (mlx4_is_master(dev)) {
-		active_vfs = mlx4_how_many_lives_vf(dev);
-		if (active_vfs) {
-			pr_warn("Removing PF when there are active VF's !!\n");
-			pr_warn("Will not disable SR-IOV.\n");
-		}
-	}
 	mlx4_stop_sense(dev);
 	mlx4_unregister_device(dev);
 
@@ -3208,12 +3280,6 @@ static void mlx4_unload_one(struct pci_dev *pdev)
 
 	if (dev->flags & MLX4_FLAG_MSI_X)
 		pci_disable_msix(pdev);
-	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
-		mlx4_warn(dev, "Disabling SR-IOV\n");
-		pci_disable_sriov(pdev);
-		dev->flags &= ~MLX4_FLAG_SRIOV;
-		dev->persist->num_vfs = 0;
-	}
 
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
@@ -3235,11 +3301,21 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct mlx4_dev_persistent *persist = pci_get_drvdata(pdev);
 	struct mlx4_dev  *dev  = persist->dev;
 	struct mlx4_priv *priv = mlx4_priv(dev);
+	int active_vfs = 0;
 
 	mutex_lock(&persist->interface_state_mutex);
 	persist->interface_state |= MLX4_INTERFACE_STATE_DELETION;
 	mutex_unlock(&persist->interface_state_mutex);
 
+	/* Disabling SR-IOV is not allowed while there are active vf's */
+	if (mlx4_is_master(dev) && dev->flags & MLX4_FLAG_SRIOV) {
+		active_vfs = mlx4_how_many_lives_vf(dev);
+		if (active_vfs) {
+			pr_warn("Removing PF when there are active VF's !!\n");
+			pr_warn("Will not disable SR-IOV.\n");
+		}
+	}
+
 	/* device marked to be under deletion running now without the lock
 	 * letting other tasks to be terminated
 	 */
@@ -3248,6 +3324,11 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	else
 		mlx4_info(dev, "%s: interface is down\n", __func__);
 	mlx4_catas_end(dev);
+	if (dev->flags & MLX4_FLAG_SRIOV && !active_vfs) {
+		mlx4_warn(dev, "Disabling SR-IOV\n");
+		pci_disable_sriov(pdev);
+	}
+
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 	kfree(dev->persist);
@@ -3287,7 +3368,7 @@ int mlx4_restart_one(struct pci_dev *pdev)
 	memcpy(nvfs, dev->persist->nvfs, sizeof(dev->persist->nvfs));
 
 	mlx4_unload_one(pdev);
-	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv, 1);
 	if (err) {
 		mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
 			 __func__, pci_name(pdev), err);
@@ -3397,7 +3478,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
 	mutex_lock(&persist->interface_state_mutex);
 	if (!(persist->interface_state & MLX4_INTERFACE_STATE_UP)) {
 		ret = mlx4_load_one(pdev, priv->pci_dev_data, total_vfs, nvfs,
-				    priv);
+				    priv, 1);
 		if (ret) {
 			mlx4_err(dev, "%s: mlx4_load_one failed, ret=%d\n",
 				 __func__,  ret);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 5c772ea4473b..2a15b8248e77 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -85,7 +85,9 @@ enum {
 	MLX4_CLR_INT_SIZE	= 0x00008,
 	MLX4_SLAVE_COMM_BASE	= 0x0,
 	MLX4_COMM_PAGESIZE	= 0x1000,
-	MLX4_CLOCK_SIZE		= 0x00008
+	MLX4_CLOCK_SIZE		= 0x00008,
+	MLX4_COMM_CHAN_CAPS	= 0x8,
+	MLX4_COMM_CHAN_FLAGS	= 0xc
 };
 
 enum {
@@ -120,6 +122,8 @@ enum mlx4_mpt_state {
 };
 
 #define MLX4_COMM_TIME		10000
+#define MLX4_COMM_OFFLINE_TIME_OUT 30000
+
 enum {
 	MLX4_COMM_CMD_RESET,
 	MLX4_COMM_CMD_VHCR0,
@@ -1162,6 +1166,7 @@ enum {
 int mlx4_cmd_init(struct mlx4_dev *dev);
 void mlx4_cmd_cleanup(struct mlx4_dev *dev, int cleanup_mask);
 int mlx4_multi_func_init(struct mlx4_dev *dev);
+int mlx4_ARM_COMM_CHANNEL(struct mlx4_dev *dev);
 void mlx4_multi_func_cleanup(struct mlx4_dev *dev);
 void mlx4_cmd_event(struct mlx4_dev *dev, u16 token, u8 status, u64 out_param);
 int mlx4_cmd_use_events(struct mlx4_dev *dev);
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index e7543844cc7a..c989442ffc6a 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -280,6 +280,7 @@ int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_stat
 int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
 			      struct mlx4_config_dev_params *params);
 void mlx4_cmd_wake_completions(struct mlx4_dev *dev);
+void mlx4_report_internal_err_comm_event(struct mlx4_dev *dev);
 /*
  * mlx4_get_slave_default_vlan -
  * return true if VST ( default vlan)
@@ -289,5 +290,6 @@ bool mlx4_get_slave_default_vlan(struct mlx4_dev *dev, int port, int slave,
 				 u16 *vlan, u8 *qos);
 
 #define MLX4_COMM_GET_IF_REV(cmd_chan_ver) (u8)((cmd_chan_ver) >> 8)
+#define COMM_CHAN_EVENT_INTERNAL_ERR (1 << 17)
 
 #endif /* MLX4_CMD_H */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 33f9ca71925c..5ef54e145e4d 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -208,6 +208,10 @@ enum {
 	MLX4_QUERY_FUNC_FLAGS_A0_RES_QP		= 1LL << 1
 };
 
+enum {
+	MLX4_VF_CAP_FLAG_RESET			= 1 << 0
+};
+
 /* bit enums for an 8-bit flags field indicating special use
  * QPs which require special handling in qp_reserve_range.
  * Currently, this only includes QPs used by the ETH interface,
@@ -545,6 +549,7 @@ struct mlx4_caps {
 	u8			alloc_res_qp_mask;
 	u32			dmfs_high_rate_qpn_base;
 	u32			dmfs_high_rate_qpn_range;
+	u32			vf_caps;
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3-70-g09d2


From c2943f14534bdc4230f4da6dcd4ea03c5d8c8162 Mon Sep 17 00:00:00 2001
From: Harout Hedeshian <harouth@codeaurora.org>
Date: Tue, 20 Jan 2015 10:06:05 -0700
Subject: net: ipv6: Add sysctl entry to disable MTU updates from RA

The kernel forcefully applies MTU values received in router
advertisements provided the new MTU is less than the current. This
behavior is undesirable when the user space is managing the MTU. Instead
a sysctl flag 'accept_ra_mtu' is introduced such that the user space
can control whether or not RA provided MTU updates should be applied. The
default behavior is unchanged; user space must explicitly set this flag
to 0 for RA MTUs to be ignored.

Signed-off-by: Harout Hedeshian <harouth@codeaurora.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  7 +++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    | 10 ++++++++++
 net/ipv6/ndisc.c                       |  2 +-
 5 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 85b022179104..a5e4c813f17f 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1287,6 +1287,13 @@ accept_ra_rtr_pref - BOOLEAN
 	Functional default: enabled if accept_ra is enabled.
 			    disabled if accept_ra is disabled.
 
+accept_ra_mtu - BOOLEAN
+	Apply the MTU value specified in RA option 5 (RFC4861). If
+	disabled, the MTU specified in the RA will be ignored.
+
+	Functional default: enabled if accept_ra is enabled.
+			    disabled if accept_ra is disabled.
+
 accept_redirects - BOOLEAN
 	Accept Redirects.
 
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index c694e7baa621..2805062c013f 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -52,6 +52,7 @@ struct ipv6_devconf {
 	__s32		force_tllao;
 	__s32           ndisc_notify;
 	__s32		suppress_frag_ndisc;
+	__s32		accept_ra_mtu;
 	void		*sysctl;
 };
 
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index 73cb02dc3065..437a6a4b125a 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -169,6 +169,7 @@ enum {
 	DEVCONF_SUPPRESS_FRAG_NDISC,
 	DEVCONF_ACCEPT_RA_FROM_LOCAL,
 	DEVCONF_USE_OPTIMISTIC,
+	DEVCONF_ACCEPT_RA_MTU,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index d6b4f5d08014..7dcc065e2160 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -201,6 +201,7 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = {
 	.disable_ipv6		= 0,
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
+	.accept_ra_mtu		= 1,
 };
 
 static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
@@ -238,6 +239,7 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = {
 	.disable_ipv6		= 0,
 	.accept_dad		= 1,
 	.suppress_frag_ndisc	= 1,
+	.accept_ra_mtu		= 1,
 };
 
 /* Check if a valid qdisc is available */
@@ -4380,6 +4382,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_NDISC_NOTIFY] = cnf->ndisc_notify;
 	array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc;
 	array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local;
+	array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu;
 }
 
 static inline size_t inet6_ifla6_size(void)
@@ -5258,6 +5261,13 @@ static struct addrconf_sysctl_table
 			.mode		= 0644,
 			.proc_handler	= proc_dointvec,
 		},
+		{
+			.procname	= "accept_ra_mtu",
+			.data		= &ipv6_devconf.accept_ra_mtu,
+			.maxlen		= sizeof(int),
+			.mode		= 0644,
+			.proc_handler	= proc_dointvec,
+		},
 		{
 			/* sentinel */
 		}
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 682866777d53..8a9d7c19e247 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1348,7 +1348,7 @@ skip_routeinfo:
 		}
 	}
 
-	if (ndopts.nd_opts_mtu) {
+	if (ndopts.nd_opts_mtu && in6_dev->cnf.accept_ra_mtu) {
 		__be32 n;
 		u32 mtu;
 
-- 
cgit v1.2.3-70-g09d2


From 607954b084d4ad5e6a2e0f795de7803d9c6ae37f Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Wed, 21 Jan 2015 11:12:13 +0000
Subject: rhashtable: fix rht_for_each_entry_safe() endless loop

"next" is not updated, causing an endless loop for buckets with more than
one element.

Fixes: 88d6ed15acff ("rhashtable: Convert bucket iterators to take table and index")
Signed-off-by: Patrick McHardy <kaber@trash.net>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index a2562ed53ea3..e0337844358e 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -260,7 +260,9 @@ void rhashtable_destroy(struct rhashtable *ht);
 	     next = !rht_is_a_nulls(pos) ?				    \
 		       rht_dereference_bucket(pos->next, tbl, hash) : NULL; \
 	     (!rht_is_a_nulls(pos)) && rht_entry(tpos, pos, member);	    \
-	     pos = next)
+	     pos = next,						    \
+	     next = !rht_is_a_nulls(pos) ?				    \
+		       rht_dereference_bucket(pos->next, tbl, hash) : NULL)
 
 /**
  * rht_for_each_rcu_continue - continue iterating over rcu hash chain
-- 
cgit v1.2.3-70-g09d2


From 0b35fa7daefe9da6fdef91d95e07eebb714a8fcc Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Jan 2015 23:33:30 +0100
Subject: NFC: st21nfcb: Remove useless include

include/linux/platform_data/st21nfcb.h is phy generic.
There is no need to include linux/i2c.h

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/platform_data/st21nfcb.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h
index c3b432f5b63e..05c54c958cc3 100644
--- a/include/linux/platform_data/st21nfcb.h
+++ b/include/linux/platform_data/st21nfcb.h
@@ -19,8 +19,6 @@
 #ifndef _ST21NFCB_NCI_H_
 #define _ST21NFCB_NCI_H_
 
-#include <linux/i2c.h>
-
 #define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci"
 
 struct st21nfcb_nfc_platform_data {
-- 
cgit v1.2.3-70-g09d2


From 6da7c85c75eed769423b428eb654eaaf89d273c1 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 25 Jan 2015 23:33:31 +0100
Subject: NFC: st21nfcb: Fix copy/paste error in comment

include/linux/platform_data/st21nfcb.h is based on
include/linux/platform_data/st21nfca.h.

The endif comment is inacurrate for st21nfcb.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/platform_data/st21nfcb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h
index 05c54c958cc3..b023373d9874 100644
--- a/include/linux/platform_data/st21nfcb.h
+++ b/include/linux/platform_data/st21nfcb.h
@@ -26,4 +26,4 @@ struct st21nfcb_nfc_platform_data {
 	unsigned int irq_polarity;
 };
 
-#endif /* _ST21NFCA_HCI_H_ */
+#endif /* _ST21NFCB_NCI_H_ */
-- 
cgit v1.2.3-70-g09d2


From 7b1883cefc288b2725966357edd2d8f321605622 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 21 Jan 2015 16:42:51 -0800
Subject: genetlink: Add genlmsg_parse() helper function.

The first user will be the next patch.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/genetlink.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include')

diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index f24aa83b80b6..d5a9a8b5af37 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -205,6 +205,23 @@ static inline struct nlmsghdr *genlmsg_nlhdr(void *user_hdr,
 				   NLMSG_HDRLEN);
 }
 
+/**
+ * genlmsg_parse - parse attributes of a genetlink message
+ * @nlh: netlink message header
+ * @family: genetlink message family
+ * @tb: destination array with maxtype+1 elements
+ * @maxtype: maximum attribute type to be expected
+ * @policy: validation policy
+ * */
+static inline int genlmsg_parse(const struct nlmsghdr *nlh,
+				const struct genl_family *family,
+				struct nlattr *tb[], int maxtype,
+				const struct nla_policy *policy)
+{
+	return nlmsg_parse(nlh, family->hdrsize + GENL_HDRLEN, tb, maxtype,
+			   policy);
+}
+
 /**
  * genl_dump_check_consistent - check if sequence is consistent and advertise if not
  * @cb: netlink callback structure that stores the sequence number
-- 
cgit v1.2.3-70-g09d2


From 74ed7ab9264c54471c7f057409d352052820d750 Mon Sep 17 00:00:00 2001
From: Joe Stringer <joestringer@nicira.com>
Date: Wed, 21 Jan 2015 16:42:52 -0800
Subject: openvswitch: Add support for unique flow IDs.

Previously, flows were manipulated by userspace specifying a full,
unmasked flow key. This adds significant burden onto flow
serialization/deserialization, particularly when dumping flows.

This patch adds an alternative way to refer to flows using a
variable-length "unique flow identifier" (UFID). At flow setup time,
userspace may specify a UFID for a flow, which is stored with the flow
and inserted into a separate table for lookup, in addition to the
standard flow table. Flows created using a UFID must be fetched or
deleted using the UFID.

All flow dump operations may now be made more terse with OVS_UFID_F_*
flags. For example, the OVS_UFID_F_OMIT_KEY flag allows responses to
omit the flow key from a datapath operation if the flow has a
corresponding UFID. This significantly reduces the time spent assembling
and transacting netlink messages. With all OVS_UFID_F_OMIT_* flags
enabled, the datapath only returns the UFID and statistics for each flow
during flow dump, increasing ovs-vswitchd revalidator performance by 40%
or more.

Signed-off-by: Joe Stringer <joestringer@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/openvswitch.txt |  13 ++
 include/uapi/linux/openvswitch.h         |  20 +++
 net/openvswitch/datapath.c               | 207 +++++++++++++++++++++++--------
 net/openvswitch/flow.h                   |  28 ++++-
 net/openvswitch/flow_netlink.c           |  68 +++++++++-
 net/openvswitch/flow_netlink.h           |   8 +-
 net/openvswitch/flow_table.c             | 187 +++++++++++++++++++++++-----
 net/openvswitch/flow_table.h             |   8 +-
 8 files changed, 448 insertions(+), 91 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
index 37c20ee2455e..b3b9ac61d29d 100644
--- a/Documentation/networking/openvswitch.txt
+++ b/Documentation/networking/openvswitch.txt
@@ -131,6 +131,19 @@ performs best-effort detection of overlapping wildcarded flows and may reject
 some but not all of them. However, this behavior may change in future versions.
 
 
+Unique flow identifiers
+-----------------------
+
+An alternative to using the original match portion of a key as the handle for
+flow identification is a unique flow identifier, or "UFID". UFIDs are optional
+for both the kernel and user space program.
+
+User space programs that support UFID are expected to provide it during flow
+setup in addition to the flow, then refer to the flow using the UFID for all
+future operations. The kernel is not required to index flows by the original
+flow key if a UFID is specified.
+
+
 Basic rule for evolving flow keys
 ---------------------------------
 
diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index cd8d933963c2..7a8785a99243 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -459,6 +459,14 @@ struct ovs_key_nd {
  * a wildcarded match. Omitting attribute is treated as wildcarding all
  * corresponding fields. Optional for all requests. If not present,
  * all flow key bits are exact match bits.
+ * @OVS_FLOW_ATTR_UFID: A value between 1-16 octets specifying a unique
+ * identifier for the flow. Causes the flow to be indexed by this value rather
+ * than the value of the %OVS_FLOW_ATTR_KEY attribute. Optional for all
+ * requests. Present in notifications if the flow was created with this
+ * attribute.
+ * @OVS_FLOW_ATTR_UFID_FLAGS: A 32-bit value of OR'd %OVS_UFID_F_*
+ * flags that provide alternative semantics for flow installation and
+ * retrieval. Optional for all requests.
  *
  * These attributes follow the &struct ovs_header within the Generic Netlink
  * payload for %OVS_FLOW_* commands.
@@ -474,11 +482,23 @@ enum ovs_flow_attr {
 	OVS_FLOW_ATTR_MASK,      /* Sequence of OVS_KEY_ATTR_* attributes. */
 	OVS_FLOW_ATTR_PROBE,     /* Flow operation is a feature probe, error
 				  * logging should be suppressed. */
+	OVS_FLOW_ATTR_UFID,      /* Variable length unique flow identifier. */
+	OVS_FLOW_ATTR_UFID_FLAGS,/* u32 of OVS_UFID_F_*. */
 	__OVS_FLOW_ATTR_MAX
 };
 
 #define OVS_FLOW_ATTR_MAX (__OVS_FLOW_ATTR_MAX - 1)
 
+/**
+ * Omit attributes for notifications.
+ *
+ * If a datapath request contains an %OVS_UFID_F_OMIT_* flag, then the datapath
+ * may omit the corresponding %OVS_FLOW_ATTR_* from the response.
+ */
+#define OVS_UFID_F_OMIT_KEY      (1 << 0)
+#define OVS_UFID_F_OMIT_MASK     (1 << 1)
+#define OVS_UFID_F_OMIT_ACTIONS  (1 << 2)
+
 /**
  * enum ovs_sample_attr - Attributes for %OVS_ACTION_ATTR_SAMPLE action.
  * @OVS_SAMPLE_ATTR_PROBABILITY: 32-bit fraction of packets to sample with
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 257b97546b33..ae5e77cdc0ca 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -65,6 +65,8 @@ static struct genl_family dp_packet_genl_family;
 static struct genl_family dp_flow_genl_family;
 static struct genl_family dp_datapath_genl_family;
 
+static const struct nla_policy flow_policy[];
+
 static const struct genl_multicast_group ovs_dp_flow_multicast_group = {
 	.name = OVS_FLOW_MCGROUP,
 };
@@ -662,15 +664,48 @@ static void get_dp_stats(const struct datapath *dp, struct ovs_dp_stats *stats,
 	}
 }
 
-static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts)
+static bool should_fill_key(const struct sw_flow_id *sfid, uint32_t ufid_flags)
+{
+	return ovs_identifier_is_ufid(sfid) &&
+	       !(ufid_flags & OVS_UFID_F_OMIT_KEY);
+}
+
+static bool should_fill_mask(uint32_t ufid_flags)
+{
+	return !(ufid_flags & OVS_UFID_F_OMIT_MASK);
+}
+
+static bool should_fill_actions(uint32_t ufid_flags)
 {
-	return NLMSG_ALIGN(sizeof(struct ovs_header))
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_KEY */
-		+ nla_total_size(ovs_key_attr_size()) /* OVS_FLOW_ATTR_MASK */
+	return !(ufid_flags & OVS_UFID_F_OMIT_ACTIONS);
+}
+
+static size_t ovs_flow_cmd_msg_size(const struct sw_flow_actions *acts,
+				    const struct sw_flow_id *sfid,
+				    uint32_t ufid_flags)
+{
+	size_t len = NLMSG_ALIGN(sizeof(struct ovs_header));
+
+	/* OVS_FLOW_ATTR_UFID */
+	if (sfid && ovs_identifier_is_ufid(sfid))
+		len += nla_total_size(sfid->ufid_len);
+
+	/* OVS_FLOW_ATTR_KEY */
+	if (!sfid || should_fill_key(sfid, ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
+
+	/* OVS_FLOW_ATTR_MASK */
+	if (should_fill_mask(ufid_flags))
+		len += nla_total_size(ovs_key_attr_size());
+
+	/* OVS_FLOW_ATTR_ACTIONS */
+	if (should_fill_actions(ufid_flags))
+		len += nla_total_size(acts->actions_len);
+
+	return len
 		+ nla_total_size(sizeof(struct ovs_flow_stats)) /* OVS_FLOW_ATTR_STATS */
 		+ nla_total_size(1) /* OVS_FLOW_ATTR_TCP_FLAGS */
-		+ nla_total_size(8) /* OVS_FLOW_ATTR_USED */
-		+ nla_total_size(acts->actions_len); /* OVS_FLOW_ATTR_ACTIONS */
+		+ nla_total_size(8); /* OVS_FLOW_ATTR_USED */
 }
 
 /* Called with ovs_mutex or RCU read lock. */
@@ -741,7 +776,7 @@ static int ovs_flow_cmd_fill_actions(const struct sw_flow *flow,
 /* Called with ovs_mutex or RCU read lock. */
 static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 				  struct sk_buff *skb, u32 portid,
-				  u32 seq, u32 flags, u8 cmd)
+				  u32 seq, u32 flags, u8 cmd, u32 ufid_flags)
 {
 	const int skb_orig_len = skb->len;
 	struct ovs_header *ovs_header;
@@ -754,21 +789,31 @@ static int ovs_flow_cmd_fill_info(const struct sw_flow *flow, int dp_ifindex,
 
 	ovs_header->dp_ifindex = dp_ifindex;
 
-	err = ovs_nla_put_unmasked_key(flow, skb);
+	err = ovs_nla_put_identifier(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_nla_put_mask(flow, skb);
-	if (err)
-		goto error;
+	if (should_fill_key(&flow->id, ufid_flags)) {
+		err = ovs_nla_put_masked_key(flow, skb);
+		if (err)
+			goto error;
+	}
+
+	if (should_fill_mask(ufid_flags)) {
+		err = ovs_nla_put_mask(flow, skb);
+		if (err)
+			goto error;
+	}
 
 	err = ovs_flow_cmd_fill_stats(flow, skb);
 	if (err)
 		goto error;
 
-	err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
-	if (err)
-		goto error;
+	if (should_fill_actions(ufid_flags)) {
+		err = ovs_flow_cmd_fill_actions(flow, skb, skb_orig_len);
+		if (err)
+			goto error;
+	}
 
 	genlmsg_end(skb, ovs_header);
 	return 0;
@@ -780,15 +825,19 @@ error:
 
 /* May not be called with RCU read lock. */
 static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *acts,
+					       const struct sw_flow_id *sfid,
 					       struct genl_info *info,
-					       bool always)
+					       bool always,
+					       uint32_t ufid_flags)
 {
 	struct sk_buff *skb;
+	size_t len;
 
 	if (!always && !ovs_must_notify(&dp_flow_genl_family, info, 0))
 		return NULL;
 
-	skb = genlmsg_new_unicast(ovs_flow_cmd_msg_size(acts), info, GFP_KERNEL);
+	len = ovs_flow_cmd_msg_size(acts, sfid, ufid_flags);
+	skb = genlmsg_new_unicast(len, info, GFP_KERNEL);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
@@ -799,19 +848,19 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(const struct sw_flow_actions *act
 static struct sk_buff *ovs_flow_cmd_build_info(const struct sw_flow *flow,
 					       int dp_ifindex,
 					       struct genl_info *info, u8 cmd,
-					       bool always)
+					       bool always, u32 ufid_flags)
 {
 	struct sk_buff *skb;
 	int retval;
 
-	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts), info,
-				      always);
+	skb = ovs_flow_cmd_alloc_info(ovsl_dereference(flow->sf_acts),
+				      &flow->id, info, always, ufid_flags);
 	if (IS_ERR_OR_NULL(skb))
 		return skb;
 
 	retval = ovs_flow_cmd_fill_info(flow, dp_ifindex, skb,
 					info->snd_portid, info->snd_seq, 0,
-					cmd);
+					cmd, ufid_flags);
 	BUG_ON(retval < 0);
 	return skb;
 }
@@ -820,12 +869,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr **a = info->attrs;
 	struct ovs_header *ovs_header = info->userhdr;
-	struct sw_flow *flow, *new_flow;
+	struct sw_flow *flow = NULL, *new_flow;
 	struct sw_flow_mask mask;
 	struct sk_buff *reply;
 	struct datapath *dp;
+	struct sw_flow_key key;
 	struct sw_flow_actions *acts;
 	struct sw_flow_match match;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
 
@@ -850,13 +901,19 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Extract key. */
-	ovs_match_init(&match, &new_flow->unmasked_key, &mask);
+	ovs_match_init(&match, &key, &mask);
 	error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
 	if (error)
 		goto err_kfree_flow;
 
-	ovs_flow_mask_key(&new_flow->key, &new_flow->unmasked_key, &mask);
+	ovs_flow_mask_key(&new_flow->key, &key, &mask);
+
+	/* Extract flow identifier. */
+	error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
+				       &key, log);
+	if (error)
+		goto err_kfree_flow;
 
 	/* Validate actions. */
 	error = ovs_nla_copy_actions(a[OVS_FLOW_ATTR_ACTIONS], &new_flow->key,
@@ -866,7 +923,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		goto err_kfree_flow;
 	}
 
-	reply = ovs_flow_cmd_alloc_info(acts, info, false);
+	reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
+					ufid_flags);
 	if (IS_ERR(reply)) {
 		error = PTR_ERR(reply);
 		goto err_kfree_acts;
@@ -878,8 +936,12 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 		error = -ENODEV;
 		goto err_unlock_ovs;
 	}
+
 	/* Check if this is a duplicate flow */
-	flow = ovs_flow_tbl_lookup(&dp->table, &new_flow->unmasked_key);
+	if (ovs_identifier_is_ufid(&new_flow->id))
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &new_flow->id);
+	if (!flow)
+		flow = ovs_flow_tbl_lookup(&dp->table, &key);
 	if (likely(!flow)) {
 		rcu_assign_pointer(new_flow->sf_acts, acts);
 
@@ -895,7 +957,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -913,10 +976,15 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 			error = -EEXIST;
 			goto err_unlock_ovs;
 		}
-		/* The unmasked key has to be the same for flow updates. */
-		if (unlikely(!ovs_flow_cmp_unmasked_key(flow, &match))) {
-			/* Look for any overlapping flow. */
-			flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+		/* The flow identifier has to be the same for flow updates.
+		 * Look for any overlapping flow.
+		 */
+		if (unlikely(!ovs_flow_cmp(flow, &match))) {
+			if (ovs_identifier_is_key(&flow->id))
+				flow = ovs_flow_tbl_lookup_exact(&dp->table,
+								 &match);
+			else /* UFID matches but key is different */
+				flow = NULL;
 			if (!flow) {
 				error = -ENOENT;
 				goto err_unlock_ovs;
@@ -931,7 +999,8 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 		ovs_unlock();
@@ -987,8 +1056,11 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct datapath *dp;
 	struct sw_flow_actions *old_acts = NULL, *acts = NULL;
 	struct sw_flow_match match;
+	struct sw_flow_id sfid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int error;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
 	/* Extract key. */
 	error = -EINVAL;
@@ -997,6 +1069,7 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto error;
 	}
 
+	ufid_present = ovs_nla_get_ufid(&sfid, a[OVS_FLOW_ATTR_UFID], log);
 	ovs_match_init(&match, &key, &mask);
 	error = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY],
 				  a[OVS_FLOW_ATTR_MASK], log);
@@ -1013,7 +1086,8 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		}
 
 		/* Can allocate before locking if have acts. */
-		reply = ovs_flow_cmd_alloc_info(acts, info, false);
+		reply = ovs_flow_cmd_alloc_info(acts, &sfid, info, false,
+						ufid_flags);
 		if (IS_ERR(reply)) {
 			error = PTR_ERR(reply);
 			goto err_kfree_acts;
@@ -1027,7 +1101,10 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto err_unlock_ovs;
 	}
 	/* Check that the flow exists. */
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &sfid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (unlikely(!flow)) {
 		error = -ENOENT;
 		goto err_unlock_ovs;
@@ -1043,13 +1120,16 @@ static int ovs_flow_cmd_set(struct sk_buff *skb, struct genl_info *info)
 						       ovs_header->dp_ifindex,
 						       reply, info->snd_portid,
 						       info->snd_seq, 0,
-						       OVS_FLOW_CMD_NEW);
+						       OVS_FLOW_CMD_NEW,
+						       ufid_flags);
 			BUG_ON(error < 0);
 		}
 	} else {
 		/* Could not alloc without acts before locking. */
 		reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex,
-						info, OVS_FLOW_CMD_NEW, false);
+						info, OVS_FLOW_CMD_NEW, false,
+						ufid_flags);
+
 		if (unlikely(IS_ERR(reply))) {
 			error = PTR_ERR(reply);
 			goto err_unlock_ovs;
@@ -1086,17 +1166,22 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	struct sw_flow *flow;
 	struct datapath *dp;
 	struct sw_flow_match match;
-	int err;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
+	int err = 0;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
-	if (!a[OVS_FLOW_ATTR_KEY]) {
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
+		ovs_match_init(&match, &key, NULL);
+		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
+					log);
+	} else if (!ufid_present) {
 		OVS_NLERR(log,
 			  "Flow get message rejected, Key attribute missing.");
-		return -EINVAL;
+		err = -EINVAL;
 	}
-
-	ovs_match_init(&match, &key, NULL);
-	err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL, log);
 	if (err)
 		return err;
 
@@ -1107,14 +1192,17 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (!flow) {
 		err = -ENOENT;
 		goto unlock;
 	}
 
 	reply = ovs_flow_cmd_build_info(flow, ovs_header->dp_ifindex, info,
-					OVS_FLOW_CMD_NEW, true);
+					OVS_FLOW_CMD_NEW, true, ufid_flags);
 	if (IS_ERR(reply)) {
 		err = PTR_ERR(reply);
 		goto unlock;
@@ -1133,13 +1221,17 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	struct ovs_header *ovs_header = info->userhdr;
 	struct sw_flow_key key;
 	struct sk_buff *reply;
-	struct sw_flow *flow;
+	struct sw_flow *flow = NULL;
 	struct datapath *dp;
 	struct sw_flow_match match;
+	struct sw_flow_id ufid;
+	u32 ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 	int err;
 	bool log = !a[OVS_FLOW_ATTR_PROBE];
+	bool ufid_present;
 
-	if (likely(a[OVS_FLOW_ATTR_KEY])) {
+	ufid_present = ovs_nla_get_ufid(&ufid, a[OVS_FLOW_ATTR_UFID], log);
+	if (a[OVS_FLOW_ATTR_KEY]) {
 		ovs_match_init(&match, &key, NULL);
 		err = ovs_nla_get_match(&match, a[OVS_FLOW_ATTR_KEY], NULL,
 					log);
@@ -1154,12 +1246,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	if (unlikely(!a[OVS_FLOW_ATTR_KEY])) {
+	if (unlikely(!a[OVS_FLOW_ATTR_KEY] && !ufid_present)) {
 		err = ovs_flow_tbl_flush(&dp->table);
 		goto unlock;
 	}
 
-	flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
+	if (ufid_present)
+		flow = ovs_flow_tbl_lookup_ufid(&dp->table, &ufid);
+	else
+		flow = ovs_flow_tbl_lookup_exact(&dp->table, &match);
 	if (unlikely(!flow)) {
 		err = -ENOENT;
 		goto unlock;
@@ -1169,14 +1264,15 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info)
 	ovs_unlock();
 
 	reply = ovs_flow_cmd_alloc_info((const struct sw_flow_actions __force *) flow->sf_acts,
-					info, false);
+					&flow->id, info, false, ufid_flags);
 	if (likely(reply)) {
 		if (likely(!IS_ERR(reply))) {
 			rcu_read_lock();	/*To keep RCU checker happy. */
 			err = ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex,
 						     reply, info->snd_portid,
 						     info->snd_seq, 0,
-						     OVS_FLOW_CMD_DEL);
+						     OVS_FLOW_CMD_DEL,
+						     ufid_flags);
 			rcu_read_unlock();
 			BUG_ON(err < 0);
 
@@ -1195,9 +1291,18 @@ unlock:
 
 static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	struct nlattr *a[__OVS_FLOW_ATTR_MAX];
 	struct ovs_header *ovs_header = genlmsg_data(nlmsg_data(cb->nlh));
 	struct table_instance *ti;
 	struct datapath *dp;
+	u32 ufid_flags;
+	int err;
+
+	err = genlmsg_parse(cb->nlh, &dp_flow_genl_family, a,
+			    OVS_FLOW_ATTR_MAX, flow_policy);
+	if (err)
+		return err;
+	ufid_flags = ovs_nla_get_ufid_flags(a[OVS_FLOW_ATTR_UFID_FLAGS]);
 
 	rcu_read_lock();
 	dp = get_dp_rcu(sock_net(skb->sk), ovs_header->dp_ifindex);
@@ -1220,7 +1325,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		if (ovs_flow_cmd_fill_info(flow, ovs_header->dp_ifindex, skb,
 					   NETLINK_CB(cb->skb).portid,
 					   cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					   OVS_FLOW_CMD_NEW) < 0)
+					   OVS_FLOW_CMD_NEW, ufid_flags) < 0)
 			break;
 
 		cb->args[0] = bucket;
@@ -1236,6 +1341,8 @@ static const struct nla_policy flow_policy[OVS_FLOW_ATTR_MAX + 1] = {
 	[OVS_FLOW_ATTR_ACTIONS] = { .type = NLA_NESTED },
 	[OVS_FLOW_ATTR_CLEAR] = { .type = NLA_FLAG },
 	[OVS_FLOW_ATTR_PROBE] = { .type = NLA_FLAG },
+	[OVS_FLOW_ATTR_UFID] = { .type = NLA_UNSPEC, .len = 1 },
+	[OVS_FLOW_ATTR_UFID_FLAGS] = { .type = NLA_U32 },
 };
 
 static const struct genl_ops dp_flow_genl_ops[] = {
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index d3d0a406562d..a076e445ccc2 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -197,6 +197,16 @@ struct sw_flow_match {
 	struct sw_flow_mask *mask;
 };
 
+#define MAX_UFID_LENGTH 16 /* 128 bits */
+
+struct sw_flow_id {
+	u32 ufid_len;
+	union {
+		u32 ufid[MAX_UFID_LENGTH / 4];
+		struct sw_flow_key *unmasked_key;
+	};
+};
+
 struct sw_flow_actions {
 	struct rcu_head rcu;
 	u32 actions_len;
@@ -213,13 +223,15 @@ struct flow_stats {
 
 struct sw_flow {
 	struct rcu_head rcu;
-	struct hlist_node hash_node[2];
-	u32 hash;
+	struct {
+		struct hlist_node node[2];
+		u32 hash;
+	} flow_table, ufid_table;
 	int stats_last_writer;		/* NUMA-node id of the last writer on
 					 * 'stats[0]'.
 					 */
 	struct sw_flow_key key;
-	struct sw_flow_key unmasked_key;
+	struct sw_flow_id id;
 	struct sw_flow_mask *mask;
 	struct sw_flow_actions __rcu *sf_acts;
 	struct flow_stats __rcu *stats[]; /* One for each NUMA node.  First one
@@ -243,6 +255,16 @@ struct arp_eth_header {
 	unsigned char       ar_tip[4];		/* target IP address        */
 } __packed;
 
+static inline bool ovs_identifier_is_ufid(const struct sw_flow_id *sfid)
+{
+	return sfid->ufid_len;
+}
+
+static inline bool ovs_identifier_is_key(const struct sw_flow_id *sfid)
+{
+	return !ovs_identifier_is_ufid(sfid);
+}
+
 void ovs_flow_stats_update(struct sw_flow *, __be16 tcp_flags,
 			   const struct sk_buff *);
 void ovs_flow_stats_get(const struct sw_flow *, struct ovs_flow_stats *,
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 33751f81bfcb..8b9a612b39d1 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1180,6 +1180,59 @@ free_newmask:
 	return err;
 }
 
+static size_t get_ufid_len(const struct nlattr *attr, bool log)
+{
+	size_t len;
+
+	if (!attr)
+		return 0;
+
+	len = nla_len(attr);
+	if (len < 1 || len > MAX_UFID_LENGTH) {
+		OVS_NLERR(log, "ufid size %u bytes exceeds the range (1, %d)",
+			  nla_len(attr), MAX_UFID_LENGTH);
+		return 0;
+	}
+
+	return len;
+}
+
+/* Initializes 'flow->ufid', returning true if 'attr' contains a valid UFID,
+ * or false otherwise.
+ */
+bool ovs_nla_get_ufid(struct sw_flow_id *sfid, const struct nlattr *attr,
+		      bool log)
+{
+	sfid->ufid_len = get_ufid_len(attr, log);
+	if (sfid->ufid_len)
+		memcpy(sfid->ufid, nla_data(attr), sfid->ufid_len);
+
+	return sfid->ufid_len;
+}
+
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log)
+{
+	struct sw_flow_key *new_key;
+
+	if (ovs_nla_get_ufid(sfid, ufid, log))
+		return 0;
+
+	/* If UFID was not provided, use unmasked key. */
+	new_key = kmalloc(sizeof(*new_key), GFP_KERNEL);
+	if (!new_key)
+		return -ENOMEM;
+	memcpy(new_key, key, sizeof(*key));
+	sfid->unmasked_key = new_key;
+
+	return 0;
+}
+
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr)
+{
+	return attr ? nla_get_u32(attr) : 0;
+}
+
 /**
  * ovs_nla_get_flow_metadata - parses Netlink attributes into a flow key.
  * @key: Receives extracted in_port, priority, tun_key and skb_mark.
@@ -1450,9 +1503,20 @@ int ovs_nla_put_key(const struct sw_flow_key *swkey,
 }
 
 /* Called with ovs_mutex or RCU read lock. */
-int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb)
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return nla_put(skb, OVS_FLOW_ATTR_UFID, flow->id.ufid_len,
+			       flow->id.ufid);
+
+	return ovs_nla_put_key(flow->id.unmasked_key, flow->id.unmasked_key,
+			       OVS_FLOW_ATTR_KEY, false, skb);
+}
+
+/* Called with ovs_mutex or RCU read lock. */
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb)
 {
-	return ovs_nla_put_key(&flow->unmasked_key, &flow->unmasked_key,
+	return ovs_nla_put_key(&flow->mask->key, &flow->key,
 				OVS_FLOW_ATTR_KEY, false, skb);
 }
 
diff --git a/net/openvswitch/flow_netlink.h b/net/openvswitch/flow_netlink.h
index 9ed09e66876a..5c3d75bff310 100644
--- a/net/openvswitch/flow_netlink.h
+++ b/net/openvswitch/flow_netlink.h
@@ -48,7 +48,8 @@ int ovs_nla_put_key(const struct sw_flow_key *, const struct sw_flow_key *,
 int ovs_nla_get_flow_metadata(const struct nlattr *, struct sw_flow_key *,
 			      bool log);
 
-int ovs_nla_put_unmasked_key(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_identifier(const struct sw_flow *flow, struct sk_buff *skb);
+int ovs_nla_put_masked_key(const struct sw_flow *flow, struct sk_buff *skb);
 int ovs_nla_put_mask(const struct sw_flow *flow, struct sk_buff *skb);
 
 int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
@@ -56,6 +57,11 @@ int ovs_nla_get_match(struct sw_flow_match *, const struct nlattr *key,
 int ovs_nla_put_egress_tunnel_key(struct sk_buff *,
 				  const struct ovs_tunnel_info *);
 
+bool ovs_nla_get_ufid(struct sw_flow_id *, const struct nlattr *, bool log);
+int ovs_nla_get_identifier(struct sw_flow_id *sfid, const struct nlattr *ufid,
+			   const struct sw_flow_key *key, bool log);
+u32 ovs_nla_get_ufid_flags(const struct nlattr *attr);
+
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa, bool log);
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 9a3f41f26da8..5e57628e6584 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -139,6 +139,8 @@ static void flow_free(struct sw_flow *flow)
 {
 	int node;
 
+	if (ovs_identifier_is_key(&flow->id))
+		kfree(flow->id.unmasked_key);
 	kfree((struct sw_flow_actions __force *)flow->sf_acts);
 	for_each_node(node)
 		if (flow->stats[node])
@@ -200,18 +202,28 @@ static struct table_instance *table_instance_alloc(int new_size)
 
 int ovs_flow_tbl_init(struct flow_table *table)
 {
-	struct table_instance *ti;
+	struct table_instance *ti, *ufid_ti;
 
 	ti = table_instance_alloc(TBL_MIN_BUCKETS);
 
 	if (!ti)
 		return -ENOMEM;
 
+	ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!ufid_ti)
+		goto free_ti;
+
 	rcu_assign_pointer(table->ti, ti);
+	rcu_assign_pointer(table->ufid_ti, ufid_ti);
 	INIT_LIST_HEAD(&table->mask_list);
 	table->last_rehash = jiffies;
 	table->count = 0;
+	table->ufid_count = 0;
 	return 0;
+
+free_ti:
+	__table_instance_destroy(ti);
+	return -ENOMEM;
 }
 
 static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
@@ -221,13 +233,16 @@ static void flow_tbl_destroy_rcu_cb(struct rcu_head *rcu)
 	__table_instance_destroy(ti);
 }
 
-static void table_instance_destroy(struct table_instance *ti, bool deferred)
+static void table_instance_destroy(struct table_instance *ti,
+				   struct table_instance *ufid_ti,
+				   bool deferred)
 {
 	int i;
 
 	if (!ti)
 		return;
 
+	BUG_ON(!ufid_ti);
 	if (ti->keep_flows)
 		goto skip_flows;
 
@@ -236,18 +251,24 @@ static void table_instance_destroy(struct table_instance *ti, bool deferred)
 		struct hlist_head *head = flex_array_get(ti->buckets, i);
 		struct hlist_node *n;
 		int ver = ti->node_ver;
+		int ufid_ver = ufid_ti->node_ver;
 
-		hlist_for_each_entry_safe(flow, n, head, hash_node[ver]) {
-			hlist_del_rcu(&flow->hash_node[ver]);
+		hlist_for_each_entry_safe(flow, n, head, flow_table.node[ver]) {
+			hlist_del_rcu(&flow->flow_table.node[ver]);
+			if (ovs_identifier_is_ufid(&flow->id))
+				hlist_del_rcu(&flow->ufid_table.node[ufid_ver]);
 			ovs_flow_free(flow, deferred);
 		}
 	}
 
 skip_flows:
-	if (deferred)
+	if (deferred) {
 		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
-	else
+		call_rcu(&ufid_ti->rcu, flow_tbl_destroy_rcu_cb);
+	} else {
 		__table_instance_destroy(ti);
+		__table_instance_destroy(ufid_ti);
+	}
 }
 
 /* No need for locking this function is called from RCU callback or
@@ -256,8 +277,9 @@ skip_flows:
 void ovs_flow_tbl_destroy(struct flow_table *table)
 {
 	struct table_instance *ti = rcu_dereference_raw(table->ti);
+	struct table_instance *ufid_ti = rcu_dereference_raw(table->ufid_ti);
 
-	table_instance_destroy(ti, false);
+	table_instance_destroy(ti, ufid_ti, false);
 }
 
 struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
@@ -272,7 +294,7 @@ struct sw_flow *ovs_flow_tbl_dump_next(struct table_instance *ti,
 	while (*bucket < ti->n_buckets) {
 		i = 0;
 		head = flex_array_get(ti->buckets, *bucket);
-		hlist_for_each_entry_rcu(flow, head, hash_node[ver]) {
+		hlist_for_each_entry_rcu(flow, head, flow_table.node[ver]) {
 			if (i < *last) {
 				i++;
 				continue;
@@ -294,16 +316,26 @@ static struct hlist_head *find_bucket(struct table_instance *ti, u32 hash)
 				(hash & (ti->n_buckets - 1)));
 }
 
-static void table_instance_insert(struct table_instance *ti, struct sw_flow *flow)
+static void table_instance_insert(struct table_instance *ti,
+				  struct sw_flow *flow)
 {
 	struct hlist_head *head;
 
-	head = find_bucket(ti, flow->hash);
-	hlist_add_head_rcu(&flow->hash_node[ti->node_ver], head);
+	head = find_bucket(ti, flow->flow_table.hash);
+	hlist_add_head_rcu(&flow->flow_table.node[ti->node_ver], head);
+}
+
+static void ufid_table_instance_insert(struct table_instance *ti,
+				       struct sw_flow *flow)
+{
+	struct hlist_head *head;
+
+	head = find_bucket(ti, flow->ufid_table.hash);
+	hlist_add_head_rcu(&flow->ufid_table.node[ti->node_ver], head);
 }
 
 static void flow_table_copy_flows(struct table_instance *old,
-				  struct table_instance *new)
+				  struct table_instance *new, bool ufid)
 {
 	int old_ver;
 	int i;
@@ -318,15 +350,21 @@ static void flow_table_copy_flows(struct table_instance *old,
 
 		head = flex_array_get(old->buckets, i);
 
-		hlist_for_each_entry(flow, head, hash_node[old_ver])
-			table_instance_insert(new, flow);
+		if (ufid)
+			hlist_for_each_entry(flow, head,
+					     ufid_table.node[old_ver])
+				ufid_table_instance_insert(new, flow);
+		else
+			hlist_for_each_entry(flow, head,
+					     flow_table.node[old_ver])
+				table_instance_insert(new, flow);
 	}
 
 	old->keep_flows = true;
 }
 
 static struct table_instance *table_instance_rehash(struct table_instance *ti,
-					    int n_buckets)
+						    int n_buckets, bool ufid)
 {
 	struct table_instance *new_ti;
 
@@ -334,27 +372,38 @@ static struct table_instance *table_instance_rehash(struct table_instance *ti,
 	if (!new_ti)
 		return NULL;
 
-	flow_table_copy_flows(ti, new_ti);
+	flow_table_copy_flows(ti, new_ti, ufid);
 
 	return new_ti;
 }
 
 int ovs_flow_tbl_flush(struct flow_table *flow_table)
 {
-	struct table_instance *old_ti;
-	struct table_instance *new_ti;
+	struct table_instance *old_ti, *new_ti;
+	struct table_instance *old_ufid_ti, *new_ufid_ti;
 
-	old_ti = ovsl_dereference(flow_table->ti);
 	new_ti = table_instance_alloc(TBL_MIN_BUCKETS);
 	if (!new_ti)
 		return -ENOMEM;
+	new_ufid_ti = table_instance_alloc(TBL_MIN_BUCKETS);
+	if (!new_ufid_ti)
+		goto err_free_ti;
+
+	old_ti = ovsl_dereference(flow_table->ti);
+	old_ufid_ti = ovsl_dereference(flow_table->ufid_ti);
 
 	rcu_assign_pointer(flow_table->ti, new_ti);
+	rcu_assign_pointer(flow_table->ufid_ti, new_ufid_ti);
 	flow_table->last_rehash = jiffies;
 	flow_table->count = 0;
+	flow_table->ufid_count = 0;
 
-	table_instance_destroy(old_ti, true);
+	table_instance_destroy(old_ti, old_ufid_ti, true);
 	return 0;
+
+err_free_ti:
+	__table_instance_destroy(new_ti);
+	return -ENOMEM;
 }
 
 static u32 flow_hash(const struct sw_flow_key *key,
@@ -402,14 +451,15 @@ static bool flow_cmp_masked_key(const struct sw_flow *flow,
 	return cmp_key(&flow->key, key, range->start, range->end);
 }
 
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       const struct sw_flow_match *match)
+static bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
+				      const struct sw_flow_match *match)
 {
 	struct sw_flow_key *key = match->key;
 	int key_start = flow_key_start(key);
 	int key_end = match->range.end;
 
-	return cmp_key(&flow->unmasked_key, key, key_start, key_end);
+	BUG_ON(ovs_identifier_is_ufid(&flow->id));
+	return cmp_key(flow->id.unmasked_key, key, key_start, key_end);
 }
 
 static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
@@ -424,8 +474,8 @@ static struct sw_flow *masked_flow_lookup(struct table_instance *ti,
 	ovs_flow_mask_key(&masked_key, unmasked, mask);
 	hash = flow_hash(&masked_key, &mask->range);
 	head = find_bucket(ti, hash);
-	hlist_for_each_entry_rcu(flow, head, hash_node[ti->node_ver]) {
-		if (flow->mask == mask && flow->hash == hash &&
+	hlist_for_each_entry_rcu(flow, head, flow_table.node[ti->node_ver]) {
+		if (flow->mask == mask && flow->flow_table.hash == hash &&
 		    flow_cmp_masked_key(flow, &masked_key, &mask->range))
 			return flow;
 	}
@@ -468,7 +518,48 @@ struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 	/* Always called under ovs-mutex. */
 	list_for_each_entry(mask, &tbl->mask_list, list) {
 		flow = masked_flow_lookup(ti, match->key, mask);
-		if (flow && ovs_flow_cmp_unmasked_key(flow, match))  /* Found */
+		if (flow && ovs_identifier_is_key(&flow->id) &&
+		    ovs_flow_cmp_unmasked_key(flow, match))
+			return flow;
+	}
+	return NULL;
+}
+
+static u32 ufid_hash(const struct sw_flow_id *sfid)
+{
+	return jhash(sfid->ufid, sfid->ufid_len, 0);
+}
+
+static bool ovs_flow_cmp_ufid(const struct sw_flow *flow,
+			      const struct sw_flow_id *sfid)
+{
+	if (flow->id.ufid_len != sfid->ufid_len)
+		return false;
+
+	return !memcmp(flow->id.ufid, sfid->ufid, sfid->ufid_len);
+}
+
+bool ovs_flow_cmp(const struct sw_flow *flow, const struct sw_flow_match *match)
+{
+	if (ovs_identifier_is_ufid(&flow->id))
+		return flow_cmp_masked_key(flow, match->key, &match->range);
+
+	return ovs_flow_cmp_unmasked_key(flow, match);
+}
+
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *tbl,
+					 const struct sw_flow_id *ufid)
+{
+	struct table_instance *ti = rcu_dereference_ovsl(tbl->ufid_ti);
+	struct sw_flow *flow;
+	struct hlist_head *head;
+	u32 hash;
+
+	hash = ufid_hash(ufid);
+	head = find_bucket(ti, hash);
+	hlist_for_each_entry_rcu(flow, head, ufid_table.node[ti->node_ver]) {
+		if (flow->ufid_table.hash == hash &&
+		    ovs_flow_cmp_ufid(flow, ufid))
 			return flow;
 	}
 	return NULL;
@@ -485,9 +576,10 @@ int ovs_flow_tbl_num_masks(const struct flow_table *table)
 	return num;
 }
 
-static struct table_instance *table_instance_expand(struct table_instance *ti)
+static struct table_instance *table_instance_expand(struct table_instance *ti,
+						    bool ufid)
 {
-	return table_instance_rehash(ti, ti->n_buckets * 2);
+	return table_instance_rehash(ti, ti->n_buckets * 2, ufid);
 }
 
 /* Remove 'mask' from the mask list, if it is not needed any more. */
@@ -512,10 +604,15 @@ static void flow_mask_remove(struct flow_table *tbl, struct sw_flow_mask *mask)
 void ovs_flow_tbl_remove(struct flow_table *table, struct sw_flow *flow)
 {
 	struct table_instance *ti = ovsl_dereference(table->ti);
+	struct table_instance *ufid_ti = ovsl_dereference(table->ufid_ti);
 
 	BUG_ON(table->count == 0);
-	hlist_del_rcu(&flow->hash_node[ti->node_ver]);
+	hlist_del_rcu(&flow->flow_table.node[ti->node_ver]);
 	table->count--;
+	if (ovs_identifier_is_ufid(&flow->id)) {
+		hlist_del_rcu(&flow->ufid_table.node[ufid_ti->node_ver]);
+		table->ufid_count--;
+	}
 
 	/* RCU delete the mask. 'flow->mask' is not NULLed, as it should be
 	 * accessible as long as the RCU read lock is held.
@@ -589,24 +686,46 @@ static void flow_key_insert(struct flow_table *table, struct sw_flow *flow)
 	struct table_instance *new_ti = NULL;
 	struct table_instance *ti;
 
-	flow->hash = flow_hash(&flow->key, &flow->mask->range);
+	flow->flow_table.hash = flow_hash(&flow->key, &flow->mask->range);
 	ti = ovsl_dereference(table->ti);
 	table_instance_insert(ti, flow);
 	table->count++;
 
 	/* Expand table, if necessary, to make room. */
 	if (table->count > ti->n_buckets)
-		new_ti = table_instance_expand(ti);
+		new_ti = table_instance_expand(ti, false);
 	else if (time_after(jiffies, table->last_rehash + REHASH_INTERVAL))
-		new_ti = table_instance_rehash(ti, ti->n_buckets);
+		new_ti = table_instance_rehash(ti, ti->n_buckets, false);
 
 	if (new_ti) {
 		rcu_assign_pointer(table->ti, new_ti);
-		table_instance_destroy(ti, true);
+		call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
 		table->last_rehash = jiffies;
 	}
 }
 
+/* Must be called with OVS mutex held. */
+static void flow_ufid_insert(struct flow_table *table, struct sw_flow *flow)
+{
+	struct table_instance *ti;
+
+	flow->ufid_table.hash = ufid_hash(&flow->id);
+	ti = ovsl_dereference(table->ufid_ti);
+	ufid_table_instance_insert(ti, flow);
+	table->ufid_count++;
+
+	/* Expand table, if necessary, to make room. */
+	if (table->ufid_count > ti->n_buckets) {
+		struct table_instance *new_ti;
+
+		new_ti = table_instance_expand(ti, true);
+		if (new_ti) {
+			rcu_assign_pointer(table->ufid_ti, new_ti);
+			call_rcu(&ti->rcu, flow_tbl_destroy_rcu_cb);
+		}
+	}
+}
+
 /* Must be called with OVS mutex held. */
 int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 			const struct sw_flow_mask *mask)
@@ -617,6 +736,8 @@ int ovs_flow_tbl_insert(struct flow_table *table, struct sw_flow *flow,
 	if (err)
 		return err;
 	flow_key_insert(table, flow);
+	if (ovs_identifier_is_ufid(&flow->id))
+		flow_ufid_insert(table, flow);
 
 	return 0;
 }
diff --git a/net/openvswitch/flow_table.h b/net/openvswitch/flow_table.h
index 309fa6415689..616eda10d955 100644
--- a/net/openvswitch/flow_table.h
+++ b/net/openvswitch/flow_table.h
@@ -47,9 +47,11 @@ struct table_instance {
 
 struct flow_table {
 	struct table_instance __rcu *ti;
+	struct table_instance __rcu *ufid_ti;
 	struct list_head mask_list;
 	unsigned long last_rehash;
 	unsigned int count;
+	unsigned int ufid_count;
 };
 
 extern struct kmem_cache *flow_stats_cache;
@@ -78,8 +80,10 @@ struct sw_flow *ovs_flow_tbl_lookup(struct flow_table *,
 				    const struct sw_flow_key *);
 struct sw_flow *ovs_flow_tbl_lookup_exact(struct flow_table *tbl,
 					  const struct sw_flow_match *match);
-bool ovs_flow_cmp_unmasked_key(const struct sw_flow *flow,
-			       const struct sw_flow_match *match);
+struct sw_flow *ovs_flow_tbl_lookup_ufid(struct flow_table *,
+					 const struct sw_flow_id *);
+
+bool ovs_flow_cmp(const struct sw_flow *, const struct sw_flow_match *);
 
 void ovs_flow_mask_key(struct sw_flow_key *dst, const struct sw_flow_key *src,
 		       const struct sw_flow_mask *mask);
-- 
cgit v1.2.3-70-g09d2


From aae88261abd58fffef7ee0e00160ce4ea105b0f3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 26 Jan 2015 22:05:38 -0800
Subject: net: phy: document has_fixups field

has_fixups was introduced to help keeping track of fixups/quirks running
on a PHY device, but we did not update the comment above struct
phy_device accordingly.

Fixes: b0ae009f3dc14 (net: phy: add "has_fixups" boolean property")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 9c189a1fa3a2..1b3690b597d5 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -327,6 +327,7 @@ struct phy_c45_device_ids {
  * c45_ids: 802.3-c45 Device Identifers if is_c45.
  * is_c45:  Set to true if this phy uses clause 45 addressing.
  * is_internal: Set to true if this phy is internal to a MAC.
+ * has_fixups: Set to true if this phy has fixups/quirks.
  * state: state of the PHY for management purposes
  * dev_flags: Device-specific flags used by the PHY driver.
  * addr: Bus address of PHY
-- 
cgit v1.2.3-70-g09d2


From 8a477a6fb6a33651adda772360b85fd813569743 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 26 Jan 2015 22:05:39 -0800
Subject: net: phy: keep track of the PHY suspend state

In order to avoid double calls to phydev->drv->suspend and resume, keep
track of whether the PHY has already been suspended as a consequence of
a successful call to phy_suspend(). We will use this in our MDIO bus
suspend/resume hooks to avoid a double suspend call.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 22 ++++++++++++++++++----
 include/linux/phy.h          |  2 ++
 2 files changed, 20 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 3fc91e89f5a5..bdfe51fc3a65 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -699,6 +699,7 @@ int phy_suspend(struct phy_device *phydev)
 {
 	struct phy_driver *phydrv = to_phy_driver(phydev->dev.driver);
 	struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
+	int ret = 0;
 
 	/* If the device has WOL enabled, we cannot suspend the PHY */
 	phy_ethtool_get_wol(phydev, &wol);
@@ -706,18 +707,31 @@ int phy_suspend(struct phy_device *phydev)
 		return -EBUSY;
 
 	if (phydrv->suspend)
-		return phydrv->suspend(phydev);
-	return 0;
+		ret = phydrv->suspend(phydev);
+
+	if (ret)
+		return ret;
+
+	phydev->suspended = true;
+
+	return ret;
 }
 EXPORT_SYMBOL(phy_suspend);
 
 int phy_resume(struct phy_device *phydev)
 {
 	struct phy_driver *phydrv = to_phy_driver(phydev->dev.driver);
+	int ret = 0;
 
 	if (phydrv->resume)
-		return phydrv->resume(phydev);
-	return 0;
+		ret = phydrv->resume(phydev);
+
+	if (ret)
+		return ret;
+
+	phydev->suspended = false;
+
+	return ret;
 }
 EXPORT_SYMBOL(phy_resume);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1b3690b597d5..685809835b5c 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -328,6 +328,7 @@ struct phy_c45_device_ids {
  * is_c45:  Set to true if this phy uses clause 45 addressing.
  * is_internal: Set to true if this phy is internal to a MAC.
  * has_fixups: Set to true if this phy has fixups/quirks.
+ * suspended: Set to true if this phy has been suspended successfully.
  * state: state of the PHY for management purposes
  * dev_flags: Device-specific flags used by the PHY driver.
  * addr: Bus address of PHY
@@ -365,6 +366,7 @@ struct phy_device {
 	bool is_c45;
 	bool is_internal;
 	bool has_fixups;
+	bool suspended;
 
 	enum phy_state state;
 
-- 
cgit v1.2.3-70-g09d2


From cfcf1682c4ca8f601a4702255958e0b1c9aa12cc Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:05 +0200
Subject: cfg80211: Add new GCMP, CCMP-256, BIP-GMAC, BIP-CMAC-256 ciphers

This makes cfg80211 aware of the GCMP, GCMP-256, CCMP-256, BIP-GMAC-128,
BIP-GMAC-256, and BIP-CMAC-256 cipher suites. These new cipher suites
were defined in IEEE Std 802.11ac-2013.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 18 ++++++++++++++++++
 net/wireless/util.c       | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4f4eea8a6288..dbf417bf25bf 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1994,9 +1994,15 @@ enum ieee80211_key_len {
 	WLAN_KEY_LEN_WEP40 = 5,
 	WLAN_KEY_LEN_WEP104 = 13,
 	WLAN_KEY_LEN_CCMP = 16,
+	WLAN_KEY_LEN_CCMP_256 = 32,
 	WLAN_KEY_LEN_TKIP = 32,
 	WLAN_KEY_LEN_AES_CMAC = 16,
 	WLAN_KEY_LEN_SMS4 = 32,
+	WLAN_KEY_LEN_GCMP = 16,
+	WLAN_KEY_LEN_GCMP_256 = 32,
+	WLAN_KEY_LEN_BIP_CMAC_256 = 32,
+	WLAN_KEY_LEN_BIP_GMAC_128 = 16,
+	WLAN_KEY_LEN_BIP_GMAC_256 = 32,
 };
 
 #define IEEE80211_WEP_IV_LEN		4
@@ -2004,9 +2010,16 @@ enum ieee80211_key_len {
 #define IEEE80211_CCMP_HDR_LEN		8
 #define IEEE80211_CCMP_MIC_LEN		8
 #define IEEE80211_CCMP_PN_LEN		6
+#define IEEE80211_CCMP_256_HDR_LEN	8
+#define IEEE80211_CCMP_256_MIC_LEN	16
+#define IEEE80211_CCMP_256_PN_LEN	6
 #define IEEE80211_TKIP_IV_LEN		8
 #define IEEE80211_TKIP_ICV_LEN		4
 #define IEEE80211_CMAC_PN_LEN		6
+#define IEEE80211_GMAC_PN_LEN		6
+#define IEEE80211_GCMP_HDR_LEN		8
+#define IEEE80211_GCMP_MIC_LEN		16
+#define IEEE80211_GCMP_PN_LEN		6
 
 /* Public action codes */
 enum ieee80211_pub_actioncode {
@@ -2230,6 +2243,11 @@ enum ieee80211_sa_query_action {
 #define WLAN_CIPHER_SUITE_WEP104	0x000FAC05
 #define WLAN_CIPHER_SUITE_AES_CMAC	0x000FAC06
 #define WLAN_CIPHER_SUITE_GCMP		0x000FAC08
+#define WLAN_CIPHER_SUITE_GCMP_256	0x000FAC09
+#define WLAN_CIPHER_SUITE_CCMP_256	0x000FAC0A
+#define WLAN_CIPHER_SUITE_BIP_GMAC_128	0x000FAC0B
+#define WLAN_CIPHER_SUITE_BIP_GMAC_256	0x000FAC0C
+#define WLAN_CIPHER_SUITE_BIP_CMAC_256	0x000FAC0D
 
 #define WLAN_CIPHER_SUITE_SMS4		0x00147201
 
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 08f136ad2ea5..919fee807dd9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -230,6 +230,9 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* Disallow pairwise keys with non-zero index unless it's WEP
 		 * or a vendor specific cipher (because current deployments use
 		 * pairwise WEP keys with non-zero indices and for vendor
@@ -240,6 +243,9 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 			return -EINVAL;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 		/* Disallow BIP (group-only) cipher as pairwise cipher */
 		if (pairwise)
 			return -EINVAL;
@@ -261,6 +267,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 		if (params->key_len != WLAN_KEY_LEN_CCMP)
 			return -EINVAL;
 		break;
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		if (params->key_len != WLAN_KEY_LEN_CCMP_256)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+		if (params->key_len != WLAN_KEY_LEN_GCMP)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		if (params->key_len != WLAN_KEY_LEN_GCMP_256)
+			return -EINVAL;
+		break;
 	case WLAN_CIPHER_SUITE_WEP104:
 		if (params->key_len != WLAN_KEY_LEN_WEP104)
 			return -EINVAL;
@@ -269,6 +287,18 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 		if (params->key_len != WLAN_KEY_LEN_AES_CMAC)
 			return -EINVAL;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		if (params->key_len != WLAN_KEY_LEN_BIP_CMAC_256)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_128)
+			return -EINVAL;
+		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		if (params->key_len != WLAN_KEY_LEN_BIP_GMAC_256)
+			return -EINVAL;
+		break;
 	default:
 		/*
 		 * We don't know anything about this algorithm,
@@ -288,7 +318,13 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 			return -EINVAL;
 		case WLAN_CIPHER_SUITE_TKIP:
 		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_CCMP_256:
+		case WLAN_CIPHER_SUITE_GCMP:
+		case WLAN_CIPHER_SUITE_GCMP_256:
 		case WLAN_CIPHER_SUITE_AES_CMAC:
+		case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 			if (params->seq_len != 6)
 				return -EINVAL;
 			break;
-- 
cgit v1.2.3-70-g09d2


From 00b9cfa3ff38401bd70c34b250ca13e5ea347b4a Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:06 +0200
Subject: mac80111: Add GCMP and GCMP-256 ciphers

This allows mac80211 to configure GCMP and GCMP-256 to the driver and
also use software-implementation within mac80211 when the driver does
not support this with hardware accelaration.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
[remove a spurious newline]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  15 ++-
 net/mac80211/Kconfig       |   1 +
 net/mac80211/Makefile      |   1 +
 net/mac80211/aes_gcm.c     |  95 +++++++++++++++++++
 net/mac80211/aes_gcm.h     |  22 +++++
 net/mac80211/cfg.c         |  13 +++
 net/mac80211/debugfs_key.c |  22 +++++
 net/mac80211/key.c         |  74 ++++++++++++++-
 net/mac80211/key.h         |  11 +++
 net/mac80211/main.c        |   9 +-
 net/mac80211/rx.c          |   4 +
 net/mac80211/tx.c          |   5 +
 net/mac80211/wpa.c         | 224 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/wpa.h         |   5 +
 14 files changed, 492 insertions(+), 9 deletions(-)
 create mode 100644 net/mac80211/aes_gcm.c
 create mode 100644 net/mac80211/aes_gcm.h

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 866073e27ea2..ae6638436112 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1294,8 +1294,8 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  * @IEEE80211_KEY_FLAG_PAIRWISE: Set by mac80211, this flag indicates
  *	that the key is pairwise rather then a shared key.
  * @IEEE80211_KEY_FLAG_SW_MGMT_TX: This flag should be set by the driver for a
- *	CCMP key if it requires CCMP encryption of management frames (MFP) to
- *	be done in software.
+ *	CCMP/GCMP key if it requires CCMP/GCMP encryption of management frames
+ *	(MFP) to be done in software.
  * @IEEE80211_KEY_FLAG_PUT_IV_SPACE: This flag should be set by the driver
  *	if space should be prepared for the IV, but the IV
  *	itself should not be generated. Do not set together with
@@ -1310,7 +1310,7 @@ struct ieee80211_vif *wdev_to_ieee80211_vif(struct wireless_dev *wdev);
  *	RX, if your crypto engine can't deal with TX you can also set the
  *	%IEEE80211_KEY_FLAG_SW_MGMT_TX flag to encrypt such frames in SW.
  * @IEEE80211_KEY_FLAG_GENERATE_IV_MGMT: This flag should be set by the
- *	driver for a CCMP key to indicate that is requires IV generation
+ *	driver for a CCMP/GCMP key to indicate that is requires IV generation
  *	only for managment frames (MFP).
  * @IEEE80211_KEY_FLAG_RESERVE_TAILROOM: This flag should be set by the
  *	driver for a key to indicate that sufficient tailroom must always
@@ -4098,6 +4098,8 @@ void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf,
  *	reverse order than in packet)
  * @aes_cmac: PN data, most significant byte first (big endian,
  *	reverse order than in packet)
+ * @gcmp: PN data, most significant byte first (big endian,
+ *	reverse order than in packet)
  */
 struct ieee80211_key_seq {
 	union {
@@ -4111,6 +4113,9 @@ struct ieee80211_key_seq {
 		struct {
 			u8 pn[6];
 		} aes_cmac;
+		struct {
+			u8 pn[6];
+		} gcmp;
 	};
 };
 
@@ -4135,7 +4140,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
  * ieee80211_get_key_rx_seq - get key RX sequence counter
  *
  * @keyconf: the parameter passed with the set key
- * @tid: The TID, or -1 for the management frame value (CCMP only);
+ * @tid: The TID, or -1 for the management frame value (CCMP/GCMP only);
  *	the value on TID 0 is also used for non-QoS frames. For
  *	CMAC, only TID 0 is valid.
  * @seq: buffer to receive the sequence data
@@ -4171,7 +4176,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
  * ieee80211_set_key_rx_seq - set key RX sequence counter
  *
  * @keyconf: the parameter passed with the set key
- * @tid: The TID, or -1 for the management frame value (CCMP only);
+ * @tid: The TID, or -1 for the management frame value (CCMP/GCMP only);
  *	the value on TID 0 is also used for non-QoS frames. For
  *	CMAC, only TID 0 is valid.
  * @seq: new sequence data
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index 75cc6801a431..64a012a0c6e5 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -5,6 +5,7 @@ config MAC80211
 	select CRYPTO_ARC4
 	select CRYPTO_AES
 	select CRYPTO_CCM
+	select CRYPTO_GCM
 	select CRC32
 	select AVERAGE
 	---help---
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index e53671b1105e..0cbf93618433 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -15,6 +15,7 @@ mac80211-y := \
 	michael.o \
 	tkip.o \
 	aes_ccm.o \
+	aes_gcm.o \
 	aes_cmac.o \
 	cfg.o \
 	ethtool.o \
diff --git a/net/mac80211/aes_gcm.c b/net/mac80211/aes_gcm.c
new file mode 100644
index 000000000000..c2bf6698d738
--- /dev/null
+++ b/net/mac80211/aes_gcm.c
@@ -0,0 +1,95 @@
+/*
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/aes.h>
+
+#include <net/mac80211.h>
+#include "key.h"
+#include "aes_gcm.h"
+
+void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			       u8 *data, size_t data_len, u8 *mic)
+{
+	struct scatterlist assoc, pt, ct[2];
+
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *)aead_req_data;
+
+	memset(aead_req, 0, sizeof(aead_req_data));
+
+	sg_init_one(&pt, data, data_len);
+	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
+	sg_init_table(ct, 2);
+	sg_set_buf(&ct[0], data, data_len);
+	sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
+
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, &assoc, assoc.length);
+	aead_request_set_crypt(aead_req, &pt, ct, data_len, j_0);
+
+	crypto_aead_encrypt(aead_req);
+}
+
+int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			      u8 *data, size_t data_len, u8 *mic)
+{
+	struct scatterlist assoc, pt, ct[2];
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *)aead_req_data;
+
+	if (data_len == 0)
+		return -EINVAL;
+
+	memset(aead_req, 0, sizeof(aead_req_data));
+
+	sg_init_one(&pt, data, data_len);
+	sg_init_one(&assoc, &aad[2], be16_to_cpup((__be16 *)aad));
+	sg_init_table(ct, 2);
+	sg_set_buf(&ct[0], data, data_len);
+	sg_set_buf(&ct[1], mic, IEEE80211_GCMP_MIC_LEN);
+
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, &assoc, assoc.length);
+	aead_request_set_crypt(aead_req, ct, &pt,
+			       data_len + IEEE80211_GCMP_MIC_LEN, j_0);
+
+	return crypto_aead_decrypt(aead_req);
+}
+
+struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
+							size_t key_len)
+{
+	struct crypto_aead *tfm;
+	int err;
+
+	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return tfm;
+
+	err = crypto_aead_setkey(tfm, key, key_len);
+	if (!err)
+		err = crypto_aead_setauthsize(tfm, IEEE80211_GCMP_MIC_LEN);
+	if (!err)
+		return tfm;
+
+	crypto_free_aead(tfm);
+	return ERR_PTR(err);
+}
+
+void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm)
+{
+	crypto_free_aead(tfm);
+}
diff --git a/net/mac80211/aes_gcm.h b/net/mac80211/aes_gcm.h
new file mode 100644
index 000000000000..1347fda6b76a
--- /dev/null
+++ b/net/mac80211/aes_gcm.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2014-2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef AES_GCM_H
+#define AES_GCM_H
+
+#include <linux/crypto.h>
+
+void ieee80211_aes_gcm_encrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			       u8 *data, size_t data_len, u8 *mic);
+int ieee80211_aes_gcm_decrypt(struct crypto_aead *tfm, u8 *j_0, u8 *aad,
+			      u8 *data, size_t data_len, u8 *mic);
+struct crypto_aead *ieee80211_aes_gcm_key_setup_encrypt(const u8 key[],
+							size_t key_len);
+void ieee80211_aes_gcm_key_free(struct crypto_aead *tfm);
+
+#endif /* AES_GCM_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index a777114d663b..1c1d061cff56 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -164,6 +164,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		break;
 	default:
 		cs = ieee80211_cs_get(local, params->cipher, sdata->vif.type);
@@ -369,6 +370,18 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq = seq;
 		params.seq_len = 6;
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
+		seq[0] = pn64;
+		seq[1] = pn64 >> 8;
+		seq[2] = pn64 >> 16;
+		seq[3] = pn64 >> 24;
+		seq[4] = pn64 >> 32;
+		seq[5] = pn64 >> 40;
+		params.seq = seq;
+		params.seq_len = 6;
+		break;
 	}
 
 	params.key = key->conf.key;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 5523b94c7c90..0e223e602296 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -105,6 +105,13 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
 				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn = atomic64_read(&key->u.gcmp.tx_pn);
+		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
+				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
+				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
+		break;
 	default:
 		return 0;
 	}
@@ -151,6 +158,17 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 			       rpn[3], rpn[4], rpn[5]);
 		len = p - buf;
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
+			rpn = key->u.gcmp.rx_pn[i];
+			p += scnprintf(p, sizeof(buf)+buf-p,
+				       "%02x%02x%02x%02x%02x%02x\n",
+				       rpn[0], rpn[1], rpn[2],
+				       rpn[3], rpn[4], rpn[5]);
+		}
+		len = p - buf;
+		break;
 	default:
 		return 0;
 	}
@@ -173,6 +191,10 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.replays);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.gcmp.replays);
+		break;
 	default:
 		return 0;
 	}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 5167c53aa15f..cbee2f5180ce 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -24,6 +24,7 @@
 #include "debugfs_key.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gcm.h"
 
 
 /**
@@ -163,6 +164,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_TKIP:
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* all of these we can do in software - if driver can */
 		if (ret == 1)
 			return 0;
@@ -412,6 +415,25 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 			return ERR_PTR(err);
 		}
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		key->conf.iv_len = IEEE80211_GCMP_HDR_LEN;
+		key->conf.icv_len = IEEE80211_GCMP_MIC_LEN;
+		for (i = 0; seq && i < IEEE80211_NUM_TIDS + 1; i++)
+			for (j = 0; j < IEEE80211_GCMP_PN_LEN; j++)
+				key->u.gcmp.rx_pn[i][j] =
+					seq[IEEE80211_GCMP_PN_LEN - j - 1];
+		/* Initialize AES key state here as an optimization so that
+		 * it does not need to be initialized for every packet.
+		 */
+		key->u.gcmp.tfm = ieee80211_aes_gcm_key_setup_encrypt(key_data,
+								      key_len);
+		if (IS_ERR(key->u.gcmp.tfm)) {
+			err = PTR_ERR(key->u.gcmp.tfm);
+			kfree(key);
+			return ERR_PTR(err);
+		}
+		break;
 	default:
 		if (cs) {
 			size_t len = (seq_len > MAX_PN_LEN) ?
@@ -433,10 +455,18 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 
 static void ieee80211_key_free_common(struct ieee80211_key *key)
 {
-	if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP)
+	switch (key->conf.cipher) {
+	case WLAN_CIPHER_SUITE_CCMP:
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
-	if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+		break;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
+		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		ieee80211_aes_gcm_key_free(key->u.gcmp.tfm);
+		break;
+	}
 	kzfree(key);
 }
 
@@ -760,6 +790,16 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		seq->ccmp.pn[1] = pn64 >> 32;
 		seq->ccmp.pn[0] = pn64 >> 40;
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
+		seq->gcmp.pn[5] = pn64;
+		seq->gcmp.pn[4] = pn64 >> 8;
+		seq->gcmp.pn[3] = pn64 >> 16;
+		seq->gcmp.pn[2] = pn64 >> 24;
+		seq->gcmp.pn[1] = pn64 >> 32;
+		seq->gcmp.pn[0] = pn64 >> 40;
+		break;
 	default:
 		WARN_ON(1);
 	}
@@ -796,6 +836,16 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
+			return;
+		if (tid < 0)
+			pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS];
+		else
+			pn = key->u.gcmp.rx_pn[tid];
+		memcpy(seq->gcmp.pn, pn, IEEE80211_GCMP_PN_LEN);
+		break;
 	}
 }
 EXPORT_SYMBOL(ieee80211_get_key_rx_seq);
@@ -831,6 +881,16 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		       ((u64)seq->aes_cmac.pn[0] << 40);
 		atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		pn64 = (u64)seq->gcmp.pn[5] |
+		       ((u64)seq->gcmp.pn[4] << 8) |
+		       ((u64)seq->gcmp.pn[3] << 16) |
+		       ((u64)seq->gcmp.pn[2] << 24) |
+		       ((u64)seq->gcmp.pn[1] << 32) |
+		       ((u64)seq->gcmp.pn[0] << 40);
+		atomic64_set(&key->u.gcmp.tx_pn, pn64);
+		break;
 	default:
 		WARN_ON(1);
 		break;
@@ -868,6 +928,16 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
+			return;
+		if (tid < 0)
+			pn = key->u.gcmp.rx_pn[IEEE80211_NUM_TIDS];
+		else
+			pn = key->u.gcmp.rx_pn[tid];
+		memcpy(pn, seq->gcmp.pn, IEEE80211_GCMP_PN_LEN);
+		break;
 	default:
 		WARN_ON(1);
 		break;
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 19db68663d75..27580da851c8 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -94,6 +94,17 @@ struct ieee80211_key {
 			u32 replays; /* dot11RSNAStatsCMACReplays */
 			u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
 		} aes_cmac;
+		struct {
+			atomic64_t tx_pn;
+			/* Last received packet number. The first
+			 * IEEE80211_NUM_TIDS counters are used with Data
+			 * frames and the last counter is used with Robust
+			 * Management frames.
+			 */
+			u8 rx_pn[IEEE80211_NUM_TIDS + 1][IEEE80211_GCMP_PN_LEN];
+			struct crypto_aead *tfm;
+			u32 replays; /* dot11RSNAStatsGCMPReplays */
+		} gcmp;
 		struct {
 			/* generic cipher scheme */
 			u8 rx_pn[IEEE80211_NUM_TIDS + 1][MAX_PN_LEN];
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ea6b82ac4f0b..7223b4e16752 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -666,6 +666,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		WLAN_CIPHER_SUITE_WEP104,
 		WLAN_CIPHER_SUITE_TKIP,
 		WLAN_CIPHER_SUITE_CCMP,
+		WLAN_CIPHER_SUITE_GCMP,
+		WLAN_CIPHER_SUITE_GCMP_256,
 
 		/* keep last -- depends on hw flags! */
 		WLAN_CIPHER_SUITE_AES_CMAC
@@ -724,9 +726,10 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		/* Driver specifies cipher schemes only (but not cipher suites
 		 * including the schemes)
 		 *
-		 * We start counting ciphers defined by schemes, TKIP and CCMP
+		 * We start counting ciphers defined by schemes, TKIP, CCMP,
+		 * GCMP, and GCMP-256
 		 */
-		n_suites = local->hw.n_cipher_schemes + 2;
+		n_suites = local->hw.n_cipher_schemes + 4;
 
 		/* check if we have WEP40 and WEP104 */
 		if (have_wep)
@@ -742,6 +745,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 
 		suites[w++] = WLAN_CIPHER_SUITE_CCMP;
 		suites[w++] = WLAN_CIPHER_SUITE_TKIP;
+		suites[w++] = WLAN_CIPHER_SUITE_GCMP;
+		suites[w++] = WLAN_CIPHER_SUITE_GCMP_256;
 
 		if (have_wep) {
 			suites[w++] = WLAN_CIPHER_SUITE_WEP40;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index ed516ae80a3b..a11d2518c365 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1655,6 +1655,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		result = ieee80211_crypto_aes_cmac_decrypt(rx);
 		break;
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		result = ieee80211_crypto_gcmp_decrypt(rx);
+		break;
 	default:
 		result = ieee80211_crypto_hw_decrypt(rx);
 	}
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 02ed6f60629a..e4c6fbc4bf7a 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -626,6 +626,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 				tx->key = NULL;
 			break;
 		case WLAN_CIPHER_SUITE_CCMP:
+		case WLAN_CIPHER_SUITE_GCMP:
+		case WLAN_CIPHER_SUITE_GCMP_256:
 			if (!ieee80211_is_data_present(hdr->frame_control) &&
 			    !ieee80211_use_mfp(hdr->frame_control, tx->sta,
 					       tx->skb))
@@ -1014,6 +1016,9 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 		return ieee80211_crypto_ccmp_encrypt(tx);
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		return ieee80211_crypto_gcmp_encrypt(tx);
 	default:
 		return ieee80211_crypto_hw_encrypt(tx);
 	}
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 12398fde02e8..96b65c240109 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -22,6 +22,7 @@
 #include "tkip.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gcm.h"
 #include "wpa.h"
 
 ieee80211_tx_result
@@ -546,6 +547,229 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+static void gcmp_special_blocks(struct sk_buff *skb, u8 *pn, u8 *j_0, u8 *aad)
+{
+	__le16 mask_fc;
+	u8 qos_tid;
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+	memcpy(j_0, hdr->addr2, ETH_ALEN);
+	memcpy(&j_0[ETH_ALEN], pn, IEEE80211_GCMP_PN_LEN);
+	j_0[13] = 0;
+	j_0[14] = 0;
+	j_0[AES_BLOCK_SIZE - 1] = 0x01;
+
+	/* AAD (extra authenticate-only data) / masked 802.11 header
+	 * FC | A1 | A2 | A3 | SC | [A4] | [QC]
+	 */
+	put_unaligned_be16(ieee80211_hdrlen(hdr->frame_control) - 2, &aad[0]);
+	/* Mask FC: zero subtype b4 b5 b6 (if not mgmt)
+	 * Retry, PwrMgt, MoreData; set Protected
+	 */
+	mask_fc = hdr->frame_control;
+	mask_fc &= ~cpu_to_le16(IEEE80211_FCTL_RETRY |
+				IEEE80211_FCTL_PM | IEEE80211_FCTL_MOREDATA);
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		mask_fc &= ~cpu_to_le16(0x0070);
+	mask_fc |= cpu_to_le16(IEEE80211_FCTL_PROTECTED);
+
+	put_unaligned(mask_fc, (__le16 *)&aad[2]);
+	memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN);
+
+	/* Mask Seq#, leave Frag# */
+	aad[22] = *((u8 *)&hdr->seq_ctrl) & 0x0f;
+	aad[23] = 0;
+
+	if (ieee80211_is_data_qos(hdr->frame_control))
+		qos_tid = *ieee80211_get_qos_ctl(hdr) &
+			IEEE80211_QOS_CTL_TID_MASK;
+	else
+		qos_tid = 0;
+
+	if (ieee80211_has_a4(hdr->frame_control)) {
+		memcpy(&aad[24], hdr->addr4, ETH_ALEN);
+		aad[30] = qos_tid;
+		aad[31] = 0;
+	} else {
+		memset(&aad[24], 0, ETH_ALEN + IEEE80211_QOS_CTL_LEN);
+		aad[24] = qos_tid;
+	}
+}
+
+static inline void gcmp_pn2hdr(u8 *hdr, const u8 *pn, int key_id)
+{
+	hdr[0] = pn[5];
+	hdr[1] = pn[4];
+	hdr[2] = 0;
+	hdr[3] = 0x20 | (key_id << 6);
+	hdr[4] = pn[3];
+	hdr[5] = pn[2];
+	hdr[6] = pn[1];
+	hdr[7] = pn[0];
+}
+
+static inline void gcmp_hdr2pn(u8 *pn, const u8 *hdr)
+{
+	pn[0] = hdr[7];
+	pn[1] = hdr[6];
+	pn[2] = hdr[5];
+	pn[3] = hdr[4];
+	pn[4] = hdr[1];
+	pn[5] = hdr[0];
+}
+
+static int gcmp_encrypt_skb(struct ieee80211_tx_data *tx, struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_key *key = tx->key;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	int hdrlen, len, tail;
+	u8 *pos;
+	u8 pn[6];
+	u64 pn64;
+	u8 aad[2 * AES_BLOCK_SIZE];
+	u8 j_0[AES_BLOCK_SIZE];
+
+	if (info->control.hw_key &&
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_GENERATE_IV) &&
+	    !(info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE) &&
+	    !((info->control.hw_key->flags &
+	       IEEE80211_KEY_FLAG_GENERATE_IV_MGMT) &&
+	      ieee80211_is_mgmt(hdr->frame_control))) {
+		/* hwaccel has no need for preallocated room for GCMP
+		 * header or MIC fields
+		 */
+		return 0;
+	}
+
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+	len = skb->len - hdrlen;
+
+	if (info->control.hw_key)
+		tail = 0;
+	else
+		tail = IEEE80211_GCMP_MIC_LEN;
+
+	if (WARN_ON(skb_tailroom(skb) < tail ||
+		    skb_headroom(skb) < IEEE80211_GCMP_HDR_LEN))
+		return -1;
+
+	pos = skb_push(skb, IEEE80211_GCMP_HDR_LEN);
+	memmove(pos, pos + IEEE80211_GCMP_HDR_LEN, hdrlen);
+	skb_set_network_header(skb, skb_network_offset(skb) +
+				    IEEE80211_GCMP_HDR_LEN);
+
+	/* the HW only needs room for the IV, but not the actual IV */
+	if (info->control.hw_key &&
+	    (info->control.hw_key->flags & IEEE80211_KEY_FLAG_PUT_IV_SPACE))
+		return 0;
+
+	hdr = (struct ieee80211_hdr *)pos;
+	pos += hdrlen;
+
+	pn64 = atomic64_inc_return(&key->u.gcmp.tx_pn);
+
+	pn[5] = pn64;
+	pn[4] = pn64 >> 8;
+	pn[3] = pn64 >> 16;
+	pn[2] = pn64 >> 24;
+	pn[1] = pn64 >> 32;
+	pn[0] = pn64 >> 40;
+
+	gcmp_pn2hdr(pos, pn, key->conf.keyidx);
+
+	/* hwaccel - with software GCMP header */
+	if (info->control.hw_key)
+		return 0;
+
+	pos += IEEE80211_GCMP_HDR_LEN;
+	gcmp_special_blocks(skb, pn, j_0, aad);
+	ieee80211_aes_gcm_encrypt(key->u.gcmp.tfm, j_0, aad, pos, len,
+				  skb_put(skb, IEEE80211_GCMP_MIC_LEN));
+
+	return 0;
+}
+
+ieee80211_tx_result
+ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx)
+{
+	struct sk_buff *skb;
+
+	ieee80211_tx_set_protected(tx);
+
+	skb_queue_walk(&tx->skbs, skb) {
+		if (gcmp_encrypt_skb(tx, skb) < 0)
+			return TX_DROP;
+	}
+
+	return TX_CONTINUE;
+}
+
+ieee80211_rx_result
+ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
+	int hdrlen;
+	struct ieee80211_key *key = rx->key;
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	u8 pn[IEEE80211_GCMP_PN_LEN];
+	int data_len;
+	int queue;
+
+	hdrlen = ieee80211_hdrlen(hdr->frame_control);
+
+	if (!ieee80211_is_data(hdr->frame_control) &&
+	    !ieee80211_is_robust_mgmt_frame(skb))
+		return RX_CONTINUE;
+
+	data_len = skb->len - hdrlen - IEEE80211_GCMP_HDR_LEN -
+		   IEEE80211_GCMP_MIC_LEN;
+	if (!rx->sta || data_len < 0)
+		return RX_DROP_UNUSABLE;
+
+	if (status->flag & RX_FLAG_DECRYPTED) {
+		if (!pskb_may_pull(rx->skb, hdrlen + IEEE80211_GCMP_HDR_LEN))
+			return RX_DROP_UNUSABLE;
+	} else {
+		if (skb_linearize(rx->skb))
+			return RX_DROP_UNUSABLE;
+	}
+
+	gcmp_hdr2pn(pn, skb->data + hdrlen);
+
+	queue = rx->security_idx;
+
+	if (memcmp(pn, key->u.gcmp.rx_pn[queue], IEEE80211_GCMP_PN_LEN) <= 0) {
+		key->u.gcmp.replays++;
+		return RX_DROP_UNUSABLE;
+	}
+
+	if (!(status->flag & RX_FLAG_DECRYPTED)) {
+		u8 aad[2 * AES_BLOCK_SIZE];
+		u8 j_0[AES_BLOCK_SIZE];
+		/* hardware didn't decrypt/verify MIC */
+		gcmp_special_blocks(skb, pn, j_0, aad);
+
+		if (ieee80211_aes_gcm_decrypt(
+			    key->u.gcmp.tfm, j_0, aad,
+			    skb->data + hdrlen + IEEE80211_GCMP_HDR_LEN,
+			    data_len,
+			    skb->data + skb->len - IEEE80211_GCMP_MIC_LEN))
+			return RX_DROP_UNUSABLE;
+	}
+
+	memcpy(key->u.gcmp.rx_pn[queue], pn, IEEE80211_GCMP_PN_LEN);
+
+	/* Remove GCMP header and MIC */
+	if (pskb_trim(skb, skb->len - IEEE80211_GCMP_MIC_LEN))
+		return RX_DROP_UNUSABLE;
+	memmove(skb->data + IEEE80211_GCMP_HDR_LEN, skb->data, hdrlen);
+	skb_pull(skb, IEEE80211_GCMP_HDR_LEN);
+
+	return RX_CONTINUE;
+}
+
 static ieee80211_tx_result
 ieee80211_crypto_cs_encrypt(struct ieee80211_tx_data *tx,
 			    struct sk_buff *skb)
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index 62e5a12dfe0a..ea955f278351 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -37,4 +37,9 @@ ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
 ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx);
 
+ieee80211_tx_result
+ieee80211_crypto_gcmp_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_rx_result
+ieee80211_crypto_gcmp_decrypt(struct ieee80211_rx_data *rx);
+
 #endif /* WPA_H */
-- 
cgit v1.2.3-70-g09d2


From 56c52da2d554f081e8fce58ecbcf6a40c605b95b Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:08 +0200
Subject: mac80111: Add BIP-CMAC-256 cipher

This allows mac80211 to configure BIP-CMAC-256 to the driver and also
use software-implementation within mac80211 when the driver does not
support this with hardware accelaration.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  9 +++++
 net/mac80211/aes_cmac.c    | 34 +++++++++++++----
 net/mac80211/aes_cmac.h    |  5 ++-
 net/mac80211/cfg.c         |  2 +
 net/mac80211/debugfs_key.c |  4 ++
 net/mac80211/key.c         | 14 ++++++-
 net/mac80211/main.c        | 13 ++++---
 net/mac80211/rx.c          | 21 ++++++++---
 net/mac80211/tx.c          |  3 ++
 net/mac80211/wpa.c         | 92 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/wpa.h         |  4 ++
 11 files changed, 180 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index dbf417bf25bf..b9c7897dc566 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1017,6 +1017,15 @@ struct ieee80211_mmie {
 	u8 mic[8];
 } __packed;
 
+/* Management MIC information element (IEEE 802.11w) for GMAC and CMAC-256 */
+struct ieee80211_mmie_16 {
+	u8 element_id;
+	u8 length;
+	__le16 key_id;
+	u8 sequence_number[6];
+	u8 mic[16];
+} __packed;
+
 struct ieee80211_vendor_ie {
 	u8 element_id;
 	u8 len;
diff --git a/net/mac80211/aes_cmac.c b/net/mac80211/aes_cmac.c
index 9b9009f99551..4192806be3d3 100644
--- a/net/mac80211/aes_cmac.c
+++ b/net/mac80211/aes_cmac.c
@@ -18,8 +18,8 @@
 #include "key.h"
 #include "aes_cmac.h"
 
-#define AES_CMAC_KEY_LEN 16
 #define CMAC_TLEN 8 /* CMAC TLen = 64 bits (8 octets) */
+#define CMAC_TLEN_256 16 /* CMAC TLen = 128 bits (16 octets) */
 #define AAD_LEN 20
 
 
@@ -35,9 +35,9 @@ static void gf_mulx(u8 *pad)
 		pad[AES_BLOCK_SIZE - 1] ^= 0x87;
 }
 
-
-static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
-				const u8 *addr[], const size_t *len, u8 *mac)
+static void aes_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
+			    const u8 *addr[], const size_t *len, u8 *mac,
+			    size_t mac_len)
 {
 	u8 cbc[AES_BLOCK_SIZE], pad[AES_BLOCK_SIZE];
 	const u8 *pos, *end;
@@ -88,7 +88,7 @@ static void aes_128_cmac_vector(struct crypto_cipher *tfm, size_t num_elem,
 	for (i = 0; i < AES_BLOCK_SIZE; i++)
 		pad[i] ^= cbc[i];
 	crypto_cipher_encrypt_one(tfm, pad, pad);
-	memcpy(mac, pad, CMAC_TLEN);
+	memcpy(mac, pad, mac_len);
 }
 
 
@@ -107,17 +107,35 @@ void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad,
 	addr[2] = zero;
 	len[2] = CMAC_TLEN;
 
-	aes_128_cmac_vector(tfm, 3, addr, len, mic);
+	aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN);
 }
 
+void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad,
+			    const u8 *data, size_t data_len, u8 *mic)
+{
+	const u8 *addr[3];
+	size_t len[3];
+	u8 zero[CMAC_TLEN_256];
+
+	memset(zero, 0, CMAC_TLEN_256);
+	addr[0] = aad;
+	len[0] = AAD_LEN;
+	addr[1] = data;
+	len[1] = data_len - CMAC_TLEN_256;
+	addr[2] = zero;
+	len[2] = CMAC_TLEN_256;
+
+	aes_cmac_vector(tfm, 3, addr, len, mic, CMAC_TLEN_256);
+}
 
-struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[])
+struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[],
+						   size_t key_len)
 {
 	struct crypto_cipher *tfm;
 
 	tfm = crypto_alloc_cipher("aes", 0, CRYPTO_ALG_ASYNC);
 	if (!IS_ERR(tfm))
-		crypto_cipher_setkey(tfm, key, AES_CMAC_KEY_LEN);
+		crypto_cipher_setkey(tfm, key, key_len);
 
 	return tfm;
 }
diff --git a/net/mac80211/aes_cmac.h b/net/mac80211/aes_cmac.h
index 0ce6487af795..3702041f44fd 100644
--- a/net/mac80211/aes_cmac.h
+++ b/net/mac80211/aes_cmac.h
@@ -11,9 +11,12 @@
 
 #include <linux/crypto.h>
 
-struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[]);
+struct crypto_cipher *ieee80211_aes_cmac_key_setup(const u8 key[],
+						   size_t key_len);
 void ieee80211_aes_cmac(struct crypto_cipher *tfm, const u8 *aad,
 			const u8 *data, size_t data_len, u8 *mic);
+void ieee80211_aes_cmac_256(struct crypto_cipher *tfm, const u8 *aad,
+			    const u8 *data, size_t data_len, u8 *mic);
 void ieee80211_aes_cmac_key_free(struct crypto_cipher *tfm);
 
 #endif /* AES_CMAC_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ef84441c119c..b7e528bbecce 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -164,6 +164,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		break;
@@ -362,6 +363,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq_len = 6;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
 		seq[0] = pn64;
 		seq[1] = pn64 >> 8;
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index 64de07b16092..d1b60eb014a8 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -101,6 +101,7 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn = atomic64_read(&key->u.aes_cmac.tx_pn);
 		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
 				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
@@ -153,6 +154,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 		len = p - buf;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		rpn = key->u.aes_cmac.rx_pn;
 		p += scnprintf(p, sizeof(buf)+buf-p,
 			       "%02x%02x%02x%02x%02x%02x\n",
@@ -191,6 +193,7 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.replays);
 		break;
@@ -214,6 +217,7 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
 
 	switch (key->conf.cipher) {
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.icverrors);
 		break;
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 83c61085c3f0..7ceea9d9fcd2 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -165,6 +165,7 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_CCMP:
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* all of these we can do in software - if driver can */
@@ -417,8 +418,12 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		}
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		key->conf.iv_len = 0;
-		key->conf.icv_len = sizeof(struct ieee80211_mmie);
+		if (cipher == WLAN_CIPHER_SUITE_AES_CMAC)
+			key->conf.icv_len = sizeof(struct ieee80211_mmie);
+		else
+			key->conf.icv_len = sizeof(struct ieee80211_mmie_16);
 		if (seq)
 			for (j = 0; j < IEEE80211_CMAC_PN_LEN; j++)
 				key->u.aes_cmac.rx_pn[j] =
@@ -428,7 +433,7 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 		 * it does not need to be initialized for every packet.
 		 */
 		key->u.aes_cmac.tfm =
-			ieee80211_aes_cmac_key_setup(key_data);
+			ieee80211_aes_cmac_key_setup(key_data, key_len);
 		if (IS_ERR(key->u.aes_cmac.tfm)) {
 			err = PTR_ERR(key->u.aes_cmac.tfm);
 			kfree(key);
@@ -481,6 +486,7 @@ static void ieee80211_key_free_common(struct ieee80211_key *key)
 		ieee80211_aes_key_free(key->u.ccmp.tfm);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
 		break;
 	case WLAN_CIPHER_SUITE_GCMP:
@@ -804,6 +810,7 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		seq->ccmp.pn[0] = pn64 >> 40;
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn64 = atomic64_read(&key->u.aes_cmac.tx_pn);
 		seq->ccmp.pn[5] = pn64;
 		seq->ccmp.pn[4] = pn64 >> 8;
@@ -854,6 +861,7 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		memcpy(seq->ccmp.pn, pn, IEEE80211_CCMP_PN_LEN);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		if (WARN_ON(tid != 0))
 			return;
 		pn = key->u.aes_cmac.rx_pn;
@@ -897,6 +905,7 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		atomic64_set(&key->u.ccmp.tx_pn, pn64);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		pn64 = (u64)seq->aes_cmac.pn[5] |
 		       ((u64)seq->aes_cmac.pn[4] << 8) |
 		       ((u64)seq->aes_cmac.pn[3] << 16) |
@@ -948,6 +957,7 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		memcpy(pn, seq->ccmp.pn, IEEE80211_CCMP_PN_LEN);
 		break;
 	case WLAN_CIPHER_SUITE_AES_CMAC:
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		if (WARN_ON(tid != 0))
 			return;
 		pn = key->u.aes_cmac.rx_pn;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a5ad2d5bb29b..053a17c5023a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -671,7 +671,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		WLAN_CIPHER_SUITE_GCMP_256,
 
 		/* keep last -- depends on hw flags! */
-		WLAN_CIPHER_SUITE_AES_CMAC
+		WLAN_CIPHER_SUITE_AES_CMAC,
+		WLAN_CIPHER_SUITE_BIP_CMAC_256,
 	};
 
 	if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL ||
@@ -710,7 +711,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
 
 		if (!have_mfp)
-			local->hw.wiphy->n_cipher_suites--;
+			local->hw.wiphy->n_cipher_suites -= 2;
 
 		if (!have_wep) {
 			local->hw.wiphy->cipher_suites += 2;
@@ -736,9 +737,9 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		if (have_wep)
 			n_suites += 2;
 
-		/* check if we have AES_CMAC */
+		/* check if we have AES_CMAC, BIP-CMAC-256 */
 		if (have_mfp)
-			n_suites++;
+			n_suites += 2;
 
 		suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL);
 		if (!suites)
@@ -755,8 +756,10 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 			suites[w++] = WLAN_CIPHER_SUITE_WEP104;
 		}
 
-		if (have_mfp)
+		if (have_mfp) {
 			suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC;
+			suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256;
+		}
 
 		for (r = 0; r < local->hw.n_cipher_schemes; r++)
 			suites[w++] = cs[r].cipher;
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index e8c6ba5ce70b..93ebc9525478 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -647,6 +647,7 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
 {
 	struct ieee80211_mgmt *hdr = (struct ieee80211_mgmt *) skb->data;
 	struct ieee80211_mmie *mmie;
+	struct ieee80211_mmie_16 *mmie16;
 
 	if (skb->len < 24 + sizeof(*mmie) || !is_multicast_ether_addr(hdr->da))
 		return -1;
@@ -656,11 +657,18 @@ static int ieee80211_get_mmie_keyidx(struct sk_buff *skb)
 
 	mmie = (struct ieee80211_mmie *)
 		(skb->data + skb->len - sizeof(*mmie));
-	if (mmie->element_id != WLAN_EID_MMIE ||
-	    mmie->length != sizeof(*mmie) - 2)
-		return -1;
-
-	return le16_to_cpu(mmie->key_id);
+	if (mmie->element_id == WLAN_EID_MMIE &&
+	    mmie->length == sizeof(*mmie) - 2)
+		return le16_to_cpu(mmie->key_id);
+
+	mmie16 = (struct ieee80211_mmie_16 *)
+		(skb->data + skb->len - sizeof(*mmie16));
+	if (skb->len >= 24 + sizeof(*mmie16) &&
+	    mmie16->element_id == WLAN_EID_MMIE &&
+	    mmie16->length == sizeof(*mmie16) - 2)
+		return le16_to_cpu(mmie16->key_id);
+
+	return -1;
 }
 
 static int iwl80211_get_cs_keyid(const struct ieee80211_cipher_scheme *cs,
@@ -1660,6 +1668,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		result = ieee80211_crypto_aes_cmac_decrypt(rx);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		result = ieee80211_crypto_aes_cmac_256_decrypt(rx);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		result = ieee80211_crypto_gcmp_decrypt(rx);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index be57e0afd019..909c27be1fdc 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -639,6 +639,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 					ieee80211_is_mgmt(hdr->frame_control);
 			break;
 		case WLAN_CIPHER_SUITE_AES_CMAC:
+		case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 			if (!ieee80211_is_mgmt(hdr->frame_control))
 				tx->key = NULL;
 			break;
@@ -1021,6 +1022,8 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 			tx, IEEE80211_CCMP_256_MIC_LEN);
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
+	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		return ieee80211_crypto_aes_cmac_256_encrypt(tx);
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		return ieee80211_crypto_gcmp_encrypt(tx);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index ae654de9782a..549af118de9f 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -955,6 +955,48 @@ ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx)
 	return TX_CONTINUE;
 }
 
+ieee80211_tx_result
+ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx)
+{
+	struct sk_buff *skb;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_key *key = tx->key;
+	struct ieee80211_mmie_16 *mmie;
+	u8 aad[20];
+	u64 pn64;
+
+	if (WARN_ON(skb_queue_len(&tx->skbs) != 1))
+		return TX_DROP;
+
+	skb = skb_peek(&tx->skbs);
+
+	info = IEEE80211_SKB_CB(skb);
+
+	if (info->control.hw_key)
+		return TX_CONTINUE;
+
+	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
+		return TX_DROP;
+
+	mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie));
+	mmie->element_id = WLAN_EID_MMIE;
+	mmie->length = sizeof(*mmie) - 2;
+	mmie->key_id = cpu_to_le16(key->conf.keyidx);
+
+	/* PN = PN + 1 */
+	pn64 = atomic64_inc_return(&key->u.aes_cmac.tx_pn);
+
+	bip_ipn_set64(mmie->sequence_number, pn64);
+
+	bip_aad(skb, aad);
+
+	/* MIC = AES-256-CMAC(IGTK, AAD || Management Frame Body || MMIE, 128)
+	 */
+	ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
+			       skb->data + 24, skb->len - 24, mmie->mic);
+
+	return TX_CONTINUE;
+}
 
 ieee80211_rx_result
 ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
@@ -1006,6 +1048,56 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+ieee80211_rx_result
+ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_key *key = rx->key;
+	struct ieee80211_mmie_16 *mmie;
+	u8 aad[20], mic[16], ipn[6];
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		return RX_CONTINUE;
+
+	/* management frames are already linear */
+
+	if (skb->len < 24 + sizeof(*mmie))
+		return RX_DROP_UNUSABLE;
+
+	mmie = (struct ieee80211_mmie_16 *)
+		(skb->data + skb->len - sizeof(*mmie));
+	if (mmie->element_id != WLAN_EID_MMIE ||
+	    mmie->length != sizeof(*mmie) - 2)
+		return RX_DROP_UNUSABLE; /* Invalid MMIE */
+
+	bip_ipn_swap(ipn, mmie->sequence_number);
+
+	if (memcmp(ipn, key->u.aes_cmac.rx_pn, 6) <= 0) {
+		key->u.aes_cmac.replays++;
+		return RX_DROP_UNUSABLE;
+	}
+
+	if (!(status->flag & RX_FLAG_DECRYPTED)) {
+		/* hardware didn't decrypt/verify MIC */
+		bip_aad(skb, aad);
+		ieee80211_aes_cmac_256(key->u.aes_cmac.tfm, aad,
+				       skb->data + 24, skb->len - 24, mic);
+		if (memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+			key->u.aes_cmac.icverrors++;
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	memcpy(key->u.aes_cmac.rx_pn, ipn, 6);
+
+	/* Remove MMIE */
+	skb_trim(skb, skb->len - sizeof(*mmie));
+
+	return RX_CONTINUE;
+}
+
 ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx)
 {
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index 43e109f27a89..06b7f167a176 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -32,8 +32,12 @@ ieee80211_crypto_ccmp_decrypt(struct ieee80211_rx_data *rx,
 
 ieee80211_tx_result
 ieee80211_crypto_aes_cmac_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_tx_result
+ieee80211_crypto_aes_cmac_256_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
 ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx);
+ieee80211_rx_result
+ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx);
 ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
-- 
cgit v1.2.3-70-g09d2


From 8ade538bf39b1ee53418528fdacd36b8e65621b9 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@qca.qualcomm.com>
Date: Sat, 24 Jan 2015 19:52:09 +0200
Subject: mac80111: Add BIP-GMAC-128 and BIP-GMAC-256 ciphers

This allows mac80211 to configure BIP-GMAC-128 and BIP-GMAC-256 to the
driver and also use software-implementation within mac80211 when the
driver does not support this with hardware accelaration.

Signed-off-by: Jouni Malinen <jouni@qca.qualcomm.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |   5 +++
 net/mac80211/Makefile      |   1 +
 net/mac80211/aes_gmac.c    |  84 ++++++++++++++++++++++++++++++++++++
 net/mac80211/aes_gmac.h    |  20 +++++++++
 net/mac80211/cfg.c         |  14 ++++++
 net/mac80211/debugfs_key.c |  26 +++++++++++
 net/mac80211/key.c         |  60 ++++++++++++++++++++++++++
 net/mac80211/key.h         |   7 +++
 net/mac80211/main.c        |  12 ++++--
 net/mac80211/rx.c          |   4 ++
 net/mac80211/tx.c          |   5 +++
 net/mac80211/wpa.c         | 105 +++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/wpa.h         |   4 ++
 13 files changed, 344 insertions(+), 3 deletions(-)
 create mode 100644 net/mac80211/aes_gmac.c
 create mode 100644 net/mac80211/aes_gmac.h

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ae6638436112..d52914b75331 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -4098,6 +4098,8 @@ void ieee80211_aes_cmac_calculate_k1_k2(struct ieee80211_key_conf *keyconf,
  *	reverse order than in packet)
  * @aes_cmac: PN data, most significant byte first (big endian,
  *	reverse order than in packet)
+ * @aes_gmac: PN data, most significant byte first (big endian,
+ *	reverse order than in packet)
  * @gcmp: PN data, most significant byte first (big endian,
  *	reverse order than in packet)
  */
@@ -4113,6 +4115,9 @@ struct ieee80211_key_seq {
 		struct {
 			u8 pn[6];
 		} aes_cmac;
+		struct {
+			u8 pn[6];
+		} aes_gmac;
 		struct {
 			u8 pn[6];
 		} gcmp;
diff --git a/net/mac80211/Makefile b/net/mac80211/Makefile
index 0cbf93618433..3275f01881be 100644
--- a/net/mac80211/Makefile
+++ b/net/mac80211/Makefile
@@ -17,6 +17,7 @@ mac80211-y := \
 	aes_ccm.o \
 	aes_gcm.o \
 	aes_cmac.o \
+	aes_gmac.o \
 	cfg.o \
 	ethtool.o \
 	rx.o \
diff --git a/net/mac80211/aes_gmac.c b/net/mac80211/aes_gmac.c
new file mode 100644
index 000000000000..1c72edcb0083
--- /dev/null
+++ b/net/mac80211/aes_gmac.c
@@ -0,0 +1,84 @@
+/*
+ * AES-GMAC for IEEE 802.11 BIP-GMAC-128 and BIP-GMAC-256
+ * Copyright 2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/crypto.h>
+#include <linux/err.h>
+#include <crypto/aes.h>
+
+#include <net/mac80211.h>
+#include "key.h"
+#include "aes_gmac.h"
+
+#define GMAC_MIC_LEN 16
+#define GMAC_NONCE_LEN 12
+#define AAD_LEN 20
+
+int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
+		       const u8 *data, size_t data_len, u8 *mic)
+{
+	struct scatterlist sg[3], ct[1];
+	char aead_req_data[sizeof(struct aead_request) +
+			   crypto_aead_reqsize(tfm)]
+		__aligned(__alignof__(struct aead_request));
+	struct aead_request *aead_req = (void *)aead_req_data;
+	u8 zero[GMAC_MIC_LEN], iv[AES_BLOCK_SIZE];
+
+	if (data_len < GMAC_MIC_LEN)
+		return -EINVAL;
+
+	memset(aead_req, 0, sizeof(aead_req_data));
+
+	memset(zero, 0, GMAC_MIC_LEN);
+	sg_init_table(sg, 3);
+	sg_set_buf(&sg[0], aad, AAD_LEN);
+	sg_set_buf(&sg[1], data, data_len - GMAC_MIC_LEN);
+	sg_set_buf(&sg[2], zero, GMAC_MIC_LEN);
+
+	memcpy(iv, nonce, GMAC_NONCE_LEN);
+	memset(iv + GMAC_NONCE_LEN, 0, sizeof(iv) - GMAC_NONCE_LEN);
+	iv[AES_BLOCK_SIZE - 1] = 0x01;
+
+	sg_init_table(ct, 1);
+	sg_set_buf(&ct[0], mic, GMAC_MIC_LEN);
+
+	aead_request_set_tfm(aead_req, tfm);
+	aead_request_set_assoc(aead_req, sg, AAD_LEN + data_len);
+	aead_request_set_crypt(aead_req, NULL, ct, 0, iv);
+
+	crypto_aead_encrypt(aead_req);
+
+	return 0;
+}
+
+struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[],
+						 size_t key_len)
+{
+	struct crypto_aead *tfm;
+	int err;
+
+	tfm = crypto_alloc_aead("gcm(aes)", 0, CRYPTO_ALG_ASYNC);
+	if (IS_ERR(tfm))
+		return tfm;
+
+	err = crypto_aead_setkey(tfm, key, key_len);
+	if (!err)
+		return tfm;
+	if (!err)
+		err = crypto_aead_setauthsize(tfm, GMAC_MIC_LEN);
+
+	crypto_free_aead(tfm);
+	return ERR_PTR(err);
+}
+
+void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm)
+{
+	crypto_free_aead(tfm);
+}
diff --git a/net/mac80211/aes_gmac.h b/net/mac80211/aes_gmac.h
new file mode 100644
index 000000000000..d328204d73a8
--- /dev/null
+++ b/net/mac80211/aes_gmac.h
@@ -0,0 +1,20 @@
+/*
+ * Copyright 2015, Qualcomm Atheros, Inc.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef AES_GMAC_H
+#define AES_GMAC_H
+
+#include <linux/crypto.h>
+
+struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[],
+						 size_t key_len);
+int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
+		       const u8 *data, size_t data_len, u8 *mic);
+void ieee80211_aes_gmac_key_free(struct crypto_aead *tfm);
+
+#endif /* AES_GMAC_H */
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b7e528bbecce..dd4ff36c557a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -165,6 +165,8 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev,
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		break;
@@ -374,6 +376,18 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev,
 		params.seq = seq;
 		params.seq_len = 6;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
+		seq[0] = pn64;
+		seq[1] = pn64 >> 8;
+		seq[2] = pn64 >> 16;
+		seq[3] = pn64 >> 24;
+		seq[4] = pn64 >> 32;
+		seq[5] = pn64 >> 40;
+		params.seq = seq;
+		params.seq_len = 6;
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c
index d1b60eb014a8..71ac1b5f4da5 100644
--- a/net/mac80211/debugfs_key.c
+++ b/net/mac80211/debugfs_key.c
@@ -107,6 +107,13 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf,
 				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
 				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn = atomic64_read(&key->u.aes_gmac.tx_pn);
+		len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n",
+				(u8)(pn >> 40), (u8)(pn >> 32), (u8)(pn >> 24),
+				(u8)(pn >> 16), (u8)(pn >> 8), (u8)pn);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn = atomic64_read(&key->u.gcmp.tx_pn);
@@ -162,6 +169,15 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf,
 			       rpn[3], rpn[4], rpn[5]);
 		len = p - buf;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		rpn = key->u.aes_gmac.rx_pn;
+		p += scnprintf(p, sizeof(buf)+buf-p,
+			       "%02x%02x%02x%02x%02x%02x\n",
+			       rpn[0], rpn[1], rpn[2],
+			       rpn[3], rpn[4], rpn[5]);
+		len = p - buf;
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		for (i = 0; i < IEEE80211_NUM_TIDS + 1; i++) {
@@ -197,6 +213,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.replays);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		len = scnprintf(buf, sizeof(buf), "%u\n",
+				key->u.aes_gmac.replays);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		len = scnprintf(buf, sizeof(buf), "%u\n", key->u.gcmp.replays);
@@ -221,6 +242,11 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf,
 		len = scnprintf(buf, sizeof(buf), "%u\n",
 				key->u.aes_cmac.icverrors);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		len = scnprintf(buf, sizeof(buf), "%u\n",
+				key->u.aes_gmac.icverrors);
+		break;
 	default:
 		return 0;
 	}
diff --git a/net/mac80211/key.c b/net/mac80211/key.c
index 7ceea9d9fcd2..0825d76edcfc 100644
--- a/net/mac80211/key.c
+++ b/net/mac80211/key.c
@@ -24,6 +24,7 @@
 #include "debugfs_key.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gmac.h"
 #include "aes_gcm.h"
 
 
@@ -166,6 +167,8 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_CCMP_256:
 	case WLAN_CIPHER_SUITE_AES_CMAC:
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		/* all of these we can do in software - if driver can */
@@ -440,6 +443,25 @@ ieee80211_key_alloc(u32 cipher, int idx, size_t key_len,
 			return ERR_PTR(err);
 		}
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		key->conf.iv_len = 0;
+		key->conf.icv_len = sizeof(struct ieee80211_mmie_16);
+		if (seq)
+			for (j = 0; j < IEEE80211_GMAC_PN_LEN; j++)
+				key->u.aes_gmac.rx_pn[j] =
+					seq[IEEE80211_GMAC_PN_LEN - j - 1];
+		/* Initialize AES key state here as an optimization so that
+		 * it does not need to be initialized for every packet.
+		 */
+		key->u.aes_gmac.tfm =
+			ieee80211_aes_gmac_key_setup(key_data, key_len);
+		if (IS_ERR(key->u.aes_gmac.tfm)) {
+			err = PTR_ERR(key->u.aes_gmac.tfm);
+			kfree(key);
+			return ERR_PTR(err);
+		}
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		key->conf.iv_len = IEEE80211_GCMP_HDR_LEN;
@@ -489,6 +511,10 @@ static void ieee80211_key_free_common(struct ieee80211_key *key)
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		ieee80211_aes_gmac_key_free(key->u.aes_gmac.tfm);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		ieee80211_aes_gcm_key_free(key->u.gcmp.tfm);
@@ -819,6 +845,16 @@ void ieee80211_get_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		seq->ccmp.pn[1] = pn64 >> 32;
 		seq->ccmp.pn[0] = pn64 >> 40;
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn64 = atomic64_read(&key->u.aes_gmac.tx_pn);
+		seq->ccmp.pn[5] = pn64;
+		seq->ccmp.pn[4] = pn64 >> 8;
+		seq->ccmp.pn[3] = pn64 >> 16;
+		seq->ccmp.pn[2] = pn64 >> 24;
+		seq->ccmp.pn[1] = pn64 >> 32;
+		seq->ccmp.pn[0] = pn64 >> 40;
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn64 = atomic64_read(&key->u.gcmp.tx_pn);
@@ -867,6 +903,13 @@ void ieee80211_get_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(seq->aes_cmac.pn, pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		if (WARN_ON(tid != 0))
+			return;
+		pn = key->u.aes_gmac.rx_pn;
+		memcpy(seq->aes_gmac.pn, pn, IEEE80211_GMAC_PN_LEN);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
@@ -914,6 +957,16 @@ void ieee80211_set_key_tx_seq(struct ieee80211_key_conf *keyconf,
 		       ((u64)seq->aes_cmac.pn[0] << 40);
 		atomic64_set(&key->u.aes_cmac.tx_pn, pn64);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		pn64 = (u64)seq->aes_gmac.pn[5] |
+		       ((u64)seq->aes_gmac.pn[4] << 8) |
+		       ((u64)seq->aes_gmac.pn[3] << 16) |
+		       ((u64)seq->aes_gmac.pn[2] << 24) |
+		       ((u64)seq->aes_gmac.pn[1] << 32) |
+		       ((u64)seq->aes_gmac.pn[0] << 40);
+		atomic64_set(&key->u.aes_gmac.tx_pn, pn64);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		pn64 = (u64)seq->gcmp.pn[5] |
@@ -963,6 +1016,13 @@ void ieee80211_set_key_rx_seq(struct ieee80211_key_conf *keyconf,
 		pn = key->u.aes_cmac.rx_pn;
 		memcpy(pn, seq->aes_cmac.pn, IEEE80211_CMAC_PN_LEN);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		if (WARN_ON(tid != 0))
+			return;
+		pn = key->u.aes_gmac.rx_pn;
+		memcpy(pn, seq->aes_gmac.pn, IEEE80211_GMAC_PN_LEN);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		if (WARN_ON(tid < -1 || tid >= IEEE80211_NUM_TIDS))
diff --git a/net/mac80211/key.h b/net/mac80211/key.h
index 27580da851c8..d57a9915494f 100644
--- a/net/mac80211/key.h
+++ b/net/mac80211/key.h
@@ -94,6 +94,13 @@ struct ieee80211_key {
 			u32 replays; /* dot11RSNAStatsCMACReplays */
 			u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
 		} aes_cmac;
+		struct {
+			atomic64_t tx_pn;
+			u8 rx_pn[IEEE80211_GMAC_PN_LEN];
+			struct crypto_aead *tfm;
+			u32 replays; /* dot11RSNAStatsCMACReplays */
+			u32 icverrors; /* dot11RSNAStatsCMACICVErrors */
+		} aes_gmac;
 		struct {
 			atomic64_t tx_pn;
 			/* Last received packet number. The first
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 053a17c5023a..5e09d354c5a5 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -673,6 +673,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		/* keep last -- depends on hw flags! */
 		WLAN_CIPHER_SUITE_AES_CMAC,
 		WLAN_CIPHER_SUITE_BIP_CMAC_256,
+		WLAN_CIPHER_SUITE_BIP_GMAC_128,
+		WLAN_CIPHER_SUITE_BIP_GMAC_256,
 	};
 
 	if (local->hw.flags & IEEE80211_HW_SW_CRYPTO_CONTROL ||
@@ -711,7 +713,7 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		local->hw.wiphy->n_cipher_suites = ARRAY_SIZE(cipher_suites);
 
 		if (!have_mfp)
-			local->hw.wiphy->n_cipher_suites -= 2;
+			local->hw.wiphy->n_cipher_suites -= 4;
 
 		if (!have_wep) {
 			local->hw.wiphy->cipher_suites += 2;
@@ -737,9 +739,11 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		if (have_wep)
 			n_suites += 2;
 
-		/* check if we have AES_CMAC, BIP-CMAC-256 */
+		/* check if we have AES_CMAC, BIP-CMAC-256, BIP-GMAC-128,
+		 * BIP-GMAC-256
+		 */
 		if (have_mfp)
-			n_suites += 2;
+			n_suites += 4;
 
 		suites = kmalloc(sizeof(u32) * n_suites, GFP_KERNEL);
 		if (!suites)
@@ -759,6 +763,8 @@ static int ieee80211_init_cipher_suites(struct ieee80211_local *local)
 		if (have_mfp) {
 			suites[w++] = WLAN_CIPHER_SUITE_AES_CMAC;
 			suites[w++] = WLAN_CIPHER_SUITE_BIP_CMAC_256;
+			suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_128;
+			suites[w++] = WLAN_CIPHER_SUITE_BIP_GMAC_256;
 		}
 
 		for (r = 0; r < local->hw.n_cipher_schemes; r++)
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 93ebc9525478..ed38d8302659 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1671,6 +1671,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		result = ieee80211_crypto_aes_cmac_256_decrypt(rx);
 		break;
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		result = ieee80211_crypto_aes_gmac_decrypt(rx);
+		break;
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		result = ieee80211_crypto_gcmp_decrypt(rx);
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 909c27be1fdc..88a18ffe2975 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -640,6 +640,8 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx)
 			break;
 		case WLAN_CIPHER_SUITE_AES_CMAC:
 		case WLAN_CIPHER_SUITE_BIP_CMAC_256:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+		case WLAN_CIPHER_SUITE_BIP_GMAC_256:
 			if (!ieee80211_is_mgmt(hdr->frame_control))
 				tx->key = NULL;
 			break;
@@ -1024,6 +1026,9 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx)
 		return ieee80211_crypto_aes_cmac_encrypt(tx);
 	case WLAN_CIPHER_SUITE_BIP_CMAC_256:
 		return ieee80211_crypto_aes_cmac_256_encrypt(tx);
+	case WLAN_CIPHER_SUITE_BIP_GMAC_128:
+	case WLAN_CIPHER_SUITE_BIP_GMAC_256:
+		return ieee80211_crypto_aes_gmac_encrypt(tx);
 	case WLAN_CIPHER_SUITE_GCMP:
 	case WLAN_CIPHER_SUITE_GCMP_256:
 		return ieee80211_crypto_gcmp_encrypt(tx);
diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c
index 549af118de9f..75de6fac40d1 100644
--- a/net/mac80211/wpa.c
+++ b/net/mac80211/wpa.c
@@ -22,6 +22,7 @@
 #include "tkip.h"
 #include "aes_ccm.h"
 #include "aes_cmac.h"
+#include "aes_gmac.h"
 #include "aes_gcm.h"
 #include "wpa.h"
 
@@ -1098,6 +1099,110 @@ ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+ieee80211_tx_result
+ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx)
+{
+	struct sk_buff *skb;
+	struct ieee80211_tx_info *info;
+	struct ieee80211_key *key = tx->key;
+	struct ieee80211_mmie_16 *mmie;
+	struct ieee80211_hdr *hdr;
+	u8 aad[20];
+	u64 pn64;
+	u8 nonce[12];
+
+	if (WARN_ON(skb_queue_len(&tx->skbs) != 1))
+		return TX_DROP;
+
+	skb = skb_peek(&tx->skbs);
+
+	info = IEEE80211_SKB_CB(skb);
+
+	if (info->control.hw_key)
+		return TX_CONTINUE;
+
+	if (WARN_ON(skb_tailroom(skb) < sizeof(*mmie)))
+		return TX_DROP;
+
+	mmie = (struct ieee80211_mmie_16 *)skb_put(skb, sizeof(*mmie));
+	mmie->element_id = WLAN_EID_MMIE;
+	mmie->length = sizeof(*mmie) - 2;
+	mmie->key_id = cpu_to_le16(key->conf.keyidx);
+
+	/* PN = PN + 1 */
+	pn64 = atomic64_inc_return(&key->u.aes_gmac.tx_pn);
+
+	bip_ipn_set64(mmie->sequence_number, pn64);
+
+	bip_aad(skb, aad);
+
+	hdr = (struct ieee80211_hdr *)skb->data;
+	memcpy(nonce, hdr->addr2, ETH_ALEN);
+	bip_ipn_swap(nonce + ETH_ALEN, mmie->sequence_number);
+
+	/* MIC = AES-GMAC(IGTK, AAD || Management Frame Body || MMIE, 128) */
+	if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
+			       skb->data + 24, skb->len - 24, mmie->mic) < 0)
+		return TX_DROP;
+
+	return TX_CONTINUE;
+}
+
+ieee80211_rx_result
+ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx)
+{
+	struct sk_buff *skb = rx->skb;
+	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
+	struct ieee80211_key *key = rx->key;
+	struct ieee80211_mmie_16 *mmie;
+	u8 aad[20], mic[16], ipn[6], nonce[12];
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+
+	if (!ieee80211_is_mgmt(hdr->frame_control))
+		return RX_CONTINUE;
+
+	/* management frames are already linear */
+
+	if (skb->len < 24 + sizeof(*mmie))
+		return RX_DROP_UNUSABLE;
+
+	mmie = (struct ieee80211_mmie_16 *)
+		(skb->data + skb->len - sizeof(*mmie));
+	if (mmie->element_id != WLAN_EID_MMIE ||
+	    mmie->length != sizeof(*mmie) - 2)
+		return RX_DROP_UNUSABLE; /* Invalid MMIE */
+
+	bip_ipn_swap(ipn, mmie->sequence_number);
+
+	if (memcmp(ipn, key->u.aes_gmac.rx_pn, 6) <= 0) {
+		key->u.aes_gmac.replays++;
+		return RX_DROP_UNUSABLE;
+	}
+
+	if (!(status->flag & RX_FLAG_DECRYPTED)) {
+		/* hardware didn't decrypt/verify MIC */
+		bip_aad(skb, aad);
+
+		memcpy(nonce, hdr->addr2, ETH_ALEN);
+		memcpy(nonce + ETH_ALEN, ipn, 6);
+
+		if (ieee80211_aes_gmac(key->u.aes_gmac.tfm, aad, nonce,
+				       skb->data + 24, skb->len - 24,
+				       mic) < 0 ||
+		    memcmp(mic, mmie->mic, sizeof(mmie->mic)) != 0) {
+			key->u.aes_gmac.icverrors++;
+			return RX_DROP_UNUSABLE;
+		}
+	}
+
+	memcpy(key->u.aes_gmac.rx_pn, ipn, 6);
+
+	/* Remove MMIE */
+	skb_trim(skb, skb->len - sizeof(*mmie));
+
+	return RX_CONTINUE;
+}
+
 ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx)
 {
diff --git a/net/mac80211/wpa.h b/net/mac80211/wpa.h
index 06b7f167a176..d98011ee8f55 100644
--- a/net/mac80211/wpa.h
+++ b/net/mac80211/wpa.h
@@ -39,6 +39,10 @@ ieee80211_crypto_aes_cmac_decrypt(struct ieee80211_rx_data *rx);
 ieee80211_rx_result
 ieee80211_crypto_aes_cmac_256_decrypt(struct ieee80211_rx_data *rx);
 ieee80211_tx_result
+ieee80211_crypto_aes_gmac_encrypt(struct ieee80211_tx_data *tx);
+ieee80211_rx_result
+ieee80211_crypto_aes_gmac_decrypt(struct ieee80211_rx_data *rx);
+ieee80211_tx_result
 ieee80211_crypto_hw_encrypt(struct ieee80211_tx_data *tx);
 ieee80211_rx_result
 ieee80211_crypto_hw_decrypt(struct ieee80211_rx_data *rx);
-- 
cgit v1.2.3-70-g09d2


From fda7a49cb991e9da15f5955d1ea292f8ec74f27a Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:11 +0100
Subject: NFC: hci: Change event_received handler gate parameter to pipe

Several pipes may point to the same CLF gate, so getting the gate ID
as an input is not enough.
For example dual secure element may have 2 pipes (1 for uicc and
1 for eSE) pointing to the connectivity gate.

As resolving gate and host IDs can be done from a pipe, we now pass
the pipe ID to the event received handler.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h | 2 +-
 net/nfc/hci/core.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 14bd0e1c47fa..031c0be9fb32 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -51,7 +51,7 @@ struct nfc_hci_ops {
 	int (*tm_send)(struct nfc_hci_dev *hdev, struct sk_buff *skb);
 	int (*check_presence)(struct nfc_hci_dev *hdev,
 			      struct nfc_target *target);
-	int (*event_received)(struct nfc_hci_dev *hdev, u8 gate, u8 event,
+	int (*event_received)(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 			      struct sk_buff *skb);
 	int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name);
 	int (*discover_se)(struct nfc_hci_dev *dev);
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index ef50e7716c4a..12a9a4b956d2 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -338,7 +338,7 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 	}
 
 	if (hdev->ops->event_received) {
-		r = hdev->ops->event_received(hdev, gate, event, skb);
+		r = hdev->ops->event_received(hdev, pipe, event, skb);
 		if (r <= 0)
 			goto exit_noskb;
 	}
-- 
cgit v1.2.3-70-g09d2


From 118278f20aa89efe45fa1e2b1829f198d557f8fe Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:12 +0100
Subject: NFC: hci: Add pipes table to reference them with a tuple {gate, host}

In order to keep host source information on specific hci event (such as
evt_connectivity or evt_transaction) and because 2 pipes can be connected
to the same gate, it is necessary to add a table referencing every pipe
with a {gate, host} tuple.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h | 13 ++++++++++++-
 net/nfc/hci/command.c |  6 ++++--
 net/nfc/hci/core.c    | 36 +++++++++++++++++++++++++++++++-----
 3 files changed, 47 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 031c0be9fb32..5570f4a316d1 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -63,8 +63,10 @@ struct nfc_hci_ops {
 };
 
 /* Pipes */
-#define NFC_HCI_INVALID_PIPE	0x80
 #define NFC_HCI_DO_NOT_CREATE_PIPE	0x81
+#define NFC_HCI_INVALID_PIPE	0x80
+#define NFC_HCI_INVALID_GATE	0xFF
+#define NFC_HCI_INVALID_HOST	0x80
 #define NFC_HCI_LINK_MGMT_PIPE	0x00
 #define NFC_HCI_ADMIN_PIPE	0x01
 
@@ -73,7 +75,13 @@ struct nfc_hci_gate {
 	u8 pipe;
 };
 
+struct nfc_hci_pipe {
+	u8 gate;
+	u8 dest_host;
+};
+
 #define NFC_HCI_MAX_CUSTOM_GATES	50
+#define NFC_HCI_MAX_PIPES		127
 struct nfc_hci_init_data {
 	u8 gate_count;
 	struct nfc_hci_gate gates[NFC_HCI_MAX_CUSTOM_GATES];
@@ -125,6 +133,7 @@ struct nfc_hci_dev {
 	void *clientdata;
 
 	u8 gate2pipe[NFC_HCI_MAX_GATES];
+	struct nfc_hci_pipe pipes[NFC_HCI_MAX_PIPES];
 
 	u8 sw_romlib;
 	u8 sw_patch;
@@ -167,6 +176,8 @@ void *nfc_hci_get_clientdata(struct nfc_hci_dev *hdev);
 void nfc_hci_driver_failure(struct nfc_hci_dev *hdev, int err);
 
 int nfc_hci_result_to_errno(u8 result);
+void nfc_hci_reset_pipes(struct nfc_hci_dev *dev);
+void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host);
 
 /* Host IDs */
 #define NFC_HCI_HOST_CONTROLLER_ID	0x00
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 91df487aa0a9..9acf586c98d4 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -331,7 +331,7 @@ int nfc_hci_disconnect_all_gates(struct nfc_hci_dev *hdev)
 	if (r < 0)
 		return r;
 
-	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+	nfc_hci_reset_pipes(hdev);
 
 	return 0;
 }
@@ -345,7 +345,7 @@ int nfc_hci_connect_gate(struct nfc_hci_dev *hdev, u8 dest_host, u8 dest_gate,
 
 	pr_debug("\n");
 
-	if (hdev->gate2pipe[dest_gate] == NFC_HCI_DO_NOT_CREATE_PIPE)
+	if (pipe == NFC_HCI_DO_NOT_CREATE_PIPE)
 		return 0;
 
 	if (hdev->gate2pipe[dest_gate] != NFC_HCI_INVALID_PIPE)
@@ -380,6 +380,8 @@ open_pipe:
 		return r;
 	}
 
+	hdev->pipes[pipe].gate = dest_gate;
+	hdev->pipes[pipe].dest_host = dest_host;
 	hdev->gate2pipe[dest_gate] = pipe;
 
 	return 0;
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 12a9a4b956d2..8f8abfed7f65 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -46,6 +46,32 @@ int nfc_hci_result_to_errno(u8 result)
 }
 EXPORT_SYMBOL(nfc_hci_result_to_errno);
 
+void nfc_hci_reset_pipes(struct nfc_hci_dev *hdev)
+{
+	int i = 0;
+
+	for (i = 0; i < NFC_HCI_MAX_PIPES; i++) {
+		hdev->pipes[i].gate = NFC_HCI_INVALID_GATE;
+		hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST;
+	}
+	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+}
+EXPORT_SYMBOL(nfc_hci_reset_pipes);
+
+void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host)
+{
+	int i = 0;
+
+	for (i = 0; i < NFC_HCI_MAX_PIPES; i++) {
+		if (hdev->pipes[i].dest_host != host)
+			continue;
+
+		hdev->pipes[i].gate = NFC_HCI_INVALID_GATE;
+		hdev->pipes[i].dest_host = NFC_HCI_INVALID_HOST;
+	}
+}
+EXPORT_SYMBOL(nfc_hci_reset_pipes_per_host);
+
 static void nfc_hci_msg_tx_work(struct work_struct *work)
 {
 	struct nfc_hci_dev *hdev = container_of(work, struct nfc_hci_dev,
@@ -168,7 +194,7 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 			  struct sk_buff *skb)
 {
 	int r = 0;
-	u8 gate = nfc_hci_pipe2gate(hdev, pipe);
+	u8 gate = hdev->pipes[pipe].gate;
 	u8 local_gate, new_pipe;
 	u8 gate_opened = 0x00;
 
@@ -330,9 +356,9 @@ void nfc_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 			    struct sk_buff *skb)
 {
 	int r = 0;
-	u8 gate = nfc_hci_pipe2gate(hdev, pipe);
+	u8 gate = hdev->pipes[pipe].gate;
 
-	if (gate == 0xff) {
+	if (gate == NFC_HCI_INVALID_GATE) {
 		pr_err("Discarded event %x to unopened pipe %x\n", event, pipe);
 		goto exit;
 	}
@@ -573,7 +599,7 @@ static int hci_dev_down(struct nfc_dev *nfc_dev)
 	if (hdev->ops->close)
 		hdev->ops->close(hdev);
 
-	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+	nfc_hci_reset_pipes(hdev);
 
 	return 0;
 }
@@ -932,7 +958,7 @@ struct nfc_hci_dev *nfc_hci_allocate_device(struct nfc_hci_ops *ops,
 
 	nfc_set_drvdata(hdev->ndev, hdev);
 
-	memset(hdev->gate2pipe, NFC_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+	nfc_hci_reset_pipes(hdev);
 
 	hdev->quirks = quirks;
 
-- 
cgit v1.2.3-70-g09d2


From af77522320aa0e5b4b52dce615ad067d92e15fbf Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:13 +0100
Subject: NFC: hci: Change nfc_hci_send_response gate parameter to pipe

As there can be several pipes connected to the same gate, we need
to know which pipe ID to use when sending an HCI response. A gate
ID is not enough.

Instead of changing the nfc_hci_send_response() API to something
not aligned with the rest of the HCI API, we call nfc_hci_hcp_message_tx
directly.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h |  2 --
 net/nfc/hci/command.c | 17 -----------------
 net/nfc/hci/core.c    | 12 ++++++++----
 3 files changed, 8 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 5570f4a316d1..1d1fd2b98f1e 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -260,8 +260,6 @@ int nfc_hci_send_cmd(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
 int nfc_hci_send_cmd_async(struct nfc_hci_dev *hdev, u8 gate, u8 cmd,
 			   const u8 *param, size_t param_len,
 			   data_exchange_cb_t cb, void *cb_context);
-int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response,
-			  const u8 *param, size_t param_len);
 int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event,
 		       const u8 *param, size_t param_len);
 int nfc_hci_target_discovered(struct nfc_hci_dev *hdev, u8 gate);
diff --git a/net/nfc/hci/command.c b/net/nfc/hci/command.c
index 9acf586c98d4..844673cb7c18 100644
--- a/net/nfc/hci/command.c
+++ b/net/nfc/hci/command.c
@@ -116,23 +116,6 @@ int nfc_hci_send_event(struct nfc_hci_dev *hdev, u8 gate, u8 event,
 }
 EXPORT_SYMBOL(nfc_hci_send_event);
 
-int nfc_hci_send_response(struct nfc_hci_dev *hdev, u8 gate, u8 response,
-			  const u8 *param, size_t param_len)
-{
-	u8 pipe;
-
-	pr_debug("\n");
-
-	pipe = hdev->gate2pipe[gate];
-	if (pipe == NFC_HCI_INVALID_PIPE)
-		return -EADDRNOTAVAIL;
-
-	return nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
-				      response, param, param_len, NULL, NULL,
-				      0);
-}
-EXPORT_SYMBOL(nfc_hci_send_response);
-
 /*
  * Execute an hci command sent to gate.
  * skb will contain response data if success. skb can be NULL if you are not
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index 8f8abfed7f65..e351e94f8d40 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -209,7 +209,8 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 
 		local_gate = skb->data[3];
 		new_pipe = skb->data[4];
-		nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0);
+		nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
+				       NFC_HCI_ANY_OK, NULL, 0, NULL, NULL, 0);
 
 		/* save the new created pipe and bind with local gate,
 		 * the description for skb->data[3] is destination gate id
@@ -223,11 +224,14 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 		 * open it
 		 */
 		if (gate != 0xff)
-			nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK,
-					      &gate_opened, 1);
+			nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
+					       NFC_HCI_ANY_OK, &gate_opened, 1,
+					       NULL, NULL, 0);
 		break;
 	case NFC_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED:
-		nfc_hci_send_response(hdev, gate, NFC_HCI_ANY_OK, NULL, 0);
+		nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
+				       NFC_HCI_ANY_OK, NULL, 0, NULL, NULL, 0);
+
 		break;
 	default:
 		pr_info("Discarded unknown cmd %x to gate %x\n", cmd, gate);
-- 
cgit v1.2.3-70-g09d2


From 8409e4283c1ca62ce107564de7ff93b4dd476d41 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:15 +0100
Subject: NFC: hci: Add cmd_received handler

When a command is received, it is sometime needed to let the CLF driver do
some additional operations. (ex: count remaining pipe notification...)

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/hci.h | 8 ++++++++
 net/nfc/hci/core.c    | 3 +++
 2 files changed, 11 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/hci.h b/include/net/nfc/hci.h
index 1d1fd2b98f1e..ab672b537dd4 100644
--- a/include/net/nfc/hci.h
+++ b/include/net/nfc/hci.h
@@ -53,6 +53,8 @@ struct nfc_hci_ops {
 			      struct nfc_target *target);
 	int (*event_received)(struct nfc_hci_dev *hdev, u8 pipe, u8 event,
 			      struct sk_buff *skb);
+	void (*cmd_received)(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+			    struct sk_buff *skb);
 	int (*fw_download)(struct nfc_hci_dev *hdev, const char *firmware_name);
 	int (*discover_se)(struct nfc_hci_dev *dev);
 	int (*enable_se)(struct nfc_hci_dev *dev, u32 se_idx);
@@ -230,6 +232,12 @@ void nfc_hci_reset_pipes_per_host(struct nfc_hci_dev *hdev, u8 host);
 #define NFC_HCI_EVT_POST_DATA			0x02
 #define NFC_HCI_EVT_HOT_PLUG			0x03
 
+/* Generic commands */
+#define NFC_HCI_ANY_SET_PARAMETER	0x01
+#define NFC_HCI_ANY_GET_PARAMETER	0x02
+#define NFC_HCI_ANY_OPEN_PIPE		0x03
+#define NFC_HCI_ANY_CLOSE_PIPE		0x04
+
 /* Reader RF gates events */
 #define NFC_HCI_EVT_READER_REQUESTED	0x10
 #define NFC_HCI_EVT_END_OPERATION	0x11
diff --git a/net/nfc/hci/core.c b/net/nfc/hci/core.c
index a664a67dff1c..6e061da2258a 100644
--- a/net/nfc/hci/core.c
+++ b/net/nfc/hci/core.c
@@ -249,6 +249,9 @@ void nfc_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
 		break;
 	}
 
+	if (hdev->ops->cmd_received)
+		hdev->ops->cmd_received(hdev, pipe, cmd, skb);
+
 exit:
 	nfc_hci_hcp_message_tx(hdev, pipe, NFC_HCI_HCP_RESPONSE,
 			       status, NULL, 0, NULL, NULL, 0);
-- 
cgit v1.2.3-70-g09d2


From 2130fb97fecf9a51bb4a21da220cff3f72496a94 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 27 Jan 2015 01:18:19 +0100
Subject: NFC: st21nfca: Adding support for secure element

st21nfca has 1 physical SWP line and can support up to 2 secure elements
(UICC & eSE) thanks to an external switch managed with a gpio.

The platform integrator needs to specify thanks to 2 initialization
properties, uicc-present and ese-present, if it is suppose to have uicc
and/or ese. Of course if the platform does not have an external switch,
only one kind of secure element can be supported. Those parameters are
under platform integrator responsibilities.

During initialization, the white_list will be set according to those
parameters.

The discovery_se function will assume a secure element is physically
present according to uicc-present and ese-present values and will add it
to the secure element list. On ese activation, the atr is retrieved to
calculate a command exchange timeout based on the first atr(TB) value.

The se_io will allow to transfer data over SWP. 2 kind of events may appear
after a data is sent over:
- ST21NFCA_EVT_TRANSMIT_DATA when receiving an apdu answer
- ST21NFCA_EVT_WTX_REQUEST when the secure element needs more time than
expected to compute a command. If this timeout expired, a first recovery
tentative consist to send a simple software reset proprietary command.
If this tentative still fail, a second recovery tentative consist to send
a hardware reset proprietary command.
This function is only relevant for eSE like secure element.

This patch also change the way a pipe is referenced. There can be
different pipe connected to the same gate with different host destination
(ex: CONNECTIVITY). In order to keep host information every pipe are
reference with a tuple (gate, host). In order to reduce changes, we are
keeping unchanged the way a gate is addressed on the Terminal Host.
However, this is working because we consider the apdu reader gate is only
present on the eSE slot also the connectivity gate cannot give a reliable
value; it will give the latest stored pipe value.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfca/Makefile          |   2 +-
 drivers/nfc/st21nfca/i2c.c             |  17 +-
 drivers/nfc/st21nfca/st21nfca.c        | 134 +++++++++--
 drivers/nfc/st21nfca/st21nfca.h        |  21 +-
 drivers/nfc/st21nfca/st21nfca_se.c     | 390 +++++++++++++++++++++++++++++++++
 drivers/nfc/st21nfca/st21nfca_se.h     |  63 ++++++
 include/linux/platform_data/st21nfca.h |   2 +
 7 files changed, 608 insertions(+), 21 deletions(-)
 create mode 100644 drivers/nfc/st21nfca/st21nfca_se.c
 create mode 100644 drivers/nfc/st21nfca/st21nfca_se.h

(limited to 'include')

diff --git a/drivers/nfc/st21nfca/Makefile b/drivers/nfc/st21nfca/Makefile
index 7d688f97aa27..97edab4bbdf8 100644
--- a/drivers/nfc/st21nfca/Makefile
+++ b/drivers/nfc/st21nfca/Makefile
@@ -2,7 +2,7 @@
 # Makefile for ST21NFCA HCI based NFC driver
 #
 
-st21nfca_hci-objs = st21nfca.o st21nfca_dep.o
+st21nfca_hci-objs = st21nfca.o st21nfca_dep.o st21nfca_se.o
 obj-$(CONFIG_NFC_ST21NFCA)     += st21nfca_hci.o
 
 st21nfca_i2c-objs  = i2c.o
diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 82b82dbd2997..a32143951616 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -74,6 +74,8 @@ struct st21nfca_i2c_phy {
 	unsigned int gpio_ena;
 	unsigned int irq_polarity;
 
+	struct st21nfca_se_status se_status;
+
 	struct sk_buff *pending_skb;
 	int current_read_len;
 	/*
@@ -537,6 +539,11 @@ static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client)
 
 	phy->irq_polarity = irq_get_trigger_type(client->irq);
 
+	phy->se_status.is_ese_present =
+				of_property_read_bool(pp, "ese-present");
+	phy->se_status.is_uicc_present =
+				of_property_read_bool(pp, "uicc-present");
+
 	return 0;
 }
 #else
@@ -571,6 +578,9 @@ static int st21nfca_hci_i2c_request_resources(struct i2c_client *client)
 		}
 	}
 
+	phy->se_status.is_ese_present = pdata->is_ese_present;
+	phy->se_status.is_uicc_present = pdata->is_uicc_present;
+
 	return 0;
 }
 
@@ -638,8 +648,11 @@ static int st21nfca_hci_i2c_probe(struct i2c_client *client,
 	}
 
 	return st21nfca_hci_probe(phy, &i2c_phy_ops, LLC_SHDLC_NAME,
-			       ST21NFCA_FRAME_HEADROOM, ST21NFCA_FRAME_TAILROOM,
-			       ST21NFCA_HCI_LLC_MAX_PAYLOAD, &phy->hdev);
+					ST21NFCA_FRAME_HEADROOM,
+					ST21NFCA_FRAME_TAILROOM,
+					ST21NFCA_HCI_LLC_MAX_PAYLOAD,
+					&phy->hdev,
+					&phy->se_status);
 }
 
 static int st21nfca_hci_i2c_remove(struct i2c_client *client)
diff --git a/drivers/nfc/st21nfca/st21nfca.c b/drivers/nfc/st21nfca/st21nfca.c
index 7cb7ce47d2af..24d3d240d5f4 100644
--- a/drivers/nfc/st21nfca/st21nfca.c
+++ b/drivers/nfc/st21nfca/st21nfca.c
@@ -23,6 +23,7 @@
 
 #include "st21nfca.h"
 #include "st21nfca_dep.h"
+#include "st21nfca_se.h"
 
 #define DRIVER_DESC "HCI NFC driver for ST21NFCA"
 
@@ -62,7 +63,6 @@
 #define ST21NFCA_RF_CARD_F_DATARATE		0x08
 #define ST21NFCA_RF_CARD_F_DATARATE_212_424	0x01
 
-#define ST21NFCA_DEVICE_MGNT_GATE		0x01
 #define ST21NFCA_DEVICE_MGNT_PIPE		0x02
 
 #define ST21NFCA_DM_GETINFO			0x13
@@ -78,6 +78,11 @@
 
 #define ST21NFCA_NFC_MODE			0x03	/* NFC_MODE parameter*/
 
+#define ST21NFCA_EVT_HOT_PLUG			0x03
+#define ST21NFCA_EVT_HOT_PLUG_IS_INHIBITED(x) (x->data[0] & 0x80)
+
+#define ST21NFCA_SE_TO_PIPES			2000
+
 static DECLARE_BITMAP(dev_mask, ST21NFCA_NUM_DEVICES);
 
 static struct nfc_hci_gate st21nfca_gates[] = {
@@ -92,6 +97,10 @@ static struct nfc_hci_gate st21nfca_gates[] = {
 	{ST21NFCA_RF_READER_14443_3_A_GATE, NFC_HCI_INVALID_PIPE},
 	{ST21NFCA_RF_READER_ISO15693_GATE, NFC_HCI_INVALID_PIPE},
 	{ST21NFCA_RF_CARD_F_GATE, NFC_HCI_INVALID_PIPE},
+
+	/* Secure element pipes are created by secure element host */
+	{ST21NFCA_CONNECTIVITY_GATE, NFC_HCI_DO_NOT_CREATE_PIPE},
+	{ST21NFCA_APDU_READER_GATE, NFC_HCI_DO_NOT_CREATE_PIPE},
 };
 
 struct st21nfca_pipe_info {
@@ -136,7 +145,8 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev)
 	 * Pipe can be closed and need to be open.
 	 */
 	r = nfc_hci_connect_gate(hdev, NFC_HCI_HOST_CONTROLLER_ID,
-		ST21NFCA_DEVICE_MGNT_GATE, ST21NFCA_DEVICE_MGNT_PIPE);
+				ST21NFCA_DEVICE_MGNT_GATE,
+				ST21NFCA_DEVICE_MGNT_PIPE);
 	if (r < 0)
 		goto free_info;
 
@@ -167,17 +177,28 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev)
 		 * - destination gid (1byte)
 		 */
 		info = (struct st21nfca_pipe_info *) skb_pipe_info->data;
+		if (info->dst_gate_id == ST21NFCA_APDU_READER_GATE &&
+			info->src_host_id != ST21NFCA_ESE_HOST_ID) {
+			pr_err("Unexpected apdu_reader pipe on host %x\n",
+				info->src_host_id);
+			continue;
+		}
+
 		for (j = 0; (j < ARRAY_SIZE(st21nfca_gates)) &&
-			(st21nfca_gates[j].gate != info->dst_gate_id);
-			j++)
+			(st21nfca_gates[j].gate != info->dst_gate_id) ; j++)
 			;
 
 		if (j < ARRAY_SIZE(st21nfca_gates) &&
 			st21nfca_gates[j].gate == info->dst_gate_id &&
 			ST21NFCA_DM_IS_PIPE_OPEN(info->pipe_state)) {
 			st21nfca_gates[j].pipe = pipe_info[2];
+
 			hdev->gate2pipe[st21nfca_gates[j].gate] =
-				st21nfca_gates[j].pipe;
+							st21nfca_gates[j].pipe;
+			hdev->pipes[st21nfca_gates[j].pipe].gate =
+							st21nfca_gates[j].gate;
+			hdev->pipes[st21nfca_gates[j].pipe].dest_host =
+							info->src_host_id;
 		}
 	}
 
@@ -187,7 +208,7 @@ static int st21nfca_hci_load_session(struct nfc_hci_dev *hdev)
 	 */
 	if (skb_pipe_list->len + 3 < ARRAY_SIZE(st21nfca_gates)) {
 		for (i = skb_pipe_list->len + 3;
-				i < ARRAY_SIZE(st21nfca_gates); i++) {
+				i < ARRAY_SIZE(st21nfca_gates) - 2; i++) {
 			r = nfc_hci_connect_gate(hdev,
 					NFC_HCI_HOST_CONTROLLER_ID,
 					st21nfca_gates[i].gate,
@@ -244,16 +265,33 @@ out:
 
 static int st21nfca_hci_ready(struct nfc_hci_dev *hdev)
 {
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
 	struct sk_buff *skb;
 
 	u8 param;
+	u8 white_list[2];
+	int wl_size = 0;
 	int r;
 
-	param = NFC_HCI_UICC_HOST_ID;
-	r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
-			      NFC_HCI_ADMIN_WHITELIST, &param, 1);
-	if (r < 0)
-		return r;
+	if (info->se_status->is_ese_present &&
+		info->se_status->is_uicc_present) {
+		white_list[wl_size++] = NFC_HCI_UICC_HOST_ID;
+		white_list[wl_size++] = ST21NFCA_ESE_HOST_ID;
+	} else if (!info->se_status->is_ese_present &&
+			 info->se_status->is_uicc_present) {
+		white_list[wl_size++] = NFC_HCI_UICC_HOST_ID;
+	} else if (info->se_status->is_ese_present &&
+			!info->se_status->is_uicc_present) {
+		white_list[wl_size++] = ST21NFCA_ESE_HOST_ID;
+	}
+
+	if (wl_size) {
+		r = nfc_hci_set_param(hdev, NFC_HCI_ADMIN_GATE,
+					NFC_HCI_ADMIN_WHITELIST,
+					(u8 *) &white_list, wl_size);
+		if (r < 0)
+			return r;
+	}
 
 	/* Set NFC_MODE in device management gate to enable */
 	r = nfc_hci_get_param(hdev, ST21NFCA_DEVICE_MGNT_GATE,
@@ -821,19 +859,79 @@ static int st21nfca_hci_check_presence(struct nfc_hci_dev *hdev,
 	}
 }
 
+static void st21nfca_hci_cmd_received(struct nfc_hci_dev *hdev, u8 pipe, u8 cmd,
+				struct sk_buff *skb)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+	u8 gate = hdev->pipes[pipe].gate;
+
+	pr_debug("cmd: %x\n", cmd);
+
+	switch (cmd) {
+	case NFC_HCI_ANY_OPEN_PIPE:
+		if (gate != ST21NFCA_APDU_READER_GATE &&
+			hdev->pipes[pipe].dest_host != NFC_HCI_UICC_HOST_ID)
+			info->se_info.count_pipes++;
+
+		if (info->se_info.count_pipes == info->se_info.expected_pipes) {
+			del_timer_sync(&info->se_info.se_active_timer);
+			info->se_info.se_active = false;
+			info->se_info.count_pipes = 0;
+			complete(&info->se_info.req_completion);
+		}
+	break;
+	}
+}
+
+static int st21nfca_admin_event_received(struct nfc_hci_dev *hdev, u8 event,
+					struct sk_buff *skb)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	pr_debug("admin event: %x\n", event);
+
+	switch (event) {
+	case ST21NFCA_EVT_HOT_PLUG:
+		if (info->se_info.se_active) {
+			if (!ST21NFCA_EVT_HOT_PLUG_IS_INHIBITED(skb)) {
+				del_timer_sync(&info->se_info.se_active_timer);
+				info->se_info.se_active = false;
+				complete(&info->se_info.req_completion);
+			} else {
+				mod_timer(&info->se_info.se_active_timer,
+					jiffies +
+					msecs_to_jiffies(ST21NFCA_SE_TO_PIPES));
+			}
+		}
+	break;
+	}
+	kfree_skb(skb);
+	return 0;
+}
+
 /*
  * Returns:
  * <= 0: driver handled the event, skb consumed
  *    1: driver does not handle the event, please do standard processing
  */
-static int st21nfca_hci_event_received(struct nfc_hci_dev *hdev, u8 gate,
+static int st21nfca_hci_event_received(struct nfc_hci_dev *hdev, u8 pipe,
 				       u8 event, struct sk_buff *skb)
 {
+	u8 gate = hdev->pipes[pipe].gate;
+	u8 host = hdev->pipes[pipe].dest_host;
+
 	pr_debug("hci event: %d gate: %x\n", event, gate);
 
 	switch (gate) {
+	case NFC_HCI_ADMIN_GATE:
+		return st21nfca_admin_event_received(hdev, event, skb);
 	case ST21NFCA_RF_CARD_F_GATE:
 		return st21nfca_dep_event_received(hdev, event, skb);
+	case ST21NFCA_CONNECTIVITY_GATE:
+		return st21nfca_connectivity_event_received(hdev, host,
+							event, skb);
+	case ST21NFCA_APDU_READER_GATE:
+		return st21nfca_apdu_reader_event_received(hdev, event, skb);
 	default:
 		return 1;
 	}
@@ -855,11 +953,17 @@ static struct nfc_hci_ops st21nfca_hci_ops = {
 	.tm_send = st21nfca_hci_tm_send,
 	.check_presence = st21nfca_hci_check_presence,
 	.event_received = st21nfca_hci_event_received,
+	.cmd_received = st21nfca_hci_cmd_received,
+	.discover_se = st21nfca_hci_discover_se,
+	.enable_se = st21nfca_hci_enable_se,
+	.disable_se = st21nfca_hci_disable_se,
+	.se_io = st21nfca_hci_se_io,
 };
 
 int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
 		       char *llc_name, int phy_headroom, int phy_tailroom,
-		       int phy_payload, struct nfc_hci_dev **hdev)
+		       int phy_payload, struct nfc_hci_dev **hdev,
+			   struct st21nfca_se_status *se_status)
 {
 	struct st21nfca_hci_info *info;
 	int r = 0;
@@ -919,6 +1023,8 @@ int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
 		goto err_alloc_hdev;
 	}
 
+	info->se_status = se_status;
+
 	nfc_hci_set_clientdata(info->hdev, info);
 
 	r = nfc_hci_register_device(info->hdev);
@@ -927,6 +1033,7 @@ int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
 
 	*hdev = info->hdev;
 	st21nfca_dep_init(info->hdev);
+	st21nfca_se_init(info->hdev);
 
 	return 0;
 
@@ -945,6 +1052,7 @@ void st21nfca_hci_remove(struct nfc_hci_dev *hdev)
 	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
 
 	st21nfca_dep_deinit(hdev);
+	st21nfca_se_deinit(hdev);
 	nfc_hci_unregister_device(hdev);
 	nfc_hci_free_device(hdev);
 	kfree(info);
diff --git a/drivers/nfc/st21nfca/st21nfca.h b/drivers/nfc/st21nfca/st21nfca.h
index 7c2a85292230..15a78d330a9f 100644
--- a/drivers/nfc/st21nfca/st21nfca.h
+++ b/drivers/nfc/st21nfca/st21nfca.h
@@ -20,6 +20,7 @@
 #include <net/nfc/hci.h>
 
 #include "st21nfca_dep.h"
+#include "st21nfca_se.h"
 
 #define HCI_MODE 0
 
@@ -51,9 +52,15 @@
 
 #define ST21NFCA_NUM_DEVICES 256
 
+struct st21nfca_se_status {
+	bool is_ese_present;
+	bool is_uicc_present;
+};
+
 int st21nfca_hci_probe(void *phy_id, struct nfc_phy_ops *phy_ops,
 		       char *llc_name, int phy_headroom, int phy_tailroom,
-		       int phy_payload, struct nfc_hci_dev **hdev);
+		       int phy_payload, struct nfc_hci_dev **hdev,
+			   struct st21nfca_se_status *se_status);
 void st21nfca_hci_remove(struct nfc_hci_dev *hdev);
 
 enum st21nfca_state {
@@ -66,6 +73,7 @@ struct st21nfca_hci_info {
 	void *phy_id;
 
 	struct nfc_hci_dev *hdev;
+	struct st21nfca_se_status *se_status;
 
 	enum st21nfca_state state;
 
@@ -76,13 +84,16 @@ struct st21nfca_hci_info {
 	void *async_cb_context;
 
 	struct st21nfca_dep_info dep_info;
+	struct st21nfca_se_info se_info;
 };
 
 /* Reader RF commands */
-#define ST21NFCA_WR_XCHG_DATA            0x10
-
-#define ST21NFCA_RF_READER_F_GATE               0x14
+#define ST21NFCA_WR_XCHG_DATA           0x10
 
-#define ST21NFCA_RF_CARD_F_GATE 0x24
+#define ST21NFCA_DEVICE_MGNT_GATE       0x01
+#define ST21NFCA_RF_READER_F_GATE       0x14
+#define ST21NFCA_RF_CARD_F_GATE			0x24
+#define ST21NFCA_APDU_READER_GATE		0xf0
+#define ST21NFCA_CONNECTIVITY_GATE		0x41
 
 #endif /* __LOCAL_ST21NFCA_H_ */
diff --git a/drivers/nfc/st21nfca/st21nfca_se.c b/drivers/nfc/st21nfca/st21nfca_se.c
new file mode 100644
index 000000000000..9b93d3904ab5
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca_se.c
@@ -0,0 +1,390 @@
+/*
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <net/nfc/hci.h>
+
+#include "st21nfca.h"
+#include "st21nfca_se.h"
+
+#define ST21NFCA_EVT_UICC_ACTIVATE		0x10
+#define ST21NFCA_EVT_UICC_DEACTIVATE	0x13
+#define ST21NFCA_EVT_SE_HARD_RESET		0x20
+#define ST21NFCA_EVT_SE_SOFT_RESET		0x11
+#define ST21NFCA_EVT_SE_END_OF_APDU_TRANSFER	0x21
+#define ST21NFCA_EVT_SE_ACTIVATE		0x22
+#define ST21NFCA_EVT_SE_DEACTIVATE		0x23
+
+#define ST21NFCA_EVT_TRANSMIT_DATA		0x10
+#define ST21NFCA_EVT_WTX_REQUEST		0x11
+
+#define ST21NFCA_EVT_CONNECTIVITY		0x10
+#define ST21NFCA_EVT_TRANSACTION		0x12
+
+#define ST21NFCA_ESE_HOST_ID			0xc0
+
+#define ST21NFCA_SE_TO_HOT_PLUG			1000
+/* Connectivity pipe only */
+#define ST21NFCA_SE_COUNT_PIPE_UICC		0x01
+/* Connectivity + APDU Reader pipe */
+#define ST21NFCA_SE_COUNT_PIPE_EMBEDDED	0x02
+
+#define ST21NFCA_SE_MODE_OFF			0x00
+#define ST21NFCA_SE_MODE_ON				0x01
+
+#define ST21NFCA_PARAM_ATR				0x01
+#define ST21NFCA_ATR_DEFAULT_BWI		0x04
+
+/*
+ * WT = 2^BWI/10[s], convert into msecs and add a secure
+ * room by increasing by 2 this timeout
+ */
+#define ST21NFCA_BWI_TO_TIMEOUT(x)		((1 << x) * 200)
+#define ST21NFCA_ATR_GET_Y_FROM_TD(x)	(x >> 4)
+
+/* If TA is present bit 0 is set */
+#define ST21NFCA_ATR_TA_PRESENT(x) (x & 0x01)
+/* If TB is present bit 1 is set */
+#define ST21NFCA_ATR_TB_PRESENT(x) (x & 0x02)
+
+static u8 st21nfca_se_get_bwi(struct nfc_hci_dev *hdev)
+{
+	int i;
+	u8 td;
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	/* Bits 8 to 5 of the first TB for T=1 encode BWI from zero to nine */
+	for (i = 1; i < ST21NFCA_ESE_MAX_LENGTH; i++) {
+		td = ST21NFCA_ATR_GET_Y_FROM_TD(info->se_info.atr[i]);
+		if (ST21NFCA_ATR_TA_PRESENT(td))
+			i++;
+		if (ST21NFCA_ATR_TB_PRESENT(td)) {
+			i++;
+			return info->se_info.atr[i] >> 4;
+		}
+	}
+	return ST21NFCA_ATR_DEFAULT_BWI;
+}
+
+static void st21nfca_se_get_atr(struct nfc_hci_dev *hdev)
+{
+	int r;
+	struct sk_buff *skb;
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	r = nfc_hci_get_param(hdev, ST21NFCA_APDU_READER_GATE,
+			ST21NFCA_PARAM_ATR, &skb);
+	if (r < 0)
+		return;
+
+	if (skb->len <= ST21NFCA_ESE_MAX_LENGTH) {
+		memcpy(info->se_info.atr, skb->data, skb->len);
+		info->se_info.wt_timeout =
+			ST21NFCA_BWI_TO_TIMEOUT(st21nfca_se_get_bwi(hdev));
+	}
+	kfree_skb(skb);
+}
+
+static int st21nfca_hci_control_se(struct nfc_hci_dev *hdev, u32 se_idx,
+				u8 state)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+	int r;
+	struct sk_buff *sk_host_list;
+	u8 se_event, host_id;
+
+	switch (se_idx) {
+	case NFC_HCI_UICC_HOST_ID:
+		se_event = (state == ST21NFCA_SE_MODE_ON ?
+					ST21NFCA_EVT_UICC_ACTIVATE :
+					ST21NFCA_EVT_UICC_DEACTIVATE);
+
+		info->se_info.count_pipes = 0;
+		info->se_info.expected_pipes = ST21NFCA_SE_COUNT_PIPE_UICC;
+		break;
+	case ST21NFCA_ESE_HOST_ID:
+		se_event = (state == ST21NFCA_SE_MODE_ON ?
+					ST21NFCA_EVT_SE_ACTIVATE :
+					ST21NFCA_EVT_SE_DEACTIVATE);
+
+		info->se_info.count_pipes = 0;
+		info->se_info.expected_pipes = ST21NFCA_SE_COUNT_PIPE_EMBEDDED;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/*
+	 * Wait for an EVT_HOT_PLUG in order to
+	 * retrieve a relevant host list.
+	 */
+	reinit_completion(&info->se_info.req_completion);
+	r = nfc_hci_send_event(hdev, ST21NFCA_DEVICE_MGNT_GATE, se_event,
+			       NULL, 0);
+	if (r < 0)
+		return r;
+
+	mod_timer(&info->se_info.se_active_timer, jiffies +
+		msecs_to_jiffies(ST21NFCA_SE_TO_HOT_PLUG));
+	info->se_info.se_active = true;
+
+	/* Ignore return value and check in any case the host_list */
+	wait_for_completion_interruptible(&info->se_info.req_completion);
+
+	r = nfc_hci_get_param(hdev, NFC_HCI_ADMIN_GATE,
+			NFC_HCI_ADMIN_HOST_LIST,
+			&sk_host_list);
+	if (r < 0)
+		return r;
+
+	host_id = sk_host_list->data[sk_host_list->len - 1];
+	kfree_skb(sk_host_list);
+
+	if (state == ST21NFCA_SE_MODE_ON && host_id == se_idx)
+		return se_idx;
+	else if (state == ST21NFCA_SE_MODE_OFF && host_id != se_idx)
+		return se_idx;
+
+	return -1;
+}
+
+int st21nfca_hci_discover_se(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+	int se_count = 0;
+
+	if (info->se_status->is_uicc_present) {
+		nfc_add_se(hdev->ndev, NFC_HCI_UICC_HOST_ID, NFC_SE_UICC);
+		se_count++;
+	}
+
+	if (info->se_status->is_ese_present) {
+		nfc_add_se(hdev->ndev, ST21NFCA_ESE_HOST_ID, NFC_SE_EMBEDDED);
+		se_count++;
+	}
+
+	return !se_count;
+}
+EXPORT_SYMBOL(st21nfca_hci_discover_se);
+
+int st21nfca_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx)
+{
+	int r;
+
+	/*
+	 * According to upper layer, se_idx == NFC_SE_UICC when
+	 * info->se_status->is_uicc_enable is true should never happen.
+	 * Same for eSE.
+	 */
+	r = st21nfca_hci_control_se(hdev, se_idx, ST21NFCA_SE_MODE_ON);
+
+	if (r == ST21NFCA_ESE_HOST_ID) {
+		st21nfca_se_get_atr(hdev);
+		r = nfc_hci_send_event(hdev, ST21NFCA_APDU_READER_GATE,
+				ST21NFCA_EVT_SE_SOFT_RESET, NULL, 0);
+		if (r < 0)
+			return r;
+	} else if (r < 0) {
+		/*
+		 * The activation tentative failed, the secure element
+		 * is not connected. Remove from the list.
+		 */
+		nfc_remove_se(hdev->ndev, se_idx);
+		return r;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(st21nfca_hci_enable_se);
+
+int st21nfca_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx)
+{
+	int r;
+
+	/*
+	 * According to upper layer, se_idx == NFC_SE_UICC when
+	 * info->se_status->is_uicc_enable is true should never happen
+	 * Same for eSE.
+	 */
+	r = st21nfca_hci_control_se(hdev, se_idx, ST21NFCA_SE_MODE_OFF);
+	if (r < 0)
+		return r;
+
+	return 0;
+}
+EXPORT_SYMBOL(st21nfca_hci_disable_se);
+
+int st21nfca_hci_se_io(struct nfc_hci_dev *hdev, u32 se_idx,
+			u8 *apdu, size_t apdu_length,
+			se_io_cb_t cb, void *cb_context)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	pr_debug("se_io %x\n", se_idx);
+
+	switch (se_idx) {
+	case ST21NFCA_ESE_HOST_ID:
+		info->se_info.cb = cb;
+		info->se_info.cb_context = cb_context;
+		mod_timer(&info->se_info.bwi_timer, jiffies +
+			  msecs_to_jiffies(info->se_info.wt_timeout));
+		info->se_info.bwi_active = true;
+		return nfc_hci_send_event(hdev, ST21NFCA_APDU_READER_GATE,
+					ST21NFCA_EVT_TRANSMIT_DATA,
+					apdu, apdu_length);
+	default:
+		return -ENODEV;
+	}
+}
+EXPORT_SYMBOL(st21nfca_hci_se_io);
+
+static void st21nfca_se_wt_timeout(unsigned long data)
+{
+	/*
+	 * No answer from the secure element
+	 * within the defined timeout.
+	 * Let's send a reset request as recovery procedure.
+	 * According to the situation, we first try to send a software reset
+	 * to the secure element. If the next command is still not
+	 * answering in time, we send to the CLF a secure element hardware
+	 * reset request.
+	 */
+	/* hardware reset managed through VCC_UICC_OUT power supply */
+	u8 param = 0x01;
+	struct st21nfca_hci_info *info = (struct st21nfca_hci_info *) data;
+
+	pr_debug("\n");
+
+	info->se_info.bwi_active = false;
+
+	if (!info->se_info.xch_error) {
+		info->se_info.xch_error = true;
+		nfc_hci_send_event(info->hdev, ST21NFCA_APDU_READER_GATE,
+				ST21NFCA_EVT_SE_SOFT_RESET, NULL, 0);
+	} else {
+		info->se_info.xch_error = false;
+		nfc_hci_send_event(info->hdev, ST21NFCA_DEVICE_MGNT_GATE,
+				ST21NFCA_EVT_SE_HARD_RESET, &param, 1);
+	}
+	info->se_info.cb(info->se_info.cb_context, NULL, 0, -ETIME);
+}
+
+static void st21nfca_se_activation_timeout(unsigned long data)
+{
+	struct st21nfca_hci_info *info = (struct st21nfca_hci_info *) data;
+
+	pr_debug("\n");
+
+	info->se_info.se_active = false;
+
+	complete(&info->se_info.req_completion);
+}
+
+/*
+ * Returns:
+ * <= 0: driver handled the event, skb consumed
+ *    1: driver does not handle the event, please do standard processing
+ */
+int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
+				u8 event, struct sk_buff *skb)
+{
+	int r = 0;
+
+	pr_debug("connectivity gate event: %x\n", event);
+
+	switch (event) {
+	case ST21NFCA_EVT_CONNECTIVITY:
+		break;
+	case ST21NFCA_EVT_TRANSACTION:
+		break;
+	default:
+		return 1;
+	}
+	kfree_skb(skb);
+	return r;
+}
+EXPORT_SYMBOL(st21nfca_connectivity_event_received);
+
+int st21nfca_apdu_reader_event_received(struct nfc_hci_dev *hdev,
+					u8 event, struct sk_buff *skb)
+{
+	int r = 0;
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	pr_debug("apdu reader gate event: %x\n", event);
+
+	switch (event) {
+	case ST21NFCA_EVT_TRANSMIT_DATA:
+		del_timer_sync(&info->se_info.bwi_timer);
+		info->se_info.bwi_active = false;
+		r = nfc_hci_send_event(hdev, ST21NFCA_DEVICE_MGNT_GATE,
+				ST21NFCA_EVT_SE_END_OF_APDU_TRANSFER, NULL, 0);
+		if (r < 0)
+			goto exit;
+
+		info->se_info.cb(info->se_info.cb_context,
+			skb->data, skb->len, 0);
+		break;
+	case ST21NFCA_EVT_WTX_REQUEST:
+		mod_timer(&info->se_info.bwi_timer, jiffies +
+				msecs_to_jiffies(info->se_info.wt_timeout));
+		break;
+	}
+
+exit:
+	kfree_skb(skb);
+	return r;
+}
+EXPORT_SYMBOL(st21nfca_apdu_reader_event_received);
+
+void st21nfca_se_init(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	init_completion(&info->se_info.req_completion);
+	/* initialize timers */
+	init_timer(&info->se_info.bwi_timer);
+	info->se_info.bwi_timer.data = (unsigned long)info;
+	info->se_info.bwi_timer.function = st21nfca_se_wt_timeout;
+	info->se_info.bwi_active = false;
+
+	init_timer(&info->se_info.se_active_timer);
+	info->se_info.se_active_timer.data = (unsigned long)info;
+	info->se_info.se_active_timer.function = st21nfca_se_activation_timeout;
+	info->se_info.se_active = false;
+
+	info->se_info.count_pipes = 0;
+	info->se_info.expected_pipes = 0;
+
+	info->se_info.xch_error = false;
+
+	info->se_info.wt_timeout =
+			ST21NFCA_BWI_TO_TIMEOUT(ST21NFCA_ATR_DEFAULT_BWI);
+}
+EXPORT_SYMBOL(st21nfca_se_init);
+
+void st21nfca_se_deinit(struct nfc_hci_dev *hdev)
+{
+	struct st21nfca_hci_info *info = nfc_hci_get_clientdata(hdev);
+
+	if (info->se_info.bwi_active)
+		del_timer_sync(&info->se_info.bwi_timer);
+	if (info->se_info.se_active)
+		del_timer_sync(&info->se_info.se_active_timer);
+
+	info->se_info.bwi_active = false;
+	info->se_info.se_active = false;
+}
+EXPORT_SYMBOL(st21nfca_se_deinit);
diff --git a/drivers/nfc/st21nfca/st21nfca_se.h b/drivers/nfc/st21nfca/st21nfca_se.h
new file mode 100644
index 000000000000..b172cfcaeb90
--- /dev/null
+++ b/drivers/nfc/st21nfca/st21nfca_se.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __ST21NFCA_SE_H
+#define __ST21NFCA_SE_H
+
+#include <linux/skbuff.h>
+#include <linux/workqueue.h>
+
+/*
+ * ref ISO7816-3 chap 8.1. the initial character TS is followed by a
+ * sequence of at most 32 characters.
+ */
+#define ST21NFCA_ESE_MAX_LENGTH			33
+#define ST21NFCA_ESE_HOST_ID			0xc0
+
+struct st21nfca_se_info {
+	u8 atr[ST21NFCA_ESE_MAX_LENGTH];
+	struct completion req_completion;
+
+	struct timer_list bwi_timer;
+	int wt_timeout; /* in msecs */
+	bool bwi_active;
+
+	struct timer_list se_active_timer;
+	bool se_active;
+	int expected_pipes;
+	int count_pipes;
+
+	bool xch_error;
+
+	se_io_cb_t cb;
+	void *cb_context;
+};
+
+int st21nfca_connectivity_event_received(struct nfc_hci_dev *hdev, u8 host,
+					u8 event, struct sk_buff *skb);
+int st21nfca_apdu_reader_event_received(struct nfc_hci_dev *hdev,
+					u8 event, struct sk_buff *skb);
+
+int st21nfca_hci_discover_se(struct nfc_hci_dev *hdev);
+int st21nfca_hci_enable_se(struct nfc_hci_dev *hdev, u32 se_idx);
+int st21nfca_hci_disable_se(struct nfc_hci_dev *hdev, u32 se_idx);
+int st21nfca_hci_se_io(struct nfc_hci_dev *hdev, u32 se_idx,
+		u8 *apdu, size_t apdu_length,
+		se_io_cb_t cb, void *cb_context);
+
+void st21nfca_se_init(struct nfc_hci_dev *hdev);
+void st21nfca_se_deinit(struct nfc_hci_dev *hdev);
+#endif /* __ST21NFCA_SE_H */
diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h
index 5087fff96d86..cc2bdafb0c69 100644
--- a/include/linux/platform_data/st21nfca.h
+++ b/include/linux/platform_data/st21nfca.h
@@ -26,6 +26,8 @@
 struct st21nfca_nfc_platform_data {
 	unsigned int gpio_ena;
 	unsigned int irq_polarity;
+	bool is_ese_present;
+	bool is_uicc_present;
 };
 
 #endif /* _ST21NFCA_HCI_H_ */
-- 
cgit v1.2.3-70-g09d2


From 2477bc9a3db53540c64687c79efae9a7f1f60cef Mon Sep 17 00:00:00 2001
From: Jonathan Toppins <jtoppins@cumulusnetworks.com>
Date: Mon, 26 Jan 2015 01:16:57 -0500
Subject: bonding: update bond carrier state when min_links option changes

Cc: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c    | 2 +-
 drivers/net/bonding/bond_options.c | 1 +
 include/net/bonding.h              | 1 +
 3 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f47bc433407a..f83ace6bab2f 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -334,7 +334,7 @@ static int bond_vlan_rx_kill_vid(struct net_device *bond_dev,
  *
  * Returns zero if carrier state does not change, nonzero if it does.
  */
-static int bond_set_carrier(struct bonding *bond)
+int bond_set_carrier(struct bonding *bond)
 {
 	struct list_head *iter;
 	struct slave *slave;
diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index 9bd538d4474b..4df28943d222 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -1181,6 +1181,7 @@ static int bond_option_min_links_set(struct bonding *bond,
 	netdev_info(bond->dev, "Setting min links value to %llu\n",
 		    newval->value);
 	bond->params.min_links = newval->value;
+	bond_set_carrier(bond);
 
 	return 0;
 }
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 983a94b86b95..29f53eacac0a 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -525,6 +525,7 @@ void bond_sysfs_slave_del(struct slave *slave);
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev);
 int bond_release(struct net_device *bond_dev, struct net_device *slave_dev);
 u32 bond_xmit_hash(struct bonding *bond, struct sk_buff *skb);
+int bond_set_carrier(struct bonding *bond);
 void bond_select_active_slave(struct bonding *bond);
 void bond_change_active_slave(struct bonding *bond, struct slave *new_active);
 void bond_create_debugfs(void);
-- 
cgit v1.2.3-70-g09d2


From 303691042d2fc996125f479cf01bd5ead8b90a16 Mon Sep 17 00:00:00 2001
From: Jonathan Toppins <jtoppins@cumulusnetworks.com>
Date: Mon, 26 Jan 2015 01:17:01 -0500
Subject: bonding: cleanup and remove dead code

fix sparse warning about non-static function

drivers/net/bonding/bond_main.c:3737:5: warning: symbol
'bond_3ad_xor_xmit' was not declared. Should it be static?

Reviewed-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: Jonathan Toppins <jtoppins@cumulusnetworks.com>
Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 2 +-
 include/net/bond_3ad.h          | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index beff00e7e110..e229a8657de8 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3734,7 +3734,7 @@ out:
  * usable slave array is formed in the control path. The xmit function
  * just calculates hash and sends the packet out.
  */
-int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
+static int bond_3ad_xor_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bonding *bond = netdev_priv(dev);
 	struct slave *slave;
diff --git a/include/net/bond_3ad.h b/include/net/bond_3ad.h
index e01d903633ef..f04cdbb7848e 100644
--- a/include/net/bond_3ad.h
+++ b/include/net/bond_3ad.h
@@ -274,7 +274,6 @@ void bond_3ad_handle_link_change(struct slave *slave, char link);
 int  bond_3ad_get_active_agg_info(struct bonding *bond, struct ad_info *ad_info);
 int  __bond_3ad_get_active_agg_info(struct bonding *bond,
 				    struct ad_info *ad_info);
-int bond_3ad_xmit_xor(struct sk_buff *skb, struct net_device *dev);
 int bond_3ad_lacpdu_recv(const struct sk_buff *skb, struct bonding *bond,
 			 struct slave *slave);
 int bond_3ad_set_carrier(struct bonding *bond);
-- 
cgit v1.2.3-70-g09d2


From 4967082b469320eeba54ffbca632af1962858fb7 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Mon, 26 Jan 2015 14:10:53 +0100
Subject: vxlan: advertise link netns in fdb messages

Previous commit is based on a wrong assumption, fdb messages are always sent
into the netns where the interface stands (see vxlan_fdb_notify()).

These fdb messages doesn't embed the rtnl attribute IFLA_LINK_NETNSID, thus we
need to add it (useful to interpret NDA_IFINDEX or NDA_DST for example).

Note also that vxlan_nlmsg_size() was not updated.

Fixes: 193523bf9373 ("vxlan: advertise netns of vxlan dev in fdb msg")
Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c            | 5 +++--
 include/uapi/linux/neighbour.h | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 87736e65cd15..31bac2a21ce3 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -341,8 +341,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	ndm->ndm_type = RTN_UNICAST;
 
 	if (!net_eq(dev_net(vxlan->dev), vxlan->net) &&
-	    nla_put_s32(skb, NDA_NDM_IFINDEX_NETNSID,
-			peernet2id(vxlan->net, dev_net(vxlan->dev))))
+	    nla_put_s32(skb, NDA_LINK_NETNSID,
+			peernet2id(dev_net(vxlan->dev), vxlan->net)))
 		goto nla_put_failure;
 
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
@@ -385,6 +385,7 @@ static inline size_t vxlan_nlmsg_size(void)
 		+ nla_total_size(sizeof(__be16)) /* NDA_PORT */
 		+ nla_total_size(sizeof(__be32)) /* NDA_VNI */
 		+ nla_total_size(sizeof(__u32)) /* NDA_IFINDEX */
+		+ nla_total_size(sizeof(__s32)) /* NDA_LINK_NETNSID */
 		+ nla_total_size(sizeof(struct nda_cacheinfo));
 }
 
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 38f236853cc0..3873a35509aa 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -25,7 +25,7 @@ enum {
 	NDA_VNI,
 	NDA_IFINDEX,
 	NDA_MASTER,
-	NDA_NDM_IFINDEX_NETNSID,
+	NDA_LINK_NETNSID,
 	__NDA_MAX
 };
 
-- 
cgit v1.2.3-70-g09d2


From be6a6b43b597a37d96dbf74985f72045ccef0940 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 27 Jan 2015 15:57:59 +0200
Subject: net/mlx4_core: Add bad-cable event support

If the firmware can detect a bad cable, allow it to generate an
event, and print the problem in the log.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/eq.c | 22 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/fw.c |  9 ++++++++-
 include/linux/mlx4/device.h             | 14 +++++++++++++-
 3 files changed, 43 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 2f2e6067426d..4df006d8afa4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -88,6 +88,8 @@ static u64 get_async_ev_mask(struct mlx4_dev *dev)
 	u64 async_ev_mask = MLX4_ASYNC_EVENT_MASK;
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_PORT_MNG_CHG_EV)
 		async_ev_mask |= (1ull << MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT);
+	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
+		async_ev_mask |= (1ull << MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT);
 
 	return async_ev_mask;
 }
@@ -736,6 +738,26 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 					    (unsigned long) eqe);
 			break;
 
+		case MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT:
+			switch (eqe->subtype) {
+			case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE:
+				mlx4_warn(dev, "Bad cable detected on port %u\n",
+					  eqe->event.bad_cable.port);
+				break;
+			case MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE:
+				mlx4_warn(dev, "Unsupported cable detected\n");
+				break;
+			default:
+				mlx4_dbg(dev,
+					 "Unhandled recoverable error event detected: %02x(%02x) on EQ %d at index %u. owner=%x, nent=0x%x, ownership=%s\n",
+					 eqe->type, eqe->subtype, eq->eqn,
+					 eq->cons_index, eqe->owner, eq->nent,
+					 !!(eqe->owner & 0x80) ^
+					 !!(eq->cons_index & eq->nent) ? "HW" : "SW");
+				break;
+			}
+			break;
+
 		case MLX4_EVENT_TYPE_EEC_CATAS_ERROR:
 		case MLX4_EVENT_TYPE_ECC_DETECT:
 		default:
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 982861d1df44..2eadc2882e4f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -145,7 +145,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[16] = "CONFIG DEV support",
 		[17] = "Asymmetric EQs support",
 		[18] = "More than 80 VFs support",
-		[19] = "Performance optimized for limited rule configuration flow steering support"
+		[19] = "Performance optimized for limited rule configuration flow steering support",
+		[20] = "Recoverable error events support"
 	};
 	int i;
 
@@ -859,6 +860,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET);
 	if (field32 & (1 << 0))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
+	if (field32 & (1 << 7))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
 	if (field & 1<<6)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
@@ -1562,6 +1565,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define INIT_HCA_VXLAN_OFFSET		 0x0c
 #define INIT_HCA_CACHELINE_SZ_OFFSET	 0x0e
 #define INIT_HCA_FLAGS_OFFSET		 0x014
+#define INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET 0x018
 #define INIT_HCA_QPC_OFFSET		 0x020
 #define	 INIT_HCA_QPC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x10)
 #define	 INIT_HCA_LOG_QP_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x17)
@@ -1668,6 +1672,9 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 	}
 
+	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT)
+		*(inbox + INIT_HCA_RECOVERABLE_ERROR_EVENT_OFFSET / 4) |= cpu_to_be32(1 << 31);
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5ef54e145e4d..c95d659a39f2 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -200,7 +200,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16,
 	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17,
 	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18,
-	MLX4_DEV_CAP_FLAG2_FS_A0		= 1LL <<  19
+	MLX4_DEV_CAP_FLAG2_FS_A0		= 1LL <<  19,
+	MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20
 };
 
 enum {
@@ -280,6 +281,7 @@ enum mlx4_event {
 	MLX4_EVENT_TYPE_FATAL_WARNING	   = 0x1b,
 	MLX4_EVENT_TYPE_FLR_EVENT	   = 0x1c,
 	MLX4_EVENT_TYPE_PORT_MNG_CHG_EVENT = 0x1d,
+	MLX4_EVENT_TYPE_RECOVERABLE_ERROR_EVENT  = 0x3e,
 	MLX4_EVENT_TYPE_NONE		   = 0xff,
 };
 
@@ -288,6 +290,11 @@ enum {
 	MLX4_PORT_CHANGE_SUBTYPE_ACTIVE	= 4
 };
 
+enum {
+	MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_BAD_CABLE		= 1,
+	MLX4_RECOVERABLE_ERROR_EVENT_SUBTYPE_UNSUPPORTED_CABLE	= 2,
+};
+
 enum {
 	MLX4_FATAL_WARNING_SUBTYPE_WARMING = 0,
 };
@@ -860,6 +867,11 @@ struct mlx4_eqe {
 				} __packed tbl_change_info;
 			} params;
 		} __packed port_mgmt_change;
+		struct {
+			u8 reserved[3];
+			u8 port;
+			u32 reserved1[5];
+		} __packed bad_cable;
 	}			event;
 	u8			slave_id;
 	u8			reserved3[2];
-- 
cgit v1.2.3-70-g09d2


From 5a03108689c6f3e448a920b42af04e6d28401f80 Mon Sep 17 00:00:00 2001
From: Jack Morgenstein <jackm@dev.mellanox.co.il>
Date: Tue, 27 Jan 2015 15:58:02 +0200
Subject: net/mlx4_core: Adjust command timeouts to conform to the firmware
 spec

The firmware spec states that the timeout for all commands should be 60 seconds.

In the past, the spec indicated that there were several classes of timeout
(short, medium, and long).  The driver has these different timeout classes.
We leave the class differentiation in the driver as-is (to protect against any
future spec changes), but set the timeout for all classes to be 60 seconds.

In addition, we fix a few commands which had hard-coded numeric timeouts specified.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c | 19 ++++++++++---------
 drivers/net/ethernet/mellanox/mlx4/mr.c |  4 ++--
 include/linux/mlx4/cmd.h                |  6 +++---
 3 files changed, 15 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 2aa7c232d0b6..0c90d1072e47 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -1774,8 +1774,8 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 		MLX4_PUT(inbox, parser_params,	INIT_HCA_VXLAN_OFFSET);
 	}
 
-	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA, 10000,
-		       MLX4_CMD_NATIVE);
+	err = mlx4_cmd(dev, mailbox->dma, 0, 0, MLX4_CMD_INIT_HCA,
+		       MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 
 	if (err)
 		mlx4_err(dev, "INIT_HCA returns %d\n", err);
@@ -2029,7 +2029,7 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
 	if (dev->caps.port_mask[port] != MLX4_PORT_TYPE_IB) {
 		if (priv->mfunc.master.init_port_ref[port] == 1) {
 			err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
-				       1000, MLX4_CMD_NATIVE);
+				       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 			if (err)
 				return err;
 		}
@@ -2040,7 +2040,7 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
 			if (!priv->mfunc.master.qp0_state[port].qp0_active &&
 			    priv->mfunc.master.qp0_state[port].port_active) {
 				err = mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
-					       1000, MLX4_CMD_NATIVE);
+					       MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 				if (err)
 					return err;
 				priv->mfunc.master.slave_state[slave].init_port_mask &= ~(1 << port);
@@ -2055,15 +2055,15 @@ int mlx4_CLOSE_PORT_wrapper(struct mlx4_dev *dev, int slave,
 
 int mlx4_CLOSE_PORT(struct mlx4_dev *dev, int port)
 {
-	return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT, 1000,
-			MLX4_CMD_WRAPPED);
+	return mlx4_cmd(dev, 0, port, 0, MLX4_CMD_CLOSE_PORT,
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 }
 EXPORT_SYMBOL_GPL(mlx4_CLOSE_PORT);
 
 int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
 {
-	return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA, 1000,
-			MLX4_CMD_NATIVE);
+	return mlx4_cmd(dev, 0, 0, panic, MLX4_CMD_CLOSE_HCA,
+			MLX4_CMD_TIME_CLASS_C, MLX4_CMD_NATIVE);
 }
 
 struct mlx4_config_dev {
@@ -2202,7 +2202,8 @@ int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
 int mlx4_NOP(struct mlx4_dev *dev)
 {
 	/* Input modifier of 0x1f means "finish as soon as possible." */
-	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, 100, MLX4_CMD_NATIVE);
+	return mlx4_cmd(dev, 0, 0x1f, 0, MLX4_CMD_NOP, MLX4_CMD_TIME_CLASS_A,
+			MLX4_CMD_NATIVE);
 }
 
 int mlx4_get_phys_port_id(struct mlx4_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mr.c b/drivers/net/ethernet/mellanox/mlx4/mr.c
index 8dbdf1d29357..d21e884a0838 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx4/mr.c
@@ -1155,7 +1155,7 @@ EXPORT_SYMBOL_GPL(mlx4_fmr_free);
 
 int mlx4_SYNC_TPT(struct mlx4_dev *dev)
 {
-	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT, 1000,
-			MLX4_CMD_NATIVE);
+	return mlx4_cmd(dev, 0, 0, 0, MLX4_CMD_SYNC_TPT,
+			MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
 }
 EXPORT_SYMBOL_GPL(mlx4_SYNC_TPT);
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index c989442ffc6a..ae95adc78509 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -165,9 +165,9 @@ enum {
 };
 
 enum {
-	MLX4_CMD_TIME_CLASS_A	= 10000,
-	MLX4_CMD_TIME_CLASS_B	= 10000,
-	MLX4_CMD_TIME_CLASS_C	= 10000,
+	MLX4_CMD_TIME_CLASS_A	= 60000,
+	MLX4_CMD_TIME_CLASS_B	= 60000,
+	MLX4_CMD_TIME_CLASS_C	= 60000,
 };
 
 enum {
-- 
cgit v1.2.3-70-g09d2


From c7741d16a57cbf97eebe53f27e8216b1ff20e20c Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 28 Jan 2015 11:09:55 -0800
Subject: Bluetooth: Perform a power cycle when receiving hardware error event

When receiving a HCI Hardware Error event, the controller should be
assumed to be non-functional until issuing a HCI Reset command.

The Bluetooth hardware errors are vendor specific and so add a
new hdev->hw_error callback that drivers can provide to run extra
code to handle the hardware error.

After completing the vendor specific error handling perform a full
reset of the Bluetooth stack by closing and re-opening the transport.

Based-on-patch-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  3 +++
 net/bluetooth/hci_core.c         | 21 +++++++++++++++++++++
 net/bluetooth/hci_event.c        |  4 +++-
 3 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0f5e59f1e3cb..1780f1681ecf 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -232,6 +232,7 @@ struct hci_dev {
 	__u16		conn_info_min_age;
 	__u16		conn_info_max_age;
 	__u8		ssp_debug_mode;
+	__u8		hw_error_code;
 	__u32		clock;
 
 	__u16		devid_source;
@@ -293,6 +294,7 @@ struct hci_dev {
 
 	struct work_struct	power_on;
 	struct delayed_work	power_off;
+	struct work_struct	error_reset;
 
 	__u16			discov_timeout;
 	struct delayed_work	discov_off;
@@ -369,6 +371,7 @@ struct hci_dev {
 	int (*setup)(struct hci_dev *hdev);
 	int (*send)(struct hci_dev *hdev, struct sk_buff *skb);
 	void (*notify)(struct hci_dev *hdev, unsigned int evt);
+	void (*hw_error)(struct hci_dev *hdev, u8 code);
 	int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
 };
 
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index d4c9152474a9..79693a9ef4eb 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2151,6 +2151,26 @@ static void hci_power_off(struct work_struct *work)
 	smp_unregister(hdev);
 }
 
+static void hci_error_reset(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev, error_reset);
+
+	BT_DBG("%s", hdev->name);
+
+	if (hdev->hw_error)
+		hdev->hw_error(hdev, hdev->hw_error_code);
+	else
+		BT_ERR("%s hardware error 0x%2.2x", hdev->name,
+		       hdev->hw_error_code);
+
+	if (hci_dev_do_close(hdev))
+		return;
+
+	smp_unregister(hdev);
+
+	hci_dev_do_open(hdev);
+}
+
 static void hci_discov_off(struct work_struct *work)
 {
 	struct hci_dev *hdev;
@@ -2943,6 +2963,7 @@ struct hci_dev *hci_alloc_dev(void)
 	INIT_WORK(&hdev->cmd_work, hci_cmd_work);
 	INIT_WORK(&hdev->tx_work, hci_tx_work);
 	INIT_WORK(&hdev->power_on, hci_power_on);
+	INIT_WORK(&hdev->error_reset, hci_error_reset);
 
 	INIT_DELAYED_WORK(&hdev->power_off, hci_power_off);
 	INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 4175470ff48e..a72a5f50728d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3100,7 +3100,9 @@ static void hci_hardware_error_evt(struct hci_dev *hdev, struct sk_buff *skb)
 {
 	struct hci_ev_hardware_error *ev = (void *) skb->data;
 
-	BT_ERR("%s hardware error 0x%2.2x", hdev->name, ev->code);
+	hdev->hw_error_code = ev->code;
+
+	queue_work(hdev->req_workqueue, &hdev->error_reset);
 }
 
 static void hci_role_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
-- 
cgit v1.2.3-70-g09d2


From b8693877ae016ac525d674d5d7a84ea0ea68ba60 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Wed, 28 Jan 2015 16:32:46 -0800
Subject: openvswitch: Add support for checksums on UDP tunnels.

Currently, it isn't possible to request checksums on the outer UDP
header of tunnels - the TUNNEL_CSUM flag is ignored. This adds
support for requesting that UDP checksums be computed on transmit
and properly reported if they are present on receive.

Signed-off-by: Jesse Gross <jesse@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/geneve.h           | 2 +-
 net/ipv4/geneve.c              | 6 +++---
 net/openvswitch/vport-geneve.c | 2 +-
 net/openvswitch/vport-vxlan.c  | 7 +++++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/geneve.h b/include/net/geneve.h
index 03aa2adb5bab..14fb8d3390b4 100644
--- a/include/net/geneve.h
+++ b/include/net/geneve.h
@@ -90,7 +90,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
 		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
 		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
-		    bool xnet);
+		    bool csum, bool xnet);
 #endif /*ifdef CONFIG_INET */
 
 #endif /*ifdef__NET_GENEVE_H */
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 93e51199e44b..5a4828ba05ad 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -107,13 +107,13 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 		    struct sk_buff *skb, __be32 src, __be32 dst, __u8 tos,
 		    __u8 ttl, __be16 df, __be16 src_port, __be16 dst_port,
 		    __be16 tun_flags, u8 vni[3], u8 opt_len, u8 *opt,
-		    bool xnet)
+		    bool csum, bool xnet)
 {
 	struct genevehdr *gnvh;
 	int min_headroom;
 	int err;
 
-	skb = udp_tunnel_handle_offloads(skb, !gs->sock->sk->sk_no_check_tx);
+	skb = udp_tunnel_handle_offloads(skb, csum);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -138,7 +138,7 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 
 	return udp_tunnel_xmit_skb(rt, skb, src, dst,
 				   tos, ttl, df, src_port, dst_port, xnet,
-				   gs->sock->sk->sk_no_check_tx);
+				   !csum);
 }
 EXPORT_SYMBOL_GPL(geneve_xmit_skb);
 
diff --git a/net/openvswitch/vport-geneve.c b/net/openvswitch/vport-geneve.c
index 7ca3d454ff3b..bf02fd5808c9 100644
--- a/net/openvswitch/vport-geneve.c
+++ b/net/openvswitch/vport-geneve.c
@@ -212,7 +212,7 @@ static int geneve_tnl_send(struct vport *vport, struct sk_buff *skb)
 			      tun_key->ipv4_dst, tun_key->ipv4_tos,
 			      tun_key->ipv4_ttl, df, sport, dport,
 			      tun_key->tun_flags, vni, opts_len, opts,
-			      false);
+			      !!(tun_key->tun_flags & TUNNEL_CSUM), false);
 	if (err < 0)
 		ip_rt_put(rt);
 	return err;
diff --git a/net/openvswitch/vport-vxlan.c b/net/openvswitch/vport-vxlan.c
index 3cc983bf444a..ff07d4062d60 100644
--- a/net/openvswitch/vport-vxlan.c
+++ b/net/openvswitch/vport-vxlan.c
@@ -74,7 +74,7 @@ static void vxlan_rcv(struct vxlan_sock *vs, struct sk_buff *skb,
 	__be64 key;
 	__be16 flags;
 
-	flags = TUNNEL_KEY;
+	flags = TUNNEL_KEY | (udp_hdr(skb)->check != 0 ? TUNNEL_CSUM : 0);
 	vxlan_port = vxlan_vport(vport);
 	if (vxlan_port->exts & VXLAN_F_GBP)
 		flags |= TUNNEL_VXLAN_OPT;
@@ -230,6 +230,7 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	__be16 src_port;
 	__be16 df;
 	int err;
+	u32 vxflags;
 
 	if (unlikely(!OVS_CB(skb)->egress_tun_info)) {
 		err = -EINVAL;
@@ -251,11 +252,13 @@ static int vxlan_tnl_send(struct vport *vport, struct sk_buff *skb)
 	src_port = udp_flow_src_port(net, skb, 0, 0, true);
 	md.vni = htonl(be64_to_cpu(tun_key->tun_id) << 8);
 	md.gbp = vxlan_ext_gbp(skb);
+	vxflags = vxlan_port->exts |
+		      (tun_key->tun_flags & TUNNEL_CSUM ? VXLAN_F_UDP_CSUM : 0);
 
 	err = vxlan_xmit_skb(rt, skb, fl.saddr, tun_key->ipv4_dst,
 			     tun_key->ipv4_tos, tun_key->ipv4_ttl, df,
 			     src_port, dst_port,
-			     &md, false, vxlan_port->exts);
+			     &md, false, vxflags);
 	if (err < 0)
 		ip_rt_put(rt);
 	return err;
-- 
cgit v1.2.3-70-g09d2


From 7cc05662682da4b0e0a4fdf3c3f190577803ae81 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 28 Jan 2015 18:04:53 +0100
Subject: net: remove sock_iocb

The sock_iocb structure is allocate on stack for each read/write-like
operation on sockets, and contains various fields of which only the
embedded msghdr and sometimes a pointer to the scm_cookie is ever used.
Get rid of the sock_iocb and put a msghdr directly on the stack and pass
the scm_cookie explicitly to netlink_mmap_sendmsg.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h       | 23 ---------------
 net/netlink/af_netlink.c | 28 +++++++------------
 net/socket.c             | 45 +++--------------------------
 net/unix/af_unix.c       | 73 +++++++++++++++++++-----------------------------
 4 files changed, 43 insertions(+), 126 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 2210fec65669..15341499786c 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1374,29 +1374,6 @@ void sk_prot_clear_portaddr_nulls(struct sock *sk, int size);
 #define SOCK_BINDADDR_LOCK	4
 #define SOCK_BINDPORT_LOCK	8
 
-/* sock_iocb: used to kick off async processing of socket ios */
-struct sock_iocb {
-	struct list_head	list;
-
-	int			flags;
-	int			size;
-	struct socket		*sock;
-	struct sock		*sk;
-	struct scm_cookie	*scm;
-	struct msghdr		*msg, async_msg;
-	struct kiocb		*kiocb;
-};
-
-static inline struct sock_iocb *kiocb_to_siocb(struct kiocb *iocb)
-{
-	return (struct sock_iocb *)iocb->private;
-}
-
-static inline struct kiocb *siocb_to_kiocb(struct sock_iocb *si)
-{
-	return si->kiocb;
-}
-
 struct socket_alloc {
 	struct socket socket;
 	struct inode vfs_inode;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 2197af00673a..a36777b7cfb6 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -695,7 +695,7 @@ static void netlink_ring_setup_skb(struct sk_buff *skb, struct sock *sk,
 
 static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 				u32 dst_portid, u32 dst_group,
-				struct sock_iocb *siocb)
+				struct scm_cookie *scm)
 {
 	struct netlink_sock *nlk = nlk_sk(sk);
 	struct netlink_ring *ring;
@@ -741,7 +741,7 @@ static int netlink_mmap_sendmsg(struct sock *sk, struct msghdr *msg,
 
 		NETLINK_CB(skb).portid	  = nlk->portid;
 		NETLINK_CB(skb).dst_group = dst_group;
-		NETLINK_CB(skb).creds	  = siocb->scm->creds;
+		NETLINK_CB(skb).creds	  = scm->creds;
 
 		err = security_netlink_send(sk, skb);
 		if (err) {
@@ -820,7 +820,7 @@ static void netlink_ring_set_copied(struct sock *sk, struct sk_buff *skb)
 #define netlink_tx_is_mmaped(sk)	false
 #define netlink_mmap			sock_no_mmap
 #define netlink_poll			datagram_poll
-#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, siocb)	0
+#define netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group, scm)	0
 #endif /* CONFIG_NETLINK_MMAP */
 
 static void netlink_skb_destructor(struct sk_buff *skb)
@@ -2259,7 +2259,6 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
 static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			   struct msghdr *msg, size_t len)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
@@ -2273,10 +2272,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (msg->msg_flags&MSG_OOB)
 		return -EOPNOTSUPP;
 
-	if (NULL == siocb->scm)
-		siocb->scm = &scm;
-
-	err = scm_send(sock, msg, siocb->scm, true);
+	err = scm_send(sock, msg, &scm, true);
 	if (err < 0)
 		return err;
 
@@ -2305,7 +2301,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (netlink_tx_is_mmaped(sk) &&
 	    msg->msg_iter.iov->iov_base == NULL) {
 		err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
-					   siocb);
+					   &scm);
 		goto out;
 	}
 
@@ -2319,7 +2315,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	NETLINK_CB(skb).portid	= nlk->portid;
 	NETLINK_CB(skb).dst_group = dst_group;
-	NETLINK_CB(skb).creds	= siocb->scm->creds;
+	NETLINK_CB(skb).creds	= scm.creds;
 	NETLINK_CB(skb).flags	= netlink_skb_flags;
 
 	err = -EFAULT;
@@ -2341,7 +2337,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT);
 
 out:
-	scm_destroy(siocb->scm);
+	scm_destroy(&scm);
 	return err;
 }
 
@@ -2349,7 +2345,6 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 			   struct msghdr *msg, size_t len,
 			   int flags)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
 	struct netlink_sock *nlk = nlk_sk(sk);
@@ -2412,11 +2407,8 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	if (nlk->flags & NETLINK_RECV_PKTINFO)
 		netlink_cmsg_recv_pktinfo(msg, skb);
 
-	if (NULL == siocb->scm) {
-		memset(&scm, 0, sizeof(scm));
-		siocb->scm = &scm;
-	}
-	siocb->scm->creds = *NETLINK_CREDS(skb);
+	memset(&scm, 0, sizeof(scm));
+	scm.creds = *NETLINK_CREDS(skb);
 	if (flags & MSG_TRUNC)
 		copied = data_skb->len;
 
@@ -2431,7 +2423,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 		}
 	}
 
-	scm_recv(sock, msg, siocb->scm, flags);
+	scm_recv(sock, msg, &scm, flags);
 out:
 	netlink_rcv_wake(sk);
 	return err ? : copied;
diff --git a/net/socket.c b/net/socket.c
index 3acd35f144d6..3326d67482ac 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -613,13 +613,6 @@ EXPORT_SYMBOL(__sock_tx_timestamp);
 static inline int __sock_sendmsg_nosec(struct kiocb *iocb, struct socket *sock,
 				       struct msghdr *msg, size_t size)
 {
-	struct sock_iocb *si = kiocb_to_siocb(iocb);
-
-	si->sock = sock;
-	si->scm = NULL;
-	si->msg = msg;
-	si->size = size;
-
 	return sock->ops->sendmsg(iocb, sock, msg, size);
 }
 
@@ -635,11 +628,9 @@ static int do_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 			   size_t size, bool nosec)
 {
 	struct kiocb iocb;
-	struct sock_iocb siocb;
 	int ret;
 
 	init_sync_kiocb(&iocb, NULL);
-	iocb.private = &siocb;
 	ret = nosec ? __sock_sendmsg_nosec(&iocb, sock, msg, size) :
 		      __sock_sendmsg(&iocb, sock, msg, size);
 	if (-EIOCBQUEUED == ret)
@@ -756,14 +747,6 @@ EXPORT_SYMBOL_GPL(__sock_recv_ts_and_drops);
 static inline int __sock_recvmsg_nosec(struct kiocb *iocb, struct socket *sock,
 				       struct msghdr *msg, size_t size, int flags)
 {
-	struct sock_iocb *si = kiocb_to_siocb(iocb);
-
-	si->sock = sock;
-	si->scm = NULL;
-	si->msg = msg;
-	si->size = size;
-	si->flags = flags;
-
 	return sock->ops->recvmsg(iocb, sock, msg, size, flags);
 }
 
@@ -779,11 +762,9 @@ int sock_recvmsg(struct socket *sock, struct msghdr *msg,
 		 size_t size, int flags)
 {
 	struct kiocb iocb;
-	struct sock_iocb siocb;
 	int ret;
 
 	init_sync_kiocb(&iocb, NULL);
-	iocb.private = &siocb;
 	ret = __sock_recvmsg(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&iocb);
@@ -795,11 +776,9 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
 			      size_t size, int flags)
 {
 	struct kiocb iocb;
-	struct sock_iocb siocb;
 	int ret;
 
 	init_sync_kiocb(&iocb, NULL);
-	iocb.private = &siocb;
 	ret = __sock_recvmsg_nosec(&iocb, sock, msg, size, flags);
 	if (-EIOCBQUEUED == ret)
 		ret = wait_on_sync_kiocb(&iocb);
@@ -866,14 +845,6 @@ static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
 	return sock->ops->splice_read(sock, ppos, pipe, len, flags);
 }
 
-static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb,
-					 struct sock_iocb *siocb)
-{
-	siocb->kiocb = iocb;
-	iocb->private = siocb;
-	return siocb;
-}
-
 static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 		struct file *file, const struct iovec *iov,
 		unsigned long nr_segs)
@@ -893,7 +864,7 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 				unsigned long nr_segs, loff_t pos)
 {
-	struct sock_iocb siocb, *x;
+	struct msghdr msg;
 
 	if (pos != 0)
 		return -ESPIPE;
@@ -901,11 +872,7 @@ static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 	if (iocb->ki_nbytes == 0)	/* Match SYS5 behaviour */
 		return 0;
 
-
-	x = alloc_sock_iocb(iocb, &siocb);
-	if (!x)
-		return -ENOMEM;
-	return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+	return do_sock_read(&msg, iocb, iocb->ki_filp, iov, nr_segs);
 }
 
 static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
@@ -929,16 +896,12 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
 			  unsigned long nr_segs, loff_t pos)
 {
-	struct sock_iocb siocb, *x;
+	struct msghdr msg;
 
 	if (pos != 0)
 		return -ESPIPE;
 
-	x = alloc_sock_iocb(iocb, &siocb);
-	if (!x)
-		return -ENOMEM;
-
-	return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs);
+	return do_sock_write(&msg, iocb, iocb->ki_filp, iov, nr_segs);
 }
 
 /*
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 8e1b10274b02..526b6edab018 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1445,7 +1445,6 @@ static void maybe_add_creds(struct sk_buff *skb, const struct socket *sock,
 static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			      struct msghdr *msg, size_t len)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 	struct unix_sock *u = unix_sk(sk);
@@ -1456,14 +1455,12 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	unsigned int hash;
 	struct sk_buff *skb;
 	long timeo;
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	int max_level;
 	int data_len = 0;
 
-	if (NULL == siocb->scm)
-		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm, false);
+	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 
@@ -1507,11 +1504,11 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
-	err = unix_scm_to_skb(siocb->scm, skb, true);
+	err = unix_scm_to_skb(&scm, skb, true);
 	if (err < 0)
 		goto out_free;
 	max_level = err + 1;
-	unix_get_secdata(siocb->scm, skb);
+	unix_get_secdata(&scm, skb);
 
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
@@ -1606,7 +1603,7 @@ restart:
 	unix_state_unlock(other);
 	other->sk_data_ready(other);
 	sock_put(other);
-	scm_destroy(siocb->scm);
+	scm_destroy(&scm);
 	return len;
 
 out_unlock:
@@ -1616,7 +1613,7 @@ out_free:
 out:
 	if (other)
 		sock_put(other);
-	scm_destroy(siocb->scm);
+	scm_destroy(&scm);
 	return err;
 }
 
@@ -1628,21 +1625,18 @@ out:
 static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			       struct msghdr *msg, size_t len)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(kiocb);
 	struct sock *sk = sock->sk;
 	struct sock *other = NULL;
 	int err, size;
 	struct sk_buff *skb;
 	int sent = 0;
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	bool fds_sent = false;
 	int max_level;
 	int data_len;
 
-	if (NULL == siocb->scm)
-		siocb->scm = &tmp_scm;
 	wait_for_unix_gc();
-	err = scm_send(sock, msg, siocb->scm, false);
+	err = scm_send(sock, msg, &scm, false);
 	if (err < 0)
 		return err;
 
@@ -1683,7 +1677,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 			goto out_err;
 
 		/* Only send the fds in the first buffer */
-		err = unix_scm_to_skb(siocb->scm, skb, !fds_sent);
+		err = unix_scm_to_skb(&scm, skb, !fds_sent);
 		if (err < 0) {
 			kfree_skb(skb);
 			goto out_err;
@@ -1715,8 +1709,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		sent += size;
 	}
 
-	scm_destroy(siocb->scm);
-	siocb->scm = NULL;
+	scm_destroy(&scm);
 
 	return sent;
 
@@ -1728,8 +1721,7 @@ pipe_err:
 		send_sig(SIGPIPE, current, 0);
 	err = -EPIPE;
 out_err:
-	scm_destroy(siocb->scm);
-	siocb->scm = NULL;
+	scm_destroy(&scm);
 	return sent ? : err;
 }
 
@@ -1778,8 +1770,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 			      struct msghdr *msg, size_t size,
 			      int flags)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
 	int noblock = flags & MSG_DONTWAIT;
@@ -1831,16 +1822,14 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (sock_flag(sk, SOCK_RCVTSTAMP))
 		__sock_recv_timestamp(msg, sk, skb);
 
-	if (!siocb->scm) {
-		siocb->scm = &tmp_scm;
-		memset(&tmp_scm, 0, sizeof(tmp_scm));
-	}
-	scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
-	unix_set_secdata(siocb->scm, skb);
+	memset(&scm, 0, sizeof(scm));
+
+	scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
+	unix_set_secdata(&scm, skb);
 
 	if (!(flags & MSG_PEEK)) {
 		if (UNIXCB(skb).fp)
-			unix_detach_fds(siocb->scm, skb);
+			unix_detach_fds(&scm, skb);
 
 		sk_peek_offset_bwd(sk, skb->len);
 	} else {
@@ -1860,11 +1849,11 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 		sk_peek_offset_fwd(sk, size);
 
 		if (UNIXCB(skb).fp)
-			siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+			scm.fp = scm_fp_dup(UNIXCB(skb).fp);
 	}
 	err = (flags & MSG_TRUNC) ? skb->len - skip : size;
 
-	scm_recv(sock, msg, siocb->scm, flags);
+	scm_recv(sock, msg, &scm, flags);
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -1915,8 +1904,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 			       struct msghdr *msg, size_t size,
 			       int flags)
 {
-	struct sock_iocb *siocb = kiocb_to_siocb(iocb);
-	struct scm_cookie tmp_scm;
+	struct scm_cookie scm;
 	struct sock *sk = sock->sk;
 	struct unix_sock *u = unix_sk(sk);
 	DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name);
@@ -1943,10 +1931,7 @@ static int unix_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 * while sleeps in memcpy_tomsg
 	 */
 
-	if (!siocb->scm) {
-		siocb->scm = &tmp_scm;
-		memset(&tmp_scm, 0, sizeof(tmp_scm));
-	}
+	memset(&scm, 0, sizeof(scm));
 
 	err = mutex_lock_interruptible(&u->readlock);
 	if (unlikely(err)) {
@@ -2012,13 +1997,13 @@ again:
 
 		if (check_creds) {
 			/* Never glue messages from different writers */
-			if ((UNIXCB(skb).pid  != siocb->scm->pid) ||
-			    !uid_eq(UNIXCB(skb).uid, siocb->scm->creds.uid) ||
-			    !gid_eq(UNIXCB(skb).gid, siocb->scm->creds.gid))
+			if ((UNIXCB(skb).pid  != scm.pid) ||
+			    !uid_eq(UNIXCB(skb).uid, scm.creds.uid) ||
+			    !gid_eq(UNIXCB(skb).gid, scm.creds.gid))
 				break;
 		} else if (test_bit(SOCK_PASSCRED, &sock->flags)) {
 			/* Copy credentials */
-			scm_set_cred(siocb->scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
+			scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid);
 			check_creds = 1;
 		}
 
@@ -2045,7 +2030,7 @@ again:
 			sk_peek_offset_bwd(sk, chunk);
 
 			if (UNIXCB(skb).fp)
-				unix_detach_fds(siocb->scm, skb);
+				unix_detach_fds(&scm, skb);
 
 			if (unix_skb_len(skb))
 				break;
@@ -2053,13 +2038,13 @@ again:
 			skb_unlink(skb, &sk->sk_receive_queue);
 			consume_skb(skb);
 
-			if (siocb->scm->fp)
+			if (scm.fp)
 				break;
 		} else {
 			/* It is questionable, see note in unix_dgram_recvmsg.
 			 */
 			if (UNIXCB(skb).fp)
-				siocb->scm->fp = scm_fp_dup(UNIXCB(skb).fp);
+				scm.fp = scm_fp_dup(UNIXCB(skb).fp);
 
 			sk_peek_offset_fwd(sk, chunk);
 
@@ -2068,7 +2053,7 @@ again:
 	} while (size);
 
 	mutex_unlock(&u->readlock);
-	scm_recv(sock, msg, siocb->scm, flags);
+	scm_recv(sock, msg, &scm, flags);
 out:
 	return copied ? : err;
 }
-- 
cgit v1.2.3-70-g09d2


From 3c313161353ad527de1a6ecde0f0d41700858fd6 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sat, 24 Jan 2015 18:47:20 +0100
Subject: bcma: detect SPROM revision 11
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extracting values from it is still unsupported, but at least we'll
display some meaningful error now.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/sprom.c         | 3 ++-
 include/linux/ssb/ssb_regs.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/bcma/sprom.c b/drivers/bcma/sprom.c
index efb037f9c98a..206edd3ba668 100644
--- a/drivers/bcma/sprom.c
+++ b/drivers/bcma/sprom.c
@@ -579,7 +579,8 @@ int bcma_sprom_get(struct bcma_bus *bus)
 	u16 offset = BCMA_CC_SPROM;
 	u16 *sprom;
 	size_t sprom_sizes[] = { SSB_SPROMSIZE_WORDS_R4,
-				 SSB_SPROMSIZE_WORDS_R10, };
+				 SSB_SPROMSIZE_WORDS_R10,
+				 SSB_SPROMSIZE_WORDS_R11, };
 	int i, err = 0;
 
 	if (!bus->drv_cc.core)
diff --git a/include/linux/ssb/ssb_regs.h b/include/linux/ssb/ssb_regs.h
index f7b9100686c3..c0f707ac192b 100644
--- a/include/linux/ssb/ssb_regs.h
+++ b/include/linux/ssb/ssb_regs.h
@@ -173,6 +173,7 @@
 #define SSB_SPROMSIZE_BYTES_R123	(SSB_SPROMSIZE_WORDS_R123 * sizeof(u16))
 #define SSB_SPROMSIZE_BYTES_R4		(SSB_SPROMSIZE_WORDS_R4 * sizeof(u16))
 #define SSB_SPROMSIZE_WORDS_R10		230
+#define SSB_SPROMSIZE_WORDS_R11		234
 #define SSB_SPROM_BASE1			0x1000
 #define SSB_SPROM_BASE31		0x0800
 #define SSB_SPROM_REVISION		0x007E
-- 
cgit v1.2.3-70-g09d2


From b504075f5903b969a54ef3a6ae994c0872edb259 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 25 Jan 2015 11:11:14 +0100
Subject: bcma: add early_init function for PCIe core and move some fix into it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are some PCIe core fixes that need to be applied before accessing
SPROM, otherwise reading it may fail.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/driver_pci.c            | 66 ++++++++++++++++++++++++------------
 drivers/bcma/main.c                  |  7 ++++
 include/linux/bcma/bcma_driver_pci.h |  2 ++
 3 files changed, 53 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/driver_pci.c b/drivers/bcma/driver_pci.c
index b85a505603e6..786666488a2d 100644
--- a/drivers/bcma/driver_pci.c
+++ b/drivers/bcma/driver_pci.c
@@ -144,6 +144,47 @@ static u16 bcma_pcie_mdio_writeread(struct bcma_drv_pci *pc, u16 device,
 	return bcma_pcie_mdio_read(pc, device, address);
 }
 
+/**************************************************
+ * Early init.
+ **************************************************/
+
+static void bcma_core_pci_fixcfg(struct bcma_drv_pci *pc)
+{
+	struct bcma_device *core = pc->core;
+	u16 val16, core_index;
+	uint regoff;
+
+	regoff = BCMA_CORE_PCI_SPROM(BCMA_CORE_PCI_SPROM_PI_OFFSET);
+	core_index = (u16)core->core_index;
+
+	val16 = pcicore_read16(pc, regoff);
+	if (((val16 & BCMA_CORE_PCI_SPROM_PI_MASK) >> BCMA_CORE_PCI_SPROM_PI_SHIFT)
+	     != core_index) {
+		val16 = (core_index << BCMA_CORE_PCI_SPROM_PI_SHIFT) |
+			(val16 & ~BCMA_CORE_PCI_SPROM_PI_MASK);
+		pcicore_write16(pc, regoff, val16);
+	}
+}
+
+/*
+ * Apply some early fixes required before accessing SPROM.
+ * See also si_pci_fixcfg.
+ */
+void bcma_core_pci_early_init(struct bcma_drv_pci *pc)
+{
+	if (pc->early_setup_done)
+		return;
+
+	pc->hostmode = bcma_core_pci_is_in_hostmode(pc);
+	if (pc->hostmode)
+		goto out;
+
+	bcma_core_pci_fixcfg(pc);
+
+out:
+	pc->early_setup_done = true;
+}
+
 /**************************************************
  * Workarounds.
  **************************************************/
@@ -175,24 +216,6 @@ static void bcma_pcicore_serdes_workaround(struct bcma_drv_pci *pc)
 		                     tmp & ~BCMA_CORE_PCI_PLL_CTRL_FREQDET_EN);
 }
 
-static void bcma_core_pci_fixcfg(struct bcma_drv_pci *pc)
-{
-	struct bcma_device *core = pc->core;
-	u16 val16, core_index;
-	uint regoff;
-
-	regoff = BCMA_CORE_PCI_SPROM(BCMA_CORE_PCI_SPROM_PI_OFFSET);
-	core_index = (u16)core->core_index;
-
-	val16 = pcicore_read16(pc, regoff);
-	if (((val16 & BCMA_CORE_PCI_SPROM_PI_MASK) >> BCMA_CORE_PCI_SPROM_PI_SHIFT)
-	     != core_index) {
-		val16 = (core_index << BCMA_CORE_PCI_SPROM_PI_SHIFT) |
-			(val16 & ~BCMA_CORE_PCI_SPROM_PI_MASK);
-		pcicore_write16(pc, regoff, val16);
-	}
-}
-
 /* Fix MISC config to allow coming out of L2/L3-Ready state w/o PRST */
 /* Needs to happen when coming out of 'standby'/'hibernate' */
 static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc)
@@ -216,7 +239,6 @@ static void bcma_core_pci_config_fixup(struct bcma_drv_pci *pc)
 
 static void bcma_core_pci_clientmode_init(struct bcma_drv_pci *pc)
 {
-	bcma_core_pci_fixcfg(pc);
 	bcma_pcicore_serdes_workaround(pc);
 	bcma_core_pci_config_fixup(pc);
 }
@@ -226,11 +248,11 @@ void bcma_core_pci_init(struct bcma_drv_pci *pc)
 	if (pc->setup_done)
 		return;
 
-	pc->hostmode = bcma_core_pci_is_in_hostmode(pc);
+	bcma_core_pci_early_init(pc);
+
 	if (pc->hostmode)
 		bcma_core_pci_hostmode_init(pc);
-
-	if (!pc->hostmode)
+	else
 		bcma_core_pci_clientmode_init(pc);
 }
 
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 73b2ee3de972..38bde6eab8a4 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -402,6 +402,13 @@ int bcma_bus_register(struct bcma_bus *bus)
 		bcma_core_chipcommon_early_init(&bus->drv_cc);
 	}
 
+	/* Early init PCIE core */
+	core = bcma_find_core(bus, BCMA_CORE_PCIE);
+	if (core) {
+		bus->drv_pci[0].core = core;
+		bcma_core_pci_early_init(&bus->drv_pci[0]);
+	}
+
 	/* Cores providing flash access go before SPROM init */
 	list_for_each_entry(core, &bus->cores, list) {
 		if (bcma_is_core_needed_early(core->id.id))
diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h
index 0333e605ea0d..3f809ae372c4 100644
--- a/include/linux/bcma/bcma_driver_pci.h
+++ b/include/linux/bcma/bcma_driver_pci.h
@@ -223,6 +223,7 @@ struct bcma_drv_pci_host {
 
 struct bcma_drv_pci {
 	struct bcma_device *core;
+	u8 early_setup_done:1;
 	u8 setup_done:1;
 	u8 hostmode:1;
 
@@ -237,6 +238,7 @@ struct bcma_drv_pci {
 #define pcicore_write16(pc, offset, val)	bcma_write16((pc)->core, offset, val)
 #define pcicore_write32(pc, offset, val)	bcma_write32((pc)->core, offset, val)
 
+extern void bcma_core_pci_early_init(struct bcma_drv_pci *pc);
 extern void bcma_core_pci_init(struct bcma_drv_pci *pc);
 extern int bcma_core_pci_irq_ctl(struct bcma_drv_pci *pc,
 				 struct bcma_device *core, bool enable);
-- 
cgit v1.2.3-70-g09d2


From 8be08a39d498d5d93ff5149276e34ccb4ec3757f Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Sun, 25 Jan 2015 13:41:19 +0100
Subject: bcma: implement host code support for PCIe Gen 2 devices
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is stil incomplete, so we don't add PCI IDs of new devices yet.
Purpose of this patch is to allow testing & adjusting rest of the code.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
---
 drivers/bcma/host_pci.c        | 6 ++++--
 include/linux/bcma/bcma.h      | 1 +
 include/linux/bcma/bcma_regs.h | 2 ++
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index cd9161a8b3a1..53c6a8a58859 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -13,10 +13,12 @@
 
 static void bcma_host_pci_switch_core(struct bcma_device *core)
 {
+	int win2 = core->bus->host_is_pcie2 ?
+		BCMA_PCIE2_BAR0_WIN2 : BCMA_PCI_BAR0_WIN2;
+
 	pci_write_config_dword(core->bus->host_pci, BCMA_PCI_BAR0_WIN,
 			       core->addr);
-	pci_write_config_dword(core->bus->host_pci, BCMA_PCI_BAR0_WIN2,
-			       core->wrap);
+	pci_write_config_dword(core->bus->host_pci, win2, core->wrap);
 	core->bus->mapped_core = core;
 	bcma_debug(core->bus, "Switched to core: 0x%X\n", core->id.id);
 }
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index eb1c6a47b67f..994739da827f 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -318,6 +318,7 @@ struct bcma_bus {
 	const struct bcma_host_ops *ops;
 
 	enum bcma_hosttype hosttype;
+	bool host_is_pcie2; /* Used for BCMA_HOSTTYPE_PCI only */
 	union {
 		/* Pointer to the PCI bus (only for BCMA_HOSTTYPE_PCI) */
 		struct pci_dev *host_pci;
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index e64ae7bf80a1..ebd5c1fcdea4 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -64,6 +64,8 @@
 #define  BCMA_PCI_GPIO_XTAL		0x40	/* PCI config space GPIO 14 for Xtal powerup */
 #define  BCMA_PCI_GPIO_PLL		0x80	/* PCI config space GPIO 15 for PLL powerdown */
 
+#define BCMA_PCIE2_BAR0_WIN2		0x70
+
 /* SiliconBackplane Address Map.
  * All regions may not exist on all chips.
  */
-- 
cgit v1.2.3-70-g09d2


From a12c6b861fab9229f002dc2eddc0aee988170e2b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 29 Jan 2015 16:52:16 +0100
Subject: nl80211: don't document per-wiphy interface dump

Such a feature doesn't exist and isn't really needed since you
probably won't have enough interfaces to make it worthwhile, so
just remove that from the documentation.

Reported-by: booto [on IRC]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1cbc3aae425c..68b294e83944 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -180,8 +180,8 @@
  *	%NL80211_ATTR_WIPHY and %NL80211_ATTR_WIPHY_NAME.
  *
  * @NL80211_CMD_GET_INTERFACE: Request an interface's configuration;
- *	either a dump request on a %NL80211_ATTR_WIPHY or a specific get
- *	on an %NL80211_ATTR_IFINDEX is supported.
+ *	either a dump request for all interfaces or a specific get with a
+ *	single %NL80211_ATTR_IFINDEX is supported.
  * @NL80211_CMD_SET_INTERFACE: Set type of a virtual interface, requires
  *	%NL80211_ATTR_IFINDEX and %NL80211_ATTR_IFTYPE.
  * @NL80211_CMD_NEW_INTERFACE: Newly created virtual interface or response
-- 
cgit v1.2.3-70-g09d2


From 7866a621043fbaca3d7389e9b9f69dd1a2e5a855 Mon Sep 17 00:00:00 2001
From: Salam Noureddine <noureddine@arista.com>
Date: Tue, 27 Jan 2015 11:35:48 -0800
Subject: dev: add per net_device packet type chains

When many pf_packet listeners are created on a lot of interfaces the
current implementation using global packet type lists scales poorly.
This patch adds per net_device packet type lists to fix this problem.

The patch was originally written by Eric Biederman for linux-2.6.29.
Tested on linux-3.16.

Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Salam Noureddine <noureddine@arista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |   2 +
 net/core/dev.c            | 132 +++++++++++++++++++++++++++++-----------------
 2 files changed, 86 insertions(+), 48 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 642d426a668f..3d37c6eb1732 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1514,6 +1514,8 @@ struct net_device {
 	struct list_head	napi_list;
 	struct list_head	unreg_list;
 	struct list_head	close_list;
+	struct list_head	ptype_all;
+	struct list_head	ptype_specific;
 
 	struct {
 		struct list_head upper;
diff --git a/net/core/dev.c b/net/core/dev.c
index 7f028d441e98..1d564d68e31a 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -371,9 +371,10 @@ static inline void netdev_set_addr_lockdep_class(struct net_device *dev)
 static inline struct list_head *ptype_head(const struct packet_type *pt)
 {
 	if (pt->type == htons(ETH_P_ALL))
-		return &ptype_all;
+		return pt->dev ? &pt->dev->ptype_all : &ptype_all;
 	else
-		return &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
+		return pt->dev ? &pt->dev->ptype_specific :
+				 &ptype_base[ntohs(pt->type) & PTYPE_HASH_MASK];
 }
 
 /**
@@ -1734,6 +1735,23 @@ static inline int deliver_skb(struct sk_buff *skb,
 	return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
 }
 
+static inline void deliver_ptype_list_skb(struct sk_buff *skb,
+					  struct packet_type **pt,
+					  struct net_device *dev, __be16 type,
+					  struct list_head *ptype_list)
+{
+	struct packet_type *ptype, *pt_prev = *pt;
+
+	list_for_each_entry_rcu(ptype, ptype_list, list) {
+		if (ptype->type != type)
+			continue;
+		if (pt_prev)
+			deliver_skb(skb, pt_prev, dev);
+		pt_prev = ptype;
+	}
+	*pt = pt_prev;
+}
+
 static inline bool skb_loop_sk(struct packet_type *ptype, struct sk_buff *skb)
 {
 	if (!ptype->af_packet_priv || !skb->sk)
@@ -1757,45 +1775,54 @@ static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
 	struct packet_type *ptype;
 	struct sk_buff *skb2 = NULL;
 	struct packet_type *pt_prev = NULL;
+	struct list_head *ptype_list = &ptype_all;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(ptype, &ptype_all, list) {
+again:
+	list_for_each_entry_rcu(ptype, ptype_list, list) {
 		/* Never send packets back to the socket
 		 * they originated from - MvS (miquels@drinkel.ow.org)
 		 */
-		if ((ptype->dev == dev || !ptype->dev) &&
-		    (!skb_loop_sk(ptype, skb))) {
-			if (pt_prev) {
-				deliver_skb(skb2, pt_prev, skb->dev);
-				pt_prev = ptype;
-				continue;
-			}
+		if (skb_loop_sk(ptype, skb))
+			continue;
 
-			skb2 = skb_clone(skb, GFP_ATOMIC);
-			if (!skb2)
-				break;
+		if (pt_prev) {
+			deliver_skb(skb2, pt_prev, skb->dev);
+			pt_prev = ptype;
+			continue;
+		}
 
-			net_timestamp_set(skb2);
+		/* need to clone skb, done only once */
+		skb2 = skb_clone(skb, GFP_ATOMIC);
+		if (!skb2)
+			goto out_unlock;
 
-			/* skb->nh should be correctly
-			   set by sender, so that the second statement is
-			   just protection against buggy protocols.
-			 */
-			skb_reset_mac_header(skb2);
-
-			if (skb_network_header(skb2) < skb2->data ||
-			    skb_network_header(skb2) > skb_tail_pointer(skb2)) {
-				net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
-						     ntohs(skb2->protocol),
-						     dev->name);
-				skb_reset_network_header(skb2);
-			}
+		net_timestamp_set(skb2);
 
-			skb2->transport_header = skb2->network_header;
-			skb2->pkt_type = PACKET_OUTGOING;
-			pt_prev = ptype;
+		/* skb->nh should be correctly
+		 * set by sender, so that the second statement is
+		 * just protection against buggy protocols.
+		 */
+		skb_reset_mac_header(skb2);
+
+		if (skb_network_header(skb2) < skb2->data ||
+		    skb_network_header(skb2) > skb_tail_pointer(skb2)) {
+			net_crit_ratelimited("protocol %04x is buggy, dev %s\n",
+					     ntohs(skb2->protocol),
+					     dev->name);
+			skb_reset_network_header(skb2);
 		}
+
+		skb2->transport_header = skb2->network_header;
+		skb2->pkt_type = PACKET_OUTGOING;
+		pt_prev = ptype;
+	}
+
+	if (ptype_list == &ptype_all) {
+		ptype_list = &dev->ptype_all;
+		goto again;
 	}
+out_unlock:
 	if (pt_prev)
 		pt_prev->func(skb2, skb->dev, pt_prev, skb->dev);
 	rcu_read_unlock();
@@ -2617,7 +2644,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 	unsigned int len;
 	int rc;
 
-	if (!list_empty(&ptype_all))
+	if (!list_empty(&ptype_all) || !list_empty(&dev->ptype_all))
 		dev_queue_xmit_nit(skb, dev);
 
 	len = skb->len;
@@ -3615,7 +3642,6 @@ static int __netif_receive_skb_core(struct sk_buff *skb, bool pfmemalloc)
 	struct packet_type *ptype, *pt_prev;
 	rx_handler_func_t *rx_handler;
 	struct net_device *orig_dev;
-	struct net_device *null_or_dev;
 	bool deliver_exact = false;
 	int ret = NET_RX_DROP;
 	__be16 type;
@@ -3658,11 +3684,15 @@ another_round:
 		goto skip_taps;
 
 	list_for_each_entry_rcu(ptype, &ptype_all, list) {
-		if (!ptype->dev || ptype->dev == skb->dev) {
-			if (pt_prev)
-				ret = deliver_skb(skb, pt_prev, orig_dev);
-			pt_prev = ptype;
-		}
+		if (pt_prev)
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+		pt_prev = ptype;
+	}
+
+	list_for_each_entry_rcu(ptype, &skb->dev->ptype_all, list) {
+		if (pt_prev)
+			ret = deliver_skb(skb, pt_prev, orig_dev);
+		pt_prev = ptype;
 	}
 
 skip_taps:
@@ -3718,19 +3748,21 @@ ncls:
 		skb->vlan_tci = 0;
 	}
 
+	type = skb->protocol;
+
 	/* deliver only exact match when indicated */
-	null_or_dev = deliver_exact ? skb->dev : NULL;
+	if (likely(!deliver_exact)) {
+		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+				       &ptype_base[ntohs(type) &
+						   PTYPE_HASH_MASK]);
+	}
 
-	type = skb->protocol;
-	list_for_each_entry_rcu(ptype,
-			&ptype_base[ntohs(type) & PTYPE_HASH_MASK], list) {
-		if (ptype->type == type &&
-		    (ptype->dev == null_or_dev || ptype->dev == skb->dev ||
-		     ptype->dev == orig_dev)) {
-			if (pt_prev)
-				ret = deliver_skb(skb, pt_prev, orig_dev);
-			pt_prev = ptype;
-		}
+	deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+			       &orig_dev->ptype_specific);
+
+	if (unlikely(skb->dev != orig_dev)) {
+		deliver_ptype_list_skb(skb, &pt_prev, orig_dev, type,
+				       &skb->dev->ptype_specific);
 	}
 
 	if (pt_prev) {
@@ -6579,6 +6611,8 @@ void netdev_run_todo(void)
 
 		/* paranoia */
 		BUG_ON(netdev_refcnt_read(dev));
+		BUG_ON(!list_empty(&dev->ptype_all));
+		BUG_ON(!list_empty(&dev->ptype_specific));
 		WARN_ON(rcu_access_pointer(dev->ip_ptr));
 		WARN_ON(rcu_access_pointer(dev->ip6_ptr));
 		WARN_ON(dev->dn_ptr);
@@ -6761,6 +6795,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->adj_list.lower);
 	INIT_LIST_HEAD(&dev->all_adj_list.upper);
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
+	INIT_LIST_HEAD(&dev->ptype_all);
+	INIT_LIST_HEAD(&dev->ptype_specific);
 	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
-- 
cgit v1.2.3-70-g09d2


From f7697b1602d13ef80779caf23d13fa1511193144 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 30 Jan 2015 23:20:55 -0800
Subject: Bluetooth: Store OOB data present value for each set of remote OOB
 data

Instead of doing complex calculation every time the OOB data is used,
just calculate the OOB data present value and store it with the OOB
data raw values.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_core.c         | 8 ++++++++
 2 files changed, 9 insertions(+)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 1780f1681ecf..a37e10f4e2b3 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -145,6 +145,7 @@ struct oob_data {
 	struct list_head list;
 	bdaddr_t bdaddr;
 	u8 bdaddr_type;
+	u8 present;
 	u8 hash192[16];
 	u8 rand192[16];
 	u8 hash256[16];
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5d4ac3fbbc08..f045c062f8f0 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -2581,9 +2581,15 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
 	if (hash192 && rand192) {
 		memcpy(data->hash192, hash192, sizeof(data->hash192));
 		memcpy(data->rand192, rand192, sizeof(data->rand192));
+		if (hash256 && rand256)
+			data->present = 0x03;
 	} else {
 		memset(data->hash192, 0, sizeof(data->hash192));
 		memset(data->rand192, 0, sizeof(data->rand192));
+		if (hash256 && rand256)
+			data->present = 0x02;
+		else
+			data->present = 0x00;
 	}
 
 	if (hash256 && rand256) {
@@ -2592,6 +2598,8 @@ int hci_add_remote_oob_data(struct hci_dev *hdev, bdaddr_t *bdaddr,
 	} else {
 		memset(data->hash256, 0, sizeof(data->hash256));
 		memset(data->rand256, 0, sizeof(data->rand256));
+		if (hash192 && rand192)
+			data->present = 0x01;
 	}
 
 	BT_DBG("%s for %pMR", hdev->name, bdaddr);
-- 
cgit v1.2.3-70-g09d2


From 349c9e3c7341bbab6efbea39acfadeba9ab19f61 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 29 Jan 2015 15:58:09 -0800
Subject: ipv4: icmp: use percpu allocation

Get rid of nr_cpu_ids and use modern percpu allocation.

Note that the sockets themselves are not yet allocated
using NUMA affinity.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h |  3 ++-
 net/ipv4/icmp.c          | 17 ++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 24945cefc4fd..7283f4d39ae2 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -48,7 +48,8 @@ struct netns_ipv4 {
 	struct hlist_head	*fib_table_hash;
 	struct sock		*fibnl;
 
-	struct sock		**icmp_sk;
+	struct sock  * __percpu	*icmp_sk;
+
 	struct inet_peer_base	*peers;
 	struct tcpm_hash_bucket	*tcp_metrics_hash;
 	unsigned int		tcp_metrics_hash_log;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 36f5584d93c5..5e564014a0b7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -205,7 +205,7 @@ static const struct icmp_control icmp_pointers[NR_ICMP_TYPES+1];
  */
 static struct sock *icmp_sk(struct net *net)
 {
-	return net->ipv4.icmp_sk[smp_processor_id()];
+	return *this_cpu_ptr(net->ipv4.icmp_sk);
 }
 
 static inline struct sock *icmp_xmit_lock(struct net *net)
@@ -1140,8 +1140,8 @@ static void __net_exit icmp_sk_exit(struct net *net)
 	int i;
 
 	for_each_possible_cpu(i)
-		inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
-	kfree(net->ipv4.icmp_sk);
+		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
+	free_percpu(net->ipv4.icmp_sk);
 	net->ipv4.icmp_sk = NULL;
 }
 
@@ -1149,9 +1149,8 @@ static int __net_init icmp_sk_init(struct net *net)
 {
 	int i, err;
 
-	net->ipv4.icmp_sk =
-		kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL);
-	if (net->ipv4.icmp_sk == NULL)
+	net->ipv4.icmp_sk = alloc_percpu(struct sock *);
+	if (!net->ipv4.icmp_sk)
 		return -ENOMEM;
 
 	for_each_possible_cpu(i) {
@@ -1162,7 +1161,7 @@ static int __net_init icmp_sk_init(struct net *net)
 		if (err < 0)
 			goto fail;
 
-		net->ipv4.icmp_sk[i] = sk;
+		*per_cpu_ptr(net->ipv4.icmp_sk, i) = sk;
 
 		/* Enough space for 2 64K ICMP packets, including
 		 * sk_buff/skb_shared_info struct overhead.
@@ -1203,8 +1202,8 @@ static int __net_init icmp_sk_init(struct net *net)
 
 fail:
 	for_each_possible_cpu(i)
-		inet_ctl_sock_destroy(net->ipv4.icmp_sk[i]);
-	kfree(net->ipv4.icmp_sk);
+		inet_ctl_sock_destroy(*per_cpu_ptr(net->ipv4.icmp_sk, i));
+	free_percpu(net->ipv4.icmp_sk);
 	return err;
 }
 
-- 
cgit v1.2.3-70-g09d2


From aafb3e98b27977148c8c86499684f8f5c3decfbb Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 29 Jan 2015 22:40:11 -0800
Subject: netdev: introduce new NETIF_F_HW_SWITCH_OFFLOAD feature flag for
 switch device offloads

This is a high level feature flag for all switch asic offloads

switch drivers set this flag on switch ports. Logical devices like
bridge, bonds, vxlans can inherit this flag from their slaves/ports.

The patch also adds the flag to NETIF_F_ONE_FOR_ALL, so that it gets
propagated to the upperdevices (bridges and bonds).

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 8e30685affeb..7d59dc6ab789 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -66,6 +66,7 @@ enum {
 	NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */
 	NETIF_F_HW_L2FW_DOFFLOAD_BIT,	/* Allow L2 Forwarding in Hardware */
 	NETIF_F_BUSY_POLL_BIT,		/* Busy poll */
+	NETIF_F_HW_SWITCH_OFFLOAD_BIT,  /* HW switch offload */
 
 	/*
 	 * Add your fresh new feature above and remember to update
@@ -124,6 +125,7 @@ enum {
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
 #define NETIF_F_HW_L2FW_DOFFLOAD	__NETIF_F(HW_L2FW_DOFFLOAD)
 #define NETIF_F_BUSY_POLL	__NETIF_F(BUSY_POLL)
+#define NETIF_F_HW_SWITCH_OFFLOAD	__NETIF_F(HW_SWITCH_OFFLOAD)
 
 /* Features valid for ethtool to change */
 /* = all defined minus driver/device-class-related */
@@ -159,7 +161,9 @@ enum {
  */
 #define NETIF_F_ONE_FOR_ALL	(NETIF_F_GSO_SOFTWARE | NETIF_F_GSO_ROBUST | \
 				 NETIF_F_SG | NETIF_F_HIGHDMA |		\
-				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED)
+				 NETIF_F_FRAGLIST | NETIF_F_VLAN_CHALLENGED | \
+				 NETIF_F_HW_SWITCH_OFFLOAD)
+
 /*
  * If one device doesn't support one of these features, then disable it
  * for all in netdev_increment_features.
-- 
cgit v1.2.3-70-g09d2


From add511b38266aa10c1079f9248854e6a415c4dc2 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 29 Jan 2015 22:40:12 -0800
Subject: bridge: add flags argument to ndo_bridge_setlink and
 ndo_bridge_dellink

bridge flags are needed inside ndo_bridge_setlink/dellink handlers to
avoid another call to parse IFLA_AF_SPEC inside these handlers

This is used later in this series

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c   |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  2 +-
 drivers/net/ethernet/rocker/rocker.c          |  2 +-
 include/linux/netdevice.h                     |  6 ++++--
 net/bridge/br_netlink.c                       |  4 ++--
 net/bridge/br_private.h                       |  4 ++--
 net/core/rtnetlink.c                          | 10 ++++++----
 7 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 598c5070c629..efed92c7b731 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4327,7 +4327,8 @@ fw_exit:
 	return status;
 }
 
-static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh)
+static int be_ndo_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
+				 u16 flags)
 {
 	struct be_adapter *adapter = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 7bb421bfd84e..e4086fea4be2 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7786,7 +7786,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 }
 
 static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
-				    struct nlmsghdr *nlh)
+				    struct nlmsghdr *nlh, u16 flags)
 {
 	struct ixgbe_adapter *adapter = netdev_priv(dev);
 	struct nlattr *attr, *br_spec;
diff --git a/drivers/net/ethernet/rocker/rocker.c b/drivers/net/ethernet/rocker/rocker.c
index 11f4ffcc113d..f0d607ca5e73 100644
--- a/drivers/net/ethernet/rocker/rocker.c
+++ b/drivers/net/ethernet/rocker/rocker.c
@@ -3722,7 +3722,7 @@ skip:
 }
 
 static int rocker_port_bridge_setlink(struct net_device *dev,
-				      struct nlmsghdr *nlh)
+				      struct nlmsghdr *nlh, u16 flags)
 {
 	struct rocker_port *rocker_port = netdev_priv(dev);
 	struct nlattr *protinfo;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3d37c6eb1732..16251e96e6aa 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1154,13 +1154,15 @@ struct net_device_ops {
 						int idx);
 
 	int			(*ndo_bridge_setlink)(struct net_device *dev,
-						      struct nlmsghdr *nlh);
+						      struct nlmsghdr *nlh,
+						      u16 flags);
 	int			(*ndo_bridge_getlink)(struct sk_buff *skb,
 						      u32 pid, u32 seq,
 						      struct net_device *dev,
 						      u32 filter_mask);
 	int			(*ndo_bridge_dellink)(struct net_device *dev,
-						      struct nlmsghdr *nlh);
+						      struct nlmsghdr *nlh,
+						      u16 flags);
 	int			(*ndo_change_carrier)(struct net_device *dev,
 						      bool new_carrier);
 	int			(*ndo_get_phys_port_id)(struct net_device *dev,
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index e08b260f33fe..088e80203845 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -494,7 +494,7 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[])
 }
 
 /* Change state and parameters on port. */
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlh)
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 {
 	struct nlattr *protinfo;
 	struct nlattr *afspec;
@@ -550,7 +550,7 @@ out:
 }
 
 /* Delete port information */
-int br_dellink(struct net_device *dev, struct nlmsghdr *nlh)
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlh, u16 flags)
 {
 	struct nlattr *afspec;
 	struct net_bridge_port *p;
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index e8e3f3681680..de0919975a25 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -819,8 +819,8 @@ extern struct rtnl_link_ops br_link_ops;
 int br_netlink_init(void);
 void br_netlink_fini(void);
 void br_ifinfo_notify(int event, struct net_bridge_port *port);
-int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg);
-int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg);
+int br_setlink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
+int br_dellink(struct net_device *dev, struct nlmsghdr *nlmsg, u16 flags);
 int br_getlink(struct sk_buff *skb, u32 pid, u32 seq, struct net_device *dev,
 	       u32 filter_mask);
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index fedd7ab4085a..673cb4c6f391 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2991,7 +2991,7 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 			goto out;
 		}
 
-		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+		err = br_dev->netdev_ops->ndo_bridge_setlink(dev, nlh, flags);
 		if (err)
 			goto out;
 
@@ -3002,7 +3002,8 @@ static int rtnl_bridge_setlink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (!dev->netdev_ops->ndo_bridge_setlink)
 			err = -EOPNOTSUPP;
 		else
-			err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh);
+			err = dev->netdev_ops->ndo_bridge_setlink(dev, nlh,
+								  flags);
 		if (!err) {
 			flags &= ~BRIDGE_FLAGS_SELF;
 
@@ -3064,7 +3065,7 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 			goto out;
 		}
 
-		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+		err = br_dev->netdev_ops->ndo_bridge_dellink(dev, nlh, flags);
 		if (err)
 			goto out;
 
@@ -3075,7 +3076,8 @@ static int rtnl_bridge_dellink(struct sk_buff *skb, struct nlmsghdr *nlh)
 		if (!dev->netdev_ops->ndo_bridge_dellink)
 			err = -EOPNOTSUPP;
 		else
-			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh);
+			err = dev->netdev_ops->ndo_bridge_dellink(dev, nlh,
+								  flags);
 
 		if (!err) {
 			flags &= ~BRIDGE_FLAGS_SELF;
-- 
cgit v1.2.3-70-g09d2


From 8a44dbb202617aa66968ba74fdabf1b654dfe661 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 29 Jan 2015 22:40:13 -0800
Subject: swdevice: add new apis to set and del bridge port attributes

This patch adds two new api's netdev_switch_port_bridge_setlink
and netdev_switch_port_bridge_dellink to offload bridge port attributes
to switch port

(The names of the apis look odd with 'switch_port_bridge',
but am more inclined to change the prefix of the api to something else.
Will take any suggestions).

The api's look at the NETIF_F_HW_SWITCH_OFFLOAD feature flag to
pass bridge port attributes to the port device.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h   |  37 +++++++++++++++-
 net/switchdev/switchdev.c | 110 ++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 146 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 205e63698da9..cfcdac2e5d25 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -43,7 +43,14 @@ int register_netdev_switch_notifier(struct notifier_block *nb);
 int unregister_netdev_switch_notifier(struct notifier_block *nb);
 int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
 				 struct netdev_switch_notifier_info *info);
-
+int netdev_switch_port_bridge_setlink(struct net_device *dev,
+				struct nlmsghdr *nlh, u16 flags);
+int netdev_switch_port_bridge_dellink(struct net_device *dev,
+				struct nlmsghdr *nlh, u16 flags);
+int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags);
+int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags);
 #else
 
 static inline int netdev_switch_parent_id_get(struct net_device *dev,
@@ -74,6 +81,34 @@ static inline int call_netdev_switch_notifiers(unsigned long val, struct net_dev
 	return NOTIFY_DONE;
 }
 
+static inline int netdev_switch_port_bridge_setlink(struct net_device *dev,
+						    struct nlmsghdr *nlh,
+						    u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int netdev_switch_port_bridge_dellink(struct net_device *dev,
+						    struct nlmsghdr *nlh,
+						    u16 flags)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
+							struct nlmsghdr *nlh,
+							u16 flags)
+{
+	return 0;
+}
+
+static inline int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
+							struct nlmsghdr *nlh,
+							u16 flags)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 22e02f4edd99..8c1e558db118 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -115,3 +115,113 @@ int call_netdev_switch_notifiers(unsigned long val, struct net_device *dev,
 	return err;
 }
 EXPORT_SYMBOL(call_netdev_switch_notifiers);
+
+/**
+ *	netdev_switch_port_bridge_setlink - Notify switch device port of bridge
+ *	port attributes
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge setlink flags
+ *
+ *	Notify switch device port of bridge port attributes
+ */
+int netdev_switch_port_bridge_setlink(struct net_device *dev,
+				      struct nlmsghdr *nlh, u16 flags)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return 0;
+
+	if (!ops->ndo_bridge_setlink)
+		return -EOPNOTSUPP;
+
+	return ops->ndo_bridge_setlink(dev, nlh, flags);
+}
+EXPORT_SYMBOL(netdev_switch_port_bridge_setlink);
+
+/**
+ *	netdev_switch_port_bridge_dellink - Notify switch device port of bridge
+ *	port attribute delete
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge setlink flags
+ *
+ *	Notify switch device port of bridge port attribute delete
+ */
+int netdev_switch_port_bridge_dellink(struct net_device *dev,
+				      struct nlmsghdr *nlh, u16 flags)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return 0;
+
+	if (!ops->ndo_bridge_dellink)
+		return -EOPNOTSUPP;
+
+	return ops->ndo_bridge_dellink(dev, nlh, flags);
+}
+EXPORT_SYMBOL(netdev_switch_port_bridge_dellink);
+
+/**
+ *	ndo_dflt_netdev_switch_port_bridge_setlink - default ndo bridge setlink
+ *						     op for master devices
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge setlink flags
+ *
+ *	Notify master device slaves of bridge port attributes
+ */
+int ndo_dflt_netdev_switch_port_bridge_setlink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int ret = 0, err = 0;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return ret;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = netdev_switch_port_bridge_setlink(lower_dev, nlh, flags);
+		if (err && err != -EOPNOTSUPP)
+			ret = err;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_setlink);
+
+/**
+ *	ndo_dflt_netdev_switch_port_bridge_dellink - default ndo bridge dellink
+ *						     op for master devices
+ *
+ *	@dev: port device
+ *	@nlh: netlink msg with bridge port attributes
+ *	@flags: bridge dellink flags
+ *
+ *	Notify master device slaves of bridge port attribute deletes
+ */
+int ndo_dflt_netdev_switch_port_bridge_dellink(struct net_device *dev,
+					       struct nlmsghdr *nlh, u16 flags)
+{
+	struct net_device *lower_dev;
+	struct list_head *iter;
+	int ret = 0, err = 0;
+
+	if (!(dev->features & NETIF_F_HW_SWITCH_OFFLOAD))
+		return ret;
+
+	netdev_for_each_lower_dev(dev, lower_dev, iter) {
+		err = netdev_switch_port_bridge_dellink(lower_dev, nlh, flags);
+		if (err && err != -EOPNOTSUPP)
+			ret = err;
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(ndo_dflt_netdev_switch_port_bridge_dellink);
-- 
cgit v1.2.3-70-g09d2


From 2d28cfe7aada495f87bb439151e9bcc86998fb6d Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Sun, 1 Feb 2015 23:07:54 -0800
Subject: Bluetooth: Add le_scan_restart work for LE scan restarting

Currently there is no way to restart le scan, and it's needed in
service scan method. The way it work: it disable, and then enable le
scan on controller.

During the restart, we must remember when the scan was started, and
it's duration, to later re-schedule the le_scan_disable work, that was
stopped during the stop scan phase.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  5 +++
 net/bluetooth/hci_core.c         | 74 ++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/mgmt.c             | 19 ++++++++++-
 3 files changed, 97 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index a37e10f4e2b3..d3a232be9d9b 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -79,6 +79,8 @@ struct discovery_state {
 	s8			rssi;
 	u16			uuid_count;
 	u8			(*uuids)[16];
+	unsigned long		scan_start;
+	unsigned long		scan_duration;
 };
 
 struct hci_conn_hash {
@@ -354,6 +356,7 @@ struct hci_dev {
 	unsigned long		dev_flags;
 
 	struct delayed_work	le_scan_disable;
+	struct delayed_work	le_scan_restart;
 
 	__s8			adv_tx_power;
 	__u8			adv_data[HCI_MAX_AD_LENGTH];
@@ -531,6 +534,8 @@ static inline void hci_discovery_filter_clear(struct hci_dev *hdev)
 	hdev->discovery.uuid_count = 0;
 	kfree(hdev->discovery.uuids);
 	hdev->discovery.uuids = NULL;
+	hdev->discovery.scan_start = 0;
+	hdev->discovery.scan_duration = 0;
 }
 
 bool hci_discovery_active(struct hci_dev *hdev);
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index f045c062f8f0..3322d3f4c85a 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -1617,6 +1617,7 @@ static int hci_dev_do_close(struct hci_dev *hdev)
 		cancel_delayed_work(&hdev->service_cache);
 
 	cancel_delayed_work_sync(&hdev->le_scan_disable);
+	cancel_delayed_work_sync(&hdev->le_scan_restart);
 
 	if (test_bit(HCI_MGMT, &hdev->dev_flags))
 		cancel_delayed_work_sync(&hdev->rpa_expired);
@@ -2830,6 +2831,8 @@ static void le_scan_disable_work_complete(struct hci_dev *hdev, u8 status,
 		return;
 	}
 
+	hdev->discovery.scan_start = 0;
+
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_LE:
 		hci_dev_lock(hdev);
@@ -2869,6 +2872,8 @@ static void le_scan_disable_work(struct work_struct *work)
 
 	BT_DBG("%s", hdev->name);
 
+	cancel_delayed_work_sync(&hdev->le_scan_restart);
+
 	hci_req_init(&req, hdev);
 
 	hci_req_add_le_scan_disable(&req);
@@ -2878,6 +2883,74 @@ static void le_scan_disable_work(struct work_struct *work)
 		BT_ERR("Disable LE scanning request failed: err %d", err);
 }
 
+static void le_scan_restart_work_complete(struct hci_dev *hdev, u8 status,
+					  u16 opcode)
+{
+	unsigned long timeout, duration, scan_start, now;
+
+	BT_DBG("%s", hdev->name);
+
+	if (status) {
+		BT_ERR("Failed to restart LE scan: status %d", status);
+		return;
+	}
+
+	if (!test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks) ||
+	    !hdev->discovery.scan_start)
+		return;
+
+	/* When the scan was started, hdev->le_scan_disable has been queued
+	 * after duration from scan_start. During scan restart this job
+	 * has been canceled, and we need to queue it again after proper
+	 * timeout, to make sure that scan does not run indefinitely.
+	 */
+	duration = hdev->discovery.scan_duration;
+	scan_start = hdev->discovery.scan_start;
+	now = jiffies;
+	if (now - scan_start <= duration) {
+		int elapsed;
+
+		if (now >= scan_start)
+			elapsed = now - scan_start;
+		else
+			elapsed = ULONG_MAX - scan_start + now;
+
+		timeout = duration - elapsed;
+	} else {
+		timeout = 0;
+	}
+	queue_delayed_work(hdev->workqueue,
+			   &hdev->le_scan_disable, timeout);
+}
+
+static void le_scan_restart_work(struct work_struct *work)
+{
+	struct hci_dev *hdev = container_of(work, struct hci_dev,
+					    le_scan_restart.work);
+	struct hci_request req;
+	struct hci_cp_le_set_scan_enable cp;
+	int err;
+
+	BT_DBG("%s", hdev->name);
+
+	/* If controller is not scanning we are done. */
+	if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+		return;
+
+	hci_req_init(&req, hdev);
+
+	hci_req_add_le_scan_disable(&req);
+
+	memset(&cp, 0, sizeof(cp));
+	cp.enable = LE_SCAN_ENABLE;
+	cp.filter_dup = LE_SCAN_FILTER_DUP_ENABLE;
+	hci_req_add(&req, HCI_OP_LE_SET_SCAN_ENABLE, sizeof(cp), &cp);
+
+	err = hci_req_run(&req, le_scan_restart_work_complete);
+	if (err)
+		BT_ERR("Restart LE scan request failed: err %d", err);
+}
+
 /* Copy the Identity Address of the controller.
  *
  * If the controller has a public BD_ADDR, then by default use that one.
@@ -2974,6 +3047,7 @@ struct hci_dev *hci_alloc_dev(void)
 	INIT_DELAYED_WORK(&hdev->power_off, hci_power_off);
 	INIT_DELAYED_WORK(&hdev->discov_off, hci_discov_off);
 	INIT_DELAYED_WORK(&hdev->le_scan_disable, le_scan_disable_work);
+	INIT_DELAYED_WORK(&hdev->le_scan_restart, le_scan_restart_work);
 
 	skb_queue_head_init(&hdev->rx_q);
 	skb_queue_head_init(&hdev->cmd_q);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index ba3b4a5820b1..8c2520a7f386 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3896,6 +3896,9 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status,
 
 	hci_discovery_set_state(hdev, DISCOVERY_FINDING);
 
+	/* If the scan involves LE scan, pick proper timeout to schedule
+	 * hdev->le_scan_disable that will stop it.
+	 */
 	switch (hdev->discovery.type) {
 	case DISCOV_TYPE_LE:
 		timeout = msecs_to_jiffies(DISCOV_LE_TIMEOUT);
@@ -3912,9 +3915,23 @@ static void start_discovery_complete(struct hci_dev *hdev, u8 status,
 		break;
 	}
 
-	if (timeout)
+	if (timeout) {
+		/* When service discovery is used and the controller has
+		 * a strict duplicate filter, it is important to remember
+		 * the start and duration of the scan. This is required
+		 * for restarting scanning during the discovery phase.
+		 */
+		if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
+			     &hdev->quirks) &&
+		    (hdev->discovery.uuid_count > 0 ||
+		     hdev->discovery.rssi != HCI_RSSI_INVALID)) {
+			hdev->discovery.scan_start = jiffies;
+			hdev->discovery.scan_duration = timeout;
+		}
+
 		queue_delayed_work(hdev->workqueue,
 				   &hdev->le_scan_disable, timeout);
+	}
 
 unlock:
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3-70-g09d2


From 4b0e0ceddf085a89173805cace44cd3c4c9d9d77 Mon Sep 17 00:00:00 2001
From: Jakub Pawlowski <jpawlowski@google.com>
Date: Sun, 1 Feb 2015 23:07:55 -0800
Subject: Bluetooth: Add restarting to service discovery

When using LE_SCAN_FILTER_DUP_ENABLE, some controllers would send
advertising report from each LE device only once. That means that we
don't get any updates on RSSI value, and makes Service Discovery very
slow. This patch adds restarting scan when in Service Discovery, and
device with filtered uuid is found, but it's not in RSSI range to send
event yet. This way if device moves into range, we will quickly get RSSI
update.

Signed-off-by: Jakub Pawlowski <jpawlowski@google.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/mgmt.c             | 53 ++++++++++++++++++++++++++++++++++++----
 2 files changed, 49 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d3a232be9d9b..52863c3e0b13 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1334,6 +1334,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event);
 #define DISCOV_INTERLEAVED_TIMEOUT	5120	/* msec */
 #define DISCOV_INTERLEAVED_INQUIRY_LEN	0x04
 #define DISCOV_BREDR_INQUIRY_LEN	0x08
+#define DISCOV_LE_RESTART_DELAY		msecs_to_jiffies(200)	/* msec */
 
 int mgmt_control(struct sock *sk, struct msghdr *msg, size_t len);
 int mgmt_new_settings(struct hci_dev *hdev);
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 8c2520a7f386..9e50b5c09b02 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7262,6 +7262,21 @@ static bool eir_has_uuids(u8 *eir, u16 eir_len, u16 uuid_count, u8 (*uuids)[16])
 	return false;
 }
 
+static void restart_le_scan(struct hci_dev *hdev)
+{
+	/* If controller is not scanning we are done. */
+	if (!test_bit(HCI_LE_SCAN, &hdev->dev_flags))
+		return;
+
+	if (time_after(jiffies + DISCOV_LE_RESTART_DELAY,
+		       hdev->discovery.scan_start +
+		       hdev->discovery.scan_duration))
+		return;
+
+	queue_delayed_work(hdev->workqueue, &hdev->le_scan_restart,
+			   DISCOV_LE_RESTART_DELAY);
+}
+
 void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		       u8 addr_type, u8 *dev_class, s8 rssi, u32 flags,
 		       u8 *eir, u16 eir_len, u8 *scan_rsp, u8 scan_rsp_len)
@@ -7284,14 +7299,18 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 
 	/* When using service discovery with a RSSI threshold, then check
 	 * if such a RSSI threshold is specified. If a RSSI threshold has
-	 * been specified, then all results with a RSSI smaller than the
-	 * RSSI threshold will be dropped.
+	 * been specified, and HCI_QUIRK_STRICT_DUPLICATE_FILTER is not set,
+	 * then all results with a RSSI smaller than the RSSI threshold will be
+	 * dropped. If the quirk is set, let it through for further processing,
+	 * as we might need to restart the scan.
 	 *
 	 * For BR/EDR devices (pre 1.2) providing no RSSI during inquiry,
 	 * the results are also dropped.
 	 */
 	if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
-	    (rssi < hdev->discovery.rssi || rssi == HCI_RSSI_INVALID))
+	    (rssi == HCI_RSSI_INVALID ||
+	    (rssi < hdev->discovery.rssi &&
+	     !test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks))))
 		return;
 
 	/* Make sure that the buffer is big enough. The 5 extra bytes
@@ -7326,12 +7345,20 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 		 * kept and checking possible scan response data
 		 * will be skipped.
 		 */
-		if (hdev->discovery.uuid_count > 0)
+		if (hdev->discovery.uuid_count > 0) {
 			match = eir_has_uuids(eir, eir_len,
 					      hdev->discovery.uuid_count,
 					      hdev->discovery.uuids);
-		else
+			/* If duplicate filtering does not report RSSI changes,
+			 * then restart scanning to ensure updated result with
+			 * updated RSSI values.
+			 */
+			if (match && test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
+					      &hdev->quirks))
+				restart_le_scan(hdev);
+		} else {
 			match = true;
+		}
 
 		if (!match && !scan_rsp_len)
 			return;
@@ -7364,6 +7391,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 						     hdev->discovery.uuid_count,
 						     hdev->discovery.uuids))
 				return;
+
+			/* If duplicate filtering does not report RSSI changes,
+			 * then restart scanning to ensure updated result with
+			 * updated RSSI values.
+			 */
+			if (test_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER,
+				     &hdev->quirks))
+				restart_le_scan(hdev);
 		}
 
 		/* Append scan response data to event */
@@ -7377,6 +7412,14 @@ void mgmt_device_found(struct hci_dev *hdev, bdaddr_t *bdaddr, u8 link_type,
 			return;
 	}
 
+	/* Validate the reported RSSI value against the RSSI threshold once more
+	 * incase HCI_QUIRK_STRICT_DUPLICATE_FILTER forced a restart of LE
+	 * scanning.
+	 */
+	if (hdev->discovery.rssi != HCI_RSSI_INVALID &&
+	    rssi < hdev->discovery.rssi)
+		return;
+
 	ev->eir_len = cpu_to_le16(eir_len + scan_rsp_len);
 	ev_size = sizeof(*ev) + eir_len + scan_rsp_len;
 
-- 
cgit v1.2.3-70-g09d2


From 66f096f79166bcd56fe3c3607a51fb6aeff857b1 Mon Sep 17 00:00:00 2001
From: Johan Hedberg <johan.hedberg@intel.com>
Date: Mon, 2 Feb 2015 13:23:42 +0200
Subject: Bluetooth: Remove mgmt_rp_read_local_oob_ext_data struct

This extended return parameters struct conflicts with the new Read Local
OOB Extended Data command definition. To avoid the conflict simply
rename the old "extended" version to the normal one and update the code
appropriately to take into account the two possible response PDU sizes.

Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/mgmt.h |  4 ----
 net/bluetooth/mgmt.c         | 25 +++++++++----------------
 2 files changed, 9 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 95c34d5180fa..e218a30f2061 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -301,10 +301,6 @@ struct mgmt_cp_user_passkey_neg_reply {
 #define MGMT_OP_READ_LOCAL_OOB_DATA	0x0020
 #define MGMT_READ_LOCAL_OOB_DATA_SIZE	0
 struct mgmt_rp_read_local_oob_data {
-	__u8	hash[16];
-	__u8	rand[16];
-} __packed;
-struct mgmt_rp_read_local_oob_ext_data {
 	__u8	hash192[16];
 	__u8	rand192[16];
 	__u8	hash256[16];
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 9e50b5c09b02..9ec5390c85eb 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -7168,28 +7168,21 @@ void mgmt_read_local_oob_data_complete(struct hci_dev *hdev, u8 *hash192,
 		cmd_status(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA,
 			   mgmt_status(status));
 	} else {
-		if (bredr_sc_enabled(hdev) && hash256 && rand256) {
-			struct mgmt_rp_read_local_oob_ext_data rp;
+		struct mgmt_rp_read_local_oob_data rp;
+		size_t rp_size = sizeof(rp);
 
-			memcpy(rp.hash192, hash192, sizeof(rp.hash192));
-			memcpy(rp.rand192, rand192, sizeof(rp.rand192));
+		memcpy(rp.hash192, hash192, sizeof(rp.hash192));
+		memcpy(rp.rand192, rand192, sizeof(rp.rand192));
 
+		if (bredr_sc_enabled(hdev) && hash256 && rand256) {
 			memcpy(rp.hash256, hash256, sizeof(rp.hash256));
 			memcpy(rp.rand256, rand256, sizeof(rp.rand256));
-
-			cmd_complete(cmd->sk, hdev->id,
-				     MGMT_OP_READ_LOCAL_OOB_DATA, 0,
-				     &rp, sizeof(rp));
 		} else {
-			struct mgmt_rp_read_local_oob_data rp;
-
-			memcpy(rp.hash, hash192, sizeof(rp.hash));
-			memcpy(rp.rand, rand192, sizeof(rp.rand));
-
-			cmd_complete(cmd->sk, hdev->id,
-				     MGMT_OP_READ_LOCAL_OOB_DATA, 0,
-				     &rp, sizeof(rp));
+			rp_size -= sizeof(rp.hash256) + sizeof(rp.rand256);
 		}
+
+		cmd_complete(cmd->sk, hdev->id, MGMT_OP_READ_LOCAL_OOB_DATA, 0,
+			     &rp, rp_size);
 	}
 
 	mgmt_pending_remove(cmd);
-- 
cgit v1.2.3-70-g09d2


From 4aeee6871e8c3b043ef02996db8ac70a1af8be92 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:08 +0100
Subject: NFC: nci: Add dynamic logical connections support

The current NCI core only support the RF static connection.
For other NFC features such as Secure Element communication, we
may need to create logical connections to the NFCEE (Execution
Environment.

In order to track each logical connection ID dynamically, we add a
linked list of connection info pointers to the nci_dev structure.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h | 32 ++++++++++++++++++++------
 net/nfc/nci/core.c         | 47 ++++++++++++++++++++++++++++++++------
 net/nfc/nci/data.c         | 56 +++++++++++++++++++++++++++++++++-------------
 net/nfc/nci/ntf.c          | 37 +++++++++++++++++++++---------
 net/nfc/nci/rsp.c          | 20 ++++++++++++++++-
 5 files changed, 152 insertions(+), 40 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 9e51bb4d841e..5e508741f208 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -82,6 +82,23 @@ struct nci_ops {
 
 #define NCI_MAX_SUPPORTED_RF_INTERFACES		4
 #define NCI_MAX_DISCOVERED_TARGETS		10
+#define NCI_MAX_NUM_NFCEE   255
+#define NCI_MAX_CONN_ID		7
+
+struct nci_conn_info {
+	struct list_head list;
+	__u8	id; /* can be an RF Discovery ID or an NFCEE ID */
+	__u8	conn_id;
+	__u8	max_pkt_payload_len;
+
+	atomic_t credits_cnt;
+	__u8	 initial_num_credits;
+
+	data_exchange_cb_t	data_exchange_cb;
+	void *data_exchange_cb_context;
+
+	struct sk_buff *rx_skb;
+};
 
 /* NCI Core structures */
 struct nci_dev {
@@ -95,7 +112,9 @@ struct nci_dev {
 	unsigned long		flags;
 
 	atomic_t		cmd_cnt;
-	atomic_t		credits_cnt;
+	__u8			cur_conn_id;
+
+	struct list_head	conn_info_list;
 
 	struct timer_list	cmd_timer;
 	struct timer_list	data_timer;
@@ -141,13 +160,10 @@ struct nci_dev {
 	__u8			manufact_id;
 	__u32			manufact_specific_info;
 
-	/* received during NCI_OP_RF_INTF_ACTIVATED_NTF */
-	__u8			max_data_pkt_payload_size;
-	__u8			initial_num_credits;
+	/* Save RF Discovery ID or NFCEE ID under conn_create */
+	__u8			cur_id;
 
 	/* stored during nci_data_exchange */
-	data_exchange_cb_t	data_exchange_cb;
-	void			*data_exchange_cb_context;
 	struct sk_buff		*rx_data_reassembly;
 
 	/* stored during intf_activated_ntf */
@@ -200,7 +216,7 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload);
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb);
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
-				int err);
+				__u8 conn_id, int err);
 void nci_clear_target_list(struct nci_dev *ndev);
 
 /* ----- NCI requests ----- */
@@ -209,6 +225,8 @@ void nci_clear_target_list(struct nci_dev *ndev);
 #define NCI_REQ_CANCELED	2
 
 void nci_req_complete(struct nci_dev *ndev, int result);
+struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
+						   int conn_id);
 
 /* ----- NCI status code ----- */
 int nci_to_errno(__u8 code);
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 51feb5e63008..eb607970bd56 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -45,6 +45,19 @@ static void nci_cmd_work(struct work_struct *work);
 static void nci_rx_work(struct work_struct *work);
 static void nci_tx_work(struct work_struct *work);
 
+struct nci_conn_info *nci_get_conn_info_by_conn_id(struct nci_dev *ndev,
+						   int conn_id)
+{
+	struct nci_conn_info *conn_info;
+
+	list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
+		if (conn_info->conn_id == conn_id)
+			return conn_info;
+	}
+
+	return NULL;
+}
+
 /* ---- NCI requests ---- */
 
 void nci_req_complete(struct nci_dev *ndev, int result)
@@ -712,6 +725,11 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
 	int rc;
+	struct nci_conn_info    *conn_info;
+
+	conn_info = nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	if (!conn_info)
+		return -EPROTO;
 
 	pr_debug("target_idx %d, len %d\n", target->idx, skb->len);
 
@@ -724,8 +742,8 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 		return -EBUSY;
 
 	/* store cb and context to be used on receiving data */
-	ndev->data_exchange_cb = cb;
-	ndev->data_exchange_cb_context = cb_context;
+	conn_info->data_exchange_cb = cb;
+	conn_info->data_exchange_cb_context = cb_context;
 
 	rc = nci_send_data(ndev, NCI_STATIC_RF_CONN_ID, skb);
 	if (rc)
@@ -913,6 +931,7 @@ int nci_register_device(struct nci_dev *ndev)
 		    (unsigned long) ndev);
 
 	mutex_init(&ndev->req_lock);
+	INIT_LIST_HEAD(&ndev->conn_info_list);
 
 	rc = nfc_register_device(ndev->nfc_dev);
 	if (rc)
@@ -938,12 +957,19 @@ EXPORT_SYMBOL(nci_register_device);
  */
 void nci_unregister_device(struct nci_dev *ndev)
 {
+	struct nci_conn_info    *conn_info, *n;
+
 	nci_close_device(ndev);
 
 	destroy_workqueue(ndev->cmd_wq);
 	destroy_workqueue(ndev->rx_wq);
 	destroy_workqueue(ndev->tx_wq);
 
+	list_for_each_entry_safe(conn_info, n, &ndev->conn_info_list, list) {
+		list_del(&conn_info->list);
+		/* conn_info is allocated with devm_kzalloc */
+	}
+
 	nfc_unregister_device(ndev->nfc_dev);
 }
 EXPORT_SYMBOL(nci_unregister_device);
@@ -1027,20 +1053,25 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload)
 static void nci_tx_work(struct work_struct *work)
 {
 	struct nci_dev *ndev = container_of(work, struct nci_dev, tx_work);
+	struct nci_conn_info    *conn_info;
 	struct sk_buff *skb;
 
-	pr_debug("credits_cnt %d\n", atomic_read(&ndev->credits_cnt));
+	conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_conn_id);
+	if (!conn_info)
+		return;
+
+	pr_debug("credits_cnt %d\n", atomic_read(&conn_info->credits_cnt));
 
 	/* Send queued tx data */
-	while (atomic_read(&ndev->credits_cnt)) {
+	while (atomic_read(&conn_info->credits_cnt)) {
 		skb = skb_dequeue(&ndev->tx_q);
 		if (!skb)
 			return;
 
 		/* Check if data flow control is used */
-		if (atomic_read(&ndev->credits_cnt) !=
+		if (atomic_read(&conn_info->credits_cnt) !=
 		    NCI_DATA_FLOW_CONTROL_NOT_USED)
-			atomic_dec(&ndev->credits_cnt);
+			atomic_dec(&conn_info->credits_cnt);
 
 		pr_debug("NCI TX: MT=data, PBF=%d, conn_id=%d, plen=%d\n",
 			 nci_pbf(skb->data),
@@ -1092,7 +1123,9 @@ static void nci_rx_work(struct work_struct *work)
 	if (test_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags)) {
 		/* complete the data exchange transaction, if exists */
 		if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
-			nci_data_exchange_complete(ndev, NULL, -ETIMEDOUT);
+			nci_data_exchange_complete(ndev, NULL,
+						   ndev->cur_conn_id,
+						   -ETIMEDOUT);
 
 		clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
 	}
diff --git a/net/nfc/nci/data.c b/net/nfc/nci/data.c
index a2de2a8cb00e..566466d90048 100644
--- a/net/nfc/nci/data.c
+++ b/net/nfc/nci/data.c
@@ -36,10 +36,20 @@
 
 /* Complete data exchange transaction and forward skb to nfc core */
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
-				int err)
+				__u8 conn_id, int err)
 {
-	data_exchange_cb_t cb = ndev->data_exchange_cb;
-	void *cb_context = ndev->data_exchange_cb_context;
+	struct nci_conn_info    *conn_info;
+	data_exchange_cb_t cb;
+	void *cb_context;
+
+	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+	if (!conn_info) {
+		kfree_skb(skb);
+		goto exit;
+	}
+
+	cb = conn_info->data_exchange_cb;
+	cb_context = conn_info->data_exchange_cb_context;
 
 	pr_debug("len %d, err %d\n", skb ? skb->len : 0, err);
 
@@ -48,9 +58,6 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
 	clear_bit(NCI_DATA_EXCHANGE_TO, &ndev->flags);
 
 	if (cb) {
-		ndev->data_exchange_cb = NULL;
-		ndev->data_exchange_cb_context = NULL;
-
 		/* forward skb to nfc core */
 		cb(cb_context, skb, err);
 	} else if (skb) {
@@ -60,6 +67,7 @@ void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
 		kfree_skb(skb);
 	}
 
+exit:
 	clear_bit(NCI_DATA_EXCHANGE, &ndev->flags);
 }
 
@@ -85,6 +93,7 @@ static inline void nci_push_data_hdr(struct nci_dev *ndev,
 static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 				   __u8 conn_id,
 				   struct sk_buff *skb) {
+	struct nci_conn_info    *conn_info;
 	int total_len = skb->len;
 	unsigned char *data = skb->data;
 	unsigned long flags;
@@ -95,11 +104,17 @@ static int nci_queue_tx_data_frags(struct nci_dev *ndev,
 
 	pr_debug("conn_id 0x%x, total_len %d\n", conn_id, total_len);
 
+	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+	if (!conn_info) {
+		rc = -EPROTO;
+		goto free_exit;
+	}
+
 	__skb_queue_head_init(&frags_q);
 
 	while (total_len) {
 		frag_len =
-			min_t(int, total_len, ndev->max_data_pkt_payload_size);
+			min_t(int, total_len, conn_info->max_pkt_payload_len);
 
 		skb_frag = nci_skb_alloc(ndev,
 					 (NCI_DATA_HDR_SIZE + frag_len),
@@ -151,12 +166,19 @@ exit:
 /* Send NCI data */
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	int rc = 0;
 
 	pr_debug("conn_id 0x%x, plen %d\n", conn_id, skb->len);
 
+	conn_info = nci_get_conn_info_by_conn_id(ndev, conn_id);
+	if (!conn_info) {
+		rc = -EPROTO;
+		goto free_exit;
+	}
+
 	/* check if the packet need to be fragmented */
-	if (skb->len <= ndev->max_data_pkt_payload_size) {
+	if (skb->len <= conn_info->max_pkt_payload_len) {
 		/* no need to fragment packet */
 		nci_push_data_hdr(ndev, conn_id, skb, NCI_PBF_LAST);
 
@@ -170,6 +192,7 @@ int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb)
 		}
 	}
 
+	ndev->cur_conn_id = conn_id;
 	queue_work(ndev->tx_wq, &ndev->tx_work);
 
 	goto exit;
@@ -185,7 +208,7 @@ exit:
 
 static void nci_add_rx_data_frag(struct nci_dev *ndev,
 				 struct sk_buff *skb,
-				 __u8 pbf, __u8 status)
+				 __u8 pbf, __u8 conn_id, __u8 status)
 {
 	int reassembly_len;
 	int err = 0;
@@ -229,16 +252,13 @@ static void nci_add_rx_data_frag(struct nci_dev *ndev,
 	}
 
 exit:
-	if (ndev->nfc_dev->rf_mode == NFC_RF_INITIATOR) {
-		nci_data_exchange_complete(ndev, skb, err);
-	} else if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) {
+	if (ndev->nfc_dev->rf_mode == NFC_RF_TARGET) {
 		/* Data received in Target mode, forward to nfc core */
 		err = nfc_tm_data_received(ndev->nfc_dev, skb);
 		if (err)
 			pr_err("unable to handle received data\n");
 	} else {
-		pr_err("rf mode unknown\n");
-		kfree_skb(skb);
+		nci_data_exchange_complete(ndev, skb, conn_id, err);
 	}
 }
 
@@ -247,6 +267,8 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u8 pbf = nci_pbf(skb->data);
 	__u8 status = 0;
+	__u8 conn_id = nci_conn_id(skb->data);
+	struct nci_conn_info    *conn_info;
 
 	pr_debug("len %d\n", skb->len);
 
@@ -255,6 +277,10 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		 nci_conn_id(skb->data),
 		 nci_plen(skb->data));
 
+	conn_info = nci_get_conn_info_by_conn_id(ndev, nci_conn_id(skb->data));
+	if (!conn_info)
+		return;
+
 	/* strip the nci data header */
 	skb_pull(skb, NCI_DATA_HDR_SIZE);
 
@@ -268,5 +294,5 @@ void nci_rx_data_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		skb_trim(skb, (skb->len - 1));
 	}
 
-	nci_add_rx_data_frag(ndev, skb, pbf, nci_to_errno(status));
+	nci_add_rx_data_frag(ndev, skb, pbf, conn_id, nci_to_errno(status));
 }
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 22e453cb787d..28fdbe234bd4 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -43,6 +43,7 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
 					     struct sk_buff *skb)
 {
 	struct nci_core_conn_credit_ntf *ntf = (void *) skb->data;
+	struct nci_conn_info	*conn_info;
 	int i;
 
 	pr_debug("num_entries %d\n", ntf->num_entries);
@@ -59,11 +60,13 @@ static void nci_core_conn_credits_ntf_packet(struct nci_dev *ndev,
 			 i, ntf->conn_entries[i].conn_id,
 			 ntf->conn_entries[i].credits);
 
-		if (ntf->conn_entries[i].conn_id == NCI_STATIC_RF_CONN_ID) {
-			/* found static rf connection */
-			atomic_add(ntf->conn_entries[i].credits,
-				   &ndev->credits_cnt);
-		}
+		conn_info = nci_get_conn_info_by_conn_id(ndev,
+							 ntf->conn_entries[i].conn_id);
+		if (!conn_info)
+			return;
+
+		atomic_add(ntf->conn_entries[i].credits,
+			   &conn_info->credits_cnt);
 	}
 
 	/* trigger the next tx */
@@ -96,7 +99,7 @@ static void nci_core_conn_intf_error_ntf_packet(struct nci_dev *ndev,
 
 	/* complete the data exchange transaction, if exists */
 	if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
-		nci_data_exchange_complete(ndev, NULL, -EIO);
+		nci_data_exchange_complete(ndev, NULL, ntf->conn_id, -EIO);
 }
 
 static __u8 *nci_extract_rf_params_nfca_passive_poll(struct nci_dev *ndev,
@@ -513,6 +516,7 @@ static int nci_store_general_bytes_nfc_dep(struct nci_dev *ndev,
 static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 					     struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	struct nci_rf_intf_activated_ntf ntf;
 	__u8 *data = skb->data;
 	int err = NCI_STATUS_OK;
@@ -614,11 +618,17 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 
 exit:
 	if (err == NCI_STATUS_OK) {
-		ndev->max_data_pkt_payload_size = ntf.max_data_pkt_payload_size;
-		ndev->initial_num_credits = ntf.initial_num_credits;
+		conn_info = nci_get_conn_info_by_conn_id(ndev,
+							 NCI_STATIC_RF_CONN_ID);
+		if (!conn_info)
+			return;
+
+		conn_info->max_pkt_payload_len = ntf.max_data_pkt_payload_size;
+		conn_info->initial_num_credits = ntf.initial_num_credits;
 
 		/* set the available credits to initial value */
-		atomic_set(&ndev->credits_cnt, ndev->initial_num_credits);
+		atomic_set(&conn_info->credits_cnt,
+			   conn_info->initial_num_credits);
 
 		/* store general bytes to be reported later in dep_link_up */
 		if (ntf.rf_interface == NCI_RF_INTERFACE_NFC_DEP) {
@@ -661,10 +671,16 @@ exit:
 static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 					 struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	struct nci_rf_deactivate_ntf *ntf = (void *) skb->data;
 
 	pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason);
 
+	conn_info =
+		nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	if (!conn_info)
+		return;
+
 	/* drop tx data queue */
 	skb_queue_purge(&ndev->tx_q);
 
@@ -676,7 +692,8 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 
 	/* complete the data exchange transaction, if exists */
 	if (test_bit(NCI_DATA_EXCHANGE, &ndev->flags))
-		nci_data_exchange_complete(ndev, NULL, -EIO);
+		nci_data_exchange_complete(ndev, NULL, NCI_STATIC_RF_CONN_ID,
+					   -EIO);
 
 	switch (ntf->type) {
 	case NCI_DEACTIVATE_TYPE_IDLE_MODE:
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 041de51ccdbe..93b914937263 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -140,13 +140,31 @@ static void nci_rf_disc_map_rsp_packet(struct nci_dev *ndev,
 
 static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
+	struct nci_conn_info    *conn_info;
 	__u8 status = skb->data[0];
 
 	pr_debug("status 0x%x\n", status);
 
-	if (status == NCI_STATUS_OK)
+	if (status == NCI_STATUS_OK) {
 		atomic_set(&ndev->state, NCI_DISCOVERY);
 
+		conn_info = nci_get_conn_info_by_conn_id(ndev,
+							 NCI_STATIC_RF_CONN_ID);
+		if (!conn_info) {
+			conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+						 sizeof(struct nci_conn_info),
+						 GFP_KERNEL);
+			if (!conn_info) {
+				status = NCI_STATUS_REJECTED;
+				goto exit;
+			}
+			conn_info->conn_id = NCI_STATIC_RF_CONN_ID;
+			INIT_LIST_HEAD(&conn_info->list);
+			list_add(&conn_info->list, &ndev->conn_info_list);
+		}
+	}
+
+exit:
 	nci_req_complete(ndev, status);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 8277f6937ae97c51ced5b54faa4934613c76999c Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:09 +0100
Subject: NFC: nci: Add NCI NFCEE constants

Add NFCEE NCI constant for:
- NFCEE Interface/Protocols
- Destination type
- Destination-specific parameters type
- NFCEE Discovery Action

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index e7257a4653b4..6d99e8f79835 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -62,6 +62,25 @@
 #define NCI_STATUS_NFCEE_PROTOCOL_ERROR				0xc2
 #define NCI_STATUS_NFCEE_TIMEOUT_ERROR				0xc3
 
+/* NFCEE Interface/Protocols */
+#define NCI_NFCEE_INTERFACE_APDU           0x00
+#define NCI_NFCEE_INTERFACE_HCI_ACCESS     0x01
+#define NCI_NFCEE_INTERFACE_TYPE3_CMD_SET  0x02
+#define NCI_NFCEE_INTERFACE_TRANSPARENT        0x03
+
+/* Destination type */
+#define NCI_DESTINATION_NFCC_LOOPBACK      0x01
+#define NCI_DESTINATION_REMOTE_NFC_ENDPOINT    0x02
+#define NCI_DESTINATION_NFCEE              0x03
+
+/* Destination-specific parameters type */
+#define NCI_DESTINATION_SPECIFIC_PARAM_RF_TYPE     0x00
+#define NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE  0x01
+
+/* NFCEE Discovery Action */
+#define NCI_NFCEE_DISCOVERY_ACTION_DISABLE			0x00
+#define NCI_NFCEE_DISCOVERY_ACTION_ENABLE			0x01
+
 /* NCI RF Technology and Mode */
 #define NCI_NFC_A_PASSIVE_POLL_MODE				0x00
 #define NCI_NFC_B_PASSIVE_POLL_MODE				0x01
@@ -260,6 +279,11 @@ struct nci_rf_deactivate_cmd {
 	__u8	type;
 } __packed;
 
+#define NCI_OP_NFCEE_DISCOVER_CMD nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x00)
+struct nci_nfcee_discover_cmd {
+	__u8	discovery_action;
+} __packed;
+
 /* ----------------------- */
 /* ---- NCI Responses ---- */
 /* ----------------------- */
@@ -303,6 +327,12 @@ struct nci_core_set_config_rsp {
 
 #define NCI_OP_RF_DEACTIVATE_RSP	nci_opcode_pack(NCI_GID_RF_MGMT, 0x06)
 
+#define NCI_OP_NFCEE_DISCOVER_RSP nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x00)
+struct nci_nfcee_discover_rsp {
+	__u8	status;
+	__u8	num_nfcee;
+} __packed;
+
 /* --------------------------- */
 /* ---- NCI Notifications ---- */
 /* --------------------------- */
@@ -430,4 +460,22 @@ struct nci_rf_deactivate_ntf {
 	__u8	reason;
 } __packed;
 
+#define NCI_OP_NFCEE_DISCOVER_NTF nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x00)
+struct nci_nfcee_supported_protocol {
+	__u8	num_protocol;
+	__u8	supported_protocol[0];
+} __packed;
+
+struct nci_nfcee_information_tlv {
+	__u8	num_tlv;
+	__u8	information_tlv[0];
+} __packed;
+
+struct nci_nfcee_discover_ntf {
+	__u8	nfcee_id;
+	__u8	nfcee_status;
+	struct nci_nfcee_supported_protocol supported_protocols;
+	struct nci_nfcee_information_tlv	information_tlv;
+} __packed;
+
 #endif /* __NCI_H */
-- 
cgit v1.2.3-70-g09d2


From af9c8aa67d07adcd3b41fb2934af7af056eabecf Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:10 +0100
Subject: NFC: nci: Add NFCEE discover support

NFCEEs (NFC Execution Environment) have to be explicitly
discovered by sending the NCI_OP_NFCEE_DISCOVER_CMD
command. The NFCC will respond to this command by telling
us how many NFCEEs are connected to it. Then the NFCC sends
a notification command for each and every NFCEE connected.
Here we implement support for sending
NCI_OP_NFCEE_DISCOVER_CMD command, receiving the response
and the potential notifications.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h |  4 ++++
 net/nfc/nci/core.c         | 17 +++++++++++++++++
 net/nfc/nci/ntf.c          | 30 ++++++++++++++++++++++++++++++
 net/nfc/nci/rsp.c          | 21 +++++++++++++++++++++
 4 files changed, 72 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 5e508741f208..31ad795aa4b5 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -100,6 +100,8 @@ struct nci_conn_info {
 	struct sk_buff *rx_skb;
 };
 
+#define NCI_INVALID_CONN_ID 0x80
+
 /* NCI Core structures */
 struct nci_dev {
 	struct nfc_dev		*nfc_dev;
@@ -182,6 +184,8 @@ void nci_unregister_device(struct nci_dev *ndev);
 int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
+int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
+
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
 					    gfp_t how)
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index eb607970bd56..a25857548524 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -469,6 +469,23 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val)
 }
 EXPORT_SYMBOL(nci_set_config);
 
+static void nci_nfcee_discover_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_nfcee_discover_cmd cmd;
+	__u8 action = opt;
+
+	cmd.discovery_action = action;
+
+	nci_send_cmd(ndev, NCI_OP_NFCEE_DISCOVER_CMD, 1, &cmd);
+}
+
+int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
+{
+	return nci_request(ndev, nci_nfcee_discover_req, action,
+				msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_nfcee_discover);
+
 static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 28fdbe234bd4..4c0be7e82d29 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -713,6 +713,33 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 	nci_req_complete(ndev, NCI_STATUS_OK);
 }
 
+static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
+					  struct sk_buff *skb)
+{
+	u8 status = NCI_STATUS_OK;
+	struct nci_conn_info    *conn_info;
+	struct nci_nfcee_discover_ntf   *nfcee_ntf =
+				(struct nci_nfcee_discover_ntf *)skb->data;
+
+	pr_debug("\n");
+
+	conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+				 sizeof(struct nci_conn_info), GFP_KERNEL);
+	if (!conn_info) {
+		status = NCI_STATUS_REJECTED;
+		goto exit;
+	}
+
+	conn_info->id = nfcee_ntf->nfcee_id;
+	conn_info->conn_id = NCI_INVALID_CONN_ID;
+
+	INIT_LIST_HEAD(&conn_info->list);
+	list_add(&conn_info->list, &ndev->conn_info_list);
+
+exit:
+	nci_req_complete(ndev, status);
+}
+
 void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 ntf_opcode = nci_opcode(skb->data);
@@ -751,6 +778,9 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_rf_deactivate_ntf_packet(ndev, skb);
 		break;
 
+	case NCI_OP_NFCEE_DISCOVER_NTF:
+		nci_nfcee_discover_ntf_packet(ndev, skb);
+		break;
 	default:
 		pr_err("unknown ntf opcode 0x%x\n", ntf_opcode);
 		break;
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 93b914937263..ee094dfab2ed 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -196,6 +196,23 @@ static void nci_rf_deactivate_rsp_packet(struct nci_dev *ndev,
 	}
 }
 
+static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
+					  struct sk_buff *skb)
+{
+	struct nci_nfcee_discover_rsp *discover_rsp;
+
+	if (skb->len != 2) {
+		nci_req_complete(ndev, NCI_STATUS_NFCEE_PROTOCOL_ERROR);
+		return;
+	}
+
+	discover_rsp = (struct nci_nfcee_discover_rsp *)skb->data;
+
+	if (discover_rsp->status != NCI_STATUS_OK ||
+	    discover_rsp->num_nfcee == 0)
+		nci_req_complete(ndev, discover_rsp->status);
+}
+
 void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 rsp_opcode = nci_opcode(skb->data);
@@ -241,6 +258,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_rf_deactivate_rsp_packet(ndev, skb);
 		break;
 
+	case NCI_OP_NFCEE_DISCOVER_RSP:
+		nci_nfcee_discover_rsp_packet(ndev, skb);
+		break;
+
 	default:
 		pr_err("unknown rsp opcode 0x%x\n", rsp_opcode);
 		break;
-- 
cgit v1.2.3-70-g09d2


From f7f793f31378d5e83276871339c2a8374b0e8657 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:11 +0100
Subject: NFC: nci: Add NFCEE enabling and disabling support

NFCEEs can be enabled or disabled by sending the
NCI_OP_NFCEE_MODE_SET_CMD command to the NFCC. This patch
provides an API for drivers to enable and disable e.g. their
NCI discoveredd secure elements.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h      |  9 +++++++++
 include/net/nfc/nci_core.h |  1 +
 net/nfc/nci/core.c         | 21 +++++++++++++++++++++
 net/nfc/nci/rsp.c          | 13 +++++++++++++
 4 files changed, 44 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 6d99e8f79835..230f227bb319 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -284,6 +284,14 @@ struct nci_nfcee_discover_cmd {
 	__u8	discovery_action;
 } __packed;
 
+#define NCI_OP_NFCEE_MODE_SET_CMD nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x01)
+#define NCI_NFCEE_DISABLE	0x00
+#define NCI_NFCEE_ENABLE	0x01
+struct nci_nfcee_mode_set_cmd {
+	__u8	nfcee_id;
+	__u8	nfcee_mode;
+} __packed;
+
 /* ----------------------- */
 /* ---- NCI Responses ---- */
 /* ----------------------- */
@@ -333,6 +341,7 @@ struct nci_nfcee_discover_rsp {
 	__u8	num_nfcee;
 } __packed;
 
+#define NCI_OP_NFCEE_MODE_SET_RSP nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x01)
 /* --------------------------- */
 /* ---- NCI Notifications ---- */
 /* --------------------------- */
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 31ad795aa4b5..6cf6ee2b696d 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -185,6 +185,7 @@ int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
+int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
 
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index a25857548524..e5fb8c8eed94 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -486,6 +486,27 @@ int nci_nfcee_discover(struct nci_dev *ndev, u8 action)
 }
 EXPORT_SYMBOL(nci_nfcee_discover);
 
+static void nci_nfcee_mode_set_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_nfcee_mode_set_cmd *cmd =
+					(struct nci_nfcee_mode_set_cmd *)opt;
+
+	nci_send_cmd(ndev, NCI_OP_NFCEE_MODE_SET_CMD,
+		     sizeof(struct nci_nfcee_mode_set_cmd), cmd);
+}
+
+int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
+{
+	struct nci_nfcee_mode_set_cmd cmd;
+
+	cmd.nfcee_id = nfcee_id;
+	cmd.nfcee_mode = nfcee_mode;
+
+	return nci_request(ndev, nci_nfcee_mode_set_req, (unsigned long)&cmd,
+			   msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_nfcee_mode_set);
+
 static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index ee094dfab2ed..0a3e98240dd6 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -213,6 +213,15 @@ static void nci_nfcee_discover_rsp_packet(struct nci_dev *ndev,
 		nci_req_complete(ndev, discover_rsp->status);
 }
 
+static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
+					  struct sk_buff *skb)
+{
+	__u8 status = skb->data[0];
+
+	pr_debug("status 0x%x\n", status);
+	nci_req_complete(ndev, status);
+}
+
 void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 rsp_opcode = nci_opcode(skb->data);
@@ -262,6 +271,10 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_nfcee_discover_rsp_packet(ndev, skb);
 		break;
 
+	case NCI_OP_NFCEE_MODE_SET_RSP:
+		nci_nfcee_mode_set_rsp_packet(ndev, skb);
+		break;
+
 	default:
 		pr_err("unknown rsp opcode 0x%x\n", rsp_opcode);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 736bb9577407d3556d81c3c3cd57581cd3ae10ea Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:12 +0100
Subject: NFC: nci: Support logical connections management

In order to communicate with an NFCEE, we need to open a logical
connection to it, by sending the NCI_OP_CORE_CONN_CREATE_CMD
command to the NFCC. It's left up to the drivers to decide when
to close an already opened logical connection.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h      | 30 +++++++++++++++++++++++++++++
 include/net/nfc/nci_core.h |  3 +++
 net/nfc/nci/core.c         | 38 +++++++++++++++++++++++++++++++++++++
 net/nfc/nci/rsp.c          | 47 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 118 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 230f227bb319..deac78b9a53c 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -243,6 +243,26 @@ struct nci_core_set_config_cmd {
 	struct	set_config_param param; /* support 1 param per cmd is enough */
 } __packed;
 
+#define NCI_OP_CORE_CONN_CREATE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x04)
+struct dest_spec_params {
+	__u8	id;
+	__u8	protocol;
+} __packed;
+
+struct core_conn_create_dest_spec_params {
+	__u8	type;
+	__u8	length;
+	struct dest_spec_params value;
+} __packed;
+
+struct nci_core_conn_create_cmd {
+	__u8	destination_type;
+	__u8	number_destination_params;
+	struct core_conn_create_dest_spec_params params;
+} __packed;
+
+#define NCI_OP_CORE_CONN_CLOSE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x05)
+
 #define NCI_OP_RF_DISCOVER_MAP_CMD	nci_opcode_pack(NCI_GID_RF_MGMT, 0x00)
 struct disc_map_config {
 	__u8	rf_protocol;
@@ -327,6 +347,16 @@ struct nci_core_set_config_rsp {
 	__u8	params_id[0];	/* variable size array */
 } __packed;
 
+#define NCI_OP_CORE_CONN_CREATE_RSP	nci_opcode_pack(NCI_GID_CORE, 0x04)
+struct nci_core_conn_create_rsp {
+	__u8	status;
+	__u8	max_ctrl_pkt_payload_len;
+	__u8    credits;
+	__u8	conn_id;
+} __packed;
+
+#define NCI_OP_CORE_CONN_CLOSE_RSP	nci_opcode_pack(NCI_GID_CORE, 0x05)
+
 #define NCI_OP_RF_DISCOVER_MAP_RSP	nci_opcode_pack(NCI_GID_RF_MGMT, 0x00)
 
 #define NCI_OP_RF_DISCOVER_RSP		nci_opcode_pack(NCI_GID_RF_MGMT, 0x03)
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 6cf6ee2b696d..8ba3e38e4167 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -186,6 +186,9 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
 int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
+int nci_core_conn_create(struct nci_dev *ndev,
+			 struct core_conn_create_dest_spec_params *params);
+int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
 
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index e5fb8c8eed94..2a96ed68c7bb 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -507,6 +507,44 @@ int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode)
 }
 EXPORT_SYMBOL(nci_nfcee_mode_set);
 
+static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_core_conn_create_cmd cmd;
+	struct core_conn_create_dest_spec_params *params =
+				(struct core_conn_create_dest_spec_params *)opt;
+
+	cmd.destination_type = NCI_DESTINATION_NFCEE;
+	cmd.number_destination_params = 1;
+	memcpy(&cmd.params.type, params,
+	       sizeof(struct core_conn_create_dest_spec_params));
+	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD,
+		     sizeof(struct nci_core_conn_create_cmd), &cmd);
+}
+
+int nci_core_conn_create(struct nci_dev *ndev,
+			 struct core_conn_create_dest_spec_params *params)
+{
+	ndev->cur_id = params->value.id;
+	return nci_request(ndev, nci_core_conn_create_req,
+			(unsigned long)params,
+			msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_conn_create);
+
+static void nci_core_conn_close_req(struct nci_dev *ndev, unsigned long opt)
+{
+	__u8 conn_id = opt;
+
+	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CLOSE_CMD, 1, &conn_id);
+}
+
+int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id)
+{
+	return nci_request(ndev, nci_core_conn_close_req, conn_id,
+				msecs_to_jiffies(NCI_CMD_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_core_conn_close);
+
 static int nci_set_local_general_bytes(struct nfc_dev *nfc_dev)
 {
 	struct nci_dev *ndev = nfc_get_drvdata(nfc_dev);
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 0a3e98240dd6..31ccf7d05e82 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -222,6 +222,45 @@ static void nci_nfcee_mode_set_rsp_packet(struct nci_dev *ndev,
 	nci_req_complete(ndev, status);
 }
 
+static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
+					    struct sk_buff *skb)
+{
+	__u8 status = skb->data[0];
+	struct nci_conn_info *conn_info;
+	struct nci_core_conn_create_rsp *rsp;
+
+	pr_debug("status 0x%x\n", status);
+
+	if (status == NCI_STATUS_OK) {
+		rsp = (struct nci_core_conn_create_rsp *)skb->data;
+		list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
+			if (conn_info->id == ndev->cur_id)
+				break;
+		}
+
+		if (!conn_info || conn_info->id != ndev->cur_id) {
+			status = NCI_STATUS_REJECTED;
+			goto exit;
+		}
+
+		conn_info->conn_id = rsp->conn_id;
+		conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len;
+		atomic_set(&conn_info->credits_cnt, rsp->credits);
+	}
+
+exit:
+	nci_req_complete(ndev, status);
+}
+
+static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
+					   struct sk_buff *skb)
+{
+	__u8 status = skb->data[0];
+
+	pr_debug("status 0x%x\n", status);
+	nci_req_complete(ndev, status);
+}
+
 void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 rsp_opcode = nci_opcode(skb->data);
@@ -251,6 +290,14 @@ void nci_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 		nci_core_set_config_rsp_packet(ndev, skb);
 		break;
 
+	case NCI_OP_CORE_CONN_CREATE_RSP:
+		nci_core_conn_create_rsp_packet(ndev, skb);
+		break;
+
+	case NCI_OP_CORE_CONN_CLOSE_RSP:
+		nci_core_conn_close_rsp_packet(ndev, skb);
+		break;
+
 	case NCI_OP_RF_DISCOVER_MAP_RSP:
 		nci_rf_disc_map_rsp_packet(ndev, skb);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 11f54f228643d0248ec00ce8c9fb8d872f87e7b8 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:14 +0100
Subject: NFC: nci: Add HCI over NCI protocol support

According to the NCI specification, one can use HCI over NCI
to talk with specific NFCEE. The HCI network is viewed as one
logical NFCEE.
This is needed to support secure element running HCI only
firmwares embedded on an NCI capable chipset, like e.g. the
st21nfcb.
There is some duplication between this piece of code and the
HCI core code, but the latter would need to be abstracted even
more to be able to use NCI as a logical transport for HCP packets.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h |  93 ++++++
 net/nfc/nci/Makefile       |   2 +-
 net/nfc/nci/core.c         |  20 +-
 net/nfc/nci/hci.c          | 686 +++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nci/ntf.c          |  32 ++-
 5 files changed, 815 insertions(+), 18 deletions(-)
 create mode 100644 net/nfc/nci/hci.c

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 8ba3e38e4167..be858870dace 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -78,6 +78,11 @@ struct nci_ops {
 	int   (*se_io)(struct nci_dev *ndev, u32 se_idx,
 				u8 *apdu, size_t apdu_length,
 				se_io_cb_t cb, void *cb_context);
+	int   (*hci_load_session)(struct nci_dev *ndev);
+	void  (*hci_event_received)(struct nci_dev *ndev, u8 pipe, u8 event,
+				    struct sk_buff *skb);
+	void  (*hci_cmd_received)(struct nci_dev *ndev, u8 pipe, u8 cmd,
+				  struct sk_buff *skb);
 };
 
 #define NCI_MAX_SUPPORTED_RF_INTERFACES		4
@@ -102,10 +107,77 @@ struct nci_conn_info {
 
 #define NCI_INVALID_CONN_ID 0x80
 
+#define NCI_HCI_ANY_OPEN_PIPE      0x03
+
+/* Gates */
+#define NCI_HCI_ADMIN_GATE         0x00
+#define NCI_HCI_LINK_MGMT_GATE     0x06
+
+/* Pipes */
+#define NCI_HCI_LINK_MGMT_PIPE             0x00
+#define NCI_HCI_ADMIN_PIPE                 0x01
+
+/* Generic responses */
+#define NCI_HCI_ANY_OK                     0x00
+#define NCI_HCI_ANY_E_NOT_CONNECTED        0x01
+#define NCI_HCI_ANY_E_CMD_PAR_UNKNOWN      0x02
+#define NCI_HCI_ANY_E_NOK                  0x03
+#define NCI_HCI_ANY_E_PIPES_FULL           0x04
+#define NCI_HCI_ANY_E_REG_PAR_UNKNOWN      0x05
+#define NCI_HCI_ANY_E_PIPE_NOT_OPENED      0x06
+#define NCI_HCI_ANY_E_CMD_NOT_SUPPORTED    0x07
+#define NCI_HCI_ANY_E_INHIBITED            0x08
+#define NCI_HCI_ANY_E_TIMEOUT              0x09
+#define NCI_HCI_ANY_E_REG_ACCESS_DENIED    0x0a
+#define NCI_HCI_ANY_E_PIPE_ACCESS_DENIED   0x0b
+
+#define NCI_HCI_DO_NOT_OPEN_PIPE           0x81
+#define NCI_HCI_INVALID_PIPE               0x80
+#define NCI_HCI_INVALID_GATE               0xFF
+#define NCI_HCI_INVALID_HOST               0x80
+
+#define NCI_HCI_MAX_CUSTOM_GATES   50
+#define NCI_HCI_MAX_PIPES          127
+
+struct nci_hci_gate {
+	u8 gate;
+	u8 pipe;
+	u8 dest_host;
+} __packed;
+
+struct nci_hci_pipe {
+	u8 gate;
+	u8 host;
+} __packed;
+
+struct nci_hci_init_data {
+	u8 gate_count;
+	struct nci_hci_gate gates[NCI_HCI_MAX_CUSTOM_GATES];
+	char session_id[9];
+};
+
+#define NCI_HCI_MAX_GATES          256
+
+struct nci_hci_dev {
+	struct nci_dev *ndev;
+	struct nci_conn_info *conn_info;
+
+	struct nci_hci_init_data init_data;
+	struct nci_hci_pipe pipes[NCI_HCI_MAX_PIPES];
+	u8 gate2pipe[NCI_HCI_MAX_GATES];
+	int expected_pipes;
+	int count_pipes;
+
+	struct sk_buff_head rx_hcp_frags;
+	struct work_struct msg_rx_work;
+	struct sk_buff_head msg_rx_queue;
+};
+
 /* NCI Core structures */
 struct nci_dev {
 	struct nfc_dev		*nfc_dev;
 	struct nci_ops		*ops;
+	struct nci_hci_dev	*hci_dev;
 
 	int			tx_headroom;
 	int			tx_tailroom;
@@ -181,6 +253,10 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
 void nci_free_device(struct nci_dev *ndev);
 int nci_register_device(struct nci_dev *ndev);
 void nci_unregister_device(struct nci_dev *ndev);
+int nci_request(struct nci_dev *ndev,
+		void (*req)(struct nci_dev *ndev,
+			    unsigned long opt),
+		unsigned long opt, __u32 timeout);
 int nci_recv_frame(struct nci_dev *ndev, struct sk_buff *skb);
 int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
@@ -190,6 +266,21 @@ int nci_core_conn_create(struct nci_dev *ndev,
 			 struct core_conn_create_dest_spec_params *params);
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
 
+struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev);
+int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
+		       const u8 *param, size_t param_len);
+int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate,
+		     u8 cmd, const u8 *param, size_t param_len,
+		     struct sk_buff **skb);
+int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe);
+int nci_hci_connect_gate(struct nci_dev *ndev, u8 dest_host,
+			 u8 dest_gate, u8 pipe);
+int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      const u8 *param, size_t param_len);
+int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      struct sk_buff **skb);
+int nci_hci_dev_session_init(struct nci_dev *ndev);
+
 static inline struct sk_buff *nci_skb_alloc(struct nci_dev *ndev,
 					    unsigned int len,
 					    gfp_t how)
@@ -225,6 +316,8 @@ int nci_send_cmd(struct nci_dev *ndev, __u16 opcode, __u8 plen, void *payload);
 int nci_send_data(struct nci_dev *ndev, __u8 conn_id, struct sk_buff *skb);
 void nci_data_exchange_complete(struct nci_dev *ndev, struct sk_buff *skb,
 				__u8 conn_id, int err);
+void nci_hci_data_received_cb(void *context, struct sk_buff *skb, int err);
+
 void nci_clear_target_list(struct nci_dev *ndev);
 
 /* ----- NCI requests ----- */
diff --git a/net/nfc/nci/Makefile b/net/nfc/nci/Makefile
index 7aeedc43187d..7ed8949266cc 100644
--- a/net/nfc/nci/Makefile
+++ b/net/nfc/nci/Makefile
@@ -4,6 +4,6 @@
 
 obj-$(CONFIG_NFC_NCI) += nci.o
 
-nci-objs := core.o data.o lib.o ntf.o rsp.o
+nci-objs := core.o data.o lib.o ntf.o rsp.o hci.o
 
 nci-$(CONFIG_NFC_NCI_SPI) += spi.o
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 2a96ed68c7bb..f74d420e2ead 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -122,10 +122,10 @@ static int __nci_request(struct nci_dev *ndev,
 	return rc;
 }
 
-static inline int nci_request(struct nci_dev *ndev,
-			      void (*req)(struct nci_dev *ndev,
-					  unsigned long opt),
-			      unsigned long opt, __u32 timeout)
+inline int nci_request(struct nci_dev *ndev,
+		       void (*req)(struct nci_dev *ndev,
+				   unsigned long opt),
+		       unsigned long opt, __u32 timeout)
 {
 	int rc;
 
@@ -901,7 +901,6 @@ static struct nfc_ops nci_nfc_ops = {
 };
 
 /* ---- Interface to NCI drivers ---- */
-
 /**
  * nci_allocate_device - allocate a new nci device
  *
@@ -936,13 +935,20 @@ struct nci_dev *nci_allocate_device(struct nci_ops *ops,
 					    tx_headroom + NCI_DATA_HDR_SIZE,
 					    tx_tailroom);
 	if (!ndev->nfc_dev)
-		goto free_exit;
+		goto free_nci;
+
+	ndev->hci_dev = nci_hci_allocate(ndev);
+	if (!ndev->hci_dev)
+		goto free_nfc;
 
 	nfc_set_drvdata(ndev->nfc_dev, ndev);
 
 	return ndev;
 
-free_exit:
+free_nfc:
+	kfree(ndev->nfc_dev);
+
+free_nci:
 	kfree(ndev);
 	return NULL;
 }
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
new file mode 100644
index 000000000000..ecf253942606
--- /dev/null
+++ b/net/nfc/nci/hci.c
@@ -0,0 +1,686 @@
+/*
+ *  The NFC Controller Interface is the communication protocol between an
+ *  NFC Controller (NFCC) and a Device Host (DH).
+ *  This is the HCI over NCI implementation, as specified in the 10.2
+ *  section of the NCI 1.1 specification.
+ *
+ *  Copyright (C) 2014  STMicroelectronics SAS. All rights reserved.
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License version 2
+ *  as published by the Free Software Foundation
+ *
+ *  This program is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ *  GNU General Public License for more details.
+ *
+ *  You should have received a copy of the GNU General Public License
+ *  along with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ */
+
+#include <linux/skbuff.h>
+
+#include "../nfc.h"
+#include <net/nfc/nci.h>
+#include <net/nfc/nci_core.h>
+#include <linux/nfc.h>
+
+struct nci_data {
+	u8              conn_id;
+	u8              pipe;
+	u8              cmd;
+	const u8        *data;
+	u32             data_len;
+} __packed;
+
+struct nci_hci_create_pipe_params {
+	u8 src_gate;
+	u8 dest_host;
+	u8 dest_gate;
+} __packed;
+
+struct nci_hci_create_pipe_resp {
+	u8 src_host;
+	u8 src_gate;
+	u8 dest_host;
+	u8 dest_gate;
+	u8 pipe;
+} __packed;
+
+struct nci_hci_delete_pipe_noti {
+	u8 pipe;
+} __packed;
+
+struct nci_hci_all_pipe_cleared_noti {
+	u8 host;
+} __packed;
+
+struct nci_hcp_message {
+	u8 header;      /* type -cmd,evt,rsp- + instruction */
+	u8 data[];
+} __packed;
+
+struct nci_hcp_packet {
+	u8 header;      /* cbit+pipe */
+	struct nci_hcp_message message;
+} __packed;
+
+#define NCI_HCI_ANY_SET_PARAMETER  0x01
+#define NCI_HCI_ANY_GET_PARAMETER  0x02
+#define NCI_HCI_ANY_CLOSE_PIPE     0x04
+
+#define NCI_HFP_NO_CHAINING        0x80
+
+#define NCI_NFCEE_ID_HCI                0x80
+
+#define NCI_EVT_HOT_PLUG           0x03
+
+#define NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY       0x01
+
+/* HCP headers */
+#define NCI_HCI_HCP_PACKET_HEADER_LEN      1
+#define NCI_HCI_HCP_MESSAGE_HEADER_LEN     1
+#define NCI_HCI_HCP_HEADER_LEN             2
+
+/* HCP types */
+#define NCI_HCI_HCP_COMMAND        0x00
+#define NCI_HCI_HCP_EVENT          0x01
+#define NCI_HCI_HCP_RESPONSE       0x02
+
+#define NCI_HCI_ADM_NOTIFY_PIPE_CREATED     0x12
+#define NCI_HCI_ADM_NOTIFY_PIPE_DELETED     0x13
+#define NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED 0x15
+
+#define NCI_HCI_FRAGMENT           0x7f
+#define NCI_HCP_HEADER(type, instr) ((((type) & 0x03) << 6) |\
+				      ((instr) & 0x3f))
+
+#define NCI_HCP_MSG_GET_TYPE(header) ((header & 0xc0) >> 6)
+#define NCI_HCP_MSG_GET_CMD(header)  (header & 0x3f)
+#define NCI_HCP_MSG_GET_PIPE(header) (header & 0x7f)
+
+/* HCI core */
+static void nci_hci_reset_pipes(struct nci_hci_dev *hdev)
+{
+	int i;
+
+	for (i = 0; i < NCI_HCI_MAX_PIPES; i++) {
+		hdev->pipes[i].gate = NCI_HCI_INVALID_GATE;
+		hdev->pipes[i].host = NCI_HCI_INVALID_HOST;
+	}
+	memset(hdev->gate2pipe, NCI_HCI_INVALID_PIPE, sizeof(hdev->gate2pipe));
+}
+
+static void nci_hci_reset_pipes_per_host(struct nci_dev *ndev, u8 host)
+{
+	int i;
+
+	for (i = 0; i < NCI_HCI_MAX_PIPES; i++) {
+		if (ndev->hci_dev->pipes[i].host == host) {
+			ndev->hci_dev->pipes[i].gate = NCI_HCI_INVALID_GATE;
+			ndev->hci_dev->pipes[i].host = NCI_HCI_INVALID_HOST;
+		}
+	}
+}
+
+/* Fragment HCI data over NCI packet.
+ * NFC Forum NCI 10.2.2 Data Exchange:
+ * The payload of the Data Packets sent on the Logical Connection SHALL be
+ * valid HCP packets, as defined within [ETSI_102622]. Each Data Packet SHALL
+ * contain a single HCP packet. NCI Segmentation and Reassembly SHALL NOT be
+ * applied to Data Messages in either direction. The HCI fragmentation mechanism
+ * is used if required.
+ */
+static int nci_hci_send_data(struct nci_dev *ndev, u8 pipe,
+			     const u8 data_type, const u8 *data,
+			     size_t data_len)
+{
+	struct nci_conn_info    *conn_info;
+	struct sk_buff *skb;
+	int len, i, r;
+	u8 cb = pipe;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	skb = nci_skb_alloc(ndev, 2 + conn_info->max_pkt_payload_len +
+			    NCI_DATA_HDR_SIZE, GFP_KERNEL);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_reserve(skb, 2 + NCI_DATA_HDR_SIZE);
+	*skb_push(skb, 1) = data_type;
+
+	i = 0;
+	len = conn_info->max_pkt_payload_len;
+
+	do {
+		/* If last packet add NCI_HFP_NO_CHAINING */
+		if (i + conn_info->max_pkt_payload_len -
+		    (skb->len + 1) >= data_len) {
+			cb |= NCI_HFP_NO_CHAINING;
+			len = data_len - i;
+		} else {
+			len = conn_info->max_pkt_payload_len - skb->len - 1;
+		}
+
+		*skb_push(skb, 1) = cb;
+
+		if (len > 0)
+			memcpy(skb_put(skb, len), data + i, len);
+
+		r = nci_send_data(ndev, conn_info->conn_id, skb);
+		if (r < 0)
+			return r;
+
+		i += len;
+		if (i < data_len) {
+			skb_trim(skb, 0);
+			skb_pull(skb, len);
+		}
+	} while (i < data_len);
+
+	return i;
+}
+
+static void nci_hci_send_data_req(struct nci_dev *ndev, unsigned long opt)
+{
+	struct nci_data *data = (struct nci_data *)opt;
+
+	nci_hci_send_data(ndev, data->pipe, data->cmd,
+			  data->data, data->data_len);
+}
+
+int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event,
+		       const u8 *param, size_t param_len)
+{
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	return nci_hci_send_data(ndev, pipe,
+			NCI_HCP_HEADER(NCI_HCI_HCP_EVENT, event),
+			param, param_len);
+}
+EXPORT_SYMBOL(nci_hci_send_event);
+
+int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, u8 cmd,
+		     const u8 *param, size_t param_len,
+		     struct sk_buff **skb)
+{
+	struct nci_conn_info    *conn_info;
+	struct nci_data data;
+	int r;
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND, cmd);
+	data.data = param;
+	data.data_len = param_len;
+
+	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+
+	if (r == NCI_STATUS_OK)
+		*skb = conn_info->rx_skb;
+
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_send_cmd);
+
+static void nci_hci_event_received(struct nci_dev *ndev, u8 pipe,
+				   u8 event, struct sk_buff *skb)
+{
+	if (ndev->ops->hci_event_received)
+		ndev->ops->hci_event_received(ndev, pipe, event, skb);
+}
+
+static void nci_hci_cmd_received(struct nci_dev *ndev, u8 pipe,
+				 u8 cmd, struct sk_buff *skb)
+{
+	u8 gate = ndev->hci_dev->pipes[pipe].gate;
+	u8 status = NCI_HCI_ANY_OK | ~NCI_HCI_FRAGMENT;
+	u8 dest_gate, new_pipe;
+	struct nci_hci_create_pipe_resp *create_info;
+	struct nci_hci_delete_pipe_noti *delete_info;
+	struct nci_hci_all_pipe_cleared_noti *cleared_info;
+
+	pr_debug("from gate %x pipe %x cmd %x\n", gate, pipe, cmd);
+
+	switch (cmd) {
+	case NCI_HCI_ADM_NOTIFY_PIPE_CREATED:
+		if (skb->len != 5) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		create_info = (struct nci_hci_create_pipe_resp *)skb->data;
+		dest_gate = create_info->dest_gate;
+		new_pipe = create_info->pipe;
+
+		/* Save the new created pipe and bind with local gate,
+		 * the description for skb->data[3] is destination gate id
+		 * but since we received this cmd from host controller, we
+		 * are the destination and it is our local gate
+		 */
+		ndev->hci_dev->gate2pipe[dest_gate] = new_pipe;
+		ndev->hci_dev->pipes[new_pipe].gate = dest_gate;
+		ndev->hci_dev->pipes[new_pipe].host =
+						create_info->src_host;
+		break;
+	case NCI_HCI_ANY_OPEN_PIPE:
+		/* If the pipe is not created report an error */
+		if (gate == NCI_HCI_INVALID_GATE) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		break;
+	case NCI_HCI_ADM_NOTIFY_PIPE_DELETED:
+		if (skb->len != 1) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+		delete_info = (struct nci_hci_delete_pipe_noti *)skb->data;
+
+		ndev->hci_dev->pipes[delete_info->pipe].gate =
+						NCI_HCI_INVALID_GATE;
+		ndev->hci_dev->pipes[delete_info->pipe].host =
+						NCI_HCI_INVALID_HOST;
+		break;
+	case NCI_HCI_ADM_NOTIFY_ALL_PIPE_CLEARED:
+		if (skb->len != 1) {
+			status = NCI_HCI_ANY_E_NOK;
+			goto exit;
+		}
+
+		cleared_info =
+			(struct nci_hci_all_pipe_cleared_noti *)skb->data;
+		nci_hci_reset_pipes_per_host(ndev, cleared_info->host);
+		break;
+	default:
+		pr_debug("Discarded unknown cmd %x to gate %x\n", cmd, gate);
+		break;
+	}
+
+	if (ndev->ops->hci_cmd_received)
+		ndev->ops->hci_cmd_received(ndev, pipe, cmd, skb);
+
+exit:
+	nci_hci_send_data(ndev, pipe, status, NULL, 0);
+
+	kfree_skb(skb);
+}
+
+static void nci_hci_resp_received(struct nci_dev *ndev, u8 pipe,
+				  u8 result, struct sk_buff *skb)
+{
+	struct nci_conn_info    *conn_info;
+	u8 status = result;
+
+	if (result != NCI_HCI_ANY_OK)
+		goto exit;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info) {
+		status = NCI_STATUS_REJECTED;
+		goto exit;
+	}
+
+	conn_info->rx_skb = skb;
+
+exit:
+	nci_req_complete(ndev, status);
+}
+
+/* Receive hcp message for pipe, with type and cmd.
+ * skb contains optional message data only.
+ */
+static void nci_hci_hcp_message_rx(struct nci_dev *ndev, u8 pipe,
+				   u8 type, u8 instruction, struct sk_buff *skb)
+{
+	switch (type) {
+	case NCI_HCI_HCP_RESPONSE:
+		nci_hci_resp_received(ndev, pipe, instruction, skb);
+		break;
+	case NCI_HCI_HCP_COMMAND:
+		nci_hci_cmd_received(ndev, pipe, instruction, skb);
+		break;
+	case NCI_HCI_HCP_EVENT:
+		nci_hci_event_received(ndev, pipe, instruction, skb);
+		break;
+	default:
+		pr_err("UNKNOWN MSG Type %d, instruction=%d\n",
+		       type, instruction);
+		kfree_skb(skb);
+		break;
+	}
+
+	nci_req_complete(ndev, 0);
+}
+
+static void nci_hci_msg_rx_work(struct work_struct *work)
+{
+	struct nci_hci_dev *hdev =
+		container_of(work, struct nci_hci_dev, msg_rx_work);
+	struct sk_buff *skb;
+	struct nci_hcp_message *message;
+	u8 pipe, type, instruction;
+
+	while ((skb = skb_dequeue(&hdev->msg_rx_queue)) != NULL) {
+		pipe = skb->data[0];
+		skb_pull(skb, NCI_HCI_HCP_PACKET_HEADER_LEN);
+		message = (struct nci_hcp_message *)skb->data;
+		type = NCI_HCP_MSG_GET_TYPE(message->header);
+		instruction = NCI_HCP_MSG_GET_CMD(message->header);
+		skb_pull(skb, NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+
+		nci_hci_hcp_message_rx(hdev->ndev, pipe,
+				       type, instruction, skb);
+	}
+}
+
+void nci_hci_data_received_cb(void *context,
+			      struct sk_buff *skb, int err)
+{
+	struct nci_dev *ndev = (struct nci_dev *)context;
+	struct nci_hcp_packet *packet;
+	u8 pipe, type, instruction;
+	struct sk_buff *hcp_skb;
+	struct sk_buff *frag_skb;
+	int msg_len;
+
+	pr_debug("\n");
+
+	if (err) {
+		nci_req_complete(ndev, err);
+		return;
+	}
+
+	packet = (struct nci_hcp_packet *)skb->data;
+	if ((packet->header & ~NCI_HCI_FRAGMENT) == 0) {
+		skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
+		return;
+	}
+
+	/* it's the last fragment. Does it need re-aggregation? */
+	if (skb_queue_len(&ndev->hci_dev->rx_hcp_frags)) {
+		pipe = packet->header & NCI_HCI_FRAGMENT;
+		skb_queue_tail(&ndev->hci_dev->rx_hcp_frags, skb);
+
+		msg_len = 0;
+		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
+			msg_len += (frag_skb->len -
+				    NCI_HCI_HCP_PACKET_HEADER_LEN);
+		}
+
+		hcp_skb = nfc_alloc_recv_skb(NCI_HCI_HCP_PACKET_HEADER_LEN +
+					     msg_len, GFP_KERNEL);
+		if (!hcp_skb) {
+			nci_req_complete(ndev, -ENOMEM);
+			return;
+		}
+
+		*skb_put(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN) = pipe;
+
+		skb_queue_walk(&ndev->hci_dev->rx_hcp_frags, frag_skb) {
+		       msg_len = frag_skb->len - NCI_HCI_HCP_PACKET_HEADER_LEN;
+			memcpy(skb_put(hcp_skb, msg_len), frag_skb->data +
+			       NCI_HCI_HCP_PACKET_HEADER_LEN, msg_len);
+		}
+
+		skb_queue_purge(&ndev->hci_dev->rx_hcp_frags);
+	} else {
+		packet->header &= NCI_HCI_FRAGMENT;
+		hcp_skb = skb;
+	}
+
+	/* if this is a response, dispatch immediately to
+	 * unblock waiting cmd context. Otherwise, enqueue to dispatch
+	 * in separate context where handler can also execute command.
+	 */
+	packet = (struct nci_hcp_packet *)hcp_skb->data;
+	type = NCI_HCP_MSG_GET_TYPE(packet->message.header);
+	if (type == NCI_HCI_HCP_RESPONSE) {
+		pipe = packet->header;
+		instruction = NCI_HCP_MSG_GET_CMD(packet->message.header);
+		skb_pull(hcp_skb, NCI_HCI_HCP_PACKET_HEADER_LEN +
+			 NCI_HCI_HCP_MESSAGE_HEADER_LEN);
+		nci_hci_hcp_message_rx(ndev, pipe, type, instruction, hcp_skb);
+	} else {
+		skb_queue_tail(&ndev->hci_dev->msg_rx_queue, hcp_skb);
+		schedule_work(&ndev->hci_dev->msg_rx_work);
+	}
+}
+
+int nci_hci_open_pipe(struct nci_dev *ndev, u8 pipe)
+{
+	struct nci_data data;
+	struct nci_conn_info    *conn_info;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
+				       NCI_HCI_ANY_OPEN_PIPE);
+	data.data = NULL;
+	data.data_len = 0;
+
+	return nci_request(ndev, nci_hci_send_data_req,
+			(unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+}
+EXPORT_SYMBOL(nci_hci_open_pipe);
+
+int nci_hci_set_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      const u8 *param, size_t param_len)
+{
+	struct nci_conn_info *conn_info;
+	struct nci_data data;
+	int r;
+	u8 *tmp;
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	pr_debug("idx=%d to gate %d\n", idx, gate);
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	tmp = kmalloc(1 + param_len, GFP_KERNEL);
+	if (!tmp)
+		return -ENOMEM;
+
+	*tmp = idx;
+	memcpy(tmp + 1, param, param_len);
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
+				       NCI_HCI_ANY_SET_PARAMETER);
+	data.data = tmp;
+	data.data_len = param_len + 1;
+
+	r = nci_request(ndev, nci_hci_send_data_req,
+			(unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+
+	kfree(tmp);
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_set_param);
+
+int nci_hci_get_param(struct nci_dev *ndev, u8 gate, u8 idx,
+		      struct sk_buff **skb)
+{
+	struct nci_conn_info    *conn_info;
+	struct nci_data data;
+	int r;
+	u8 pipe = ndev->hci_dev->gate2pipe[gate];
+
+	pr_debug("idx=%d to gate %d\n", idx, gate);
+
+	if (pipe == NCI_HCI_INVALID_PIPE)
+		return -EADDRNOTAVAIL;
+
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	data.conn_id = conn_info->conn_id;
+	data.pipe = pipe;
+	data.cmd = NCI_HCP_HEADER(NCI_HCI_HCP_COMMAND,
+				  NCI_HCI_ANY_GET_PARAMETER);
+	data.data = &idx;
+	data.data_len = 1;
+
+	r = nci_request(ndev, nci_hci_send_data_req, (unsigned long)&data,
+			msecs_to_jiffies(NCI_DATA_TIMEOUT));
+
+	if (r == NCI_STATUS_OK)
+		*skb = conn_info->rx_skb;
+
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_get_param);
+
+int nci_hci_connect_gate(struct nci_dev *ndev,
+			 u8 dest_host, u8 dest_gate, u8 pipe)
+{
+	int r;
+
+	if (pipe == NCI_HCI_DO_NOT_OPEN_PIPE)
+		return 0;
+
+	if (ndev->hci_dev->gate2pipe[dest_gate] != NCI_HCI_INVALID_PIPE)
+		return -EADDRINUSE;
+
+	if (pipe != NCI_HCI_INVALID_PIPE)
+		goto open_pipe;
+
+	switch (dest_gate) {
+	case NCI_HCI_LINK_MGMT_GATE:
+		pipe = NCI_HCI_LINK_MGMT_PIPE;
+	break;
+	case NCI_HCI_ADMIN_GATE:
+		pipe = NCI_HCI_ADMIN_PIPE;
+	break;
+	}
+
+open_pipe:
+	r = nci_hci_open_pipe(ndev, pipe);
+	if (r < 0)
+		return r;
+
+	ndev->hci_dev->pipes[pipe].gate = dest_gate;
+	ndev->hci_dev->pipes[pipe].host = dest_host;
+	ndev->hci_dev->gate2pipe[dest_gate] = pipe;
+
+	return 0;
+}
+EXPORT_SYMBOL(nci_hci_connect_gate);
+
+static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
+				     u8 gate_count,
+				     struct nci_hci_gate *gates)
+{
+	int r;
+
+	while (gate_count--) {
+		r = nci_hci_connect_gate(ndev, gates->dest_host,
+					 gates->gate, gates->pipe);
+		if (r < 0)
+			return r;
+		gates++;
+	}
+
+	return 0;
+}
+
+int nci_hci_dev_session_init(struct nci_dev *ndev)
+{
+	struct sk_buff *skb;
+	int r;
+
+	ndev->hci_dev->count_pipes = 0;
+	ndev->hci_dev->expected_pipes = 0;
+
+	nci_hci_reset_pipes(ndev->hci_dev);
+
+	if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE)
+		return -EPROTO;
+
+	r = nci_hci_connect_gate(ndev,
+				 ndev->hci_dev->init_data.gates[0].dest_host,
+				 ndev->hci_dev->init_data.gates[0].gate,
+				 ndev->hci_dev->init_data.gates[0].pipe);
+	if (r < 0)
+		goto exit;
+
+	r = nci_hci_get_param(ndev, NCI_HCI_ADMIN_GATE,
+			      NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY, &skb);
+	if (r < 0)
+		goto exit;
+
+	if (skb->len &&
+	    skb->len == strlen(ndev->hci_dev->init_data.session_id) &&
+	    memcmp(ndev->hci_dev->init_data.session_id,
+		   skb->data, skb->len) == 0 &&
+	    ndev->ops->hci_load_session) {
+		/* Restore gate<->pipe table from some proprietary location. */
+		r = ndev->ops->hci_load_session(ndev);
+		if (r < 0)
+			goto exit;
+	} else {
+		r = nci_hci_dev_connect_gates(ndev,
+					      ndev->hci_dev->init_data.gate_count,
+					      ndev->hci_dev->init_data.gates);
+		if (r < 0)
+			goto exit;
+
+		r = nci_hci_set_param(ndev, NCI_HCI_ADMIN_GATE,
+				      NCI_HCI_ADMIN_PARAM_SESSION_IDENTITY,
+				      ndev->hci_dev->init_data.session_id,
+				      strlen(ndev->hci_dev->init_data.session_id));
+	}
+	if (r == 0)
+		goto exit;
+
+exit:
+	kfree_skb(skb);
+
+	return r;
+}
+EXPORT_SYMBOL(nci_hci_dev_session_init);
+
+struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev)
+{
+	struct nci_hci_dev *hdev;
+
+	hdev = kzalloc(sizeof(*hdev), GFP_KERNEL);
+	if (!hdev)
+		return NULL;
+
+	skb_queue_head_init(&hdev->rx_hcp_frags);
+	INIT_WORK(&hdev->msg_rx_work, nci_hci_msg_rx_work);
+	skb_queue_head_init(&hdev->msg_rx_queue);
+	hdev->ndev = ndev;
+
+	return hdev;
+}
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 4c0be7e82d29..6e041ac49e17 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -723,18 +723,30 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
 
 	pr_debug("\n");
 
-	conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
-				 sizeof(struct nci_conn_info), GFP_KERNEL);
-	if (!conn_info) {
-		status = NCI_STATUS_REJECTED;
-		goto exit;
-	}
+	/* NFCForum NCI 9.2.1 HCI Network Specific Handling
+	 * If the NFCC supports the HCI Network, it SHALL return one,
+	 * and only one, NFCEE_DISCOVER_NTF with a Protocol type of
+	 * “HCI Access”, even if the HCI Network contains multiple NFCEEs.
+	 */
+	if (!ndev->hci_dev->conn_info) {
+		conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+					 sizeof(*conn_info), GFP_KERNEL);
+		if (!conn_info) {
+			status = NCI_STATUS_REJECTED;
+			goto exit;
+		}
 
-	conn_info->id = nfcee_ntf->nfcee_id;
-	conn_info->conn_id = NCI_INVALID_CONN_ID;
+		conn_info->id = nfcee_ntf->nfcee_id;
+		conn_info->conn_id = NCI_INVALID_CONN_ID;
 
-	INIT_LIST_HEAD(&conn_info->list);
-	list_add(&conn_info->list, &ndev->conn_info_list);
+		conn_info->data_exchange_cb = nci_hci_data_received_cb;
+		conn_info->data_exchange_cb_context = ndev;
+
+		INIT_LIST_HEAD(&conn_info->list);
+		list_add(&conn_info->list, &ndev->conn_info_list);
+
+		ndev->hci_dev->conn_info = conn_info;
+	}
 
 exit:
 	nci_req_complete(ndev, status);
-- 
cgit v1.2.3-70-g09d2


From 447b27c4f29b510b98e99395120d635f009ed563 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:16 +0100
Subject: NFC: Forward NFC_EVT_TRANSACTION to user space

NFC_EVT_TRANSACTION is sent through netlink in order for a
specific application running on a secure element to notify
userspace of an event. Typically the secure element application
counterpart on the host could interpret that event and act
upon it.

Forwarded information contains:
- SE host generating the event
- Application IDentifier doing the operation
- Applications parameters

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nfc.h    | 27 +++++++++++++++++++++++++++
 include/uapi/linux/nfc.h |  1 +
 net/nfc/core.c           | 21 +++++++++++++++++++++
 net/nfc/netlink.c        | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 net/nfc/nfc.h            |  2 ++
 5 files changed, 98 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h
index 12adb817c27a..73190e65d5c1 100644
--- a/include/net/nfc/nfc.h
+++ b/include/net/nfc/nfc.h
@@ -135,6 +135,31 @@ struct nfc_se {
 	u16 state;
 };
 
+/**
+ * nfc_evt_transaction - A struct for NFC secure element event transaction.
+ *
+ * @aid: The application identifier triggering the event
+ *
+ * @aid_len: The application identifier length [5:16]
+ *
+ * @params: The application parameters transmitted during the transaction
+ *
+ * @params_len: The applications parameters length [0:255]
+ *
+ */
+#define NFC_MIN_AID_LENGTH	5
+#define	NFC_MAX_AID_LENGTH	16
+#define NFC_MAX_PARAMS_LENGTH	255
+
+#define NFC_EVT_TRANSACTION_AID_TAG	0x81
+#define NFC_EVT_TRANSACTION_PARAMS_TAG	0x82
+struct nfc_evt_transaction {
+	u32 aid_len;
+	u8 aid[NFC_MAX_AID_LENGTH];
+	u8 params_len;
+	u8 params[NFC_MAX_PARAMS_LENGTH];
+} __packed;
+
 struct nfc_genl_data {
 	u32 poll_req_portid;
 	struct mutex genl_data_mutex;
@@ -262,6 +287,8 @@ int nfc_tm_data_received(struct nfc_dev *dev, struct sk_buff *skb);
 
 void nfc_driver_failure(struct nfc_dev *dev, int err);
 
+int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx,
+		       struct nfc_evt_transaction *evt_transaction);
 int nfc_add_se(struct nfc_dev *dev, u32 se_idx, u16 type);
 int nfc_remove_se(struct nfc_dev *dev, u32 se_idx);
 struct nfc_se *nfc_find_se(struct nfc_dev *dev, u32 se_idx);
diff --git a/include/uapi/linux/nfc.h b/include/uapi/linux/nfc.h
index 8119255feae4..c1e2e63cf9b5 100644
--- a/include/uapi/linux/nfc.h
+++ b/include/uapi/linux/nfc.h
@@ -183,6 +183,7 @@ enum nfc_attrs {
 	NFC_ATTR_SE_APDU,
 	NFC_ATTR_TARGET_ISO15693_DSFID,
 	NFC_ATTR_TARGET_ISO15693_UID,
+	NFC_ATTR_SE_PARAMS,
 /* private: internal use only */
 	__NFC_ATTR_AFTER_LAST
 };
diff --git a/net/nfc/core.c b/net/nfc/core.c
index 7f1b6351755c..cff3f1614ad4 100644
--- a/net/nfc/core.c
+++ b/net/nfc/core.c
@@ -932,6 +932,27 @@ int nfc_remove_se(struct nfc_dev *dev, u32 se_idx)
 }
 EXPORT_SYMBOL(nfc_remove_se);
 
+int nfc_se_transaction(struct nfc_dev *dev, u8 se_idx,
+		       struct nfc_evt_transaction *evt_transaction)
+{
+	int rc;
+
+	pr_debug("transaction: %x\n", se_idx);
+
+	device_lock(&dev->dev);
+
+	if (!evt_transaction) {
+		rc = -EPROTO;
+		goto out;
+	}
+
+	rc = nfc_genl_se_transaction(dev, se_idx, evt_transaction);
+out:
+	device_unlock(&dev->dev);
+	return rc;
+}
+EXPORT_SYMBOL(nfc_se_transaction);
+
 static void nfc_release(struct device *d)
 {
 	struct nfc_dev *dev = to_nfc_dev(d);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index be387e6219a0..14a2d11581da 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -497,6 +497,53 @@ free_msg:
 	return -EMSGSIZE;
 }
 
+int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
+			    struct nfc_evt_transaction *evt_transaction)
+{
+	struct nfc_se *se;
+	struct sk_buff *msg;
+	void *hdr;
+
+	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+	if (!msg)
+		return -ENOMEM;
+
+	hdr = genlmsg_put(msg, 0, 0, &nfc_genl_family, 0,
+			  NFC_EVENT_SE_TRANSACTION);
+	if (!hdr)
+		goto free_msg;
+
+	se = nfc_find_se(dev, se_idx);
+	if (!se)
+		goto free_msg;
+
+	if (nla_put_u32(msg, NFC_ATTR_DEVICE_INDEX, dev->idx) ||
+	    nla_put_u32(msg, NFC_ATTR_SE_INDEX, se_idx) ||
+	    nla_put_u8(msg, NFC_ATTR_SE_TYPE, se->type) ||
+	    nla_put(msg, NFC_ATTR_SE_AID, evt_transaction->aid_len,
+		    evt_transaction->aid) ||
+	    nla_put(msg, NFC_ATTR_SE_PARAMS, evt_transaction->params_len,
+		    evt_transaction->params))
+		goto nla_put_failure;
+
+	/* evt_transaction is no more used */
+	devm_kfree(&dev->dev, evt_transaction);
+
+	genlmsg_end(msg, hdr);
+
+	genlmsg_multicast(&nfc_genl_family, msg, 0, 0, GFP_KERNEL);
+
+	return 0;
+
+nla_put_failure:
+	genlmsg_cancel(msg, hdr);
+free_msg:
+	/* evt_transaction is no more used */
+	devm_kfree(&dev->dev, evt_transaction);
+	nlmsg_free(msg);
+	return -EMSGSIZE;
+}
+
 static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev,
 				u32 portid, u32 seq,
 				struct netlink_callback *cb,
diff --git a/net/nfc/nfc.h b/net/nfc/nfc.h
index 88d60064890e..a8ce80b47720 100644
--- a/net/nfc/nfc.h
+++ b/net/nfc/nfc.h
@@ -100,6 +100,8 @@ int nfc_genl_llc_send_sdres(struct nfc_dev *dev, struct hlist_head *sdres_list);
 
 int nfc_genl_se_added(struct nfc_dev *dev, u32 se_idx, u16 type);
 int nfc_genl_se_removed(struct nfc_dev *dev, u32 se_idx);
+int nfc_genl_se_transaction(struct nfc_dev *dev, u8 se_idx,
+			    struct nfc_evt_transaction *evt_transaction);
 
 struct nfc_dev *nfc_get_device(unsigned int idx);
 
-- 
cgit v1.2.3-70-g09d2


From a41bb8448ebaebe1d0d9a268d340fad73c247e09 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Sun, 1 Feb 2015 22:26:17 +0100
Subject: NFC: nci: Add RF NFCEE action notification support

The NFCC sends an NCI_OP_RF_NFCEE_ACTION_NTF notification
to the host (DH) to let it know that for example an RF
transaction with a payment reader is done.
For now the notification handler is empty.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h |  8 ++++++++
 net/nfc/nci/ntf.c     | 11 +++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index deac78b9a53c..6c1beb2704b1 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -499,6 +499,14 @@ struct nci_rf_deactivate_ntf {
 	__u8	reason;
 } __packed;
 
+#define NCI_OP_RF_NFCEE_ACTION_NTF	nci_opcode_pack(NCI_GID_RF_MGMT, 0x09)
+struct nci_rf_nfcee_action_ntf {
+	__u8 nfcee_id;
+	__u8 trigger;
+	__u8 supported_data_length;
+	__u8 supported_data[0];
+} __packed;
+
 #define NCI_OP_NFCEE_DISCOVER_NTF nci_opcode_pack(NCI_GID_NFCEE_MGMT, 0x00)
 struct nci_nfcee_supported_protocol {
 	__u8	num_protocol;
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 6e041ac49e17..5924b812fb6a 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -752,6 +752,12 @@ exit:
 	nci_req_complete(ndev, status);
 }
 
+static void nci_nfcee_action_ntf_packet(struct nci_dev *ndev,
+					struct sk_buff *skb)
+{
+	pr_debug("\n");
+}
+
 void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 {
 	__u16 ntf_opcode = nci_opcode(skb->data);
@@ -793,6 +799,11 @@ void nci_ntf_packet(struct nci_dev *ndev, struct sk_buff *skb)
 	case NCI_OP_NFCEE_DISCOVER_NTF:
 		nci_nfcee_discover_ntf_packet(ndev, skb);
 		break;
+
+	case NCI_OP_RF_NFCEE_ACTION_NTF:
+		nci_nfcee_action_ntf_packet(ndev, skb);
+		break;
+
 	default:
 		pr_err("unknown ntf opcode 0x%x\n", ntf_opcode);
 		break;
-- 
cgit v1.2.3-70-g09d2


From 49ca0d8bfaf3bc46d5eef60ce67b00eb195bd392 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 30 Jan 2015 13:29:31 -0500
Subject: net-timestamp: no-payload option

Add timestamping option SOF_TIMESTAMPING_OPT_TSONLY. For transmit
timestamps, this loops timestamps on top of empty packets.

Doing so reduces the pressure on SO_RCVBUF. Payload inspection and
cmsg reception (aside from timestamps) are no longer possible. This
works together with a follow on patch that allows administrators to
only allow tx timestamping if it does not loop payload or metadata.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Changes (rfc -> v1)
  - add documentation
  - remove unnecessary skb->len test (thanks to Richard Cochran)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/timestamping.txt | 21 +++++++++++++++++++++
 include/uapi/linux/net_tstamp.h           |  3 ++-
 net/core/skbuff.c                         | 19 ++++++++++++++-----
 net/ipv4/ip_sockglue.c                    |  7 ++++---
 net/ipv6/datagram.c                       |  5 ++---
 net/rxrpc/ar-error.c                      |  5 +++++
 6 files changed, 48 insertions(+), 12 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
index a5c784c89312..5f0922613f1a 100644
--- a/Documentation/networking/timestamping.txt
+++ b/Documentation/networking/timestamping.txt
@@ -162,6 +162,27 @@ SOF_TIMESTAMPING_OPT_CMSG:
   option IP_PKTINFO simultaneously.
 
 
+SOF_TIMESTAMPING_OPT_TSONLY:
+
+  Applies to transmit timestamps only. Makes the kernel return the
+  timestamp as a cmsg alongside an empty packet, as opposed to
+  alongside the original packet. This reduces the amount of memory
+  charged to the socket's receive budget (SO_RCVBUF) and delivers
+  the timestamp even if sysctl net.core.tstamp_allow_data is 0.
+  This option disables SOF_TIMESTAMPING_OPT_CMSG.
+
+
+New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
+disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
+regardless of the setting of sysctl net.core.tstamp_allow_data.
+
+An exception is when a process needs additional cmsg data, for
+instance SOL_IP/IP_PKTINFO to detect the egress network interface.
+Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
+having access to the contents of the original packet, so cannot be
+combined with SOF_TIMESTAMPING_OPT_TSONLY.
+
+
 1.4 Bytestream Timestamps
 
 The SO_TIMESTAMPING interface supports timestamping of bytes in a
diff --git a/include/uapi/linux/net_tstamp.h b/include/uapi/linux/net_tstamp.h
index edbc888ceb51..6d1abea9746e 100644
--- a/include/uapi/linux/net_tstamp.h
+++ b/include/uapi/linux/net_tstamp.h
@@ -24,8 +24,9 @@ enum {
 	SOF_TIMESTAMPING_TX_SCHED = (1<<8),
 	SOF_TIMESTAMPING_TX_ACK = (1<<9),
 	SOF_TIMESTAMPING_OPT_CMSG = (1<<10),
+	SOF_TIMESTAMPING_OPT_TSONLY = (1<<11),
 
-	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_CMSG,
+	SOF_TIMESTAMPING_LAST = SOF_TIMESTAMPING_OPT_TSONLY,
 	SOF_TIMESTAMPING_MASK = (SOF_TIMESTAMPING_LAST - 1) |
 				 SOF_TIMESTAMPING_LAST
 };
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56db472e9b86..65a3798f43e6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3710,19 +3710,28 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 		     struct sock *sk, int tstype)
 {
 	struct sk_buff *skb;
+	bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
 
 	if (!sk)
 		return;
 
-	if (hwtstamps)
-		*skb_hwtstamps(orig_skb) = *hwtstamps;
+	if (tsonly)
+		skb = alloc_skb(0, GFP_ATOMIC);
 	else
-		orig_skb->tstamp = ktime_get_real();
-
-	skb = skb_clone(orig_skb, GFP_ATOMIC);
+		skb = skb_clone(orig_skb, GFP_ATOMIC);
 	if (!skb)
 		return;
 
+	if (tsonly) {
+		skb_shinfo(skb)->tx_flags = skb_shinfo(orig_skb)->tx_flags;
+		skb_shinfo(skb)->tskey = skb_shinfo(orig_skb)->tskey;
+	}
+
+	if (hwtstamps)
+		*skb_hwtstamps(skb) = *hwtstamps;
+	else
+		skb->tstamp = ktime_get_real();
+
 	__skb_complete_tx_timestamp(skb, sk, tstype);
 }
 EXPORT_SYMBOL_GPL(__skb_tstamp_tx);
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index db5e0f81ce0a..31d8c71986b4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -483,7 +483,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin) {
+	if (sin && skb->len) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = *(__be32 *)(skb_network_header(skb) +
 						   serr->addr_offset);
@@ -496,8 +496,9 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
 
-	if (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
-	    ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin)) {
+	if (skb->len &&
+	    (serr->ee.ee_origin == SO_EE_ORIGIN_ICMP ||
+	     ipv4_pktinfo_prepare_errqueue(sk, skb, serr->ee.ee_origin))) {
 		sin->sin_family = AF_INET;
 		sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
 		if (inet_sk(sk)->cmsg_flags)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 49f5e73db122..c215be70cac0 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -369,7 +369,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 
 	serr = SKB_EXT_ERR(skb);
 
-	if (sin) {
+	if (sin && skb->len) {
 		const unsigned char *nh = skb_network_header(skb);
 		sin->sin6_family = AF_INET6;
 		sin->sin6_flowinfo = 0;
@@ -394,8 +394,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 	memcpy(&errhdr.ee, &serr->ee, sizeof(struct sock_extended_err));
 	sin = &errhdr.offender;
 	memset(sin, 0, sizeof(*sin));
-
-	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL) {
+	if (serr->ee.ee_origin != SO_EE_ORIGIN_LOCAL && skb->len) {
 		sin->sin6_family = AF_INET6;
 		if (np->rxopt.all) {
 			if (serr->ee.ee_origin != SO_EE_ORIGIN_ICMP &&
diff --git a/net/rxrpc/ar-error.c b/net/rxrpc/ar-error.c
index 74c0fcd36838..5394b6be46ec 100644
--- a/net/rxrpc/ar-error.c
+++ b/net/rxrpc/ar-error.c
@@ -42,6 +42,11 @@ void rxrpc_UDP_error_report(struct sock *sk)
 		_leave("UDP socket errqueue empty");
 		return;
 	}
+	if (!skb->len) {
+		_leave("UDP empty message");
+		kfree_skb(skb);
+		return;
+	}
 
 	rxrpc_new_skb(skb);
 
-- 
cgit v1.2.3-70-g09d2


From b245be1f4db1a0394e4b6eb66059814b46670ac3 Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Fri, 30 Jan 2015 13:29:32 -0500
Subject: net-timestamp: no-payload only sysctl

Tx timestamps are looped onto the error queue on top of an skb. This
mechanism leaks packet headers to processes unless the no-payload
options SOF_TIMESTAMPING_OPT_TSONLY is set.

Add a sysctl that optionally drops looped timestamp with data. This
only affects processes without CAP_NET_RAW.

The policy is checked when timestamps are generated in the stack.
It is possible for timestamps with data to be reported after the
sysctl is set, if these were queued internally earlier.

No vulnerability is immediately known that exploits knowledge
gleaned from packet headers, but it may still be preferable to allow
administrators to lock down this path at the cost of possible
breakage of legacy applications.

Signed-off-by: Willem de Bruijn <willemb@google.com>

----

Changes
  (v1 -> v2)
  - test socket CAP_NET_RAW instead of capable(CAP_NET_RAW)
  (rfc -> v1)
  - document the sysctl in Documentation/sysctl/net.txt
  - fix access control race: read .._OPT_TSONLY only once,
        use same value for permission check and skb generation.
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt |  8 ++++++++
 include/net/sock.h           |  1 +
 net/core/skbuff.c            | 21 ++++++++++++++++++++-
 net/core/sock.c              |  3 +++
 net/core/sysctl_net_core.c   |  9 +++++++++
 5 files changed, 41 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index 666594b43cff..6294b5186ae5 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -97,6 +97,14 @@ rmem_max
 
 The maximum receive socket buffer size in bytes.
 
+tstamp_allow_data
+-----------------
+Allow processes to receive tx timestamps looped together with the original
+packet contents. If disabled, transmit timestamp requests from unprivileged
+processes are dropped unless socket option SOF_TIMESTAMPING_OPT_TSONLY is set.
+Default: 1 (on)
+
+
 wmem_default
 ------------
 
diff --git a/include/net/sock.h b/include/net/sock.h
index 15341499786c..511ef7c8889b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2239,6 +2239,7 @@ bool sk_net_capable(const struct sock *sk, int cap);
 extern __u32 sysctl_wmem_max;
 extern __u32 sysctl_rmem_max;
 
+extern int sysctl_tstamp_allow_data;
 extern int sysctl_optmem_max;
 
 extern __u32 sysctl_wmem_default;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 65a3798f43e6..a5bff2767f15 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -74,6 +74,8 @@
 #include <asm/uaccess.h>
 #include <trace/events/skb.h>
 #include <linux/highmem.h>
+#include <linux/capability.h>
+#include <linux/user_namespace.h>
 
 struct kmem_cache *skbuff_head_cache __read_mostly;
 static struct kmem_cache *skbuff_fclone_cache __read_mostly;
@@ -3690,11 +3692,28 @@ static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 		kfree_skb(skb);
 }
 
+static bool skb_may_tx_timestamp(struct sock *sk, bool tsonly)
+{
+	bool ret;
+
+	if (likely(sysctl_tstamp_allow_data || tsonly))
+		return true;
+
+	read_lock_bh(&sk->sk_callback_lock);
+	ret = sk->sk_socket && sk->sk_socket->file &&
+	      file_ns_capable(sk->sk_socket->file, &init_user_ns, CAP_NET_RAW);
+	read_unlock_bh(&sk->sk_callback_lock);
+	return ret;
+}
+
 void skb_complete_tx_timestamp(struct sk_buff *skb,
 			       struct skb_shared_hwtstamps *hwtstamps)
 {
 	struct sock *sk = skb->sk;
 
+	if (!skb_may_tx_timestamp(sk, false))
+		return;
+
 	/* take a reference to prevent skb_orphan() from freeing the socket */
 	sock_hold(sk);
 
@@ -3712,7 +3731,7 @@ void __skb_tstamp_tx(struct sk_buff *orig_skb,
 	struct sk_buff *skb;
 	bool tsonly = sk->sk_tsflags & SOF_TIMESTAMPING_OPT_TSONLY;
 
-	if (!sk)
+	if (!sk || !skb_may_tx_timestamp(sk, tsonly))
 		return;
 
 	if (tsonly)
diff --git a/net/core/sock.c b/net/core/sock.c
index 1c7a33db1314..93c8b20c91e4 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -325,6 +325,8 @@ __u32 sysctl_rmem_default __read_mostly = SK_RMEM_MAX;
 int sysctl_optmem_max __read_mostly = sizeof(unsigned long)*(2*UIO_MAXIOV+512);
 EXPORT_SYMBOL(sysctl_optmem_max);
 
+int sysctl_tstamp_allow_data __read_mostly = 1;
+
 struct static_key memalloc_socks = STATIC_KEY_INIT_FALSE;
 EXPORT_SYMBOL_GPL(memalloc_socks);
 
@@ -840,6 +842,7 @@ set_rcvbuf:
 			ret = -EINVAL;
 			break;
 		}
+
 		if (val & SOF_TIMESTAMPING_OPT_ID &&
 		    !(sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)) {
 			if (sk->sk_protocol == IPPROTO_TCP) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 31baba2a71ce..fde21d19e61b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -321,6 +321,15 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "tstamp_allow_data",
+		.data		= &sysctl_tstamp_allow_data,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one
+	},
 #ifdef CONFIG_RPS
 	{
 		.procname	= "rps_sock_flow_entries",
-- 
cgit v1.2.3-70-g09d2


From 366e41d9774d7010cb63112b6db2fce6dc7809c0 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:13 -0500
Subject: ipv6: pull cork initialization into its own function.

Pull IPv6 cork initialization into its own function that
can be re-used.  IPv6 specific cork data did not have an
explicit data structure.  This patch creats eone so that
just ipv6 cork data can be as arguemts.  Also, since
IPv6 tries to save the flow label into inet_cork_full
tructure, pass the full cork.

Adjust ip6_cork_release() to take cork data structures.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h  |  12 ++--
 net/ipv6/ip6_output.c | 158 ++++++++++++++++++++++++++++----------------------
 2 files changed, 96 insertions(+), 74 deletions(-)

(limited to 'include')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 2805062c013f..4d5169f5d7d1 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -125,6 +125,12 @@ struct ipv6_mc_socklist;
 struct ipv6_ac_socklist;
 struct ipv6_fl_socklist;
 
+struct inet6_cork {
+	struct ipv6_txoptions *opt;
+	u8 hop_limit;
+	u8 tclass;
+};
+
 /**
  * struct ipv6_pinfo - ipv6 private area
  *
@@ -217,11 +223,7 @@ struct ipv6_pinfo {
 	struct ipv6_txoptions	*opt;
 	struct sk_buff		*pktoptions;
 	struct sk_buff		*rxpmtu;
-	struct {
-		struct ipv6_txoptions *opt;
-		u8 hop_limit;
-		u8 tclass;
-	} cork;
+	struct inet6_cork	cork;
 };
 
 /* WARNING: don't change the layout of the members in {raw,udp,tcp}6_sock! */
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index ce69a12ae48c..f9f08c43c6e1 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1135,6 +1135,74 @@ static void ip6_append_data_mtu(unsigned int *mtu,
 	}
 }
 
+static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
+			  struct inet6_cork *v6_cork,
+			  int hlimit, int tclass, struct ipv6_txoptions *opt,
+			  struct rt6_info *rt, struct flowi6 *fl6)
+{
+	struct ipv6_pinfo *np = inet6_sk(sk);
+	unsigned int mtu;
+
+	/*
+	 * setup for corking
+	 */
+	if (opt) {
+		if (WARN_ON(v6_cork->opt))
+			return -EINVAL;
+
+		v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation);
+		if (unlikely(v6_cork->opt == NULL))
+			return -ENOBUFS;
+
+		v6_cork->opt->tot_len = opt->tot_len;
+		v6_cork->opt->opt_flen = opt->opt_flen;
+		v6_cork->opt->opt_nflen = opt->opt_nflen;
+
+		v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
+						    sk->sk_allocation);
+		if (opt->dst0opt && !v6_cork->opt->dst0opt)
+			return -ENOBUFS;
+
+		v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
+						    sk->sk_allocation);
+		if (opt->dst1opt && !v6_cork->opt->dst1opt)
+			return -ENOBUFS;
+
+		v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
+						   sk->sk_allocation);
+		if (opt->hopopt && !v6_cork->opt->hopopt)
+			return -ENOBUFS;
+
+		v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
+						    sk->sk_allocation);
+		if (opt->srcrt && !v6_cork->opt->srcrt)
+			return -ENOBUFS;
+
+		/* need source address above miyazawa*/
+	}
+	dst_hold(&rt->dst);
+	cork->base.dst = &rt->dst;
+	cork->fl.u.ip6 = *fl6;
+	v6_cork->hop_limit = hlimit;
+	v6_cork->tclass = tclass;
+	if (rt->dst.flags & DST_XFRM_TUNNEL)
+		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		      rt->dst.dev->mtu : dst_mtu(&rt->dst);
+	else
+		mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
+		      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
+	if (np->frag_size < mtu) {
+		if (np->frag_size)
+			mtu = np->frag_size;
+	}
+	cork->base.fragsize = mtu;
+	if (dst_allfrag(rt->dst.path))
+		cork->base.flags |= IPCORK_ALLFRAG;
+	cork->base.length = 0;
+
+	return 0;
+}
+
 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 	int offset, int len, int odd, struct sk_buff *skb),
 	void *from, int length, int transhdrlen,
@@ -1162,59 +1230,10 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		/*
 		 * setup for corking
 		 */
-		if (opt) {
-			if (WARN_ON(np->cork.opt))
-				return -EINVAL;
-
-			np->cork.opt = kzalloc(opt->tot_len, sk->sk_allocation);
-			if (unlikely(np->cork.opt == NULL))
-				return -ENOBUFS;
-
-			np->cork.opt->tot_len = opt->tot_len;
-			np->cork.opt->opt_flen = opt->opt_flen;
-			np->cork.opt->opt_nflen = opt->opt_nflen;
-
-			np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
-							    sk->sk_allocation);
-			if (opt->dst0opt && !np->cork.opt->dst0opt)
-				return -ENOBUFS;
-
-			np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
-							    sk->sk_allocation);
-			if (opt->dst1opt && !np->cork.opt->dst1opt)
-				return -ENOBUFS;
-
-			np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
-							   sk->sk_allocation);
-			if (opt->hopopt && !np->cork.opt->hopopt)
-				return -ENOBUFS;
-
-			np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
-							    sk->sk_allocation);
-			if (opt->srcrt && !np->cork.opt->srcrt)
-				return -ENOBUFS;
-
-			/* need source address above miyazawa*/
-		}
-		dst_hold(&rt->dst);
-		cork->dst = &rt->dst;
-		inet->cork.fl.u.ip6 = *fl6;
-		np->cork.hop_limit = hlimit;
-		np->cork.tclass = tclass;
-		if (rt->dst.flags & DST_XFRM_TUNNEL)
-			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-			      rt->dst.dev->mtu : dst_mtu(&rt->dst);
-		else
-			mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
-			      rt->dst.dev->mtu : dst_mtu(rt->dst.path);
-		if (np->frag_size < mtu) {
-			if (np->frag_size)
-				mtu = np->frag_size;
-		}
-		cork->fragsize = mtu;
-		if (dst_allfrag(rt->dst.path))
-			cork->flags |= IPCORK_ALLFRAG;
-		cork->length = 0;
+		err = ip6_setup_cork(sk, &inet->cork, &np->cork, hlimit,
+				     tclass, opt, rt, fl6);
+		if (err)
+			return err;
 		exthdrlen = (opt ? opt->opt_flen : 0);
 		length += exthdrlen;
 		transhdrlen += exthdrlen;
@@ -1226,8 +1245,8 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
 		transhdrlen = 0;
 		exthdrlen = 0;
 		dst_exthdrlen = 0;
-		mtu = cork->fragsize;
 	}
+	mtu = cork->fragsize;
 	orig_mtu = mtu;
 
 	hh_len = LL_RESERVED_SPACE(rt->dst.dev);
@@ -1503,23 +1522,24 @@ error:
 }
 EXPORT_SYMBOL_GPL(ip6_append_data);
 
-static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
+static void ip6_cork_release(struct inet_cork_full *cork,
+			     struct inet6_cork *v6_cork)
 {
-	if (np->cork.opt) {
-		kfree(np->cork.opt->dst0opt);
-		kfree(np->cork.opt->dst1opt);
-		kfree(np->cork.opt->hopopt);
-		kfree(np->cork.opt->srcrt);
-		kfree(np->cork.opt);
-		np->cork.opt = NULL;
+	if (v6_cork->opt) {
+		kfree(v6_cork->opt->dst0opt);
+		kfree(v6_cork->opt->dst1opt);
+		kfree(v6_cork->opt->hopopt);
+		kfree(v6_cork->opt->srcrt);
+		kfree(v6_cork->opt);
+		v6_cork->opt = NULL;
 	}
 
-	if (inet->cork.base.dst) {
-		dst_release(inet->cork.base.dst);
-		inet->cork.base.dst = NULL;
-		inet->cork.base.flags &= ~IPCORK_ALLFRAG;
+	if (cork->base.dst) {
+		dst_release(cork->base.dst);
+		cork->base.dst = NULL;
+		cork->base.flags &= ~IPCORK_ALLFRAG;
 	}
-	memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
+	memset(&cork->fl, 0, sizeof(cork->fl));
 }
 
 int ip6_push_pending_frames(struct sock *sk)
@@ -1599,7 +1619,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	}
 
 out:
-	ip6_cork_release(inet, np);
+	ip6_cork_release(&inet->cork, &np->cork);
 	return err;
 error:
 	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
@@ -1618,6 +1638,6 @@ void ip6_flush_pending_frames(struct sock *sk)
 		kfree_skb(skb);
 	}
 
-	ip6_cork_release(inet_sk(sk), inet6_sk(sk));
+	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
-- 
cgit v1.2.3-70-g09d2


From 6422398c2ab09268a55112f98cbf96bbf0184328 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Sat, 31 Jan 2015 10:40:15 -0500
Subject: ipv6: introduce ipv6_make_skb

This commit is very similar to
commit 1c32c5ad6fac8cee1a77449f5abf211e911ff830
Author: Herbert Xu <herbert@gondor.apana.org.au>
Date:   Tue Mar 1 02:36:47 2011 +0000

    inet: Add ip_make_skb and ip_finish_skb

It adds IPv6 version of the helpers ip6_make_skb and ip6_finish_skb.

The job of ip6_make_skb is to collect messages into an ipv6 packet
and poplulate ipv6 eader.  The job of ip6_finish_skb is to transmit
the generated skb.  Together they replicated the job of
ip6_push_pending_frames() while also provide the capability to be
called independently.  This will be needed to add lockless UDP sendmsg
support.

Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h    |  19 ++++++++++
 net/ipv6/ip6_output.c | 103 ++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 103 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 4292929392b0..8027ca53e31f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -788,6 +788,25 @@ int ip6_push_pending_frames(struct sock *sk);
 
 void ip6_flush_pending_frames(struct sock *sk);
 
+int ip6_send_skb(struct sk_buff *skb);
+
+struct sk_buff *__ip6_make_skb(struct sock *sk, struct sk_buff_head *queue,
+			       struct inet_cork_full *cork,
+			       struct inet6_cork *v6_cork);
+struct sk_buff *ip6_make_skb(struct sock *sk,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     int hlimit, int tclass, struct ipv6_txoptions *opt,
+			     struct flowi6 *fl6, struct rt6_info *rt,
+			     unsigned int flags, int dontfrag);
+
+static inline struct sk_buff *ip6_finish_skb(struct sock *sk)
+{
+	return __ip6_make_skb(sk, &sk->sk_write_queue, &inet_sk(sk)->cork,
+			      &inet6_sk(sk)->cork);
+}
+
 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6);
 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
 				      const struct in6_addr *final_dst);
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 1b66453a695d..b89d3c27dac7 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -1564,22 +1564,23 @@ static void ip6_cork_release(struct inet_cork_full *cork,
 	memset(&cork->fl, 0, sizeof(cork->fl));
 }
 
-int ip6_push_pending_frames(struct sock *sk)
+struct sk_buff *__ip6_make_skb(struct sock *sk,
+			       struct sk_buff_head *queue,
+			       struct inet_cork_full *cork,
+			       struct inet6_cork *v6_cork)
 {
 	struct sk_buff *skb, *tmp_skb;
 	struct sk_buff **tail_skb;
 	struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
-	struct inet_sock *inet = inet_sk(sk);
 	struct ipv6_pinfo *np = inet6_sk(sk);
 	struct net *net = sock_net(sk);
 	struct ipv6hdr *hdr;
-	struct ipv6_txoptions *opt = np->cork.opt;
-	struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
-	struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
+	struct ipv6_txoptions *opt = v6_cork->opt;
+	struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
+	struct flowi6 *fl6 = &cork->fl.u.ip6;
 	unsigned char proto = fl6->flowi6_proto;
-	int err = 0;
 
-	skb = __skb_dequeue(&sk->sk_write_queue);
+	skb = __skb_dequeue(queue);
 	if (skb == NULL)
 		goto out;
 	tail_skb = &(skb_shinfo(skb)->frag_list);
@@ -1587,7 +1588,7 @@ int ip6_push_pending_frames(struct sock *sk)
 	/* move skb->data to ip header from ext header */
 	if (skb->data < skb_network_header(skb))
 		__skb_pull(skb, skb_network_offset(skb));
-	while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
+	while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
 		__skb_pull(tmp_skb, skb_network_header_len(skb));
 		*tail_skb = tmp_skb;
 		tail_skb = &(tmp_skb->next);
@@ -1612,10 +1613,10 @@ int ip6_push_pending_frames(struct sock *sk)
 	skb_reset_network_header(skb);
 	hdr = ipv6_hdr(skb);
 
-	ip6_flow_hdr(hdr, np->cork.tclass,
+	ip6_flow_hdr(hdr, v6_cork->tclass,
 		     ip6_make_flowlabel(net, skb, fl6->flowlabel,
 					np->autoflowlabel));
-	hdr->hop_limit = np->cork.hop_limit;
+	hdr->hop_limit = v6_cork->hop_limit;
 	hdr->nexthdr = proto;
 	hdr->saddr = fl6->saddr;
 	hdr->daddr = *final_dst;
@@ -1632,25 +1633,45 @@ int ip6_push_pending_frames(struct sock *sk)
 		ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
 	}
 
+	ip6_cork_release(cork, v6_cork);
+out:
+	return skb;
+}
+
+int ip6_send_skb(struct sk_buff *skb)
+{
+	struct net *net = sock_net(skb->sk);
+	struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
+	int err;
+
 	err = ip6_local_out(skb);
 	if (err) {
 		if (err > 0)
 			err = net_xmit_errno(err);
 		if (err)
-			goto error;
+			IP6_INC_STATS(net, rt->rt6i_idev,
+				      IPSTATS_MIB_OUTDISCARDS);
 	}
 
-out:
-	ip6_cork_release(&inet->cork, &np->cork);
 	return err;
-error:
-	IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
-	goto out;
+}
+
+int ip6_push_pending_frames(struct sock *sk)
+{
+	struct sk_buff *skb;
+
+	skb = ip6_finish_skb(sk);
+	if (!skb)
+		return 0;
+
+	return ip6_send_skb(skb);
 }
 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
 
 static void __ip6_flush_pending_frames(struct sock *sk,
-				       struct sk_buff_head *queue)
+				       struct sk_buff_head *queue,
+				       struct inet_cork_full *cork,
+				       struct inet6_cork *v6_cork)
 {
 	struct sk_buff *skb;
 
@@ -1661,11 +1682,55 @@ static void __ip6_flush_pending_frames(struct sock *sk,
 		kfree_skb(skb);
 	}
 
-	ip6_cork_release(&inet_sk(sk)->cork, &inet6_sk(sk)->cork);
+	ip6_cork_release(cork, v6_cork);
 }
 
 void ip6_flush_pending_frames(struct sock *sk)
 {
-	__ip6_flush_pending_frames(sk, &sk->sk_write_queue);
+	__ip6_flush_pending_frames(sk, &sk->sk_write_queue,
+				   &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
 }
 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
+
+struct sk_buff *ip6_make_skb(struct sock *sk,
+			     int getfrag(void *from, char *to, int offset,
+					 int len, int odd, struct sk_buff *skb),
+			     void *from, int length, int transhdrlen,
+			     int hlimit, int tclass,
+			     struct ipv6_txoptions *opt, struct flowi6 *fl6,
+			     struct rt6_info *rt, unsigned int flags,
+			     int dontfrag)
+{
+	struct inet_cork_full cork;
+	struct inet6_cork v6_cork;
+	struct sk_buff_head queue;
+	int exthdrlen = (opt ? opt->opt_flen : 0);
+	int err;
+
+	if (flags & MSG_PROBE)
+		return NULL;
+
+	__skb_queue_head_init(&queue);
+
+	cork.base.flags = 0;
+	cork.base.addr = 0;
+	cork.base.opt = NULL;
+	v6_cork.opt = NULL;
+	err = ip6_setup_cork(sk, &cork, &v6_cork, hlimit, tclass, opt, rt, fl6);
+	if (err)
+		return ERR_PTR(err);
+
+	if (dontfrag < 0)
+		dontfrag = inet6_sk(sk)->dontfrag;
+
+	err = __ip6_append_data(sk, fl6, &queue, &cork.base, &v6_cork,
+				&current->task_frag, getfrag, from,
+				length + exthdrlen, transhdrlen + exthdrlen,
+				flags, dontfrag);
+	if (err) {
+		__ip6_flush_pending_frames(sk, &queue, &cork, &v6_cork);
+		return ERR_PTR(err);
+	}
+
+	return __ip6_make_skb(sk, &queue, &cork, &v6_cork);
+}
-- 
cgit v1.2.3-70-g09d2


From 4c946d9c11d173c2ea6b9081b248f8072e6b46f1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 19:52:04 -0500
Subject: vmci: propagate msghdr all way down to __qp_memcpy_to_queue()

Switch from passing msg->iov_iter.iov to passing msg itself

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 16 ++++++++--------
 include/linux/vmw_vmci_api.h            |  2 +-
 net/vmw_vsock/vmci_transport.c          |  3 +--
 3 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 7aaaf51e1596..35f19a683822 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -370,12 +370,12 @@ static int __qp_memcpy_to_queue(struct vmci_queue *queue,
 			to_copy = size - bytes_copied;
 
 		if (is_iovec) {
-			struct iovec *iov = (struct iovec *)src;
+			struct msghdr *msg = (struct msghdr *)src;
 			int err;
 
 			/* The iovec will track bytes_copied internally. */
-			err = memcpy_fromiovec((u8 *)va + page_offset,
-					       iov, to_copy);
+			err = memcpy_from_msg((u8 *)va + page_offset,
+					      msg, to_copy);
 			if (err != 0) {
 				if (kernel_if->host)
 					kunmap(kernel_if->u.h.page[page_index]);
@@ -580,7 +580,7 @@ static int qp_memcpy_from_queue(void *dest,
  */
 static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
 				  u64 queue_offset,
-				  const void *src,
+				  const void *msg,
 				  size_t src_offset, size_t size)
 {
 
@@ -588,7 +588,7 @@ static int qp_memcpy_to_queue_iov(struct vmci_queue *queue,
 	 * We ignore src_offset because src is really a struct iovec * and will
 	 * maintain offset internally.
 	 */
-	return __qp_memcpy_to_queue(queue, queue_offset, src, size, true);
+	return __qp_memcpy_to_queue(queue, queue_offset, msg, size, true);
 }
 
 /*
@@ -3223,13 +3223,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_peek);
  * of bytes enqueued or < 0 on error.
  */
 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
-			  void *iov,
+			  struct msghdr *msg,
 			  size_t iov_size,
 			  int buf_type)
 {
 	ssize_t result;
 
-	if (!qpair || !iov)
+	if (!qpair)
 		return VMCI_ERROR_INVALID_ARGS;
 
 	qp_lock(qpair);
@@ -3238,7 +3238,7 @@ ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
 		result = qp_enqueue_locked(qpair->produce_q,
 					   qpair->consume_q,
 					   qpair->produce_q_size,
-					   iov, iov_size,
+					   msg, iov_size,
 					   qp_memcpy_to_queue_iov);
 
 		if (result == VMCI_ERROR_QUEUEPAIR_NOT_READY &&
diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h
index 5691f752ce8f..63df3a2a8ce5 100644
--- a/include/linux/vmw_vmci_api.h
+++ b/include/linux/vmw_vmci_api.h
@@ -74,7 +74,7 @@ ssize_t vmci_qpair_dequeue(struct vmci_qp *qpair,
 ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size,
 			int mode);
 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
-			  void *iov, size_t iov_size, int mode);
+			  struct msghdr *msg, size_t iov_size, int mode);
 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
 			  struct msghdr *msg, size_t iov_size, int mode);
 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size,
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 02d2e5229240..7f3255084a6c 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1850,8 +1850,7 @@ static ssize_t vmci_transport_stream_enqueue(
 	struct msghdr *msg,
 	size_t len)
 {
-	/* XXX: stripping const */
-	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0);
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg, len, 0);
 }
 
 static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
-- 
cgit v1.2.3-70-g09d2


From af2b040e470b470bfc881981db3c796072853eae Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 21:44:24 -0500
Subject: rxrpc: switch rxrpc_send_data() to iov_iter primitives

Convert skb_add_data() to iov_iter; allows to get rid of the explicit
messing with iovec in its only caller - skb_add_data() will keep advancing
->msg_iter for us, so there's no need to similate that manually.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h | 11 +++++------
 net/rxrpc/ar-output.c  | 43 ++++++++++---------------------------------
 2 files changed, 15 insertions(+), 39 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 85ab7d72b54c..9a8bafee1b67 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2484,19 +2484,18 @@ static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
 }
 
 static inline int skb_add_data(struct sk_buff *skb,
-			       char __user *from, int copy)
+			       struct iov_iter *from, int copy)
 {
 	const int off = skb->len;
 
 	if (skb->ip_summed == CHECKSUM_NONE) {
-		int err = 0;
-		__wsum csum = csum_and_copy_from_user(from, skb_put(skb, copy),
-							    copy, 0, &err);
-		if (!err) {
+		__wsum csum = 0;
+		if (csum_and_copy_from_iter(skb_put(skb, copy), copy,
+					    &csum, from) == copy) {
 			skb->csum = csum_block_add(skb->csum, csum, off);
 			return 0;
 		}
-	} else if (!copy_from_user(skb_put(skb, copy), from, copy))
+	} else if (copy_from_iter(skb_put(skb, copy), copy, from) == copy)
 		return 0;
 
 	__skb_trim(skb, off);
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index e1a9373e5979..963a5b91f3e8 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -529,13 +529,11 @@ static int rxrpc_send_data(struct kiocb *iocb,
 			   struct msghdr *msg, size_t len)
 {
 	struct rxrpc_skb_priv *sp;
-	unsigned char __user *from;
 	struct sk_buff *skb;
-	const struct iovec *iov;
 	struct sock *sk = &rx->sk;
 	long timeo;
 	bool more;
-	int ret, ioc, segment, copied;
+	int ret, copied;
 
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
@@ -545,25 +543,17 @@ static int rxrpc_send_data(struct kiocb *iocb,
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		return -EPIPE;
 
-	iov = msg->msg_iter.iov;
-	ioc = msg->msg_iter.nr_segs - 1;
-	from = iov->iov_base;
-	segment = iov->iov_len;
-	iov++;
 	more = msg->msg_flags & MSG_MORE;
 
 	skb = call->tx_pending;
 	call->tx_pending = NULL;
 
 	copied = 0;
-	do {
+	if (len > iov_iter_count(&msg->msg_iter))
+		len = iov_iter_count(&msg->msg_iter);
+	while (len) {
 		int copy;
 
-		if (segment > len)
-			segment = len;
-
-		_debug("SEGMENT %d @%p", segment, from);
-
 		if (!skb) {
 			size_t size, chunk, max, space;
 
@@ -631,13 +621,13 @@ static int rxrpc_send_data(struct kiocb *iocb,
 		/* append next segment of data to the current buffer */
 		copy = skb_tailroom(skb);
 		ASSERTCMP(copy, >, 0);
-		if (copy > segment)
-			copy = segment;
+		if (copy > len)
+			copy = len;
 		if (copy > sp->remain)
 			copy = sp->remain;
 
 		_debug("add");
-		ret = skb_add_data(skb, from, copy);
+		ret = skb_add_data(skb, &msg->msg_iter, copy);
 		_debug("added");
 		if (ret < 0)
 			goto efault;
@@ -646,18 +636,6 @@ static int rxrpc_send_data(struct kiocb *iocb,
 		copied += copy;
 
 		len -= copy;
-		segment -= copy;
-		from += copy;
-		while (segment == 0 && ioc > 0) {
-			from = iov->iov_base;
-			segment = iov->iov_len;
-			iov++;
-			ioc--;
-		}
-		if (len == 0) {
-			segment = 0;
-			ioc = 0;
-		}
 
 		/* check for the far side aborting the call or a network error
 		 * occurring */
@@ -665,7 +643,7 @@ static int rxrpc_send_data(struct kiocb *iocb,
 			goto call_aborted;
 
 		/* add the packet to the send queue if it's now full */
-		if (sp->remain <= 0 || (segment == 0 && !more)) {
+		if (sp->remain <= 0 || (!len && !more)) {
 			struct rxrpc_connection *conn = call->conn;
 			uint32_t seq;
 			size_t pad;
@@ -711,11 +689,10 @@ static int rxrpc_send_data(struct kiocb *iocb,
 
 			memcpy(skb->head, &sp->hdr,
 			       sizeof(struct rxrpc_header));
-			rxrpc_queue_packet(call, skb, segment == 0 && !more);
+			rxrpc_queue_packet(call, skb, !iov_iter_count(&msg->msg_iter) && !more);
 			skb = NULL;
 		}
-
-	} while (segment > 0);
+	}
 
 success:
 	ret = copied;
-- 
cgit v1.2.3-70-g09d2


From cacdc7d2f9fa42e29b650e2879df42ea7d7833c1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 20:34:16 -0500
Subject: ip: stash a pointer to msghdr in struct ping_fakehdr

... instead of storing its ->mgs_iter.iov there

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/ping.h | 2 +-
 net/ipv4/ping.c    | 7 +++----
 net/ipv6/ping.c    | 3 +--
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/ping.h b/include/net/ping.h
index f074060bc5de..cc16d413f681 100644
--- a/include/net/ping.h
+++ b/include/net/ping.h
@@ -59,7 +59,7 @@ extern struct pingv6_ops pingv6_ops;
 
 struct pingfakehdr {
 	struct icmphdr icmph;
-	struct iovec *iov;
+	struct msghdr *msg;
 	sa_family_t family;
 	__wsum wcheck;
 };
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 2a3720fb5a5f..9e15ba701401 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -602,14 +602,14 @@ int ping_getfrag(void *from, char *to,
 		if (fraglen < sizeof(struct icmphdr))
 			BUG();
 		if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
-			    pfh->iov, 0, fraglen - sizeof(struct icmphdr),
+			    pfh->msg->msg_iter.iov, 0, fraglen - sizeof(struct icmphdr),
 			    &pfh->wcheck))
 			return -EFAULT;
 	} else if (offset < sizeof(struct icmphdr)) {
 			BUG();
 	} else {
 		if (csum_partial_copy_fromiovecend
-				(to, pfh->iov, offset - sizeof(struct icmphdr),
+				(to, pfh->msg->msg_iter.iov, offset - sizeof(struct icmphdr),
 				 fraglen, &pfh->wcheck))
 			return -EFAULT;
 	}
@@ -811,8 +811,7 @@ back_from_confirm:
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
-	/* XXX: stripping const */
-	pfh.iov = (struct iovec *)msg->msg_iter.iov;
+	pfh.msg = msg;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET;
 
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 2d3148378a1f..bd46f736f61d 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -163,8 +163,7 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
-	/* XXX: stripping const */
-	pfh.iov = (struct iovec *)msg->msg_iter.iov;
+	pfh.msg = msg;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET6;
 
-- 
cgit v1.2.3-70-g09d2


From 57be5bdad759b9dde8b0d0cc630782a1a4ac4b9f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 13:40:20 -0500
Subject: ip: convert tcp_sendmsg() to iov_iter primitives

patch is actually smaller than it seems to be - most of it is unindenting
the inner loop body in tcp_sendmsg() itself...

the bit in tcp_input.c is going to get reverted very soon - that's what
memcpy_from_msg() will become, but not in this commit; let's keep it
reasonably contained...

There's one potentially subtle change here: in case of short copy from
userland, mainline tcp_send_syn_data() discards the skb it has allocated
and falls back to normal path, where we'll send as much as possible after
rereading the same data again.  This patch trims SYN+data skb instead -
that way we don't need to copy from the same place twice.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/sock.h    |  18 ++--
 net/ipv4/tcp.c        | 233 +++++++++++++++++++++++---------------------------
 net/ipv4/tcp_input.c  |   2 +-
 net/ipv4/tcp_output.c |  11 ++-
 4 files changed, 123 insertions(+), 141 deletions(-)

(limited to 'include')

diff --git a/include/net/sock.h b/include/net/sock.h
index 15341499786c..1e45e599a3ab 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1803,27 +1803,25 @@ static inline void sk_nocaps_add(struct sock *sk, netdev_features_t flags)
 }
 
 static inline int skb_do_copy_data_nocache(struct sock *sk, struct sk_buff *skb,
-					   char __user *from, char *to,
+					   struct iov_iter *from, char *to,
 					   int copy, int offset)
 {
 	if (skb->ip_summed == CHECKSUM_NONE) {
-		int err = 0;
-		__wsum csum = csum_and_copy_from_user(from, to, copy, 0, &err);
-		if (err)
-			return err;
+		__wsum csum = 0;
+		if (csum_and_copy_from_iter(to, copy, &csum, from) != copy)
+			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, offset);
 	} else if (sk->sk_route_caps & NETIF_F_NOCACHE_COPY) {
-		if (!access_ok(VERIFY_READ, from, copy) ||
-		    __copy_from_user_nocache(to, from, copy))
+		if (copy_from_iter_nocache(to, copy, from) != copy)
 			return -EFAULT;
-	} else if (copy_from_user(to, from, copy))
+	} else if (copy_from_iter(to, copy, from) != copy)
 		return -EFAULT;
 
 	return 0;
 }
 
 static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
-				       char __user *from, int copy)
+				       struct iov_iter *from, int copy)
 {
 	int err, offset = skb->len;
 
@@ -1835,7 +1833,7 @@ static inline int skb_add_data_nocache(struct sock *sk, struct sk_buff *skb,
 	return err;
 }
 
-static inline int skb_copy_to_page_nocache(struct sock *sk, char __user *from,
+static inline int skb_copy_to_page_nocache(struct sock *sk, struct iov_iter *from,
 					   struct sk_buff *skb,
 					   struct page *page,
 					   int off, int copy)
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 3075723c729b..9d72a0fcd928 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1067,11 +1067,10 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	const struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int iovlen, flags, err, copied = 0;
-	int mss_now = 0, size_goal, copied_syn = 0, offset = 0;
+	int flags, err, copied = 0;
+	int mss_now = 0, size_goal, copied_syn = 0;
 	bool sg;
 	long timeo;
 
@@ -1084,7 +1083,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			goto out;
 		else if (err)
 			goto out_err;
-		offset = copied_syn;
 	}
 
 	timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT);
@@ -1118,8 +1116,6 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
 	/* Ok commence sending. */
-	iovlen = msg->msg_iter.nr_segs;
-	iov = msg->msg_iter.iov;
 	copied = 0;
 
 	err = -EPIPE;
@@ -1128,151 +1124,134 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	sg = !!(sk->sk_route_caps & NETIF_F_SG);
 
-	while (--iovlen >= 0) {
-		size_t seglen = iov->iov_len;
-		unsigned char __user *from = iov->iov_base;
+	while (iov_iter_count(&msg->msg_iter)) {
+		int copy = 0;
+		int max = size_goal;
 
-		iov++;
-		if (unlikely(offset > 0)) {  /* Skip bytes copied in SYN */
-			if (offset >= seglen) {
-				offset -= seglen;
-				continue;
-			}
-			seglen -= offset;
-			from += offset;
-			offset = 0;
+		skb = tcp_write_queue_tail(sk);
+		if (tcp_send_head(sk)) {
+			if (skb->ip_summed == CHECKSUM_NONE)
+				max = mss_now;
+			copy = max - skb->len;
 		}
 
-		while (seglen > 0) {
-			int copy = 0;
-			int max = size_goal;
-
-			skb = tcp_write_queue_tail(sk);
-			if (tcp_send_head(sk)) {
-				if (skb->ip_summed == CHECKSUM_NONE)
-					max = mss_now;
-				copy = max - skb->len;
-			}
-
-			if (copy <= 0) {
+		if (copy <= 0) {
 new_segment:
-				/* Allocate new segment. If the interface is SG,
-				 * allocate skb fitting to single page.
-				 */
-				if (!sk_stream_memory_free(sk))
-					goto wait_for_sndbuf;
+			/* Allocate new segment. If the interface is SG,
+			 * allocate skb fitting to single page.
+			 */
+			if (!sk_stream_memory_free(sk))
+				goto wait_for_sndbuf;
 
-				skb = sk_stream_alloc_skb(sk,
-							  select_size(sk, sg),
-							  sk->sk_allocation);
-				if (!skb)
-					goto wait_for_memory;
+			skb = sk_stream_alloc_skb(sk,
+						  select_size(sk, sg),
+						  sk->sk_allocation);
+			if (!skb)
+				goto wait_for_memory;
 
-				/*
-				 * Check whether we can use HW checksum.
-				 */
-				if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
-					skb->ip_summed = CHECKSUM_PARTIAL;
+			/*
+			 * Check whether we can use HW checksum.
+			 */
+			if (sk->sk_route_caps & NETIF_F_ALL_CSUM)
+				skb->ip_summed = CHECKSUM_PARTIAL;
 
-				skb_entail(sk, skb);
-				copy = size_goal;
-				max = size_goal;
+			skb_entail(sk, skb);
+			copy = size_goal;
+			max = size_goal;
 
-				/* All packets are restored as if they have
-				 * already been sent. skb_mstamp isn't set to
-				 * avoid wrong rtt estimation.
-				 */
-				if (tp->repair)
-					TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
-			}
+			/* All packets are restored as if they have
+			 * already been sent. skb_mstamp isn't set to
+			 * avoid wrong rtt estimation.
+			 */
+			if (tp->repair)
+				TCP_SKB_CB(skb)->sacked |= TCPCB_REPAIRED;
+		}
 
-			/* Try to append data to the end of skb. */
-			if (copy > seglen)
-				copy = seglen;
-
-			/* Where to copy to? */
-			if (skb_availroom(skb) > 0) {
-				/* We have some space in skb head. Superb! */
-				copy = min_t(int, copy, skb_availroom(skb));
-				err = skb_add_data_nocache(sk, skb, from, copy);
-				if (err)
-					goto do_fault;
-			} else {
-				bool merge = true;
-				int i = skb_shinfo(skb)->nr_frags;
-				struct page_frag *pfrag = sk_page_frag(sk);
-
-				if (!sk_page_frag_refill(sk, pfrag))
-					goto wait_for_memory;
-
-				if (!skb_can_coalesce(skb, i, pfrag->page,
-						      pfrag->offset)) {
-					if (i == MAX_SKB_FRAGS || !sg) {
-						tcp_mark_push(tp, skb);
-						goto new_segment;
-					}
-					merge = false;
-				}
+		/* Try to append data to the end of skb. */
+		if (copy > iov_iter_count(&msg->msg_iter))
+			copy = iov_iter_count(&msg->msg_iter);
+
+		/* Where to copy to? */
+		if (skb_availroom(skb) > 0) {
+			/* We have some space in skb head. Superb! */
+			copy = min_t(int, copy, skb_availroom(skb));
+			err = skb_add_data_nocache(sk, skb, &msg->msg_iter, copy);
+			if (err)
+				goto do_fault;
+		} else {
+			bool merge = true;
+			int i = skb_shinfo(skb)->nr_frags;
+			struct page_frag *pfrag = sk_page_frag(sk);
+
+			if (!sk_page_frag_refill(sk, pfrag))
+				goto wait_for_memory;
 
-				copy = min_t(int, copy, pfrag->size - pfrag->offset);
-
-				if (!sk_wmem_schedule(sk, copy))
-					goto wait_for_memory;
-
-				err = skb_copy_to_page_nocache(sk, from, skb,
-							       pfrag->page,
-							       pfrag->offset,
-							       copy);
-				if (err)
-					goto do_error;
-
-				/* Update the skb. */
-				if (merge) {
-					skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
-				} else {
-					skb_fill_page_desc(skb, i, pfrag->page,
-							   pfrag->offset, copy);
-					get_page(pfrag->page);
+			if (!skb_can_coalesce(skb, i, pfrag->page,
+					      pfrag->offset)) {
+				if (i == MAX_SKB_FRAGS || !sg) {
+					tcp_mark_push(tp, skb);
+					goto new_segment;
 				}
-				pfrag->offset += copy;
+				merge = false;
 			}
 
-			if (!copied)
-				TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
+			copy = min_t(int, copy, pfrag->size - pfrag->offset);
 
-			tp->write_seq += copy;
-			TCP_SKB_CB(skb)->end_seq += copy;
-			tcp_skb_pcount_set(skb, 0);
+			if (!sk_wmem_schedule(sk, copy))
+				goto wait_for_memory;
 
-			from += copy;
-			copied += copy;
-			if ((seglen -= copy) == 0 && iovlen == 0) {
-				tcp_tx_timestamp(sk, skb);
-				goto out;
+			err = skb_copy_to_page_nocache(sk, &msg->msg_iter, skb,
+						       pfrag->page,
+						       pfrag->offset,
+						       copy);
+			if (err)
+				goto do_error;
+
+			/* Update the skb. */
+			if (merge) {
+				skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
+			} else {
+				skb_fill_page_desc(skb, i, pfrag->page,
+						   pfrag->offset, copy);
+				get_page(pfrag->page);
 			}
+			pfrag->offset += copy;
+		}
 
-			if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
-				continue;
+		if (!copied)
+			TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_PSH;
+
+		tp->write_seq += copy;
+		TCP_SKB_CB(skb)->end_seq += copy;
+		tcp_skb_pcount_set(skb, 0);
+
+		copied += copy;
+		if (!iov_iter_count(&msg->msg_iter)) {
+			tcp_tx_timestamp(sk, skb);
+			goto out;
+		}
 
-			if (forced_push(tp)) {
-				tcp_mark_push(tp, skb);
-				__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
-			} else if (skb == tcp_send_head(sk))
-				tcp_push_one(sk, mss_now);
+		if (skb->len < max || (flags & MSG_OOB) || unlikely(tp->repair))
 			continue;
 
+		if (forced_push(tp)) {
+			tcp_mark_push(tp, skb);
+			__tcp_push_pending_frames(sk, mss_now, TCP_NAGLE_PUSH);
+		} else if (skb == tcp_send_head(sk))
+			tcp_push_one(sk, mss_now);
+		continue;
+
 wait_for_sndbuf:
-			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 wait_for_memory:
-			if (copied)
-				tcp_push(sk, flags & ~MSG_MORE, mss_now,
-					 TCP_NAGLE_PUSH, size_goal);
+		if (copied)
+			tcp_push(sk, flags & ~MSG_MORE, mss_now,
+				 TCP_NAGLE_PUSH, size_goal);
 
-			if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
-				goto do_error;
+		if ((err = sk_stream_wait_memory(sk, &timeo)) != 0)
+			goto do_error;
 
-			mss_now = tcp_send_mss(sk, &size_goal, flags);
-		}
+		mss_now = tcp_send_mss(sk, &size_goal, flags);
 	}
 
 out:
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 71fb37c70581..93c74829cbce 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (memcpy_from_msg(skb_put(skb, size), msg, size))
+	if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size)
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20ab06b228ac..722c8bceaf9a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3055,7 +3055,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_fastopen_request *fo = tp->fastopen_req;
-	int syn_loss = 0, space, err = 0;
+	int syn_loss = 0, space, err = 0, copied;
 	unsigned long last_syn_loss = 0;
 	struct sk_buff *syn_data;
 
@@ -3093,11 +3093,16 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 		goto fallback;
 	syn_data->ip_summed = CHECKSUM_PARTIAL;
 	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
-	if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
-					 fo->data->msg_iter.iov, 0, space))) {
+	copied = copy_from_iter(skb_put(syn_data, space), space,
+				&fo->data->msg_iter);
+	if (unlikely(!copied)) {
 		kfree_skb(syn_data);
 		goto fallback;
 	}
+	if (copied != space) {
+		skb_trim(syn_data, copied);
+		space = copied;
+	}
 
 	/* No more data pending in inet_wait_for_connect() */
 	if (space == fo->size)
-- 
cgit v1.2.3-70-g09d2


From 21226abb4e9f14d88238964d89b279e461ddc30c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 15:48:29 -0500
Subject: net: switch memcpy_fromiovec()/memcpy_fromiovecend() users to
 copy_from_iter()

That takes care of the majority of ->sendmsg() instances - most of them
via memcpy_to_msg() or assorted getfrag() callbacks.  One place where we
still keep memcpy_fromiovecend() is tipc - there we potentially read the
same data over and over; separate patch, that...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h |  3 +--
 include/net/udplite.h  |  3 +--
 net/ipv4/ip_output.c   |  6 ++----
 net/ipv4/ping.c        | 14 +++++++-------
 net/ipv4/raw.c         |  2 +-
 net/ipv4/tcp_input.c   |  2 +-
 net/ipv6/raw.c         |  2 +-
 7 files changed, 14 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9a8bafee1b67..b349c96dc80a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2692,8 +2692,7 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 
 static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 {
-	/* XXX: stripping const */
-	return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len);
+	return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
 }
 
 static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
diff --git a/include/net/udplite.h b/include/net/udplite.h
index ae7c8d1fbcad..80761938b9a7 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -20,8 +20,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 				      int len, int odd, struct sk_buff *skb)
 {
 	struct msghdr *msg = from;
-	/* XXX: stripping const */
-	return memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len);
+	return copy_from_iter(to, len, &msg->msg_iter) != len ? -EFAULT : 0;
 }
 
 /* Designate sk as UDP-Lite socket */
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b50861b22b6b..f998bc87ae38 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -755,13 +755,11 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 	struct msghdr *msg = from;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		/* XXX: stripping const */
-		if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0)
+		if (copy_from_iter(to, len, &msg->msg_iter) != len)
 			return -EFAULT;
 	} else {
 		__wsum csum = 0;
-		/* XXX: stripping const */
-		if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0)
+		if (csum_and_copy_from_iter(to, len, &csum, &msg->msg_iter) != len)
 			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, odd);
 	}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 9e15ba701401..e9f66e1cda50 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -599,18 +599,18 @@ int ping_getfrag(void *from, char *to,
 	struct pingfakehdr *pfh = (struct pingfakehdr *)from;
 
 	if (offset == 0) {
-		if (fraglen < sizeof(struct icmphdr))
+		fraglen -= sizeof(struct icmphdr);
+		if (fraglen < 0)
 			BUG();
-		if (csum_partial_copy_fromiovecend(to + sizeof(struct icmphdr),
-			    pfh->msg->msg_iter.iov, 0, fraglen - sizeof(struct icmphdr),
-			    &pfh->wcheck))
+		if (csum_and_copy_from_iter(to + sizeof(struct icmphdr),
+			    fraglen, &pfh->wcheck,
+			    &pfh->msg->msg_iter) != fraglen)
 			return -EFAULT;
 	} else if (offset < sizeof(struct icmphdr)) {
 			BUG();
 	} else {
-		if (csum_partial_copy_fromiovecend
-				(to, pfh->msg->msg_iter.iov, offset - sizeof(struct icmphdr),
-				 fraglen, &pfh->wcheck))
+		if (csum_and_copy_from_iter(to, fraglen, &pfh->wcheck,
+					    &pfh->msg->msg_iter) != fraglen)
 			return -EFAULT;
 	}
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 2c9d252072a2..f027a708b7e0 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -382,7 +382,7 @@ static int raw_send_hdrinc(struct sock *sk, struct flowi4 *fl4,
 
 	skb->transport_header = skb->network_header;
 	err = -EFAULT;
-	if (memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length))
+	if (memcpy_from_msg(iph, msg, length))
 		goto error_free;
 
 	iphlen = iph->ihl * 4;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 93c74829cbce..71fb37c70581 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (copy_from_iter(skb_put(skb, size), size, &msg->msg_iter) != size)
+	if (memcpy_from_msg(skb_put(skb, size), msg, size))
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0dbb328fa688..dae7f1a1e464 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -648,7 +648,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length,
 	skb->ip_summed = CHECKSUM_NONE;
 
 	skb->transport_header = skb->network_header;
-	err = memcpy_fromiovecend((void *)iph, msg->msg_iter.iov, 0, length);
+	err = memcpy_from_msg(iph, msg, length);
 	if (err)
 		goto error_fault;
 
-- 
cgit v1.2.3-70-g09d2


From 31a25fae85956e3a9c778141d29e5e803fb0b124 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 15:53:57 -0500
Subject: net: bury net/core/iovec.c - nothing in there is used anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/socket.h |   7 ---
 net/core/Makefile      |   2 +-
 net/core/iovec.c       | 137 -------------------------------------------------
 3 files changed, 1 insertion(+), 145 deletions(-)
 delete mode 100644 net/core/iovec.c

(limited to 'include')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 6e49a14365dc..5c19cba34dce 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -318,13 +318,6 @@ struct ucred {
 /* IPX options */
 #define IPX_TYPE	1
 
-extern int csum_partial_copy_fromiovecend(unsigned char *kdata, 
-					  struct iovec *iov, 
-					  int offset, 
-					  unsigned int len, __wsum *csump);
-extern unsigned long iov_pages(const struct iovec *iov, int offset,
-			       unsigned long nr_segs);
-
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
diff --git a/net/core/Makefile b/net/core/Makefile
index 235e6c50708d..fec0856dd6c0 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -2,7 +2,7 @@
 # Makefile for the Linux networking core.
 #
 
-obj-y := sock.o request_sock.o skbuff.o iovec.o datagram.o stream.o scm.o \
+obj-y := sock.o request_sock.o skbuff.o datagram.o stream.o scm.o \
 	 gen_stats.o gen_estimator.o net_namespace.o secure_seq.o flow_dissector.o
 
 obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
diff --git a/net/core/iovec.c b/net/core/iovec.c
deleted file mode 100644
index dcbe98b3726a..000000000000
--- a/net/core/iovec.c
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- *	iovec manipulation routines.
- *
- *
- *		This program is free software; you can redistribute it and/or
- *		modify it under the terms of the GNU General Public License
- *		as published by the Free Software Foundation; either version
- *		2 of the License, or (at your option) any later version.
- *
- *	Fixes:
- *		Andrew Lunn	:	Errors in iovec copying.
- *		Pedro Roque	:	Added memcpy_fromiovecend and
- *					csum_..._fromiovecend.
- *		Andi Kleen	:	fixed error handling for 2.1
- *		Alexey Kuznetsov:	2.1 optimisations
- *		Andi Kleen	:	Fix csum*fromiovecend for IPv6.
- */
-
-#include <linux/errno.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/mm.h>
-#include <linux/net.h>
-#include <linux/in6.h>
-#include <asm/uaccess.h>
-#include <asm/byteorder.h>
-#include <net/checksum.h>
-#include <net/sock.h>
-
-/*
- *	And now for the all-in-one: copy and checksum from a user iovec
- *	directly to a datagram
- *	Calls to csum_partial but the last must be in 32 bit chunks
- *
- *	ip_build_xmit must ensure that when fragmenting only the last
- *	call to this function will be unaligned also.
- */
-int csum_partial_copy_fromiovecend(unsigned char *kdata, struct iovec *iov,
-				 int offset, unsigned int len, __wsum *csump)
-{
-	__wsum csum = *csump;
-	int partial_cnt = 0, err = 0;
-
-	/* Skip over the finished iovecs */
-	while (offset >= iov->iov_len) {
-		offset -= iov->iov_len;
-		iov++;
-	}
-
-	while (len > 0) {
-		u8 __user *base = iov->iov_base + offset;
-		int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
-		offset = 0;
-
-		/* There is a remnant from previous iov. */
-		if (partial_cnt) {
-			int par_len = 4 - partial_cnt;
-
-			/* iov component is too short ... */
-			if (par_len > copy) {
-				if (copy_from_user(kdata, base, copy))
-					goto out_fault;
-				kdata += copy;
-				base += copy;
-				partial_cnt += copy;
-				len -= copy;
-				iov++;
-				if (len)
-					continue;
-				*csump = csum_partial(kdata - partial_cnt,
-							 partial_cnt, csum);
-				goto out;
-			}
-			if (copy_from_user(kdata, base, par_len))
-				goto out_fault;
-			csum = csum_partial(kdata - partial_cnt, 4, csum);
-			kdata += par_len;
-			base  += par_len;
-			copy  -= par_len;
-			len   -= par_len;
-			partial_cnt = 0;
-		}
-
-		if (len > copy) {
-			partial_cnt = copy % 4;
-			if (partial_cnt) {
-				copy -= partial_cnt;
-				if (copy_from_user(kdata + copy, base + copy,
-						partial_cnt))
-					goto out_fault;
-			}
-		}
-
-		if (copy) {
-			csum = csum_and_copy_from_user(base, kdata, copy,
-							csum, &err);
-			if (err)
-				goto out;
-		}
-		len   -= copy + partial_cnt;
-		kdata += copy + partial_cnt;
-		iov++;
-	}
-	*csump = csum;
-out:
-	return err;
-
-out_fault:
-	err = -EFAULT;
-	goto out;
-}
-EXPORT_SYMBOL(csum_partial_copy_fromiovecend);
-
-unsigned long iov_pages(const struct iovec *iov, int offset,
-			unsigned long nr_segs)
-{
-	unsigned long seg, base;
-	int pages = 0, len, size;
-
-	while (nr_segs && (offset >= iov->iov_len)) {
-		offset -= iov->iov_len;
-		++iov;
-		--nr_segs;
-	}
-
-	for (seg = 0; seg < nr_segs; seg++) {
-		base = (unsigned long)iov[seg].iov_base + offset;
-		len = iov[seg].iov_len - offset;
-		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
-		pages += size;
-		offset = 0;
-	}
-
-	return pages;
-}
-EXPORT_SYMBOL(iov_pages);
-- 
cgit v1.2.3-70-g09d2


From 1d10eb2f156f5fc83cf6c7ce60441592e66eadb3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 28 Nov 2014 16:39:25 -0500
Subject: crypto: switch af_alg_make_sg() to iov_iter

With that, all ->sendmsg() instances are converted to iov_iter primitives
and are agnostic wrt the kind of iov_iter they are working with.
So's the last remaining ->recvmsg() instance that wasn't kind-agnostic yet.
All ->sendmsg() and ->recvmsg() advance ->msg_iter by the amount actually
copied and none of them modifies the underlying iovec, etc.

Cc: linux-crypto@vger.kernel.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 crypto/af_alg.c         | 40 ++++++++------------------
 crypto/algif_hash.c     | 45 ++++++++++++------------------
 crypto/algif_skcipher.c | 74 ++++++++++++++++++++++---------------------------
 include/crypto/if_alg.h |  3 +-
 4 files changed, 62 insertions(+), 100 deletions(-)

(limited to 'include')

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 4665b79c729a..eb78fe8a60c8 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -338,49 +338,31 @@ static const struct net_proto_family alg_family = {
 	.owner	=	THIS_MODULE,
 };
 
-int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len,
-		   int write)
+int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len)
 {
-	unsigned long from = (unsigned long)addr;
-	unsigned long npages;
-	unsigned off;
-	int err;
-	int i;
-
-	err = -EFAULT;
-	if (!access_ok(write ? VERIFY_READ : VERIFY_WRITE, addr, len))
-		goto out;
-
-	off = from & ~PAGE_MASK;
-	npages = (off + len + PAGE_SIZE - 1) >> PAGE_SHIFT;
-	if (npages > ALG_MAX_PAGES)
-		npages = ALG_MAX_PAGES;
+	size_t off;
+	ssize_t n;
+	int npages, i;
 
-	err = get_user_pages_fast(from, npages, write, sgl->pages);
-	if (err < 0)
-		goto out;
+	n = iov_iter_get_pages(iter, sgl->pages, len, ALG_MAX_PAGES, &off);
+	if (n < 0)
+		return n;
 
-	npages = err;
-	err = -EINVAL;
+	npages = PAGE_ALIGN(off + n);
 	if (WARN_ON(npages == 0))
-		goto out;
-
-	err = 0;
+		return -EINVAL;
 
 	sg_init_table(sgl->sg, npages);
 
-	for (i = 0; i < npages; i++) {
+	for (i = 0, len = n; i < npages; i++) {
 		int plen = min_t(int, len, PAGE_SIZE - off);
 
 		sg_set_page(sgl->sg + i, sgl->pages[i], plen, off);
 
 		off = 0;
 		len -= plen;
-		err += plen;
 	}
-
-out:
-	return err;
+	return n;
 }
 EXPORT_SYMBOL_GPL(af_alg_make_sg);
 
diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 01f56eb7816e..01da360bdb55 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -41,8 +41,6 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
 	struct sock *sk = sock->sk;
 	struct alg_sock *ask = alg_sk(sk);
 	struct hash_ctx *ctx = ask->private;
-	unsigned long iovlen;
-	const struct iovec *iov;
 	long copied = 0;
 	int err;
 
@@ -58,37 +56,28 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
 
 	ctx->more = 0;
 
-	for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0;
-	     iovlen--, iov++) {
-		unsigned long seglen = iov->iov_len;
-		char __user *from = iov->iov_base;
+	while (iov_iter_count(&msg->msg_iter)) {
+		int len = iov_iter_count(&msg->msg_iter);
 
-		while (seglen) {
-			int len = min_t(unsigned long, seglen, limit);
-			int newlen;
+		if (len > limit)
+			len = limit;
 
-			newlen = af_alg_make_sg(&ctx->sgl, from, len, 0);
-			if (newlen < 0) {
-				err = copied ? 0 : newlen;
-				goto unlock;
-			}
-
-			ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL,
-						newlen);
-
-			err = af_alg_wait_for_completion(
-				crypto_ahash_update(&ctx->req),
-				&ctx->completion);
+		len = af_alg_make_sg(&ctx->sgl, &msg->msg_iter, len);
+		if (len < 0) {
+			err = copied ? 0 : len;
+			goto unlock;
+		}
 
-			af_alg_free_sg(&ctx->sgl);
+		ahash_request_set_crypt(&ctx->req, ctx->sgl.sg, NULL, len);
 
-			if (err)
-				goto unlock;
+		err = af_alg_wait_for_completion(crypto_ahash_update(&ctx->req),
+						 &ctx->completion);
+		af_alg_free_sg(&ctx->sgl);
+		if (err)
+			goto unlock;
 
-			seglen -= newlen;
-			from += newlen;
-			copied += newlen;
-		}
+		copied += len;
+		iov_iter_advance(&msg->msg_iter, len);
 	}
 
 	err = 0;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c12207c8dde9..37110fd68adf 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -426,67 +426,59 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
 		&ctx->req));
 	struct skcipher_sg_list *sgl;
 	struct scatterlist *sg;
-	unsigned long iovlen;
-	const struct iovec *iov;
 	int err = -EAGAIN;
 	int used;
 	long copied = 0;
 
 	lock_sock(sk);
-	for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0;
-	     iovlen--, iov++) {
-		unsigned long seglen = iov->iov_len;
-		char __user *from = iov->iov_base;
-
-		while (seglen) {
-			sgl = list_first_entry(&ctx->tsgl,
-					       struct skcipher_sg_list, list);
-			sg = sgl->sg;
-
-			while (!sg->length)
-				sg++;
-
-			if (!ctx->used) {
-				err = skcipher_wait_for_data(sk, flags);
-				if (err)
-					goto unlock;
-			}
+	while (iov_iter_count(&msg->msg_iter)) {
+		sgl = list_first_entry(&ctx->tsgl,
+				       struct skcipher_sg_list, list);
+		sg = sgl->sg;
 
-			used = min_t(unsigned long, ctx->used, seglen);
+		while (!sg->length)
+			sg++;
 
-			used = af_alg_make_sg(&ctx->rsgl, from, used, 1);
-			err = used;
-			if (err < 0)
+		used = ctx->used;
+		if (!used) {
+			err = skcipher_wait_for_data(sk, flags);
+			if (err)
 				goto unlock;
+		}
+
+		used = min_t(unsigned long, used, iov_iter_count(&msg->msg_iter));
+
+		used = af_alg_make_sg(&ctx->rsgl, &msg->msg_iter, used);
+		err = used;
+		if (err < 0)
+			goto unlock;
 
-			if (ctx->more || used < ctx->used)
-				used -= used % bs;
+		if (ctx->more || used < ctx->used)
+			used -= used % bs;
 
-			err = -EINVAL;
-			if (!used)
-				goto free;
+		err = -EINVAL;
+		if (!used)
+			goto free;
 
-			ablkcipher_request_set_crypt(&ctx->req, sg,
-						     ctx->rsgl.sg, used,
-						     ctx->iv);
+		ablkcipher_request_set_crypt(&ctx->req, sg,
+					     ctx->rsgl.sg, used,
+					     ctx->iv);
 
-			err = af_alg_wait_for_completion(
+		err = af_alg_wait_for_completion(
 				ctx->enc ?
 					crypto_ablkcipher_encrypt(&ctx->req) :
 					crypto_ablkcipher_decrypt(&ctx->req),
 				&ctx->completion);
 
 free:
-			af_alg_free_sg(&ctx->rsgl);
+		af_alg_free_sg(&ctx->rsgl);
 
-			if (err)
-				goto unlock;
+		if (err)
+			goto unlock;
 
-			copied += used;
-			from += used;
-			seglen -= used;
-			skcipher_pull_sgl(sk, used);
-		}
+		copied += used;
+		skcipher_pull_sgl(sk, used);
+		iov_iter_advance(&msg->msg_iter, used);
 	}
 
 	err = 0;
diff --git a/include/crypto/if_alg.h b/include/crypto/if_alg.h
index cd62bf4289e9..88ea64e9a91c 100644
--- a/include/crypto/if_alg.h
+++ b/include/crypto/if_alg.h
@@ -67,8 +67,7 @@ int af_alg_unregister_type(const struct af_alg_type *type);
 int af_alg_release(struct socket *sock);
 int af_alg_accept(struct sock *sk, struct socket *newsock);
 
-int af_alg_make_sg(struct af_alg_sgl *sgl, void __user *addr, int len,
-		   int write);
+int af_alg_make_sg(struct af_alg_sgl *sgl, struct iov_iter *iter, int len);
 void af_alg_free_sg(struct af_alg_sgl *sgl);
 
 int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con);
-- 
cgit v1.2.3-70-g09d2


From aad9a1cec7dcd1d45809b64643fce37061b17788 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2014 14:49:01 -0500
Subject: vhost: switch vhost get_indirect() to iov_iter, kill
 memcpy_fromiovec()

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/vhost/vhost.c |  6 ++++--
 include/linux/uio.h   |  1 -
 lib/iovec.c           | 25 -------------------------
 3 files changed, 4 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
index cb807d0ea498..2ee28266fd07 100644
--- a/drivers/vhost/vhost.c
+++ b/drivers/vhost/vhost.c
@@ -1125,6 +1125,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
 	struct vring_desc desc;
 	unsigned int i = 0, count, found = 0;
 	u32 len = vhost32_to_cpu(vq, indirect->len);
+	struct iov_iter from;
 	int ret;
 
 	/* Sanity check */
@@ -1142,6 +1143,7 @@ static int get_indirect(struct vhost_virtqueue *vq,
 		vq_err(vq, "Translation failure %d in indirect.\n", ret);
 		return ret;
 	}
+	iov_iter_init(&from, READ, vq->indirect, ret, len);
 
 	/* We will use the result as an address to read from, so most
 	 * architectures only need a compiler barrier here. */
@@ -1164,8 +1166,8 @@ static int get_indirect(struct vhost_virtqueue *vq,
 			       i, count);
 			return -EINVAL;
 		}
-		if (unlikely(memcpy_fromiovec((unsigned char *)&desc,
-					      vq->indirect, sizeof desc))) {
+		if (unlikely(copy_from_iter(&desc, sizeof(desc), &from) !=
+			     sizeof(desc))) {
 			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
 			       i, (size_t)vhost64_to_cpu(vq, indirect->addr) + i * sizeof desc);
 			return -EINVAL;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 1c5e453f7ea9..af3439f4ebf2 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -135,7 +135,6 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
-int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			int offset, int len);
 int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
diff --git a/lib/iovec.c b/lib/iovec.c
index 2d99cb4a5006..4a90875c64ae 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -2,31 +2,6 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 
-/*
- *	Copy iovec to kernel. Returns -EFAULT on error.
- *
- *	Note: this modifies the original iovec.
- */
-
-int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
-{
-	while (len > 0) {
-		if (iov->iov_len) {
-			int copy = min_t(unsigned int, len, iov->iov_len);
-			if (copy_from_user(kdata, iov->iov_base, copy))
-				return -EFAULT;
-			len -= copy;
-			kdata += copy;
-			iov->iov_base += copy;
-			iov->iov_len -= copy;
-		}
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovec);
-
 /*
  *	Copy kernel to iovec. Returns -EFAULT on error.
  */
-- 
cgit v1.2.3-70-g09d2


From ba7438aed924133df54a60e4cd5499d359bcf2a8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2014 15:51:28 -0500
Subject: vhost: don't bother copying iovecs in handle_rx(), kill
 memcpy_toiovecend()

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: kvm@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/vhost/net.c | 82 +++++++++++++++--------------------------------------
 include/linux/uio.h |  3 --
 lib/iovec.c         | 26 -----------------
 3 files changed, 23 insertions(+), 88 deletions(-)

(limited to 'include')

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index d86cc9bb9ea4..e022cc40303d 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -84,10 +84,6 @@ struct vhost_net_ubuf_ref {
 
 struct vhost_net_virtqueue {
 	struct vhost_virtqueue vq;
-	/* hdr is used to store the virtio header.
-	 * Since each iovec has >= 1 byte length, we never need more than
-	 * header length entries to store the header. */
-	struct iovec hdr[sizeof(struct virtio_net_hdr_mrg_rxbuf)];
 	size_t vhost_hlen;
 	size_t sock_hlen;
 	/* vhost zerocopy support fields below: */
@@ -235,44 +231,6 @@ static bool vhost_sock_zcopy(struct socket *sock)
 		sock_flag(sock->sk, SOCK_ZEROCOPY);
 }
 
-/* Pop first len bytes from iovec. Return number of segments used. */
-static int move_iovec_hdr(struct iovec *from, struct iovec *to,
-			  size_t len, int iov_count)
-{
-	int seg = 0;
-	size_t size;
-
-	while (len && seg < iov_count) {
-		size = min(from->iov_len, len);
-		to->iov_base = from->iov_base;
-		to->iov_len = size;
-		from->iov_len -= size;
-		from->iov_base += size;
-		len -= size;
-		++from;
-		++to;
-		++seg;
-	}
-	return seg;
-}
-/* Copy iovec entries for len bytes from iovec. */
-static void copy_iovec_hdr(const struct iovec *from, struct iovec *to,
-			   size_t len, int iovcount)
-{
-	int seg = 0;
-	size_t size;
-
-	while (len && seg < iovcount) {
-		size = min(from->iov_len, len);
-		to->iov_base = from->iov_base;
-		to->iov_len = size;
-		len -= size;
-		++from;
-		++to;
-		++seg;
-	}
-}
-
 /* In case of DMA done not in order in lower device driver for some reason.
  * upend_idx is used to track end of used idx, done_idx is used to track head
  * of used idx. Once lower device DMA done contiguously, we will signal KVM
@@ -570,9 +528,9 @@ static void handle_rx(struct vhost_net *net)
 		.msg_controllen = 0,
 		.msg_flags = MSG_DONTWAIT,
 	};
-	struct virtio_net_hdr_mrg_rxbuf hdr = {
-		.hdr.flags = 0,
-		.hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE
+	struct virtio_net_hdr hdr = {
+		.flags = 0,
+		.gso_type = VIRTIO_NET_HDR_GSO_NONE
 	};
 	size_t total_len = 0;
 	int err, mergeable;
@@ -580,6 +538,7 @@ static void handle_rx(struct vhost_net *net)
 	size_t vhost_hlen, sock_hlen;
 	size_t vhost_len, sock_len;
 	struct socket *sock;
+	struct iov_iter fixup;
 
 	mutex_lock(&vq->mutex);
 	sock = vq->private_data;
@@ -624,14 +583,19 @@ static void handle_rx(struct vhost_net *net)
 			break;
 		}
 		/* We don't need to be notified again. */
-		if (unlikely((vhost_hlen)))
-			/* Skip header. TODO: support TSO. */
-			move_iovec_hdr(vq->iov, nvq->hdr, vhost_hlen, in);
-		else
-			/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
-			 * needed because recvmsg can modify msg_iov. */
-			copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
-		iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len);
+		iov_iter_init(&msg.msg_iter, READ, vq->iov, in, vhost_len);
+		fixup = msg.msg_iter;
+		if (unlikely((vhost_hlen))) {
+			/* We will supply the header ourselves
+			 * TODO: support TSO.
+			 */
+			iov_iter_advance(&msg.msg_iter, vhost_hlen);
+		} else {
+			/* It'll come from socket; we'll need to patch
+			 * ->num_buffers over if VIRTIO_NET_F_MRG_RXBUF
+			 */
+			iov_iter_advance(&fixup, sizeof(hdr));
+		}
 		err = sock->ops->recvmsg(NULL, sock, &msg,
 					 sock_len, MSG_DONTWAIT | MSG_TRUNC);
 		/* Userspace might have consumed the packet meanwhile:
@@ -643,18 +607,18 @@ static void handle_rx(struct vhost_net *net)
 			vhost_discard_vq_desc(vq, headcount);
 			continue;
 		}
+		/* Supply virtio_net_hdr if VHOST_NET_F_VIRTIO_NET_HDR */
 		if (unlikely(vhost_hlen) &&
-		    memcpy_toiovecend(nvq->hdr, (unsigned char *)&hdr, 0,
-				      vhost_hlen)) {
+		    copy_to_iter(&hdr, sizeof(hdr), &fixup) != sizeof(hdr)) {
 			vq_err(vq, "Unable to write vnet_hdr at addr %p\n",
 			       vq->iov->iov_base);
 			break;
 		}
-		/* TODO: Should check and handle checksum. */
+		/* Supply (or replace) ->num_buffers if VIRTIO_NET_F_MRG_RXBUF
+		 * TODO: Should check and handle checksum.
+		 */
 		if (likely(mergeable) &&
-		    memcpy_toiovecend(nvq->hdr, (unsigned char *)&headcount,
-				      offsetof(typeof(hdr), num_buffers),
-				      sizeof hdr.num_buffers)) {
+		    copy_to_iter(&headcount, 2, &fixup) != 2) {
 			vq_err(vq, "Failed num_buffers write");
 			vhost_discard_vq_desc(vq, headcount);
 			break;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index af3439f4ebf2..02bd8a92038a 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -137,7 +137,4 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct io
 
 int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			int offset, int len);
-int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
-		      int offset, int len);
-
 #endif
diff --git a/lib/iovec.c b/lib/iovec.c
index 4a90875c64ae..d8f17a9b1ccf 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -2,32 +2,6 @@
 #include <linux/export.h>
 #include <linux/uio.h>
 
-/*
- *	Copy kernel to iovec. Returns -EFAULT on error.
- */
-
-int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata,
-		      int offset, int len)
-{
-	int copy;
-	for (; len > 0; ++iov) {
-		/* Skip over the finished iovecs */
-		if (unlikely(offset >= iov->iov_len)) {
-			offset -= iov->iov_len;
-			continue;
-		}
-		copy = min_t(unsigned int, iov->iov_len - offset, len);
-		if (copy_to_user(iov->iov_base + offset, kdata, copy))
-			return -EFAULT;
-		offset = 0;
-		kdata += copy;
-		len -= copy;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovecend);
-
 /*
  *	Copy iovec to kernel. Returns -EFAULT on error.
  */
-- 
cgit v1.2.3-70-g09d2


From 57dd8a0735aabff4862025cf64ad94da3d80e620 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 10 Dec 2014 16:03:43 -0500
Subject: vhost: vhost_scsi_handle_vq() should just use copy_from_user()

it has just verified that it asks no more than the length of the
first segment of iovec.

And with that the last user of stuff in lib/iovec.c is gone.
RIP.

Cc: Michael S. Tsirkin <mst@redhat.com>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: kvm@vger.kernel.org
Cc: virtualization@lists.linux-foundation.org
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/vhost/scsi.c |  2 +-
 include/linux/uio.h  |  2 --
 lib/Makefile         |  2 +-
 lib/iovec.c          | 36 ------------------------------------
 4 files changed, 2 insertions(+), 40 deletions(-)
 delete mode 100644 lib/iovec.c

(limited to 'include')

diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c
index d695b1673ae5..dc78d87e0fc2 100644
--- a/drivers/vhost/scsi.c
+++ b/drivers/vhost/scsi.c
@@ -1079,7 +1079,7 @@ vhost_scsi_handle_vq(struct vhost_scsi *vs, struct vhost_virtqueue *vq)
 			       req_size, vq->iov[0].iov_len);
 			break;
 		}
-		ret = memcpy_fromiovecend(req, &vq->iov[0], 0, req_size);
+		ret = copy_from_user(req, vq->iov[0].iov_base, req_size);
 		if (unlikely(ret)) {
 			vq_err(vq, "Faulted on virtio_scsi_cmd_req\n");
 			break;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 02bd8a92038a..3e0cb4ea3905 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -135,6 +135,4 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-			int offset, int len);
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index 3c3b30b9e020..1071d06398c3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -24,7 +24,7 @@ obj-y	+= lockref.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
+	 gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
 	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
diff --git a/lib/iovec.c b/lib/iovec.c
deleted file mode 100644
index d8f17a9b1ccf..000000000000
--- a/lib/iovec.c
+++ /dev/null
@@ -1,36 +0,0 @@
-#include <linux/uaccess.h>
-#include <linux/export.h>
-#include <linux/uio.h>
-
-/*
- *	Copy iovec to kernel. Returns -EFAULT on error.
- */
-
-int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
-			int offset, int len)
-{
-	/* No data? Done! */
-	if (len == 0)
-		return 0;
-
-	/* Skip over the finished iovecs */
-	while (offset >= iov->iov_len) {
-		offset -= iov->iov_len;
-		iov++;
-	}
-
-	while (len > 0) {
-		u8 __user *base = iov->iov_base + offset;
-		int copy = min_t(unsigned int, len, iov->iov_len - offset);
-
-		offset = 0;
-		if (copy_from_user(kdata, base, copy))
-			return -EFAULT;
-		len -= copy;
-		kdata += copy;
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_fromiovecend);
-- 
cgit v1.2.3-70-g09d2


From 12bdf27d46c9d5e490fa164551642e065105db78 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:04 +0100
Subject: NFC: nci: Add reference to the RF logical connection

The NCI_STATIC_RF_CONN_ID logical connection is the most used
connection. Keeping it directly accessible in the nci_dev
structure will simplify and optimize the access.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci_core.h | 1 +
 net/nfc/nci/core.c         | 2 +-
 net/nfc/nci/ntf.c          | 6 ++----
 net/nfc/nci/rsp.c          | 4 ++--
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index be858870dace..731fa5be9989 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -189,6 +189,7 @@ struct nci_dev {
 	__u8			cur_conn_id;
 
 	struct list_head	conn_info_list;
+	struct nci_conn_info	*rf_conn_info;
 
 	struct timer_list	cmd_timer;
 	struct timer_list	data_timer;
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index f74d420e2ead..17ff5f83393c 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -803,7 +803,7 @@ static int nci_transceive(struct nfc_dev *nfc_dev, struct nfc_target *target,
 	int rc;
 	struct nci_conn_info    *conn_info;
 
-	conn_info = nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	conn_info = ndev->rf_conn_info;
 	if (!conn_info)
 		return -EPROTO;
 
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 33f5f00ecf4c..6bbbf6fdacc0 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -625,8 +625,7 @@ static void nci_rf_intf_activated_ntf_packet(struct nci_dev *ndev,
 
 exit:
 	if (err == NCI_STATUS_OK) {
-		conn_info = nci_get_conn_info_by_conn_id(ndev,
-							 NCI_STATIC_RF_CONN_ID);
+		conn_info = ndev->rf_conn_info;
 		if (!conn_info)
 			return;
 
@@ -684,8 +683,7 @@ static void nci_rf_deactivate_ntf_packet(struct nci_dev *ndev,
 
 	pr_debug("entry, type 0x%x, reason 0x%x\n", ntf->type, ntf->reason);
 
-	conn_info =
-		nci_get_conn_info_by_conn_id(ndev, NCI_STATIC_RF_CONN_ID);
+	conn_info = ndev->rf_conn_info;
 	if (!conn_info)
 		return;
 
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 31ccf7d05e82..05268eb473df 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -148,8 +148,7 @@ static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 	if (status == NCI_STATUS_OK) {
 		atomic_set(&ndev->state, NCI_DISCOVERY);
 
-		conn_info = nci_get_conn_info_by_conn_id(ndev,
-							 NCI_STATIC_RF_CONN_ID);
+		conn_info = ndev->rf_conn_info;
 		if (!conn_info) {
 			conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
 						 sizeof(struct nci_conn_info),
@@ -161,6 +160,7 @@ static void nci_rf_disc_rsp_packet(struct nci_dev *ndev, struct sk_buff *skb)
 			conn_info->conn_id = NCI_STATIC_RF_CONN_ID;
 			INIT_LIST_HEAD(&conn_info->list);
 			list_add(&conn_info->list, &ndev->conn_info_list);
+			ndev->rf_conn_info = conn_info;
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From b16ae7160a836c4a1e443ea6efca31421e86bae1 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:05 +0100
Subject: NFC: nci: Support all destinations type when creating a connection

The current implementation limits nci_core_conn_create_req()
to only manage NCI_DESTINATION_NFCEE.
Add new parameters to nci_core_conn_create() to support all
destination types described in the NCI specification.
Because there are some parameters with variable size dynamic
buffer allocation is needed.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfcb/st21nfcb_se.c | 38 ++++++++++++++++++++++--------
 include/net/nfc/nci.h              | 18 +++++++-------
 include/net/nfc/nci_core.h         |  4 +++-
 net/nfc/nci/core.c                 | 48 ++++++++++++++++++++++++++------------
 4 files changed, 74 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.c b/drivers/nfc/st21nfcb/st21nfcb_se.c
index 9f4d8b744f32..3b465e4c85e3 100644
--- a/drivers/nfc/st21nfcb/st21nfcb_se.c
+++ b/drivers/nfc/st21nfcb/st21nfcb_se.c
@@ -494,7 +494,8 @@ EXPORT_SYMBOL_GPL(st21nfcb_nci_enable_se);
 
 static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 {
-	struct core_conn_create_dest_spec_params dest_params;
+	struct core_conn_create_dest_spec_params *dest_params;
+	struct dest_spec_params spec_params;
 	struct nci_conn_info    *conn_info;
 	int r, dev_num;
 
@@ -502,17 +503,29 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 	if (r != NCI_STATUS_OK)
 		goto exit;
 
-	dest_params.type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE;
-	dest_params.length = sizeof(struct dest_spec_params);
-	dest_params.value.id = ndev->hci_dev->conn_info->id;
-	dest_params.value.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS;
-	r = nci_core_conn_create(ndev, &dest_params);
-	if (r != NCI_STATUS_OK)
+	dest_params =
+		kzalloc(sizeof(struct core_conn_create_dest_spec_params) +
+			sizeof(struct dest_spec_params), GFP_KERNEL);
+	if (dest_params == NULL) {
+		r = -ENOMEM;
 		goto exit;
+	}
+
+	dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE;
+	dest_params->length = sizeof(struct dest_spec_params);
+	spec_params.id = ndev->hci_dev->conn_info->id;
+	spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS;
+	memcpy(dest_params->value, &spec_params, sizeof(struct dest_spec_params));
+	r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1,
+				 sizeof(struct core_conn_create_dest_spec_params) +
+				 sizeof(struct dest_spec_params),
+				 dest_params);
+	if (r != NCI_STATUS_OK)
+		goto free_dest_params;
 
 	conn_info = ndev->hci_dev->conn_info;
 	if (!conn_info)
-		goto exit;
+		goto free_dest_params;
 
 	memcpy(ndev->hci_dev->init_data.gates, st21nfcb_gates,
 	       sizeof(st21nfcb_gates));
@@ -522,8 +535,10 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 	 * persistent info to discriminate 2 identical chips
 	 */
 	dev_num = find_first_zero_bit(dev_mask, ST21NFCB_NUM_DEVICES);
-	if (dev_num >= ST21NFCB_NUM_DEVICES)
-		return -ENODEV;
+	if (dev_num >= ST21NFCB_NUM_DEVICES) {
+		r = -ENODEV;
+		goto free_dest_params;
+	}
 
 	scnprintf(ndev->hci_dev->init_data.session_id,
 		  sizeof(ndev->hci_dev->init_data.session_id),
@@ -540,6 +555,9 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 
 	return 0;
 
+free_dest_params:
+	kfree(dest_params);
+
 exit:
 	return r;
 }
diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 6c1beb2704b1..695d33cb75e8 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -244,21 +244,23 @@ struct nci_core_set_config_cmd {
 } __packed;
 
 #define NCI_OP_CORE_CONN_CREATE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x04)
+#define DEST_SPEC_PARAMS_ID_INDEX	0
+#define DEST_SPEC_PARAMS_PROTOCOL_INDEX	1
 struct dest_spec_params {
-	__u8	id;
-	__u8	protocol;
+	__u8    id;
+	__u8    protocol;
 } __packed;
 
 struct core_conn_create_dest_spec_params {
-	__u8	type;
-	__u8	length;
-	struct dest_spec_params value;
+	__u8    type;
+	__u8    length;
+	__u8    value[0];
 } __packed;
 
 struct nci_core_conn_create_cmd {
-	__u8	destination_type;
-	__u8	number_destination_params;
-	struct core_conn_create_dest_spec_params params;
+	__u8    destination_type;
+	__u8    number_destination_params;
+	struct core_conn_create_dest_spec_params params[0];
 } __packed;
 
 #define NCI_OP_CORE_CONN_CLOSE_CMD	nci_opcode_pack(NCI_GID_CORE, 0x05)
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index 731fa5be9989..d34c1b2295d7 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -263,7 +263,9 @@ int nci_set_config(struct nci_dev *ndev, __u8 id, size_t len, __u8 *val);
 
 int nci_nfcee_discover(struct nci_dev *ndev, u8 action);
 int nci_nfcee_mode_set(struct nci_dev *ndev, u8 nfcee_id, u8 nfcee_mode);
-int nci_core_conn_create(struct nci_dev *ndev,
+int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
+			 u8 number_destination_params,
+			 size_t params_len,
 			 struct core_conn_create_dest_spec_params *params);
 int nci_core_conn_close(struct nci_dev *ndev, u8 conn_id);
 
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 17ff5f83393c..ddfe91e43c88 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -41,6 +41,11 @@
 #include <net/nfc/nci_core.h>
 #include <linux/nfc.h>
 
+struct core_conn_create_data {
+	int length;
+	struct nci_core_conn_create_cmd *cmd;
+};
+
 static void nci_cmd_work(struct work_struct *work);
 static void nci_rx_work(struct work_struct *work);
 static void nci_tx_work(struct work_struct *work);
@@ -509,25 +514,38 @@ EXPORT_SYMBOL(nci_nfcee_mode_set);
 
 static void nci_core_conn_create_req(struct nci_dev *ndev, unsigned long opt)
 {
-	struct nci_core_conn_create_cmd cmd;
-	struct core_conn_create_dest_spec_params *params =
-				(struct core_conn_create_dest_spec_params *)opt;
-
-	cmd.destination_type = NCI_DESTINATION_NFCEE;
-	cmd.number_destination_params = 1;
-	memcpy(&cmd.params.type, params,
-	       sizeof(struct core_conn_create_dest_spec_params));
-	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD,
-		     sizeof(struct nci_core_conn_create_cmd), &cmd);
+	struct core_conn_create_data *data =
+					(struct core_conn_create_data *)opt;
+
+	nci_send_cmd(ndev, NCI_OP_CORE_CONN_CREATE_CMD, data->length, data->cmd);
 }
 
-int nci_core_conn_create(struct nci_dev *ndev,
+int nci_core_conn_create(struct nci_dev *ndev, u8 destination_type,
+			 u8 number_destination_params,
+			 size_t params_len,
 			 struct core_conn_create_dest_spec_params *params)
 {
-	ndev->cur_id = params->value.id;
-	return nci_request(ndev, nci_core_conn_create_req,
-			(unsigned long)params,
-			msecs_to_jiffies(NCI_CMD_TIMEOUT));
+	int r;
+	struct nci_core_conn_create_cmd *cmd;
+	struct core_conn_create_data data;
+
+	data.length = params_len + sizeof(struct nci_core_conn_create_cmd);
+	cmd = kzalloc(data.length, GFP_KERNEL);
+	if (!cmd)
+		return -ENOMEM;
+
+	cmd->destination_type = destination_type;
+	cmd->number_destination_params = number_destination_params;
+	memcpy(cmd->params, params, params_len);
+
+	data.cmd = cmd;
+	ndev->cur_id = params->value[DEST_SPEC_PARAMS_ID_INDEX];
+
+	r = __nci_request(ndev, nci_core_conn_create_req,
+			  (unsigned long)&data,
+			  msecs_to_jiffies(NCI_CMD_TIMEOUT));
+	kfree(cmd);
+	return r;
 }
 EXPORT_SYMBOL(nci_core_conn_create);
 
-- 
cgit v1.2.3-70-g09d2


From 3ba5c8466b320c3fd5d5861b34aa8a31dd0cf6b3 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:06 +0100
Subject: NFC: nci: Change credits field to credits_cnt

For consistency sake change nci_core_conn_create_rsp structure
credits field to credits_cnt.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/net/nfc/nci.h | 2 +-
 net/nfc/nci/rsp.c     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/nfc/nci.h b/include/net/nfc/nci.h
index 695d33cb75e8..a2f2f3d3196d 100644
--- a/include/net/nfc/nci.h
+++ b/include/net/nfc/nci.h
@@ -353,7 +353,7 @@ struct nci_core_set_config_rsp {
 struct nci_core_conn_create_rsp {
 	__u8	status;
 	__u8	max_ctrl_pkt_payload_len;
-	__u8    credits;
+	__u8    credits_cnt;
 	__u8	conn_id;
 } __packed;
 
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index 05268eb473df..b419fed77ea3 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -245,7 +245,7 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
 
 		conn_info->conn_id = rsp->conn_id;
 		conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len;
-		atomic_set(&conn_info->credits_cnt, rsp->credits);
+		atomic_set(&conn_info->credits_cnt, rsp->credits_cnt);
 	}
 
 exit:
-- 
cgit v1.2.3-70-g09d2


From 15d4a8da0e440faf589a26346c8287e1ed0abe6c Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Tue, 3 Feb 2015 19:48:07 +0100
Subject: NFC: nci: Move logical connection structure allocation

conn_info is currently allocated only after nfcee_discovery_ntf
which is not generic enough for logical connection other than
NFCEE. The corresponding conn_info is now created in
nci_core_conn_create_rsp().

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfcb/st21nfcb_se.c |  2 +-
 include/net/nfc/nci_core.h         |  1 +
 net/nfc/nci/hci.c                  |  8 ++++++++
 net/nfc/nci/ntf.c                  | 23 ++---------------------
 net/nfc/nci/rsp.c                  | 29 ++++++++++++++++++++++++-----
 5 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'include')

diff --git a/drivers/nfc/st21nfcb/st21nfcb_se.c b/drivers/nfc/st21nfcb/st21nfcb_se.c
index 3b465e4c85e3..d23e8f27c4aa 100644
--- a/drivers/nfc/st21nfcb/st21nfcb_se.c
+++ b/drivers/nfc/st21nfcb/st21nfcb_se.c
@@ -513,7 +513,7 @@ static int st21nfcb_hci_network_init(struct nci_dev *ndev)
 
 	dest_params->type = NCI_DESTINATION_SPECIFIC_PARAM_NFCEE_TYPE;
 	dest_params->length = sizeof(struct dest_spec_params);
-	spec_params.id = ndev->hci_dev->conn_info->id;
+	spec_params.id = ndev->hci_dev->nfcee_id;
 	spec_params.protocol = NCI_NFCEE_INTERFACE_HCI_ACCESS;
 	memcpy(dest_params->value, &spec_params, sizeof(struct dest_spec_params));
 	r = nci_core_conn_create(ndev, NCI_DESTINATION_NFCEE, 1,
diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h
index d34c1b2295d7..ff87f8611fa3 100644
--- a/include/net/nfc/nci_core.h
+++ b/include/net/nfc/nci_core.h
@@ -159,6 +159,7 @@ struct nci_hci_init_data {
 #define NCI_HCI_MAX_GATES          256
 
 struct nci_hci_dev {
+	u8 nfcee_id;
 	struct nci_dev *ndev;
 	struct nci_conn_info *conn_info;
 
diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c
index ecf253942606..ed54ec533836 100644
--- a/net/nfc/nci/hci.c
+++ b/net/nfc/nci/hci.c
@@ -615,12 +615,20 @@ static int nci_hci_dev_connect_gates(struct nci_dev *ndev,
 
 int nci_hci_dev_session_init(struct nci_dev *ndev)
 {
+	struct nci_conn_info    *conn_info;
 	struct sk_buff *skb;
 	int r;
 
 	ndev->hci_dev->count_pipes = 0;
 	ndev->hci_dev->expected_pipes = 0;
 
+	conn_info = ndev->hci_dev->conn_info;
+	if (!conn_info)
+		return -EPROTO;
+
+	conn_info->data_exchange_cb = nci_hci_data_received_cb;
+	conn_info->data_exchange_cb_context = ndev;
+
 	nci_hci_reset_pipes(ndev->hci_dev);
 
 	if (ndev->hci_dev->init_data.gates[0].gate != NCI_HCI_ADMIN_GATE)
diff --git a/net/nfc/nci/ntf.c b/net/nfc/nci/ntf.c
index 6bbbf6fdacc0..3218071072ac 100644
--- a/net/nfc/nci/ntf.c
+++ b/net/nfc/nci/ntf.c
@@ -723,7 +723,6 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
 					  struct sk_buff *skb)
 {
 	u8 status = NCI_STATUS_OK;
-	struct nci_conn_info    *conn_info;
 	struct nci_nfcee_discover_ntf   *nfcee_ntf =
 				(struct nci_nfcee_discover_ntf *)skb->data;
 
@@ -734,27 +733,9 @@ static void nci_nfcee_discover_ntf_packet(struct nci_dev *ndev,
 	 * and only one, NFCEE_DISCOVER_NTF with a Protocol type of
 	 * “HCI Access”, even if the HCI Network contains multiple NFCEEs.
 	 */
-	if (!ndev->hci_dev->conn_info) {
-		conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
-					 sizeof(*conn_info), GFP_KERNEL);
-		if (!conn_info) {
-			status = NCI_STATUS_REJECTED;
-			goto exit;
-		}
-
-		conn_info->id = nfcee_ntf->nfcee_id;
-		conn_info->conn_id = NCI_INVALID_CONN_ID;
-
-		conn_info->data_exchange_cb = nci_hci_data_received_cb;
-		conn_info->data_exchange_cb_context = ndev;
+	ndev->hci_dev->nfcee_id = nfcee_ntf->nfcee_id;
+	ndev->cur_id = nfcee_ntf->nfcee_id;
 
-		INIT_LIST_HEAD(&conn_info->list);
-		list_add(&conn_info->list, &ndev->conn_info_list);
-
-		ndev->hci_dev->conn_info = conn_info;
-	}
-
-exit:
 	nci_req_complete(ndev, status);
 }
 
diff --git a/net/nfc/nci/rsp.c b/net/nfc/nci/rsp.c
index b419fed77ea3..02486bc2ceea 100644
--- a/net/nfc/nci/rsp.c
+++ b/net/nfc/nci/rsp.c
@@ -233,16 +233,27 @@ static void nci_core_conn_create_rsp_packet(struct nci_dev *ndev,
 
 	if (status == NCI_STATUS_OK) {
 		rsp = (struct nci_core_conn_create_rsp *)skb->data;
-		list_for_each_entry(conn_info, &ndev->conn_info_list, list) {
-			if (conn_info->id == ndev->cur_id)
-				break;
-		}
 
-		if (!conn_info || conn_info->id != ndev->cur_id) {
+		conn_info = devm_kzalloc(&ndev->nfc_dev->dev,
+					 sizeof(*conn_info), GFP_KERNEL);
+		if (!conn_info) {
 			status = NCI_STATUS_REJECTED;
 			goto exit;
 		}
 
+		conn_info->id = ndev->cur_id;
+		conn_info->conn_id = rsp->conn_id;
+
+		/* Note: data_exchange_cb and data_exchange_cb_context need to
+		 * be specify out of nci_core_conn_create_rsp_packet
+		 */
+
+		INIT_LIST_HEAD(&conn_info->list);
+		list_add(&conn_info->list, &ndev->conn_info_list);
+
+		if (ndev->cur_id == ndev->hci_dev->nfcee_id)
+			ndev->hci_dev->conn_info = conn_info;
+
 		conn_info->conn_id = rsp->conn_id;
 		conn_info->max_pkt_payload_len = rsp->max_ctrl_pkt_payload_len;
 		atomic_set(&conn_info->credits_cnt, rsp->credits_cnt);
@@ -255,9 +266,17 @@ exit:
 static void nci_core_conn_close_rsp_packet(struct nci_dev *ndev,
 					   struct sk_buff *skb)
 {
+	struct nci_conn_info *conn_info;
 	__u8 status = skb->data[0];
 
 	pr_debug("status 0x%x\n", status);
+	if (status == NCI_STATUS_OK) {
+		conn_info = nci_get_conn_info_by_conn_id(ndev, ndev->cur_id);
+		if (conn_info) {
+			list_del(&conn_info->list);
+			devm_kfree(&ndev->nfc_dev->dev, conn_info);
+		}
+	}
 	nci_req_complete(ndev, status);
 }
 
-- 
cgit v1.2.3-70-g09d2


From 2bd82484bb4c5db1d5dc983ac7c409b2782e0154 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 3 Feb 2015 23:48:24 -0800
Subject: xps: fix xps for stacked devices

A typical qdisc setup is the following :

bond0 : bonding device, using HTB hierarchy
eth1/eth2 : slaves, multiqueue NIC, using MQ + FQ qdisc

XPS allows to spread packets on specific tx queues, based on the cpu
doing the send.

Problem is that dequeues from bond0 qdisc can happen on random cpus,
due to the fact that qdisc_run() can dequeue a batch of packets.

CPUA -> queue packet P1 on bond0 qdisc, P1->ooo_okay=1
CPUA -> queue packet P2 on bond0 qdisc, P2->ooo_okay=0

CPUB -> dequeue packet P1 from bond0
        enqueue packet on eth1/eth2
CPUC -> dequeue packet P2 from bond0
        enqueue packet on eth1/eth2 using sk cache (ooo_okay is 0)

get_xps_queue() then might select wrong queue for P1, since current cpu
might be different than CPUA.

P2 might be sent on the old queue (stored in sk->sk_tx_queue_mapping),
if CPUC runs a bit faster (or CPUB spins a bit on qdisc lock)

Effect of this bug is TCP reorders, and more generally not optimal
TX queue placement. (A victim bulk flow can be migrated to the wrong TX
queue for a while)

To fix this, we have to record sender cpu number the first time
dev_queue_xmit() is called for one tx skb.

We can union napi_id (used on receive path) and sender_cpu,
granted we clear sender_cpu in skb_scrub_packet() (credit to Willem for
this union idea)

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Cc: Nandita Dukkipati <nanditad@google.com>
Cc: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 7 +++++--
 net/core/flow_dissector.c | 7 ++++++-
 net/core/skbuff.c         | 4 ++++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 85ab7d72b54c..2748ff639144 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -626,8 +626,11 @@ struct sk_buff {
 	__u32			hash;
 	__be16			vlan_proto;
 	__u16			vlan_tci;
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	unsigned int	napi_id;
+#if defined(CONFIG_NET_RX_BUSY_POLL) || defined(CONFIG_XPS)
+	union {
+		unsigned int	napi_id;
+		unsigned int	sender_cpu;
+	};
 #endif
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index beb83d1ac1c6..2c35c02a931e 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -422,7 +422,7 @@ static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 	dev_maps = rcu_dereference(dev->xps_maps);
 	if (dev_maps) {
 		map = rcu_dereference(
-		    dev_maps->cpu_map[raw_smp_processor_id()]);
+		    dev_maps->cpu_map[skb->sender_cpu - 1]);
 		if (map) {
 			if (map->len == 1)
 				queue_index = map->queues[0];
@@ -468,6 +468,11 @@ struct netdev_queue *netdev_pick_tx(struct net_device *dev,
 {
 	int queue_index = 0;
 
+#ifdef CONFIG_XPS
+	if (skb->sender_cpu == 0)
+		skb->sender_cpu = raw_smp_processor_id() + 1;
+#endif
+
 	if (dev->real_num_tx_queues != 1) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		if (ops->ndo_select_queue)
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a5bff2767f15..88c613eab142 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -825,6 +825,9 @@ static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 #ifdef CONFIG_NET_RX_BUSY_POLL
 	CHECK_SKB_FIELD(napi_id);
 #endif
+#ifdef CONFIG_XPS
+	CHECK_SKB_FIELD(sender_cpu);
+#endif
 #ifdef CONFIG_NET_SCHED
 	CHECK_SKB_FIELD(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
@@ -4169,6 +4172,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet)
 	skb->ignore_df = 0;
 	skb_dst_drop(skb);
 	skb->mark = 0;
+	skb->sender_cpu = 0;
 	skb_init_secmark(skb);
 	secpath_reset(skb);
 	nf_reset(skb);
-- 
cgit v1.2.3-70-g09d2


From dcdc8994697faa789669c3fdaca1a8bc27a8f356 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 2 Feb 2015 16:07:34 -0800
Subject: net: add skb functions to process remote checksum offload

This patch adds skb_remcsum_process and skb_gro_remcsum_process to
perform the appropriate adjustments to the skb when receiving
remote checksum offload.

Updated vxlan and gue to use these functions.

Tested: Ran TCP_RR and TCP_STREAM netperf for VXLAN and GUE, did
not see any change in performance.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c       | 18 ++----------------
 include/linux/netdevice.h | 15 +++++++++++++++
 include/linux/skbuff.h    | 21 +++++++++++++++++++++
 net/ipv4/fou.c            | 18 ++----------------
 4 files changed, 40 insertions(+), 32 deletions(-)

(limited to 'include')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 31bac2a21ce3..c184717e8b28 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -558,7 +558,6 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 					  u32 data)
 {
 	size_t start, offset, plen;
-	__wsum delta;
 
 	if (skb->remcsum_offload)
 		return vh;
@@ -580,12 +579,7 @@ static struct vxlanhdr *vxlan_gro_remcsum(struct sk_buff *skb,
 			return NULL;
 	}
 
-	delta = remcsum_adjust((void *)vh + hdrlen,
-			       NAPI_GRO_CB(skb)->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
-	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+	skb_gro_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
 
 	skb->remcsum_offload = 1;
 
@@ -1159,7 +1153,6 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
 				      size_t hdrlen, u32 data)
 {
 	size_t start, offset, plen;
-	__wsum delta;
 
 	if (skb->remcsum_offload) {
 		/* Already processed in GRO path */
@@ -1179,14 +1172,7 @@ static struct vxlanhdr *vxlan_remcsum(struct sk_buff *skb, struct vxlanhdr *vh,
 
 	vh = (struct vxlanhdr *)(udp_hdr(skb) + 1);
 
-	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
-		__skb_checksum_complete(skb);
-
-	delta = remcsum_adjust((void *)vh + hdrlen,
-			       skb->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
+	skb_remcsum_process(skb, (void *)vh + hdrlen, start, offset);
 
 	return vh;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 16251e96e6aa..1347ac50d2af 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2318,6 +2318,21 @@ do {									\
 					   compute_pseudo(skb, proto));	\
 } while (0)
 
+static inline void skb_gro_remcsum_process(struct sk_buff *skb, void *ptr,
+					   int start, int offset)
+{
+	__wsum delta;
+
+	BUG_ON(!NAPI_GRO_CB(skb)->csum_valid);
+
+	delta = remcsum_adjust(ptr, NAPI_GRO_CB(skb)->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+}
+
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2748ff639144..5405dfe02572 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3099,6 +3099,27 @@ do {									\
 				       compute_pseudo(skb, proto));	\
 } while (0)
 
+/* Update skbuf and packet to reflect the remote checksum offload operation.
+ * When called, ptr indicates the starting point for skb->csum when
+ * ip_summed is CHECKSUM_COMPLETE. If we need create checksum complete
+ * here, skb_postpull_rcsum is done so skb->csum start is ptr.
+ */
+static inline void skb_remcsum_process(struct sk_buff *skb, void *ptr,
+				       int start, int offset)
+{
+	__wsum delta;
+
+	 if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE)) {
+		__skb_checksum_complete(skb);
+		skb_postpull_rcsum(skb, skb->data, ptr - (void *)skb->data);
+	}
+
+	delta = remcsum_adjust(ptr, skb->csum, start, offset);
+
+	/* Adjust skb->csum since we changed the packet */
+	skb->csum = csum_add(skb->csum, delta);
+}
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index 3bc0cf07661c..92ddea1e6457 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -70,7 +70,6 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 	size_t start = ntohs(pd[0]);
 	size_t offset = ntohs(pd[1]);
 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
-	__wsum delta;
 
 	if (skb->remcsum_offload) {
 		/* Already processed in GRO path */
@@ -82,14 +81,7 @@ static struct guehdr *gue_remcsum(struct sk_buff *skb, struct guehdr *guehdr,
 		return NULL;
 	guehdr = (struct guehdr *)&udp_hdr(skb)[1];
 
-	if (unlikely(skb->ip_summed != CHECKSUM_COMPLETE))
-		__skb_checksum_complete(skb);
-
-	delta = remcsum_adjust((void *)guehdr + hdrlen,
-			       skb->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
+	skb_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
 
 	return guehdr;
 }
@@ -228,7 +220,6 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 	size_t start = ntohs(pd[0]);
 	size_t offset = ntohs(pd[1]);
 	size_t plen = hdrlen + max_t(size_t, offset + sizeof(u16), start);
-	__wsum delta;
 
 	if (skb->remcsum_offload)
 		return guehdr;
@@ -243,12 +234,7 @@ static struct guehdr *gue_gro_remcsum(struct sk_buff *skb, unsigned int off,
 			return NULL;
 	}
 
-	delta = remcsum_adjust((void *)guehdr + hdrlen,
-			       NAPI_GRO_CB(skb)->csum, start, offset);
-
-	/* Adjust skb->csum since we changed the packet */
-	skb->csum = csum_add(skb->csum, delta);
-	NAPI_GRO_CB(skb)->csum = csum_add(NAPI_GRO_CB(skb)->csum, delta);
+	skb_gro_remcsum_process(skb, (void *)guehdr + hdrlen, start, offset);
 
 	skb->remcsum_offload = 1;
 
-- 
cgit v1.2.3-70-g09d2


From 61bd3857ff2c7daf756d49b41e6277bbdaa8f789 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 3 Feb 2015 16:48:29 +0200
Subject: net/core: Add event for a change in slave state

Add event which provides an indication on a change in the state
of a bonding slave. The event handler should cast the pointer to the
appropriate type (struct netdev_bonding_info) in order to get the
full info about the slave.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 15 +++++++++++++++
 net/core/dev.c            | 20 ++++++++++++++++++++
 net/core/rtnetlink.c      |  1 +
 3 files changed, 36 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1347ac50d2af..ce784d5018e0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -51,6 +51,7 @@
 #include <linux/netdev_features.h>
 #include <linux/neighbour.h>
 #include <uapi/linux/netdevice.h>
+#include <uapi/linux/if_bonding.h>
 
 struct netpoll_info;
 struct device;
@@ -2056,6 +2057,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_RESEND_IGMP	0x0016
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
 #define NETDEV_CHANGEINFODATA	0x0018
+#define NETDEV_BONDING_INFO	0x0019
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
@@ -3494,6 +3496,19 @@ struct sk_buff *__skb_gso_segment(struct sk_buff *skb,
 struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 				    netdev_features_t features);
 
+struct netdev_bonding_info {
+	ifslave	slave;
+	ifbond	master;
+};
+
+struct netdev_notifier_bonding_info {
+	struct netdev_notifier_info info; /* must be first */
+	struct netdev_bonding_info  bonding_info;
+};
+
+void netdev_bonding_info_change(struct net_device *dev,
+				struct netdev_bonding_info *bonding_info);
+
 static inline
 struct sk_buff *skb_gso_segment(struct sk_buff *skb, netdev_features_t features)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index 1d564d68e31a..ede0b161b115 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5355,6 +5355,26 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_upper_dev_unlink);
 
+/**
+ * netdev_bonding_info_change - Dispatch event about slave change
+ * @dev: device
+ * @netdev_bonding_info: info to dispatch
+ *
+ * Send NETDEV_BONDING_INFO to netdev notifiers with info.
+ * The caller must hold the RTNL lock.
+ */
+void netdev_bonding_info_change(struct net_device *dev,
+				struct netdev_bonding_info *bonding_info)
+{
+	struct netdev_notifier_bonding_info	info;
+
+	memcpy(&info.bonding_info, bonding_info,
+	       sizeof(struct netdev_bonding_info));
+	call_netdevice_notifiers_info(NETDEV_BONDING_INFO, dev,
+				      &info.info);
+}
+EXPORT_SYMBOL(netdev_bonding_info_change);
+
 void netdev_adjacent_add_links(struct net_device *dev)
 {
 	struct netdev_adjacent *iter;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 673cb4c6f391..4cd5e350d129 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3180,6 +3180,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi
 	case NETDEV_UNREGISTER_FINAL:
 	case NETDEV_RELEASE:
 	case NETDEV_JOIN:
+	case NETDEV_BONDING_INFO:
 		break;
 	default:
 		rtmsg_ifinfo(RTM_NEWLINK, dev, 0, GFP_KERNEL);
-- 
cgit v1.2.3-70-g09d2


From 69a2338e05995b10225b2a131f7540d1305980e4 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 3 Feb 2015 16:48:30 +0200
Subject: net/bonding: Move slave state changes to a helper function

Move slave state changes to a helper function, this is a pre-step for adding
functionality of dispatching an event when this helper is called.

This commit doesn't add new functionality.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 64 ++++++++++++++++++++++++-----------------
 include/net/bonding.h           |  5 ++++
 2 files changed, 43 insertions(+), 26 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index c9e519cb9214..92fe3a1bf52b 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -790,7 +790,7 @@ void bond_change_active_slave(struct bonding *bond, struct slave *new_active)
 			}
 
 			new_active->delay = 0;
-			new_active->link = BOND_LINK_UP;
+			bond_set_slave_link_state(new_active, BOND_LINK_UP);
 
 			if (BOND_MODE(bond) == BOND_MODE_8023AD)
 				bond_3ad_handle_link_change(new_active, BOND_LINK_UP);
@@ -1181,6 +1181,21 @@ static void bond_free_slave(struct slave *slave)
 	kfree(slave);
 }
 
+static void bond_fill_ifbond(struct bonding *bond, struct ifbond *info)
+{
+	info->bond_mode = BOND_MODE(bond);
+	info->miimon = bond->params.miimon;
+	info->num_slaves = bond->slave_cnt;
+}
+
+static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
+{
+	strcpy(info->slave_name, slave->dev->name);
+	info->link = slave->link;
+	info->state = bond_slave_state(slave);
+	info->link_failure_count = slave->link_failure_count;
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
@@ -1444,19 +1459,22 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 	if (bond->params.miimon) {
 		if (bond_check_dev_link(bond, slave_dev, 0) == BMSR_LSTATUS) {
 			if (bond->params.updelay) {
-				new_slave->link = BOND_LINK_BACK;
+				bond_set_slave_link_state(new_slave,
+							  BOND_LINK_BACK);
 				new_slave->delay = bond->params.updelay;
 			} else {
-				new_slave->link = BOND_LINK_UP;
+				bond_set_slave_link_state(new_slave,
+							  BOND_LINK_UP);
 			}
 		} else {
-			new_slave->link = BOND_LINK_DOWN;
+			bond_set_slave_link_state(new_slave, BOND_LINK_DOWN);
 		}
 	} else if (bond->params.arp_interval) {
-		new_slave->link = (netif_carrier_ok(slave_dev) ?
-			BOND_LINK_UP : BOND_LINK_DOWN);
+		bond_set_slave_link_state(new_slave,
+					  (netif_carrier_ok(slave_dev) ?
+					  BOND_LINK_UP : BOND_LINK_DOWN));
 	} else {
-		new_slave->link = BOND_LINK_UP;
+		bond_set_slave_link_state(new_slave, BOND_LINK_UP);
 	}
 
 	if (new_slave->link != BOND_LINK_DOWN)
@@ -1821,11 +1839,7 @@ static int  bond_release_and_destroy(struct net_device *bond_dev,
 static int bond_info_query(struct net_device *bond_dev, struct ifbond *info)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-
-	info->bond_mode = BOND_MODE(bond);
-	info->miimon = bond->params.miimon;
-	info->num_slaves = bond->slave_cnt;
-
+	bond_fill_ifbond(bond, info);
 	return 0;
 }
 
@@ -1839,10 +1853,7 @@ static int bond_slave_info_query(struct net_device *bond_dev, struct ifslave *in
 	bond_for_each_slave(bond, slave, iter) {
 		if (i++ == (int)info->slave_id) {
 			res = 0;
-			strcpy(info->slave_name, slave->dev->name);
-			info->link = slave->link;
-			info->state = bond_slave_state(slave);
-			info->link_failure_count = slave->link_failure_count;
+			bond_fill_ifslave(slave, info);
 			break;
 		}
 	}
@@ -1872,7 +1883,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 			if (link_state)
 				continue;
 
-			slave->link = BOND_LINK_FAIL;
+			bond_set_slave_link_state(slave, BOND_LINK_FAIL);
 			slave->delay = bond->params.downdelay;
 			if (slave->delay) {
 				netdev_info(bond->dev, "link status down for %sinterface %s, disabling it in %d ms\n",
@@ -1887,7 +1898,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 		case BOND_LINK_FAIL:
 			if (link_state) {
 				/* recovered before downdelay expired */
-				slave->link = BOND_LINK_UP;
+				bond_set_slave_link_state(slave, BOND_LINK_UP);
 				slave->last_link_up = jiffies;
 				netdev_info(bond->dev, "link status up again after %d ms for interface %s\n",
 					    (bond->params.downdelay - slave->delay) *
@@ -1909,7 +1920,7 @@ static int bond_miimon_inspect(struct bonding *bond)
 			if (!link_state)
 				continue;
 
-			slave->link = BOND_LINK_BACK;
+			bond_set_slave_link_state(slave, BOND_LINK_BACK);
 			slave->delay = bond->params.updelay;
 
 			if (slave->delay) {
@@ -1922,7 +1933,8 @@ static int bond_miimon_inspect(struct bonding *bond)
 			/*FALLTHRU*/
 		case BOND_LINK_BACK:
 			if (!link_state) {
-				slave->link = BOND_LINK_DOWN;
+				bond_set_slave_link_state(slave,
+							  BOND_LINK_DOWN);
 				netdev_info(bond->dev, "link status down again after %d ms for interface %s\n",
 					    (bond->params.updelay - slave->delay) *
 					    bond->params.miimon,
@@ -1960,7 +1972,7 @@ static void bond_miimon_commit(struct bonding *bond)
 			continue;
 
 		case BOND_LINK_UP:
-			slave->link = BOND_LINK_UP;
+			bond_set_slave_link_state(slave, BOND_LINK_UP);
 			slave->last_link_up = jiffies;
 
 			primary = rtnl_dereference(bond->primary_slave);
@@ -2000,7 +2012,7 @@ static void bond_miimon_commit(struct bonding *bond)
 			if (slave->link_failure_count < UINT_MAX)
 				slave->link_failure_count++;
 
-			slave->link = BOND_LINK_DOWN;
+			bond_set_slave_link_state(slave, BOND_LINK_DOWN);
 
 			if (BOND_MODE(bond) == BOND_MODE_ACTIVEBACKUP ||
 			    BOND_MODE(bond) == BOND_MODE_8023AD)
@@ -2583,7 +2595,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
 				struct slave *current_arp_slave;
 
 				current_arp_slave = rtnl_dereference(bond->current_arp_slave);
-				slave->link = BOND_LINK_UP;
+				bond_set_slave_link_state(slave, BOND_LINK_UP);
 				if (current_arp_slave) {
 					bond_set_slave_inactive_flags(
 						current_arp_slave,
@@ -2606,7 +2618,7 @@ static void bond_ab_arp_commit(struct bonding *bond)
 			if (slave->link_failure_count < UINT_MAX)
 				slave->link_failure_count++;
 
-			slave->link = BOND_LINK_DOWN;
+			bond_set_slave_link_state(slave, BOND_LINK_DOWN);
 			bond_set_slave_inactive_flags(slave,
 						      BOND_SLAVE_NOTIFY_NOW);
 
@@ -2685,7 +2697,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
 		 * up when it is actually down
 		 */
 		if (!bond_slave_is_up(slave) && slave->link == BOND_LINK_UP) {
-			slave->link = BOND_LINK_DOWN;
+			bond_set_slave_link_state(slave, BOND_LINK_DOWN);
 			if (slave->link_failure_count < UINT_MAX)
 				slave->link_failure_count++;
 
@@ -2705,7 +2717,7 @@ static bool bond_ab_arp_probe(struct bonding *bond)
 	if (!new_slave)
 		goto check_state;
 
-	new_slave->link = BOND_LINK_BACK;
+	bond_set_slave_link_state(new_slave, BOND_LINK_BACK);
 	bond_set_slave_active_flags(new_slave, BOND_SLAVE_NOTIFY_LATER);
 	bond_arp_send_all(bond, new_slave);
 	new_slave->last_link_up = jiffies;
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 29f53eacac0a..d1367ec74933 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -490,6 +490,11 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
 	return slave->inactive;
 }
 
+static inline void bond_set_slave_link_state(struct slave *slave, int state)
+{
+	slave->link = state;
+}
+
 static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local)
 {
 	struct in_device *in_dev;
-- 
cgit v1.2.3-70-g09d2


From 69e6113343cfe983511904ffca0d7a1466460b67 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 3 Feb 2015 16:48:31 +0200
Subject: net/bonding: Notify state change on slaves

Use notifier chain to dispatch an event upon a change in slave state.
Event is dispatched with slave specific info.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 42 +++++++++++++++++++++++++++++++++++++++++
 include/net/bonding.h           | 12 ++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 92fe3a1bf52b..679ef00d6b16 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1196,6 +1196,47 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
 	info->link_failure_count = slave->link_failure_count;
 }
 
+static void bond_netdev_notify(struct slave *slave, struct net_device *dev)
+{
+	struct bonding *bond = slave->bond;
+	struct netdev_bonding_info bonding_info;
+
+	rtnl_lock();
+	/* make sure that slave is still valid */
+	if (dev->priv_flags & IFF_BONDING) {
+		bond_fill_ifslave(slave, &bonding_info.slave);
+		bond_fill_ifbond(bond, &bonding_info.master);
+		netdev_bonding_info_change(slave->dev, &bonding_info);
+	}
+	rtnl_unlock();
+}
+
+static void bond_netdev_notify_work(struct work_struct *_work)
+{
+	struct netdev_notify_work *w =
+		container_of(_work, struct netdev_notify_work, work.work);
+
+	bond_netdev_notify(w->slave, w->dev);
+	dev_put(w->dev);
+}
+
+void bond_queue_slave_event(struct slave *slave)
+{
+	struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
+
+	if (!nnw)
+		return;
+
+	INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
+	nnw->slave = slave;
+	nnw->dev = slave->dev;
+
+	if (queue_delayed_work(slave->bond->wq, &nnw->work, 0))
+		dev_hold(slave->dev);
+	else
+		kfree(nnw);
+}
+
 /* enslave device <slave> to bond device <master> */
 int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 {
@@ -1590,6 +1631,7 @@ int bond_enslave(struct net_device *bond_dev, struct net_device *slave_dev)
 		    new_slave->link != BOND_LINK_DOWN ? "an up" : "a down");
 
 	/* enslave is successful */
+	bond_queue_slave_event(new_slave);
 	return 0;
 
 /* Undo stages on error */
diff --git a/include/net/bonding.h b/include/net/bonding.h
index d1367ec74933..4e17095ad46a 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -150,6 +150,12 @@ struct bond_parm_tbl {
 	int mode;
 };
 
+struct netdev_notify_work {
+	struct delayed_work	work;
+	struct slave		*slave;
+	struct net_device	*dev;
+};
+
 struct slave {
 	struct net_device *dev; /* first - useful for panic debug */
 	struct bonding *bond; /* our master */
@@ -243,6 +249,8 @@ struct bonding {
 #define bond_slave_get_rtnl(dev) \
 	((struct slave *) rtnl_dereference(dev->rx_handler_data))
 
+void bond_queue_slave_event(struct slave *slave);
+
 struct bond_vlan_tag {
 	__be16		vlan_proto;
 	unsigned short	vlan_id;
@@ -315,6 +323,7 @@ static inline void bond_set_active_slave(struct slave *slave)
 {
 	if (slave->backup) {
 		slave->backup = 0;
+		bond_queue_slave_event(slave);
 		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
 	}
 }
@@ -323,6 +332,7 @@ static inline void bond_set_backup_slave(struct slave *slave)
 {
 	if (!slave->backup) {
 		slave->backup = 1;
+		bond_queue_slave_event(slave);
 		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
 	}
 }
@@ -336,6 +346,7 @@ static inline void bond_set_slave_state(struct slave *slave,
 	slave->backup = slave_state;
 	if (notify) {
 		rtmsg_ifinfo(RTM_NEWLINK, slave->dev, 0, GFP_ATOMIC);
+		bond_queue_slave_event(slave);
 		slave->should_notify = 0;
 	} else {
 		if (slave->should_notify)
@@ -493,6 +504,7 @@ static inline bool bond_is_slave_inactive(struct slave *slave)
 static inline void bond_set_slave_link_state(struct slave *slave, int state)
 {
 	slave->link = state;
+	bond_queue_slave_event(slave);
 }
 
 static inline __be32 bond_confirm_addr(struct net_device *dev, __be32 dst, __be32 local)
-- 
cgit v1.2.3-70-g09d2


From 59e14e325066be49b49b6c2503337c69a9ee29fc Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 3 Feb 2015 16:48:32 +0200
Subject: net/mlx4_core: Port aggregation low level interface

Implement the hardware interface required for port aggregation.

1. Disable RX port check on receive - don't perform a validity check
that matches to QP's port and the port where the packet is received.

2. Virtual to physical port remap - configure virtual to physical port
mapping. Port remap capability for virtual functions.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c |  9 +++++
 drivers/net/ethernet/mellanox/mlx4/fw.c  | 56 +++++++++++++++++++++++++++++---
 include/linux/mlx4/cmd.h                 |  7 ++++
 include/linux/mlx4/device.h              | 10 +++++-
 include/linux/mlx4/qp.h                  |  1 +
 5 files changed, 77 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 154effbfd8be..a681d7c0bb9f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1583,6 +1583,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_CMD_EPERM_wrapper
 	},
+	{
+		.opcode = MLX4_CMD_VIRT_PORT_MAP,
+		.has_inbox = false,
+		.has_outbox = false,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = mlx4_CMD_EPERM_wrapper
+	},
 };
 
 static int mlx4_master_process_vhcr(struct mlx4_dev *dev, int slave,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index dbabfae3a3de..4b08a393ebcb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[17] = "Asymmetric EQs support",
 		[18] = "More than 80 VFs support",
 		[19] = "Performance optimized for limited rule configuration flow steering support",
-		[20] = "Recoverable error events support"
+		[20] = "Recoverable error events support",
+		[21] = "Port Remap support"
 	};
 	int i;
 
@@ -863,6 +864,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 	MLX4_GET(dev_cap->bmme_flags, outbox,
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+	if (dev_cap->bmme_flags & MLX4_FLAG_PORT_REMAP)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_PORT_REMAP;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
 	if (field & 0x20)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV;
@@ -1120,9 +1123,10 @@ int mlx4_QUERY_DEV_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	field &= 0x7f;
 	MLX4_PUT(outbox->buf, field, QUERY_DEV_CAP_BF_OFFSET);
 
-	/* For guests, disable mw type 2 */
+	/* For guests, disable mw type 2 and port remap*/
 	MLX4_GET(bmme_flags, outbox->buf, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
 	bmme_flags &= ~MLX4_BMME_FLAG_TYPE_2_WIN;
+	bmme_flags &= ~MLX4_FLAG_PORT_REMAP;
 	MLX4_PUT(outbox->buf, bmme_flags, QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
 
 	/* turn off device-managed steering capability if not enabled */
@@ -2100,13 +2104,16 @@ struct mlx4_config_dev {
 	__be32	rsvd1[3];
 	__be16	vxlan_udp_dport;
 	__be16	rsvd2;
-	__be32	rsvd3[27];
-	__be16	rsvd4;
-	u8	rsvd5;
+	__be32	rsvd3;
+	__be32	roce_flags;
+	__be32	rsvd4[25];
+	__be16	rsvd5;
+	u8	rsvd6;
 	u8	rx_checksum_val;
 };
 
 #define MLX4_VXLAN_UDP_DPORT (1 << 0)
+#define MLX4_DISABLE_RX_PORT BIT(18)
 
 static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
 {
@@ -2209,6 +2216,45 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port)
 }
 EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port);
 
+#define CONFIG_DISABLE_RX_PORT BIT(15)
+int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis)
+{
+	struct mlx4_config_dev config_dev;
+
+	memset(&config_dev, 0, sizeof(config_dev));
+	config_dev.update_flags = cpu_to_be32(MLX4_DISABLE_RX_PORT);
+	if (dis)
+		config_dev.roce_flags =
+			cpu_to_be32(CONFIG_DISABLE_RX_PORT);
+
+	return mlx4_CONFIG_DEV_set(dev, &config_dev);
+}
+
+int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2)
+{
+	struct mlx4_cmd_mailbox *mailbox;
+	struct {
+		__be32 v_port1;
+		__be32 v_port2;
+	} *v2p;
+	int err;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return -ENOMEM;
+
+	v2p = mailbox->buf;
+	v2p->v_port1 = cpu_to_be32(port1);
+	v2p->v_port2 = cpu_to_be32(port2);
+
+	err = mlx4_cmd(dev, mailbox->dma, 0,
+		       MLX4_SET_PORT_VIRT2PHY, MLX4_CMD_VIRT_PORT_MAP,
+		       MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
 
 int mlx4_SET_ICM_SIZE(struct mlx4_dev *dev, u64 icm_size, u64 *aux_pages)
 {
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index ae95adc78509..7b6d4e9ff603 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -71,6 +71,7 @@ enum {
 
 	/*master notify fw on finish for slave's flr*/
 	MLX4_CMD_INFORM_FLR_DONE = 0x5b,
+	MLX4_CMD_VIRT_PORT_MAP   = 0x5c,
 	MLX4_CMD_GET_OP_REQ      = 0x59,
 
 	/* TPT commands */
@@ -170,6 +171,12 @@ enum {
 	MLX4_CMD_TIME_CLASS_C	= 60000,
 };
 
+enum {
+	/* virtual to physical port mapping opcode modifiers */
+	MLX4_GET_PORT_VIRT2PHY = 0x0,
+	MLX4_SET_PORT_VIRT2PHY = 0x1,
+};
+
 enum {
 	MLX4_MAILBOX_SIZE	= 4096,
 	MLX4_ACCESS_MEM_ALIGN	= 256,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c95d659a39f2..d9afd99dde39 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -201,7 +201,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17,
 	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18,
 	MLX4_DEV_CAP_FLAG2_FS_A0		= 1LL <<  19,
-	MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20
+	MLX4_DEV_CAP_FLAG2_RECOVERABLE_ERROR_EVENT = 1LL << 20,
+	MLX4_DEV_CAP_FLAG2_PORT_REMAP		= 1LL <<  21
 };
 
 enum {
@@ -253,9 +254,14 @@ enum {
 	MLX4_BMME_FLAG_TYPE_2_WIN	= 1 <<  9,
 	MLX4_BMME_FLAG_RESERVED_LKEY	= 1 << 10,
 	MLX4_BMME_FLAG_FAST_REG_WR	= 1 << 11,
+	MLX4_BMME_FLAG_PORT_REMAP	= 1 << 24,
 	MLX4_BMME_FLAG_VSD_INIT2RTR	= 1 << 28,
 };
 
+enum {
+	MLX4_FLAG_PORT_REMAP		= MLX4_BMME_FLAG_PORT_REMAP
+};
+
 enum mlx4_event {
 	MLX4_EVENT_TYPE_COMP		   = 0x00,
 	MLX4_EVENT_TYPE_PATH_MIG	   = 0x01,
@@ -1378,6 +1384,8 @@ int mlx4_phys_to_slave_port(struct mlx4_dev *dev, int slave, int port);
 int mlx4_get_base_gid_ix(struct mlx4_dev *dev, int slave, int port);
 
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port);
+int mlx4_disable_rx_port_check(struct mlx4_dev *dev, bool dis);
+int mlx4_virt2phy_port_map(struct mlx4_dev *dev, u32 port1, u32 port2);
 int mlx4_vf_smi_enabled(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_get_enable_smi_admin(struct mlx4_dev *dev, int slave, int port);
 int mlx4_vf_set_enable_smi_admin(struct mlx4_dev *dev, int slave, int port,
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 467ccdf94c98..2bbc62aa818a 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -96,6 +96,7 @@ enum {
 	MLX4_QP_BIT_RRE				= 1 << 15,
 	MLX4_QP_BIT_RWE				= 1 << 14,
 	MLX4_QP_BIT_RAE				= 1 << 13,
+	MLX4_QP_BIT_FPP				= 1 <<	3,
 	MLX4_QP_BIT_RIC				= 1 <<	4,
 };
 
-- 
cgit v1.2.3-70-g09d2


From 53f33ae295a5098f12218da1400f55ad7df7447c Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Tue, 3 Feb 2015 16:48:33 +0200
Subject: net/mlx4_core: Port aggregation upper layer interface

Supply interface functions to bond and unbond ports of a mlx4 internal
interfaces. Example for such an interface is the one registered by the
mlx4 IB driver under RoCE.

There are

1. Functions to go in/out to/from bonded mode
2. Function to remap virtual ports to physical ports

The bond_mutex prevents simultaneous access to data that keep status of
the device in bonded mode.

The upper mlx4 interface marks to the mlx4 core module that they
want to be subject for such bonding by setting the MLX4_INTFF_BONDING
flag. Interface which goes to/from bonded mode is re-created.

The mlx4 Ethernet driver does not set this flag when registering the
interface, the IB driver does.

Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_resources.c  |  8 +-
 drivers/net/ethernet/mellanox/mlx4/intf.c          | 54 +++++++++++++
 drivers/net/ethernet/mellanox/mlx4/main.c          | 89 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  3 +
 drivers/net/ethernet/mellanox/mlx4/qp.c            |  2 +
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  3 +
 include/linux/mlx4/device.h                        |  1 +
 include/linux/mlx4/driver.h                        | 19 +++++
 8 files changed, 177 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_resources.c b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
index f1a5500ff72d..34f2fdf4fe5d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_resources.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_resources.c
@@ -50,10 +50,14 @@ void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
 	context->mtu_msgmax = 0xff;
 	if (!is_tx && !rss)
 		context->rq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
-	if (is_tx)
+	if (is_tx) {
 		context->sq_size_stride = ilog2(size) << 3 | (ilog2(stride) - 4);
-	else
+		if (mdev->dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP)
+			context->params2 |= MLX4_QP_BIT_FPP;
+
+	} else {
 		context->sq_size_stride = ilog2(TXBB_SIZE) - 4;
+	}
 	context->usr_page = cpu_to_be32(mdev->priv_uar.index);
 	context->local_qpn = cpu_to_be32(qpn);
 	context->pri_path.ackto = 1 & 0x07;
diff --git a/drivers/net/ethernet/mellanox/mlx4/intf.c b/drivers/net/ethernet/mellanox/mlx4/intf.c
index 68d2bad325d5..6fce58718837 100644
--- a/drivers/net/ethernet/mellanox/mlx4/intf.c
+++ b/drivers/net/ethernet/mellanox/mlx4/intf.c
@@ -33,11 +33,13 @@
 
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <linux/errno.h>
 
 #include "mlx4.h"
 
 struct mlx4_device_context {
 	struct list_head	list;
+	struct list_head	bond_list;
 	struct mlx4_interface  *intf;
 	void		       *context;
 };
@@ -115,6 +117,58 @@ void mlx4_unregister_interface(struct mlx4_interface *intf)
 }
 EXPORT_SYMBOL_GPL(mlx4_unregister_interface);
 
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable)
+{
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	struct mlx4_device_context *dev_ctx = NULL, *temp_dev_ctx;
+	unsigned long flags;
+	int ret;
+	LIST_HEAD(bond_list);
+
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+		return -ENOTSUPP;
+
+	ret = mlx4_disable_rx_port_check(dev, enable);
+	if (ret) {
+		mlx4_err(dev, "Fail to %s rx port check\n",
+			 enable ? "enable" : "disable");
+		return ret;
+	}
+	if (enable) {
+		dev->flags |= MLX4_FLAG_BONDED;
+	} else {
+		 ret = mlx4_virt2phy_port_map(dev, 1, 2);
+		if (ret) {
+			mlx4_err(dev, "Fail to reset port map\n");
+			return ret;
+		}
+		dev->flags &= ~MLX4_FLAG_BONDED;
+	}
+
+	spin_lock_irqsave(&priv->ctx_lock, flags);
+	list_for_each_entry_safe(dev_ctx, temp_dev_ctx, &priv->ctx_list, list) {
+		if (dev_ctx->intf->flags & MLX4_INTFF_BONDING) {
+			list_add_tail(&dev_ctx->bond_list, &bond_list);
+			list_del(&dev_ctx->list);
+		}
+	}
+	spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+	list_for_each_entry(dev_ctx, &bond_list, bond_list) {
+		dev_ctx->intf->remove(dev, dev_ctx->context);
+		dev_ctx->context =  dev_ctx->intf->add(dev);
+
+		spin_lock_irqsave(&priv->ctx_lock, flags);
+		list_add_tail(&dev_ctx->list, &priv->ctx_list);
+		spin_unlock_irqrestore(&priv->ctx_lock, flags);
+
+		mlx4_dbg(dev, "Inrerface for protocol %d restarted with when bonded mode is %s\n",
+			 dev_ctx->intf->protocol, enable ?
+			 "enabled" : "disabled");
+	}
+	return 0;
+}
+
 void mlx4_dispatch_event(struct mlx4_dev *dev, enum mlx4_dev_event type,
 			 unsigned long param)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index cc9f48439244..f3245fe0f442 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1160,6 +1160,91 @@ err_set_port:
 	return err ? err : count;
 }
 
+int mlx4_bond(struct mlx4_dev *dev)
+{
+	int ret = 0;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	mutex_lock(&priv->bond_mutex);
+
+	if (!mlx4_is_bonded(dev))
+		ret = mlx4_do_bond(dev, true);
+	else
+		ret = 0;
+
+	mutex_unlock(&priv->bond_mutex);
+	if (ret)
+		mlx4_err(dev, "Failed to bond device: %d\n", ret);
+	else
+		mlx4_dbg(dev, "Device is bonded\n");
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_bond);
+
+int mlx4_unbond(struct mlx4_dev *dev)
+{
+	int ret = 0;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+
+	mutex_lock(&priv->bond_mutex);
+
+	if (mlx4_is_bonded(dev))
+		ret = mlx4_do_bond(dev, false);
+
+	mutex_unlock(&priv->bond_mutex);
+	if (ret)
+		mlx4_err(dev, "Failed to unbond device: %d\n", ret);
+	else
+		mlx4_dbg(dev, "Device is unbonded\n");
+	return ret;
+}
+EXPORT_SYMBOL_GPL(mlx4_unbond);
+
+
+int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p)
+{
+	u8 port1 = v2p->port1;
+	u8 port2 = v2p->port2;
+	struct mlx4_priv *priv = mlx4_priv(dev);
+	int err;
+
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_PORT_REMAP))
+		return -ENOTSUPP;
+
+	mutex_lock(&priv->bond_mutex);
+
+	/* zero means keep current mapping for this port */
+	if (port1 == 0)
+		port1 = priv->v2p.port1;
+	if (port2 == 0)
+		port2 = priv->v2p.port2;
+
+	if ((port1 < 1) || (port1 > MLX4_MAX_PORTS) ||
+	    (port2 < 1) || (port2 > MLX4_MAX_PORTS) ||
+	    (port1 == 2 && port2 == 1)) {
+		/* besides boundary checks cross mapping makes
+		 * no sense and therefore not allowed */
+		err = -EINVAL;
+	} else if ((port1 == priv->v2p.port1) &&
+		 (port2 == priv->v2p.port2)) {
+		err = 0;
+	} else {
+		err = mlx4_virt2phy_port_map(dev, port1, port2);
+		if (!err) {
+			mlx4_dbg(dev, "port map changed: [%d][%d]\n",
+				 port1, port2);
+			priv->v2p.port1 = port1;
+			priv->v2p.port2 = port2;
+		} else {
+			mlx4_err(dev, "Failed to change port mape: %d\n", err);
+		}
+	}
+
+	mutex_unlock(&priv->bond_mutex);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx4_port_map_set);
+
 static int mlx4_load_fw(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2638,6 +2723,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 	spin_lock_init(&priv->ctx_lock);
 
 	mutex_init(&priv->port_mutex);
+	mutex_init(&priv->bond_mutex);
 
 	INIT_LIST_HEAD(&priv->pgdir_list);
 	mutex_init(&priv->pgdir_mutex);
@@ -2934,6 +3020,9 @@ slave_start:
 			goto err_port;
 	}
 
+	priv->v2p.port1 = 1;
+	priv->v2p.port2 = 2;
+
 	err = mlx4_register_device(dev);
 	if (err)
 		goto err_port;
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 148dc0945aab..803f17653da7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -885,6 +885,8 @@ struct mlx4_priv {
 	int			reserved_mtts;
 	int			fs_hash_mode;
 	u8 virt2phys_pkey[MLX4_MFUNC_MAX][MLX4_MAX_PORTS][MLX4_MAX_PORT_PKEYS];
+	struct mlx4_port_map	v2p; /* cached port mapping configuration */
+	struct mutex		bond_mutex; /* for bond mode */
 	__be64			slave_node_guids[MLX4_MFUNC_MAX];
 
 	atomic_t		opreq_count;
@@ -1364,6 +1366,7 @@ int mlx4_get_slave_num_gids(struct mlx4_dev *dev, int slave, int port);
 /* Returns the VF index of slave */
 int mlx4_get_vf_indx(struct mlx4_dev *dev, int slave);
 int mlx4_config_mad_demux(struct mlx4_dev *dev);
+int mlx4_do_bond(struct mlx4_dev *dev, bool enable);
 
 enum mlx4_zone_flags {
 	MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO	= 1UL << 0,
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 1586ecce13c7..2bb8553bd905 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -882,6 +882,8 @@ int mlx4_qp_to_ready(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 	for (i = 0; i < ARRAY_SIZE(states) - 1; i++) {
 		context->flags &= cpu_to_be32(~(0xf << 28));
 		context->flags |= cpu_to_be32(states[i + 1] << 28);
+		if (states[i + 1] != MLX4_QP_STATE_RTR)
+			context->params2 &= ~MLX4_QP_BIT_FPP;
 		err = mlx4_qp_modify(dev, mtt, states[i], states[i + 1],
 				     context, 0, 0, qp);
 		if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 79feeb6b0d87..c5f3dfca226b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2944,6 +2944,9 @@ static int verify_qp_parameters(struct mlx4_dev *dev,
 	qp_type	= (be32_to_cpu(qp_ctx->flags) >> 16) & 0xff;
 	optpar	= be32_to_cpu(*(__be32 *) inbox->buf);
 
+	if (slave != mlx4_master_func_num(dev))
+		qp_ctx->params2 &= ~MLX4_QP_BIT_FPP;
+
 	switch (qp_type) {
 	case MLX4_QP_ST_RC:
 	case MLX4_QP_ST_XRC:
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index d9afd99dde39..977b0b164431 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -70,6 +70,7 @@ enum {
 	MLX4_FLAG_SLAVE		= 1 << 3,
 	MLX4_FLAG_SRIOV		= 1 << 4,
 	MLX4_FLAG_OLD_REG_MAC	= 1 << 6,
+	MLX4_FLAG_BONDED	= 1 << 7
 };
 
 enum {
diff --git a/include/linux/mlx4/driver.h b/include/linux/mlx4/driver.h
index 022055c8fb26..9553a73d2049 100644
--- a/include/linux/mlx4/driver.h
+++ b/include/linux/mlx4/driver.h
@@ -49,6 +49,10 @@ enum mlx4_dev_event {
 	MLX4_DEV_EVENT_SLAVE_SHUTDOWN,
 };
 
+enum {
+	MLX4_INTFF_BONDING	= 1 << 0
+};
+
 struct mlx4_interface {
 	void *			(*add)	 (struct mlx4_dev *dev);
 	void			(*remove)(struct mlx4_dev *dev, void *context);
@@ -57,11 +61,26 @@ struct mlx4_interface {
 	void *			(*get_dev)(struct mlx4_dev *dev, void *context, u8 port);
 	struct list_head	list;
 	enum mlx4_protocol	protocol;
+	int			flags;
 };
 
 int mlx4_register_interface(struct mlx4_interface *intf);
 void mlx4_unregister_interface(struct mlx4_interface *intf);
 
+int mlx4_bond(struct mlx4_dev *dev);
+int mlx4_unbond(struct mlx4_dev *dev);
+static inline int mlx4_is_bonded(struct mlx4_dev *dev)
+{
+	return !!(dev->flags & MLX4_FLAG_BONDED);
+}
+
+struct mlx4_port_map {
+	u8	port1;
+	u8	port2;
+};
+
+int mlx4_port_map_set(struct mlx4_dev *dev, struct mlx4_port_map *v2p);
+
 void *mlx4_get_protocol_dev(struct mlx4_dev *dev, enum mlx4_protocol proto, int port);
 
 static inline u64 mlx4_mac_to_u64(u8 *addr)
-- 
cgit v1.2.3-70-g09d2


From f2dba9c6ff0d9a515b4c3f1b037cd65c8b2a868c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Wed, 4 Feb 2015 07:33:23 +1100
Subject: rhashtable: Introduce rhashtable_walk_*

Some existing rhashtable users get too intimate with it by walking
the buckets directly.  This prevents us from easily changing the
internals of rhashtable.

This patch adds the helpers rhashtable_walk_init/exit/start/next/stop
which will replace these custom walkers.

They are meant to be usable for both procfs seq_file walks as well
as walking by a netlink dump.  The iterator structure should fit
inside a netlink dump cb structure, with at least one element to
spare.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  35 ++++++++++
 lib/rhashtable.c           | 163 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 198 insertions(+)

(limited to 'include')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index e0337844358e..58851275fed9 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -18,6 +18,7 @@
 #ifndef _LINUX_RHASHTABLE_H
 #define _LINUX_RHASHTABLE_H
 
+#include <linux/compiler.h>
 #include <linux/list_nulls.h>
 #include <linux/workqueue.h>
 #include <linux/mutex.h>
@@ -111,6 +112,7 @@ struct rhashtable_params {
  * @p: Configuration parameters
  * @run_work: Deferred worker to expand/shrink asynchronously
  * @mutex: Mutex to protect current/future table swapping
+ * @walkers: List of active walkers
  * @being_destroyed: True if table is set up for destruction
  */
 struct rhashtable {
@@ -121,9 +123,36 @@ struct rhashtable {
 	struct rhashtable_params	p;
 	struct work_struct		run_work;
 	struct mutex                    mutex;
+	struct list_head		walkers;
 	bool                            being_destroyed;
 };
 
+/**
+ * struct rhashtable_walker - Hash table walker
+ * @list: List entry on list of walkers
+ * @resize: Resize event occured
+ */
+struct rhashtable_walker {
+	struct list_head list;
+	bool resize;
+};
+
+/**
+ * struct rhashtable_iter - Hash table iterator, fits into netlink cb
+ * @ht: Table to iterate through
+ * @p: Current pointer
+ * @walker: Associated rhashtable walker
+ * @slot: Current slot
+ * @skip: Number of entries to skip in slot
+ */
+struct rhashtable_iter {
+	struct rhashtable *ht;
+	struct rhash_head *p;
+	struct rhashtable_walker *walker;
+	unsigned int slot;
+	unsigned int skip;
+};
+
 static inline unsigned long rht_marker(const struct rhashtable *ht, u32 hash)
 {
 	return NULLS_MARKER(ht->p.nulls_base + hash);
@@ -179,6 +208,12 @@ bool rhashtable_lookup_compare_insert(struct rhashtable *ht,
 				      bool (*compare)(void *, void *),
 				      void *arg);
 
+int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter);
+void rhashtable_walk_exit(struct rhashtable_iter *iter);
+int rhashtable_walk_start(struct rhashtable_iter *iter) __acquires(RCU);
+void *rhashtable_walk_next(struct rhashtable_iter *iter);
+void rhashtable_walk_stop(struct rhashtable_iter *iter) __releases(RCU);
+
 void rhashtable_destroy(struct rhashtable *ht);
 
 #define rht_dereference(p, ht) \
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 904b419b72f5..057919164e23 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -484,6 +484,7 @@ static void rht_deferred_worker(struct work_struct *work)
 {
 	struct rhashtable *ht;
 	struct bucket_table *tbl;
+	struct rhashtable_walker *walker;
 
 	ht = container_of(work, struct rhashtable, run_work);
 	mutex_lock(&ht->mutex);
@@ -492,6 +493,9 @@ static void rht_deferred_worker(struct work_struct *work)
 
 	tbl = rht_dereference(ht->tbl, ht);
 
+	list_for_each_entry(walker, &ht->walkers, list)
+		walker->resize = true;
+
 	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
 		rhashtable_expand(ht);
 	else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size))
@@ -822,6 +826,164 @@ exit:
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert);
 
+/**
+ * rhashtable_walk_init - Initialise an iterator
+ * @ht:		Table to walk over
+ * @iter:	Hash table Iterator
+ *
+ * This function prepares a hash table walk.
+ *
+ * Note that if you restart a walk after rhashtable_walk_stop you
+ * may see the same object twice.  Also, you may miss objects if
+ * there are removals in between rhashtable_walk_stop and the next
+ * call to rhashtable_walk_start.
+ *
+ * For a completely stable walk you should construct your own data
+ * structure outside the hash table.
+ *
+ * This function may sleep so you must not call it from interrupt
+ * context or with spin locks held.
+ *
+ * You must call rhashtable_walk_exit if this function returns
+ * successfully.
+ */
+int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter)
+{
+	iter->ht = ht;
+	iter->p = NULL;
+	iter->slot = 0;
+	iter->skip = 0;
+
+	iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL);
+	if (!iter->walker)
+		return -ENOMEM;
+
+	mutex_lock(&ht->mutex);
+	list_add(&iter->walker->list, &ht->walkers);
+	mutex_unlock(&ht->mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_init);
+
+/**
+ * rhashtable_walk_exit - Free an iterator
+ * @iter:	Hash table Iterator
+ *
+ * This function frees resources allocated by rhashtable_walk_init.
+ */
+void rhashtable_walk_exit(struct rhashtable_iter *iter)
+{
+	mutex_lock(&iter->ht->mutex);
+	list_del(&iter->walker->list);
+	mutex_unlock(&iter->ht->mutex);
+	kfree(iter->walker);
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_exit);
+
+/**
+ * rhashtable_walk_start - Start a hash table walk
+ * @iter:	Hash table iterator
+ *
+ * Start a hash table walk.  Note that we take the RCU lock in all
+ * cases including when we return an error.  So you must always call
+ * rhashtable_walk_stop to clean up.
+ *
+ * Returns zero if successful.
+ *
+ * Returns -EAGAIN if resize event occured.  Note that the iterator
+ * will rewind back to the beginning and you may use it immediately
+ * by calling rhashtable_walk_next.
+ */
+int rhashtable_walk_start(struct rhashtable_iter *iter)
+{
+	rcu_read_lock();
+
+	if (iter->walker->resize) {
+		iter->slot = 0;
+		iter->skip = 0;
+		iter->walker->resize = false;
+		return -EAGAIN;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_start);
+
+/**
+ * rhashtable_walk_next - Return the next object and advance the iterator
+ * @iter:	Hash table iterator
+ *
+ * Note that you must call rhashtable_walk_stop when you are finished
+ * with the walk.
+ *
+ * Returns the next object or NULL when the end of the table is reached.
+ *
+ * Returns -EAGAIN if resize event occured.  Note that the iterator
+ * will rewind back to the beginning and you may continue to use it.
+ */
+void *rhashtable_walk_next(struct rhashtable_iter *iter)
+{
+	const struct bucket_table *tbl;
+	struct rhashtable *ht = iter->ht;
+	struct rhash_head *p = iter->p;
+	void *obj = NULL;
+
+	tbl = rht_dereference_rcu(ht->tbl, ht);
+
+	if (p) {
+		p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot);
+		goto next;
+	}
+
+	for (; iter->slot < tbl->size; iter->slot++) {
+		int skip = iter->skip;
+
+		rht_for_each_rcu(p, tbl, iter->slot) {
+			if (!skip)
+				break;
+			skip--;
+		}
+
+next:
+		if (!rht_is_a_nulls(p)) {
+			iter->skip++;
+			iter->p = p;
+			obj = rht_obj(ht, p);
+			goto out;
+		}
+
+		iter->skip = 0;
+	}
+
+	iter->p = NULL;
+
+out:
+	if (iter->walker->resize) {
+		iter->p = NULL;
+		iter->slot = 0;
+		iter->skip = 0;
+		iter->walker->resize = false;
+		return ERR_PTR(-EAGAIN);
+	}
+
+	return obj;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_next);
+
+/**
+ * rhashtable_walk_stop - Finish a hash table walk
+ * @iter:	Hash table iterator
+ *
+ * Finish a hash table walk.
+ */
+void rhashtable_walk_stop(struct rhashtable_iter *iter)
+{
+	rcu_read_unlock();
+	iter->p = NULL;
+}
+EXPORT_SYMBOL_GPL(rhashtable_walk_stop);
+
 static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
 	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
@@ -894,6 +1056,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	memset(ht, 0, sizeof(*ht));
 	mutex_init(&ht->mutex);
 	memcpy(&ht->p, params, sizeof(*params));
+	INIT_LIST_HEAD(&ht->walkers);
 
 	if (params->locks_mul)
 		ht->p.locks_mul = roundup_pow_of_two(params->locks_mul);
-- 
cgit v1.2.3-70-g09d2


From 9878196578286c5ed494778ada01da094377a686 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 3 Feb 2015 18:31:53 -0800
Subject: tcp: do not pace pure ack packets

When we added pacing to TCP, we decided to let sch_fq take care
of actual pacing.

All TCP had to do was to compute sk->pacing_rate using simple formula:

sk->pacing_rate = 2 * cwnd * mss / rtt

It works well for senders (bulk flows), but not very well for receivers
or even RPC :

cwnd on the receiver can be less than 10, rtt can be around 100ms, so we
can end up pacing ACK packets, slowing down the sender.

Really, only the sender should pace, according to its own logic.

Instead of adding a new bit in skb, or call yet another flow
dissection, we tweak skb->truesize to a small value (2), and
we instruct sch_fq to use new helper and not pace pure ack.

Note this also helps TCP small queue, as ack packets present
in qdisc/NIC do not prevent sending a data packet (RPC workload)

This helps to reduce tx completion overhead, ack packets can use regular
sock_wfree() instead of tcp_wfree() which is a bit more expensive.

This has no impact in the case packets are sent to loopback interface,
as we do not coalesce ack packets (were we would detect skb->truesize
lie)

In case netem (with a delay) is used, skb_orphan_partial() also sets
skb->truesize to 1.

This patch is a combination of two patches we used for about one year at
Google.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 15 +++++++++++++++
 net/ipv4/tcp_output.c | 10 +++++++++-
 net/sched/sch_fq.c    | 10 ++++++++--
 3 files changed, 32 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b8fdc6bab3f3..637ee490ec81 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1713,4 +1713,19 @@ static inline struct ip_options_rcu *tcp_v4_save_options(struct sk_buff *skb)
 	return dopt;
 }
 
+/* locally generated TCP pure ACKs have skb->truesize == 2
+ * (check tcp_send_ack() in net/ipv4/tcp_output.c )
+ * This is much faster than dissecting the packet to find out.
+ * (Think of GRE encapsulations, IPv4, IPv6, ...)
+ */
+static inline bool skb_is_tcp_pure_ack(const struct sk_buff *skb)
+{
+	return skb->truesize == 2;
+}
+
+static inline void skb_set_tcp_pure_ack(struct sk_buff *skb)
+{
+	skb->truesize = 2;
+}
+
 #endif	/* _TCP_H */
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 20ab06b228ac..1b326ed46f7b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -948,7 +948,7 @@ static int tcp_transmit_skb(struct sock *sk, struct sk_buff *skb, int clone_it,
 
 	skb_orphan(skb);
 	skb->sk = sk;
-	skb->destructor = tcp_wfree;
+	skb->destructor = skb_is_tcp_pure_ack(skb) ? sock_wfree : tcp_wfree;
 	skb_set_hash_from_sk(skb, sk);
 	atomic_add(skb->truesize, &sk->sk_wmem_alloc);
 
@@ -3265,6 +3265,14 @@ void tcp_send_ack(struct sock *sk)
 	skb_reserve(buff, MAX_TCP_HEADER);
 	tcp_init_nondata_skb(buff, tcp_acceptable_seq(sk), TCPHDR_ACK);
 
+	/* We do not want pure acks influencing TCP Small Queues or fq/pacing
+	 * too much.
+	 * SKB_TRUESIZE(max(1 .. 66, MAX_TCP_HEADER)) is unfortunately ~784
+	 * We also avoid tcp_wfree() overhead (cache line miss accessing
+	 * tp->tsq_flags) by using regular sock_wfree()
+	 */
+	skb_set_tcp_pure_ack(buff);
+
 	/* Send it off, this clears delayed acks for us. */
 	skb_mstamp_get(&buff->skb_mstamp);
 	tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC));
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 2a50f5c62070..69a3dbf55c60 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -52,6 +52,7 @@
 #include <net/pkt_sched.h>
 #include <net/sock.h>
 #include <net/tcp_states.h>
+#include <net/tcp.h>
 
 /*
  * Per flow structure, dynamically allocated
@@ -445,7 +446,9 @@ begin:
 		goto begin;
 	}
 
-	if (unlikely(f->head && now < f->time_next_packet)) {
+	skb = f->head;
+	if (unlikely(skb && now < f->time_next_packet &&
+		     !skb_is_tcp_pure_ack(skb))) {
 		head->first = f->next;
 		fq_flow_set_throttled(q, f);
 		goto begin;
@@ -464,12 +467,15 @@ begin:
 		goto begin;
 	}
 	prefetch(&skb->end);
-	f->time_next_packet = now;
 	f->credit -= qdisc_pkt_len(skb);
 
 	if (f->credit > 0 || !q->rate_enable)
 		goto out;
 
+	/* Do not pace locally generated ack packets */
+	if (skb_is_tcp_pure_ack(skb))
+		goto out;
+
 	rate = q->flow_max_rate;
 	if (skb->sk)
 		rate = min(skb->sk->sk_pacing_rate, rate);
-- 
cgit v1.2.3-70-g09d2


From 06eb395fa9856b5a87cf7d80baee2a0ed3cdb9d7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 4 Feb 2015 21:30:40 -0800
Subject: pkt_sched: fq: better control of DDOS traffic

FQ has a fast path for skb attached to a socket, as it does not
have to compute a flow hash. But for other packets, FQ being non
stochastic means that hosts exposed to random Internet traffic
can allocate million of flows structure (104 bytes each) pretty
easily. Not only host can OOM, but lookup in RB trees can take
too much cpu and memory resources.

This patch adds a new attribute, orphan_mask, that is adding
possibility of having a stochastic hash for orphaned skb.

Its default value is 1024 slots, to mimic SFQ behavior.

Note: This does not apply to locally generated TCP traffic,
and no locally generated traffic will share a flow structure
with another perfect or stochastic flow.

This patch also handles the specific case of SYNACK messages:

They are attached to the listener socket, and therefore all map
to a single hash bucket. If listener have set SO_MAX_PACING_RATE,
hoping to have new accepted socket inherit this rate, SYNACK
might be paced and even dropped.

This is very similar to an internal patch Google have used more
than one year.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  2 ++
 net/sched/sch_fq.c             | 19 +++++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index d62316baae94..534b84710745 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -774,6 +774,8 @@ enum {
 
 	TCA_FQ_FLOW_REFILL_DELAY,	/* flow credit refill delay in usec */
 
+	TCA_FQ_ORPHAN_MASK,	/* mask applied to orphaned skb hashes */
+
 	__TCA_FQ_MAX
 };
 
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 69a3dbf55c60..a00c43043001 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -93,6 +93,7 @@ struct fq_sched_data {
 	u32		flow_refill_delay;
 	u32		flow_max_rate;	/* optional max rate per flow */
 	u32		flow_plimit;	/* max packets per flow */
+	u32		orphan_mask;	/* mask for orphaned skb */
 	struct rb_root	*fq_root;
 	u8		rate_enable;
 	u8		fq_trees_log;
@@ -223,11 +224,20 @@ static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
 	if (unlikely((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL))
 		return &q->internal;
 
-	if (unlikely(!sk)) {
+	/* SYNACK messages are attached to a listener socket.
+	 * 1) They are not part of a 'flow' yet
+	 * 2) We do not want to rate limit them (eg SYNFLOOD attack),
+	 *    especially if the listener set SO_MAX_PACING_RATE
+	 * 3) We pretend they are orphaned
+	 */
+	if (!sk || sk->sk_state == TCP_LISTEN) {
+		unsigned long hash = skb_get_hash(skb) & q->orphan_mask;
+
 		/* By forcing low order bit to 1, we make sure to not
 		 * collide with a local flow (socket pointers are word aligned)
 		 */
-		sk = (struct sock *)(skb_get_hash(skb) | 1L);
+		sk = (struct sock *)((hash << 1) | 1UL);
+		skb_orphan(skb);
 	}
 
 	root = &q->fq_root[hash_32((u32)(long)sk, q->fq_trees_log)];
@@ -704,6 +714,9 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt)
 		q->flow_refill_delay = usecs_to_jiffies(usecs_delay);
 	}
 
+	if (tb[TCA_FQ_ORPHAN_MASK])
+		q->orphan_mask = nla_get_u32(tb[TCA_FQ_ORPHAN_MASK]);
+
 	if (!err) {
 		sch_tree_unlock(sch);
 		err = fq_resize(sch, fq_log);
@@ -749,6 +762,7 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt)
 	q->delayed		= RB_ROOT;
 	q->fq_root		= NULL;
 	q->fq_trees_log		= ilog2(1024);
+	q->orphan_mask		= 1024 - 1;
 	qdisc_watchdog_init(&q->watchdog, sch);
 
 	if (opt)
@@ -778,6 +792,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	    nla_put_u32(skb, TCA_FQ_FLOW_MAX_RATE, q->flow_max_rate) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_REFILL_DELAY,
 			jiffies_to_usecs(q->flow_refill_delay)) ||
+	    nla_put_u32(skb, TCA_FQ_ORPHAN_MASK, q->orphan_mask) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log))
 		goto nla_put_failure;
 
-- 
cgit v1.2.3-70-g09d2


From c58da4c659803ac12eca5275c8a7064222adb4c7 Mon Sep 17 00:00:00 2001
From: Erik Kline <ek@google.com>
Date: Wed, 4 Feb 2015 20:01:23 +0900
Subject: net: ipv6: allow explicitly choosing optimistic addresses

RFC 4429 ("Optimistic DAD") states that optimistic addresses
should be treated as deprecated addresses.  From section 2.1:

   Unless noted otherwise, components of the IPv6 protocol stack
   should treat addresses in the Optimistic state equivalently to
   those in the Deprecated state, indicating that the address is
   available for use but should not be used if another suitable
   address is available.

Optimistic addresses are indeed avoided when other addresses are
available (i.e. at source address selection time), but they have
not heretofore been available for things like explicit bind() and
sendmsg() with struct in6_pktinfo, etc.

This change makes optimistic addresses treated more like
deprecated addresses than tentative ones.

Signed-off-by: Erik Kline <ek@google.com>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  3 +++
 net/ipv6/addrconf.c    | 19 +++++++++++++++++--
 net/ipv6/ndisc.c       |  4 +++-
 3 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index d13573bb879e..80456f72d70a 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -62,6 +62,9 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg);
 
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict);
+int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+			    const struct net_device *dev, int strict,
+			    u32 banned_flags);
 
 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
 int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr);
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7c8bbeb27b7..62900aee4c58 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1518,16 +1518,31 @@ static int ipv6_count_addresses(struct inet6_dev *idev)
 
 int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 		  const struct net_device *dev, int strict)
+{
+	return ipv6_chk_addr_and_flags(net, addr, dev, strict, IFA_F_TENTATIVE);
+}
+EXPORT_SYMBOL(ipv6_chk_addr);
+
+int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr,
+			    const struct net_device *dev, int strict,
+			    u32 banned_flags)
 {
 	struct inet6_ifaddr *ifp;
 	unsigned int hash = inet6_addr_hash(addr);
+	u32 ifp_flags;
 
 	rcu_read_lock_bh();
 	hlist_for_each_entry_rcu(ifp, &inet6_addr_lst[hash], addr_lst) {
 		if (!net_eq(dev_net(ifp->idev->dev), net))
 			continue;
+		/* Decouple optimistic from tentative for evaluation here.
+		 * Ban optimistic addresses explicitly, when required.
+		 */
+		ifp_flags = (ifp->flags&IFA_F_OPTIMISTIC)
+			    ? (ifp->flags&~IFA_F_TENTATIVE)
+			    : ifp->flags;
 		if (ipv6_addr_equal(&ifp->addr, addr) &&
-		    !(ifp->flags&IFA_F_TENTATIVE) &&
+		    !(ifp_flags&banned_flags) &&
 		    (dev == NULL || ifp->idev->dev == dev ||
 		     !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) {
 			rcu_read_unlock_bh();
@@ -1538,7 +1553,7 @@ int ipv6_chk_addr(struct net *net, const struct in6_addr *addr,
 	rcu_read_unlock_bh();
 	return 0;
 }
-EXPORT_SYMBOL(ipv6_chk_addr);
+EXPORT_SYMBOL(ipv6_chk_addr_and_flags);
 
 static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr,
 			       struct net_device *dev)
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 682866777d53..113fc6cd5a0c 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -655,7 +655,9 @@ static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb)
 	struct in6_addr *target = (struct in6_addr *)&neigh->primary_key;
 	int probes = atomic_read(&neigh->probes);
 
-	if (skb && ipv6_chk_addr(dev_net(dev), &ipv6_hdr(skb)->saddr, dev, 1))
+	if (skb && ipv6_chk_addr_and_flags(dev_net(dev), &ipv6_hdr(skb)->saddr,
+					   dev, 1,
+					   IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))
 		saddr = &ipv6_hdr(skb)->saddr;
 	probes -= NEIGH_VAR(neigh->parms, UCAST_PROBES);
 	if (probes < 0) {
-- 
cgit v1.2.3-70-g09d2


From 83d2b9ba1abca241df44a502b6da950a25856b5b Mon Sep 17 00:00:00 2001
From: Jarno Rajahalme <jrajahalme@nicira.com>
Date: Thu, 5 Feb 2015 13:40:49 -0800
Subject: net: openvswitch: Support masked set actions.

OVS userspace already probes the openvswitch kernel module for
OVS_ACTION_ATTR_SET_MASKED support.  This patch adds the kernel module
implementation of masked set actions.

The existing set action sets many fields at once.  When only a subset
of the IP header fields, for example, should be modified, all the IP
fields need to be exact matched so that the other field values can be
copied to the set action.  A masked set action allows modification of
an arbitrary subset of the supported header bits without requiring the
rest to be matched.

Masked set action is now supported for all writeable key types, except
for the tunnel key.  The set tunnel action is an exception as any
input tunnel info is cleared before action processing starts, so there
is no tunnel info to mask.

The kernel module converts all (non-tunnel) set actions to masked set
actions.  This makes action processing more uniform, and results in
less branching and duplicating the action processing code.  When
returning actions to userspace, the fully masked set actions are
converted back to normal set actions.  We use a kernel internal action
code to be able to tell the userspace provided and converted masked
set actions apart.

Signed-off-by: Jarno Rajahalme <jrajahalme@nicira.com>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/openvswitch.h |  22 ++-
 net/openvswitch/actions.c        | 373 ++++++++++++++++++++++++---------------
 net/openvswitch/flow_netlink.c   | 161 +++++++++++++----
 3 files changed, 383 insertions(+), 173 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/openvswitch.h b/include/uapi/linux/openvswitch.h
index 7a8785a99243..bbd49a0c46c7 100644
--- a/include/uapi/linux/openvswitch.h
+++ b/include/uapi/linux/openvswitch.h
@@ -599,6 +599,12 @@ struct ovs_action_hash {
  * @OVS_ACTION_ATTR_SET: Replaces the contents of an existing header.  The
  * single nested %OVS_KEY_ATTR_* attribute specifies a header to modify and its
  * value.
+ * @OVS_ACTION_ATTR_SET_MASKED: Replaces the contents of an existing header.  A
+ * nested %OVS_KEY_ATTR_* attribute specifies a header to modify, its value,
+ * and a mask.  For every bit set in the mask, the corresponding bit value
+ * is copied from the value to the packet header field, rest of the bits are
+ * left unchanged.  The non-masked value bits must be passed in as zeroes.
+ * Masking is not supported for the %OVS_KEY_ATTR_TUNNEL attribute.
  * @OVS_ACTION_ATTR_PUSH_VLAN: Push a new outermost 802.1Q header onto the
  * packet.
  * @OVS_ACTION_ATTR_POP_VLAN: Pop the outermost 802.1Q header off the packet.
@@ -617,6 +623,9 @@ struct ovs_action_hash {
  * Only a single header can be set with a single %OVS_ACTION_ATTR_SET.  Not all
  * fields within a header are modifiable, e.g. the IPv4 protocol and fragment
  * type may not be changed.
+ *
+ * @OVS_ACTION_ATTR_SET_TO_MASKED: Kernel internal masked set action translated
+ * from the @OVS_ACTION_ATTR_SET.
  */
 
 enum ovs_action_attr {
@@ -631,8 +640,19 @@ enum ovs_action_attr {
 	OVS_ACTION_ATTR_HASH,	      /* struct ovs_action_hash. */
 	OVS_ACTION_ATTR_PUSH_MPLS,    /* struct ovs_action_push_mpls. */
 	OVS_ACTION_ATTR_POP_MPLS,     /* __be16 ethertype. */
+	OVS_ACTION_ATTR_SET_MASKED,   /* One nested OVS_KEY_ATTR_* including
+				       * data immediately followed by a mask.
+				       * The data must be zero for the unmasked
+				       * bits. */
+
+	__OVS_ACTION_ATTR_MAX,	      /* Nothing past this will be accepted
+				       * from userspace. */
 
-	__OVS_ACTION_ATTR_MAX
+#ifdef __KERNEL__
+	OVS_ACTION_ATTR_SET_TO_MASKED, /* Kernel module internal masked
+					* set action converted from
+					* OVS_ACTION_ATTR_SET. */
+#endif
 };
 
 #define OVS_ACTION_ATTR_MAX (__OVS_ACTION_ATTR_MAX - 1)
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index b4cffe686126..b491c1c296fe 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -185,10 +185,15 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
-static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
-		    const __be32 *mpls_lse)
+/* 'KEY' must not have any bits set outside of the 'MASK' */
+#define MASKED(OLD, KEY, MASK) ((KEY) | ((OLD) & ~(MASK)))
+#define SET_MASKED(OLD, KEY, MASK) ((OLD) = MASKED(OLD, KEY, MASK))
+
+static int set_mpls(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const __be32 *mpls_lse, const __be32 *mask)
 {
 	__be32 *stack;
+	__be32 lse;
 	int err;
 
 	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
@@ -196,14 +201,16 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	stack = (__be32 *)skb_mpls_header(skb);
+	lse = MASKED(*stack, *mpls_lse, *mask);
 	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		__be32 diff[] = { ~(*stack), *mpls_lse };
+		__be32 diff[] = { ~(*stack), lse };
+
 		skb->csum = ~csum_partial((char *)diff, sizeof(diff),
 					  ~skb->csum);
 	}
 
-	*stack = *mpls_lse;
-	key->mpls.top_lse = *mpls_lse;
+	*stack = lse;
+	flow_key->mpls.top_lse = lse;
 	return 0;
 }
 
@@ -230,23 +237,39 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 }
 
-static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
-			const struct ovs_key_ethernet *eth_key)
+/* 'src' is already properly masked. */
+static void ether_addr_copy_masked(u8 *dst_, const u8 *src_, const u8 *mask_)
+{
+	u16 *dst = (u16 *)dst_;
+	const u16 *src = (const u16 *)src_;
+	const u16 *mask = (const u16 *)mask_;
+
+	SET_MASKED(dst[0], src[0], mask[0]);
+	SET_MASKED(dst[1], src[1], mask[1]);
+	SET_MASKED(dst[2], src[2], mask[2]);
+}
+
+static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *flow_key,
+			const struct ovs_key_ethernet *key,
+			const struct ovs_key_ethernet *mask)
 {
 	int err;
+
 	err = skb_ensure_writable(skb, ETH_HLEN);
 	if (unlikely(err))
 		return err;
 
 	skb_postpull_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	ether_addr_copy(eth_hdr(skb)->h_source, eth_key->eth_src);
-	ether_addr_copy(eth_hdr(skb)->h_dest, eth_key->eth_dst);
+	ether_addr_copy_masked(eth_hdr(skb)->h_source, key->eth_src,
+			       mask->eth_src);
+	ether_addr_copy_masked(eth_hdr(skb)->h_dest, key->eth_dst,
+			       mask->eth_dst);
 
 	ovs_skb_postpush_rcsum(skb, eth_hdr(skb), ETH_ALEN * 2);
 
-	ether_addr_copy(key->eth.src, eth_key->eth_src);
-	ether_addr_copy(key->eth.dst, eth_key->eth_dst);
+	ether_addr_copy(flow_key->eth.src, eth_hdr(skb)->h_source);
+	ether_addr_copy(flow_key->eth.dst, eth_hdr(skb)->h_dest);
 	return 0;
 }
 
@@ -304,6 +327,15 @@ static void update_ipv6_checksum(struct sk_buff *skb, u8 l4_proto,
 	}
 }
 
+static void mask_ipv6_addr(const __be32 old[4], const __be32 addr[4],
+			   const __be32 mask[4], __be32 masked[4])
+{
+	masked[0] = MASKED(old[0], addr[0], mask[0]);
+	masked[1] = MASKED(old[1], addr[1], mask[1]);
+	masked[2] = MASKED(old[2], addr[2], mask[2]);
+	masked[3] = MASKED(old[3], addr[3], mask[3]);
+}
+
 static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 			  __be32 addr[4], const __be32 new_addr[4],
 			  bool recalculate_csum)
@@ -315,29 +347,29 @@ static void set_ipv6_addr(struct sk_buff *skb, u8 l4_proto,
 	memcpy(addr, new_addr, sizeof(__be32[4]));
 }
 
-static void set_ipv6_tc(struct ipv6hdr *nh, u8 tc)
+static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl, u32 mask)
 {
-	nh->priority = tc >> 4;
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0x0F) | ((tc & 0x0F) << 4);
+	/* Bits 21-24 are always unmasked, so this retains their values. */
+	SET_MASKED(nh->flow_lbl[0], (u8)(fl >> 16), (u8)(mask >> 16));
+	SET_MASKED(nh->flow_lbl[1], (u8)(fl >> 8), (u8)(mask >> 8));
+	SET_MASKED(nh->flow_lbl[2], (u8)fl, (u8)mask);
 }
 
-static void set_ipv6_fl(struct ipv6hdr *nh, u32 fl)
+static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl,
+		       u8 mask)
 {
-	nh->flow_lbl[0] = (nh->flow_lbl[0] & 0xF0) | (fl & 0x000F0000) >> 16;
-	nh->flow_lbl[1] = (fl & 0x0000FF00) >> 8;
-	nh->flow_lbl[2] = fl & 0x000000FF;
-}
+	new_ttl = MASKED(nh->ttl, new_ttl, mask);
 
-static void set_ip_ttl(struct sk_buff *skb, struct iphdr *nh, u8 new_ttl)
-{
 	csum_replace2(&nh->check, htons(nh->ttl << 8), htons(new_ttl << 8));
 	nh->ttl = new_ttl;
 }
 
-static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_ipv4 *ipv4_key)
+static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv4 *key,
+		    const struct ovs_key_ipv4 *mask)
 {
 	struct iphdr *nh;
+	__be32 new_addr;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
@@ -347,36 +379,49 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
 
 	nh = ip_hdr(skb);
 
-	if (ipv4_key->ipv4_src != nh->saddr) {
-		set_ip_addr(skb, nh, &nh->saddr, ipv4_key->ipv4_src);
-		key->ipv4.addr.src = ipv4_key->ipv4_src;
-	}
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (mask->ipv4_src) {
+		new_addr = MASKED(nh->saddr, key->ipv4_src, mask->ipv4_src);
 
-	if (ipv4_key->ipv4_dst != nh->daddr) {
-		set_ip_addr(skb, nh, &nh->daddr, ipv4_key->ipv4_dst);
-		key->ipv4.addr.dst = ipv4_key->ipv4_dst;
+		if (unlikely(new_addr != nh->saddr)) {
+			set_ip_addr(skb, nh, &nh->saddr, new_addr);
+			flow_key->ipv4.addr.src = new_addr;
+		}
 	}
+	if (mask->ipv4_dst) {
+		new_addr = MASKED(nh->daddr, key->ipv4_dst, mask->ipv4_dst);
 
-	if (ipv4_key->ipv4_tos != nh->tos) {
-		ipv4_change_dsfield(nh, 0, ipv4_key->ipv4_tos);
-		key->ip.tos = nh->tos;
+		if (unlikely(new_addr != nh->daddr)) {
+			set_ip_addr(skb, nh, &nh->daddr, new_addr);
+			flow_key->ipv4.addr.dst = new_addr;
+		}
 	}
-
-	if (ipv4_key->ipv4_ttl != nh->ttl) {
-		set_ip_ttl(skb, nh, ipv4_key->ipv4_ttl);
-		key->ip.ttl = ipv4_key->ipv4_ttl;
+	if (mask->ipv4_tos) {
+		ipv4_change_dsfield(nh, ~mask->ipv4_tos, key->ipv4_tos);
+		flow_key->ip.tos = nh->tos;
+	}
+	if (mask->ipv4_ttl) {
+		set_ip_ttl(skb, nh, key->ipv4_ttl, mask->ipv4_ttl);
+		flow_key->ip.ttl = nh->ttl;
 	}
 
 	return 0;
 }
 
-static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_ipv6 *ipv6_key)
+static bool is_ipv6_mask_nonzero(const __be32 addr[4])
+{
+	return !!(addr[0] | addr[1] | addr[2] | addr[3]);
+}
+
+static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_ipv6 *key,
+		    const struct ovs_key_ipv6 *mask)
 {
 	struct ipv6hdr *nh;
 	int err;
-	__be32 *saddr;
-	__be32 *daddr;
 
 	err = skb_ensure_writable(skb, skb_network_offset(skb) +
 				  sizeof(struct ipv6hdr));
@@ -384,71 +429,77 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	nh = ipv6_hdr(skb);
-	saddr = (__be32 *)&nh->saddr;
-	daddr = (__be32 *)&nh->daddr;
-
-	if (memcmp(ipv6_key->ipv6_src, saddr, sizeof(ipv6_key->ipv6_src))) {
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, saddr,
-			      ipv6_key->ipv6_src, true);
-		memcpy(&key->ipv6.addr.src, ipv6_key->ipv6_src,
-		       sizeof(ipv6_key->ipv6_src));
-	}
 
-	if (memcmp(ipv6_key->ipv6_dst, daddr, sizeof(ipv6_key->ipv6_dst))) {
+	/* Setting an IP addresses is typically only a side effect of
+	 * matching on them in the current userspace implementation, so it
+	 * makes sense to check if the value actually changed.
+	 */
+	if (is_ipv6_mask_nonzero(mask->ipv6_src)) {
+		__be32 *saddr = (__be32 *)&nh->saddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(saddr, key->ipv6_src, mask->ipv6_src, masked);
+
+		if (unlikely(memcmp(saddr, masked, sizeof(masked)))) {
+			set_ipv6_addr(skb, key->ipv6_proto, saddr, masked,
+				      true);
+			memcpy(&flow_key->ipv6.addr.src, masked,
+			       sizeof(flow_key->ipv6.addr.src));
+		}
+	}
+	if (is_ipv6_mask_nonzero(mask->ipv6_dst)) {
 		unsigned int offset = 0;
 		int flags = IP6_FH_F_SKIP_RH;
 		bool recalc_csum = true;
-
-		if (ipv6_ext_hdr(nh->nexthdr))
-			recalc_csum = ipv6_find_hdr(skb, &offset,
-						    NEXTHDR_ROUTING, NULL,
-						    &flags) != NEXTHDR_ROUTING;
-
-		set_ipv6_addr(skb, ipv6_key->ipv6_proto, daddr,
-			      ipv6_key->ipv6_dst, recalc_csum);
-		memcpy(&key->ipv6.addr.dst, ipv6_key->ipv6_dst,
-		       sizeof(ipv6_key->ipv6_dst));
+		__be32 *daddr = (__be32 *)&nh->daddr;
+		__be32 masked[4];
+
+		mask_ipv6_addr(daddr, key->ipv6_dst, mask->ipv6_dst, masked);
+
+		if (unlikely(memcmp(daddr, masked, sizeof(masked)))) {
+			if (ipv6_ext_hdr(nh->nexthdr))
+				recalc_csum = (ipv6_find_hdr(skb, &offset,
+							     NEXTHDR_ROUTING,
+							     NULL, &flags)
+					       != NEXTHDR_ROUTING);
+
+			set_ipv6_addr(skb, key->ipv6_proto, daddr, masked,
+				      recalc_csum);
+			memcpy(&flow_key->ipv6.addr.dst, masked,
+			       sizeof(flow_key->ipv6.addr.dst));
+		}
+	}
+	if (mask->ipv6_tclass) {
+		ipv6_change_dsfield(nh, ~mask->ipv6_tclass, key->ipv6_tclass);
+		flow_key->ip.tos = ipv6_get_dsfield(nh);
+	}
+	if (mask->ipv6_label) {
+		set_ipv6_fl(nh, ntohl(key->ipv6_label),
+			    ntohl(mask->ipv6_label));
+		flow_key->ipv6.label =
+		    *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
+	}
+	if (mask->ipv6_hlimit) {
+		SET_MASKED(nh->hop_limit, key->ipv6_hlimit, mask->ipv6_hlimit);
+		flow_key->ip.ttl = nh->hop_limit;
 	}
-
-	set_ipv6_tc(nh, ipv6_key->ipv6_tclass);
-	key->ip.tos = ipv6_get_dsfield(nh);
-
-	set_ipv6_fl(nh, ntohl(ipv6_key->ipv6_label));
-	key->ipv6.label = *(__be32 *)nh & htonl(IPV6_FLOWINFO_FLOWLABEL);
-
-	nh->hop_limit = ipv6_key->ipv6_hlimit;
-	key->ip.ttl = ipv6_key->ipv6_hlimit;
 	return 0;
 }
 
 /* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
-			 __be16 new_port, __sum16 *check)
+			__be16 new_port, __sum16 *check)
 {
 	inet_proto_csum_replace2(check, skb, *port, new_port, 0);
 	*port = new_port;
-	skb_clear_hash(skb);
-}
-
-static void set_udp_port(struct sk_buff *skb, __be16 *port, __be16 new_port)
-{
-	struct udphdr *uh = udp_hdr(skb);
-
-	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
-		set_tp_port(skb, port, new_port, &uh->check);
-
-		if (!uh->check)
-			uh->check = CSUM_MANGLED_0;
-	} else {
-		*port = new_port;
-		skb_clear_hash(skb);
-	}
 }
 
-static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
-		   const struct ovs_key_udp *udp_port_key)
+static int set_udp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_udp *key,
+		   const struct ovs_key_udp *mask)
 {
 	struct udphdr *uh;
+	__be16 src, dst;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -457,23 +508,40 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	uh = udp_hdr(skb);
-	if (udp_port_key->udp_src != uh->source) {
-		set_udp_port(skb, &uh->source, udp_port_key->udp_src);
-		key->tp.src = udp_port_key->udp_src;
-	}
+	/* Either of the masks is non-zero, so do not bother checking them. */
+	src = MASKED(uh->source, key->udp_src, mask->udp_src);
+	dst = MASKED(uh->dest, key->udp_dst, mask->udp_dst);
 
-	if (udp_port_key->udp_dst != uh->dest) {
-		set_udp_port(skb, &uh->dest, udp_port_key->udp_dst);
-		key->tp.dst = udp_port_key->udp_dst;
+	if (uh->check && skb->ip_summed != CHECKSUM_PARTIAL) {
+		if (likely(src != uh->source)) {
+			set_tp_port(skb, &uh->source, src, &uh->check);
+			flow_key->tp.src = src;
+		}
+		if (likely(dst != uh->dest)) {
+			set_tp_port(skb, &uh->dest, dst, &uh->check);
+			flow_key->tp.dst = dst;
+		}
+
+		if (unlikely(!uh->check))
+			uh->check = CSUM_MANGLED_0;
+	} else {
+		uh->source = src;
+		uh->dest = dst;
+		flow_key->tp.src = src;
+		flow_key->tp.dst = dst;
 	}
 
+	skb_clear_hash(skb);
+
 	return 0;
 }
 
-static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
-		   const struct ovs_key_tcp *tcp_port_key)
+static int set_tcp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		   const struct ovs_key_tcp *key,
+		   const struct ovs_key_tcp *mask)
 {
 	struct tcphdr *th;
+	__be16 src, dst;
 	int err;
 
 	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
@@ -482,50 +550,49 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
 		return err;
 
 	th = tcp_hdr(skb);
-	if (tcp_port_key->tcp_src != th->source) {
-		set_tp_port(skb, &th->source, tcp_port_key->tcp_src, &th->check);
-		key->tp.src = tcp_port_key->tcp_src;
+	src = MASKED(th->source, key->tcp_src, mask->tcp_src);
+	if (likely(src != th->source)) {
+		set_tp_port(skb, &th->source, src, &th->check);
+		flow_key->tp.src = src;
 	}
-
-	if (tcp_port_key->tcp_dst != th->dest) {
-		set_tp_port(skb, &th->dest, tcp_port_key->tcp_dst, &th->check);
-		key->tp.dst = tcp_port_key->tcp_dst;
+	dst = MASKED(th->dest, key->tcp_dst, mask->tcp_dst);
+	if (likely(dst != th->dest)) {
+		set_tp_port(skb, &th->dest, dst, &th->check);
+		flow_key->tp.dst = dst;
 	}
+	skb_clear_hash(skb);
 
 	return 0;
 }
 
-static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
-		    const struct ovs_key_sctp *sctp_port_key)
+static int set_sctp(struct sk_buff *skb, struct sw_flow_key *flow_key,
+		    const struct ovs_key_sctp *key,
+		    const struct ovs_key_sctp *mask)
 {
+	unsigned int sctphoff = skb_transport_offset(skb);
 	struct sctphdr *sh;
+	__le32 old_correct_csum, new_csum, old_csum;
 	int err;
-	unsigned int sctphoff = skb_transport_offset(skb);
 
 	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
 	if (unlikely(err))
 		return err;
 
 	sh = sctp_hdr(skb);
-	if (sctp_port_key->sctp_src != sh->source ||
-	    sctp_port_key->sctp_dst != sh->dest) {
-		__le32 old_correct_csum, new_csum, old_csum;
+	old_csum = sh->checksum;
+	old_correct_csum = sctp_compute_cksum(skb, sctphoff);
 
-		old_csum = sh->checksum;
-		old_correct_csum = sctp_compute_cksum(skb, sctphoff);
+	sh->source = MASKED(sh->source, key->sctp_src, mask->sctp_src);
+	sh->dest = MASKED(sh->dest, key->sctp_dst, mask->sctp_dst);
 
-		sh->source = sctp_port_key->sctp_src;
-		sh->dest = sctp_port_key->sctp_dst;
+	new_csum = sctp_compute_cksum(skb, sctphoff);
 
-		new_csum = sctp_compute_cksum(skb, sctphoff);
+	/* Carry any checksum errors through. */
+	sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
 
-		/* Carry any checksum errors through. */
-		sh->checksum = old_csum ^ old_correct_csum ^ new_csum;
-
-		skb_clear_hash(skb);
-		key->tp.src = sctp_port_key->sctp_src;
-		key->tp.dst = sctp_port_key->sctp_dst;
-	}
+	skb_clear_hash(skb);
+	flow_key->tp.src = sh->source;
+	flow_key->tp.dst = sh->dest;
 
 	return 0;
 }
@@ -653,52 +720,77 @@ static void execute_hash(struct sk_buff *skb, struct sw_flow_key *key,
 	key->ovs_flow_hash = hash;
 }
 
-static int execute_set_action(struct sk_buff *skb, struct sw_flow_key *key,
-			      const struct nlattr *nested_attr)
+static int execute_set_action(struct sk_buff *skb,
+			      struct sw_flow_key *flow_key,
+			      const struct nlattr *a)
+{
+	/* Only tunnel set execution is supported without a mask. */
+	if (nla_type(a) == OVS_KEY_ATTR_TUNNEL_INFO) {
+		OVS_CB(skb)->egress_tun_info = nla_data(a);
+		return 0;
+	}
+
+	return -EINVAL;
+}
+
+/* Mask is at the midpoint of the data. */
+#define get_mask(a, type) ((const type)nla_data(a) + 1)
+
+static int execute_masked_set_action(struct sk_buff *skb,
+				     struct sw_flow_key *flow_key,
+				     const struct nlattr *a)
 {
 	int err = 0;
 
-	switch (nla_type(nested_attr)) {
+	switch (nla_type(a)) {
 	case OVS_KEY_ATTR_PRIORITY:
-		skb->priority = nla_get_u32(nested_attr);
-		key->phy.priority = skb->priority;
+		SET_MASKED(skb->priority, nla_get_u32(a), *get_mask(a, u32 *));
+		flow_key->phy.priority = skb->priority;
 		break;
 
 	case OVS_KEY_ATTR_SKB_MARK:
-		skb->mark = nla_get_u32(nested_attr);
-		key->phy.skb_mark = skb->mark;
+		SET_MASKED(skb->mark, nla_get_u32(a), *get_mask(a, u32 *));
+		flow_key->phy.skb_mark = skb->mark;
 		break;
 
 	case OVS_KEY_ATTR_TUNNEL_INFO:
-		OVS_CB(skb)->egress_tun_info = nla_data(nested_attr);
+		/* Masked data not supported for tunnel. */
+		err = -EINVAL;
 		break;
 
 	case OVS_KEY_ATTR_ETHERNET:
-		err = set_eth_addr(skb, key, nla_data(nested_attr));
+		err = set_eth_addr(skb, flow_key, nla_data(a),
+				   get_mask(a, struct ovs_key_ethernet *));
 		break;
 
 	case OVS_KEY_ATTR_IPV4:
-		err = set_ipv4(skb, key, nla_data(nested_attr));
+		err = set_ipv4(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv4 *));
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
-		err = set_ipv6(skb, key, nla_data(nested_attr));
+		err = set_ipv6(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_ipv6 *));
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		err = set_tcp(skb, key, nla_data(nested_attr));
+		err = set_tcp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_tcp *));
 		break;
 
 	case OVS_KEY_ATTR_UDP:
-		err = set_udp(skb, key, nla_data(nested_attr));
+		err = set_udp(skb, flow_key, nla_data(a),
+			      get_mask(a, struct ovs_key_udp *));
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		err = set_sctp(skb, key, nla_data(nested_attr));
+		err = set_sctp(skb, flow_key, nla_data(a),
+			       get_mask(a, struct ovs_key_sctp *));
 		break;
 
 	case OVS_KEY_ATTR_MPLS:
-		err = set_mpls(skb, key, nla_data(nested_attr));
+		err = set_mpls(skb, flow_key, nla_data(a), get_mask(a,
+								    __be32 *));
 		break;
 	}
 
@@ -818,6 +910,11 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 			err = execute_set_action(skb, key, nla_data(a));
 			break;
 
+		case OVS_ACTION_ATTR_SET_MASKED:
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = execute_masked_set_action(skb, key, nla_data(a));
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample(dp, skb, key, a);
 			break;
diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c
index 8b9a612b39d1..993281e6278d 100644
--- a/net/openvswitch/flow_netlink.c
+++ b/net/openvswitch/flow_netlink.c
@@ -1695,16 +1695,6 @@ static int validate_and_copy_sample(const struct nlattr *attr,
 	return 0;
 }
 
-static int validate_tp_port(const struct sw_flow_key *flow_key,
-			    __be16 eth_type)
-{
-	if ((eth_type == htons(ETH_P_IP) || eth_type == htons(ETH_P_IPV6)) &&
-	    (flow_key->tp.src || flow_key->tp.dst))
-		return 0;
-
-	return -EINVAL;
-}
-
 void ovs_match_init(struct sw_flow_match *match,
 		    struct sw_flow_key *key,
 		    struct sw_flow_mask *mask)
@@ -1805,23 +1795,45 @@ static int validate_and_copy_set_tun(const struct nlattr *attr,
 	return err;
 }
 
+/* Return false if there are any non-masked bits set.
+ * Mask follows data immediately, before any netlink padding.
+ */
+static bool validate_masked(u8 *data, int len)
+{
+	u8 *mask = data + len;
+
+	while (len--)
+		if (*data++ & ~*mask++)
+			return false;
+
+	return true;
+}
+
 static int validate_set(const struct nlattr *a,
 			const struct sw_flow_key *flow_key,
 			struct sw_flow_actions **sfa,
-			bool *set_tun, __be16 eth_type, bool log)
+			bool *skip_copy, __be16 eth_type, bool masked, bool log)
 {
 	const struct nlattr *ovs_key = nla_data(a);
 	int key_type = nla_type(ovs_key);
+	size_t key_len;
 
 	/* There can be only one key in a action */
 	if (nla_total_size(nla_len(ovs_key)) != nla_len(a))
 		return -EINVAL;
 
+	key_len = nla_len(ovs_key);
+	if (masked)
+		key_len /= 2;
+
 	if (key_type > OVS_KEY_ATTR_MAX ||
-	    (ovs_key_lens[key_type].len != nla_len(ovs_key) &&
+	    (ovs_key_lens[key_type].len != key_len &&
 	     ovs_key_lens[key_type].len != OVS_ATTR_NESTED))
 		return -EINVAL;
 
+	if (masked && !validate_masked(nla_data(ovs_key), key_len))
+		return -EINVAL;
+
 	switch (key_type) {
 	const struct ovs_key_ipv4 *ipv4_key;
 	const struct ovs_key_ipv6 *ipv6_key;
@@ -1836,7 +1848,10 @@ static int validate_set(const struct nlattr *a,
 		if (eth_p_mpls(eth_type))
 			return -EINVAL;
 
-		*set_tun = true;
+		if (masked)
+			return -EINVAL; /* Masked tunnel set not supported. */
+
+		*skip_copy = true;
 		err = validate_and_copy_set_tun(a, sfa, log);
 		if (err)
 			return err;
@@ -1846,48 +1861,66 @@ static int validate_set(const struct nlattr *a,
 		if (eth_type != htons(ETH_P_IP))
 			return -EINVAL;
 
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
 		ipv4_key = nla_data(ovs_key);
-		if (ipv4_key->ipv4_proto != flow_key->ip.proto)
-			return -EINVAL;
 
-		if (ipv4_key->ipv4_frag != flow_key->ip.frag)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv4 *mask = ipv4_key + 1;
 
+			/* Non-writeable fields. */
+			if (mask->ipv4_proto || mask->ipv4_frag)
+				return -EINVAL;
+		} else {
+			if (ipv4_key->ipv4_proto != flow_key->ip.proto)
+				return -EINVAL;
+
+			if (ipv4_key->ipv4_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		break;
 
 	case OVS_KEY_ATTR_IPV6:
 		if (eth_type != htons(ETH_P_IPV6))
 			return -EINVAL;
 
-		if (!flow_key->ip.proto)
-			return -EINVAL;
-
 		ipv6_key = nla_data(ovs_key);
-		if (ipv6_key->ipv6_proto != flow_key->ip.proto)
-			return -EINVAL;
 
-		if (ipv6_key->ipv6_frag != flow_key->ip.frag)
-			return -EINVAL;
+		if (masked) {
+			const struct ovs_key_ipv6 *mask = ipv6_key + 1;
+
+			/* Non-writeable fields. */
+			if (mask->ipv6_proto || mask->ipv6_frag)
+				return -EINVAL;
+
+			/* Invalid bits in the flow label mask? */
+			if (ntohl(mask->ipv6_label) & 0xFFF00000)
+				return -EINVAL;
+		} else {
+			if (ipv6_key->ipv6_proto != flow_key->ip.proto)
+				return -EINVAL;
 
+			if (ipv6_key->ipv6_frag != flow_key->ip.frag)
+				return -EINVAL;
+		}
 		if (ntohl(ipv6_key->ipv6_label) & 0xFFF00000)
 			return -EINVAL;
 
 		break;
 
 	case OVS_KEY_ATTR_TCP:
-		if (flow_key->ip.proto != IPPROTO_TCP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_TCP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	case OVS_KEY_ATTR_UDP:
-		if (flow_key->ip.proto != IPPROTO_UDP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_UDP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	case OVS_KEY_ATTR_MPLS:
 		if (!eth_p_mpls(eth_type))
@@ -1895,15 +1928,45 @@ static int validate_set(const struct nlattr *a,
 		break;
 
 	case OVS_KEY_ATTR_SCTP:
-		if (flow_key->ip.proto != IPPROTO_SCTP)
+		if ((eth_type != htons(ETH_P_IP) &&
+		     eth_type != htons(ETH_P_IPV6)) ||
+		    flow_key->ip.proto != IPPROTO_SCTP)
 			return -EINVAL;
 
-		return validate_tp_port(flow_key, eth_type);
+		break;
 
 	default:
 		return -EINVAL;
 	}
 
+	/* Convert non-masked non-tunnel set actions to masked set actions. */
+	if (!masked && key_type != OVS_KEY_ATTR_TUNNEL) {
+		int start, len = key_len * 2;
+		struct nlattr *at;
+
+		*skip_copy = true;
+
+		start = add_nested_action_start(sfa,
+						OVS_ACTION_ATTR_SET_TO_MASKED,
+						log);
+		if (start < 0)
+			return start;
+
+		at = __add_action(sfa, key_type, NULL, len, log);
+		if (IS_ERR(at))
+			return PTR_ERR(at);
+
+		memcpy(nla_data(at), nla_data(ovs_key), key_len); /* Key. */
+		memset(nla_data(at) + key_len, 0xff, key_len);    /* Mask. */
+		/* Clear non-writeable bits from otherwise writeable fields. */
+		if (key_type == OVS_KEY_ATTR_IPV6) {
+			struct ovs_key_ipv6 *mask = nla_data(at) + key_len;
+
+			mask->ipv6_label &= htonl(0x000FFFFF);
+		}
+		add_nested_action_end(*sfa, start);
+	}
+
 	return 0;
 }
 
@@ -1965,6 +2028,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 			[OVS_ACTION_ATTR_PUSH_VLAN] = sizeof(struct ovs_action_push_vlan),
 			[OVS_ACTION_ATTR_POP_VLAN] = 0,
 			[OVS_ACTION_ATTR_SET] = (u32)-1,
+			[OVS_ACTION_ATTR_SET_MASKED] = (u32)-1,
 			[OVS_ACTION_ATTR_SAMPLE] = (u32)-1,
 			[OVS_ACTION_ATTR_HASH] = sizeof(struct ovs_action_hash)
 		};
@@ -2060,7 +2124,14 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 
 		case OVS_ACTION_ATTR_SET:
 			err = validate_set(a, key, sfa,
-					   &skip_copy, eth_type, log);
+					   &skip_copy, eth_type, false, log);
+			if (err)
+				return err;
+			break;
+
+		case OVS_ACTION_ATTR_SET_MASKED:
+			err = validate_set(a, key, sfa,
+					   &skip_copy, eth_type, true, log);
 			if (err)
 				return err;
 			break;
@@ -2090,6 +2161,7 @@ static int __ovs_nla_copy_actions(const struct nlattr *attr,
 	return 0;
 }
 
+/* 'key' must be the masked key. */
 int ovs_nla_copy_actions(const struct nlattr *attr,
 			 const struct sw_flow_key *key,
 			 struct sw_flow_actions **sfa, bool log)
@@ -2177,6 +2249,21 @@ static int set_action_to_attr(const struct nlattr *a, struct sk_buff *skb)
 	return 0;
 }
 
+static int masked_set_action_to_set_action_attr(const struct nlattr *a,
+						struct sk_buff *skb)
+{
+	const struct nlattr *ovs_key = nla_data(a);
+	size_t key_len = nla_len(ovs_key) / 2;
+
+	/* Revert the conversion we did from a non-masked set action to
+	 * masked set action.
+	 */
+	if (nla_put(skb, OVS_ACTION_ATTR_SET, nla_len(a) - key_len, ovs_key))
+		return -EMSGSIZE;
+
+	return 0;
+}
+
 int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 {
 	const struct nlattr *a;
@@ -2192,6 +2279,12 @@ int ovs_nla_put_actions(const struct nlattr *attr, int len, struct sk_buff *skb)
 				return err;
 			break;
 
+		case OVS_ACTION_ATTR_SET_TO_MASKED:
+			err = masked_set_action_to_set_action_attr(a, skb);
+			if (err)
+				return err;
+			break;
+
 		case OVS_ACTION_ATTR_SAMPLE:
 			err = sample_action_to_attr(a, skb);
 			if (err)
-- 
cgit v1.2.3-70-g09d2


From 032ee4236954eb214651cb9bfc1b38ffa8fd7a01 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:38 -0500
Subject: tcp: helpers to mitigate ACK loops by rate-limiting out-of-window
 dupacks

Helpers for mitigating ACK loops by rate-limiting dupacks sent in
response to incoming out-of-window packets.

This patch includes:

- rate-limiting logic
- sysctl to control how often we allow dupacks to out-of-window packets
- SNMP counter for cases where we rate-limited our dupack sending

The rate-limiting logic in this patch decides to not send dupacks in
response to out-of-window segments if (a) they are SYNs or pure ACKs
and (b) the remote endpoint is sending them faster than the configured
rate limit.

We rate-limit our responses rather than blocking them entirely or
resetting the connection, because legitimate connections can rely on
dupacks in response to some out-of-window segments. For example, zero
window probes are typically sent with a sequence number that is below
the current window, and ZWPs thus expect to thus elicit a dupack in
response.

We allow dupacks in response to TCP segments with data, because these
may be spurious retransmissions for which the remote endpoint wants to
receive DSACKs. This is safe because segments with data can't
realistically be part of ACK loops, which by their nature consist of
each side sending pure/data-less ACKs to each other.

The dupack interval is controlled by a new sysctl knob,
tcp_invalid_ratelimit, given in milliseconds, in case an administrator
needs to dial this upward in the face of a high-rate DoS attack. The
name and units are chosen to be analogous to the existing analogous
knob for ICMP, icmp_ratelimit.

The default value for tcp_invalid_ratelimit is 500ms, which allows at
most one such dupack per 500ms. This is chosen to be 2x faster than
the 1-second minimum RTO interval allowed by RFC 6298 (section 2, rule
2.4). We allow the extra 2x factor because network delay variations
can cause packets sent at 1 second intervals to be compressed and
arrive much closer.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 22 ++++++++++++++++++++++
 include/net/tcp.h                      | 32 ++++++++++++++++++++++++++++++++
 include/uapi/linux/snmp.h              |  6 ++++++
 net/ipv4/proc.c                        |  6 ++++++
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv4/tcp_input.c                   |  1 +
 6 files changed, 74 insertions(+)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index a5e4c813f17f..1b8c964b0d17 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -290,6 +290,28 @@ tcp_frto - INTEGER
 
 	By default it's enabled with a non-zero value. 0 disables F-RTO.
 
+tcp_invalid_ratelimit - INTEGER
+	Limit the maximal rate for sending duplicate acknowledgments
+	in response to incoming TCP packets that are for an existing
+	connection but that are invalid due to any of these reasons:
+
+	  (a) out-of-window sequence number,
+	  (b) out-of-window acknowledgment number, or
+	  (c) PAWS (Protection Against Wrapped Sequence numbers) check failure
+
+	This can help mitigate simple "ack loop" DoS attacks, wherein
+	a buggy or malicious middlebox or man-in-the-middle can
+	rewrite TCP header fields in manner that causes each endpoint
+	to think that the other is sending invalid TCP segments, thus
+	causing each side to send an unterminating stream of duplicate
+	acknowledgments for invalid segments.
+
+	Using 0 disables rate-limiting of dupacks in response to
+	invalid segments; otherwise this value specifies the minimal
+	space between sending such dupacks, in milliseconds.
+
+	Default: 500 (milliseconds).
+
 tcp_keepalive_time - INTEGER
 	How often TCP sends out keepalive messages when keepalive is enabled.
 	Default: 2hours.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 28e9bd3abceb..b81f45c67b2e 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -274,6 +274,7 @@ extern int sysctl_tcp_challenge_ack_limit;
 extern unsigned int sysctl_tcp_notsent_lowat;
 extern int sysctl_tcp_min_tso_segs;
 extern int sysctl_tcp_autocorking;
+extern int sysctl_tcp_invalid_ratelimit;
 
 extern atomic_long_t tcp_memory_allocated;
 extern struct percpu_counter tcp_sockets_allocated;
@@ -1236,6 +1237,37 @@ static inline bool tcp_paws_reject(const struct tcp_options_received *rx_opt,
 	return true;
 }
 
+/* Return true if we're currently rate-limiting out-of-window ACKs and
+ * thus shouldn't send a dupack right now. We rate-limit dupacks in
+ * response to out-of-window SYNs or ACKs to mitigate ACK loops or DoS
+ * attacks that send repeated SYNs or ACKs for the same connection. To
+ * do this, we do not send a duplicate SYNACK or ACK if the remote
+ * endpoint is sending out-of-window SYNs or pure ACKs at a high rate.
+ */
+static inline bool tcp_oow_rate_limited(struct net *net,
+					const struct sk_buff *skb,
+					int mib_idx, u32 *last_oow_ack_time)
+{
+	/* Data packets without SYNs are not likely part of an ACK loop. */
+	if ((TCP_SKB_CB(skb)->seq != TCP_SKB_CB(skb)->end_seq) &&
+	    !tcp_hdr(skb)->syn)
+		goto not_rate_limited;
+
+	if (*last_oow_ack_time) {
+		s32 elapsed = (s32)(tcp_time_stamp - *last_oow_ack_time);
+
+		if (0 <= elapsed && elapsed < sysctl_tcp_invalid_ratelimit) {
+			NET_INC_STATS_BH(net, mib_idx);
+			return true;	/* rate-limited: don't send yet! */
+		}
+	}
+
+	*last_oow_ack_time = tcp_time_stamp;
+
+not_rate_limited:
+	return false;	/* not rate-limited: go ahead, send dupack now! */
+}
+
 static inline void tcp_mib_init(struct net *net)
 {
 	/* See RFC 2012 */
diff --git a/include/uapi/linux/snmp.h b/include/uapi/linux/snmp.h
index b22224100011..6a6fb747c78d 100644
--- a/include/uapi/linux/snmp.h
+++ b/include/uapi/linux/snmp.h
@@ -270,6 +270,12 @@ enum
 	LINUX_MIB_TCPHYSTARTTRAINCWND,		/* TCPHystartTrainCwnd */
 	LINUX_MIB_TCPHYSTARTDELAYDETECT,	/* TCPHystartDelayDetect */
 	LINUX_MIB_TCPHYSTARTDELAYCWND,		/* TCPHystartDelayCwnd */
+	LINUX_MIB_TCPACKSKIPPEDSYNRECV,		/* TCPACKSkippedSynRecv */
+	LINUX_MIB_TCPACKSKIPPEDPAWS,		/* TCPACKSkippedPAWS */
+	LINUX_MIB_TCPACKSKIPPEDSEQ,		/* TCPACKSkippedSeq */
+	LINUX_MIB_TCPACKSKIPPEDFINWAIT2,	/* TCPACKSkippedFinWait2 */
+	LINUX_MIB_TCPACKSKIPPEDTIMEWAIT,	/* TCPACKSkippedTimeWait */
+	LINUX_MIB_TCPACKSKIPPEDCHALLENGE,	/* TCPACKSkippedChallenge */
 	__LINUX_MIB_MAX
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index 8f9cd200ce20..d8953ef0770c 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -292,6 +292,12 @@ static const struct snmp_mib snmp4_net_list[] = {
 	SNMP_MIB_ITEM("TCPHystartTrainCwnd", LINUX_MIB_TCPHYSTARTTRAINCWND),
 	SNMP_MIB_ITEM("TCPHystartDelayDetect", LINUX_MIB_TCPHYSTARTDELAYDETECT),
 	SNMP_MIB_ITEM("TCPHystartDelayCwnd", LINUX_MIB_TCPHYSTARTDELAYCWND),
+	SNMP_MIB_ITEM("TCPACKSkippedSynRecv", LINUX_MIB_TCPACKSKIPPEDSYNRECV),
+	SNMP_MIB_ITEM("TCPACKSkippedPAWS", LINUX_MIB_TCPACKSKIPPEDPAWS),
+	SNMP_MIB_ITEM("TCPACKSkippedSeq", LINUX_MIB_TCPACKSKIPPEDSEQ),
+	SNMP_MIB_ITEM("TCPACKSkippedFinWait2", LINUX_MIB_TCPACKSKIPPEDFINWAIT2),
+	SNMP_MIB_ITEM("TCPACKSkippedTimeWait", LINUX_MIB_TCPACKSKIPPEDTIMEWAIT),
+	SNMP_MIB_ITEM("TCPACKSkippedChallenge", LINUX_MIB_TCPACKSKIPPEDCHALLENGE),
 	SNMP_MIB_SENTINEL
 };
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index e0ee384a448f..82601a68cf90 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -728,6 +728,13 @@ static struct ctl_table ipv4_table[] = {
 		.extra1		= &zero,
 		.extra2		= &one,
 	},
+	{
+		.procname	= "tcp_invalid_ratelimit",
+		.data		= &sysctl_tcp_invalid_ratelimit,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_ms_jiffies,
+	},
 	{
 		.procname	= "icmp_msgs_per_sec",
 		.data		= &sysctl_icmp_msgs_per_sec,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d3dfff78fa19..9401aa43b814 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -100,6 +100,7 @@ int sysctl_tcp_thin_dupack __read_mostly;
 
 int sysctl_tcp_moderate_rcvbuf __read_mostly = 1;
 int sysctl_tcp_early_retrans __read_mostly = 3;
+int sysctl_tcp_invalid_ratelimit __read_mostly = HZ/2;
 
 #define FLAG_DATA		0x01 /* Incoming frame contained data.		*/
 #define FLAG_WIN_UPDATE		0x02 /* Incoming ACK was a window update.	*/
-- 
cgit v1.2.3-70-g09d2


From a9b2c06dbef48ed31cff1764c5ce824829106f4f Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:39 -0500
Subject: tcp: mitigate ACK loops for connections as tcp_request_sock

In the SYN_RECV state, where the TCP connection is represented by
tcp_request_sock, we now rate-limit SYNACKs in response to a client's
retransmitted SYNs: we do not send a SYNACK in response to client SYN
if it has been less than sysctl_tcp_invalid_ratelimit (default 500ms)
since we last sent a SYNACK in response to a client's retransmitted
SYN.

This allows the vast majority of legitimate client connections to
proceed unimpeded, even for the most aggressive platforms, iOS and
MacOS, which actually retransmit SYNs 1-second intervals for several
times in a row. They use SYN RTO timeouts following the progression:
1,1,1,1,1,2,4,8,16,32.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      | 1 +
 include/net/tcp.h        | 1 +
 net/ipv4/tcp_minisocks.c | 6 +++++-
 3 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 67309ece0772..bcc828d3b9b9 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -115,6 +115,7 @@ struct tcp_request_sock {
 	u32				rcv_isn;
 	u32				snt_isn;
 	u32				snt_synack; /* synack sent time */
+	u32				last_oow_ack_time; /* last SYNACK */
 	u32				rcv_nxt; /* the ack # by SYNACK. For
 						  * FastOpen it's the seq#
 						  * after data-in-SYN.
diff --git a/include/net/tcp.h b/include/net/tcp.h
index b81f45c67b2e..da4196fb78db 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1145,6 +1145,7 @@ static inline void tcp_openreq_init(struct request_sock *req,
 	tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq;
 	tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1;
 	tcp_rsk(req)->snt_synack = tcp_time_stamp;
+	tcp_rsk(req)->last_oow_ack_time = 0;
 	req->mss = rx_opt->mss_clamp;
 	req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0;
 	ireq->tstamp_ok = rx_opt->tstamp_ok;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index bc9216dc9de1..131aa4950d1c 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -605,7 +605,11 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 		 * Reset timer after retransmitting SYNACK, similar to
 		 * the idea of fast retransmit in recovery.
 		 */
-		if (!inet_rtx_syn_ack(sk, req))
+		if (!tcp_oow_rate_limited(sock_net(sk), skb,
+					  LINUX_MIB_TCPACKSKIPPEDSYNRECV,
+					  &tcp_rsk(req)->last_oow_ack_time) &&
+
+		    !inet_rtx_syn_ack(sk, req))
 			req->expires = min(TCP_TIMEOUT_INIT << req->num_timeout,
 					   TCP_RTO_MAX) + jiffies;
 		return NULL;
-- 
cgit v1.2.3-70-g09d2


From f2b2c582e82429270d5818fbabe653f4359d7024 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:40 -0500
Subject: tcp: mitigate ACK loops for connections as tcp_sock

Ensure that in state ESTABLISHED, where the connection is represented
by a tcp_sock, we rate limit dupacks in response to incoming packets
(a) with TCP timestamps that fail PAWS checks, or (b) with sequence
numbers or ACK numbers that are out of the acceptable window.

We do not send a dupack in response to out-of-window packets if it has
been less than sysctl_tcp_invalid_ratelimit (default 500ms) since we
last sent a dupack in response to an out-of-window packet.

There is already a similar (although global) rate-limiting mechanism
for "challenge ACKs". When deciding whether to send a challence ACK,
we first consult the new per-connection rate limit, and then the
global rate limit.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  1 +
 net/ipv4/tcp_input.c     | 29 ++++++++++++++++++++++-------
 net/ipv4/tcp_minisocks.c |  1 +
 3 files changed, 24 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bcc828d3b9b9..66d85a80a1ec 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -153,6 +153,7 @@ struct tcp_sock {
  	u32	snd_sml;	/* Last byte of the most recently transmitted small packet */
 	u32	rcv_tstamp;	/* timestamp of last received ACK (for keepalives) */
 	u32	lsndtime;	/* timestamp of last sent data packet (for restart window) */
+	u32	last_oow_ack_time;  /* timestamp of last out-of-window ACK */
 
 	u32	tsoffset;	/* timestamp offset */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 9401aa43b814..8fdd27b17306 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3322,13 +3322,22 @@ static int tcp_ack_update_window(struct sock *sk, const struct sk_buff *skb, u32
 }
 
 /* RFC 5961 7 [ACK Throttling] */
-static void tcp_send_challenge_ack(struct sock *sk)
+static void tcp_send_challenge_ack(struct sock *sk, const struct sk_buff *skb)
 {
 	/* unprotected vars, we dont care of overwrites */
 	static u32 challenge_timestamp;
 	static unsigned int challenge_count;
-	u32 now = jiffies / HZ;
+	struct tcp_sock *tp = tcp_sk(sk);
+	u32 now;
+
+	/* First check our per-socket dupack rate limit. */
+	if (tcp_oow_rate_limited(sock_net(sk), skb,
+				 LINUX_MIB_TCPACKSKIPPEDCHALLENGE,
+				 &tp->last_oow_ack_time))
+		return;
 
+	/* Then check the check host-wide RFC 5961 rate limit. */
+	now = jiffies / HZ;
 	if (now != challenge_timestamp) {
 		challenge_timestamp = now;
 		challenge_count = 0;
@@ -3424,7 +3433,7 @@ static int tcp_ack(struct sock *sk, const struct sk_buff *skb, int flag)
 	if (before(ack, prior_snd_una)) {
 		/* RFC 5961 5.2 [Blind Data Injection Attack].[Mitigation] */
 		if (before(ack, prior_snd_una - tp->max_window)) {
-			tcp_send_challenge_ack(sk);
+			tcp_send_challenge_ack(sk, skb);
 			return -1;
 		}
 		goto old_ack;
@@ -4993,7 +5002,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 	    tcp_paws_discard(sk, skb)) {
 		if (!th->rst) {
 			NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
-			tcp_send_dupack(sk, skb);
+			if (!tcp_oow_rate_limited(sock_net(sk), skb,
+						  LINUX_MIB_TCPACKSKIPPEDPAWS,
+						  &tp->last_oow_ack_time))
+				tcp_send_dupack(sk, skb);
 			goto discard;
 		}
 		/* Reset is accepted even if it did not pass PAWS. */
@@ -5010,7 +5022,10 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		if (!th->rst) {
 			if (th->syn)
 				goto syn_challenge;
-			tcp_send_dupack(sk, skb);
+			if (!tcp_oow_rate_limited(sock_net(sk), skb,
+						  LINUX_MIB_TCPACKSKIPPEDSEQ,
+						  &tp->last_oow_ack_time))
+				tcp_send_dupack(sk, skb);
 		}
 		goto discard;
 	}
@@ -5026,7 +5041,7 @@ static bool tcp_validate_incoming(struct sock *sk, struct sk_buff *skb,
 		if (TCP_SKB_CB(skb)->seq == tp->rcv_nxt)
 			tcp_reset(sk);
 		else
-			tcp_send_challenge_ack(sk);
+			tcp_send_challenge_ack(sk, skb);
 		goto discard;
 	}
 
@@ -5040,7 +5055,7 @@ syn_challenge:
 		if (syn_inerr)
 			TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
 		NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPSYNCHALLENGE);
-		tcp_send_challenge_ack(sk);
+		tcp_send_challenge_ack(sk, skb);
 		goto discard;
 	}
 
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 131aa4950d1c..98a840561ec8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -467,6 +467,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req,
 		tcp_enable_early_retrans(newtp);
 		newtp->tlp_high_seq = 0;
 		newtp->lsndtime = treq->snt_synack;
+		newtp->last_oow_ack_time = 0;
 		newtp->total_retrans = req->num_retrans;
 
 		/* So many TCP implementations out there (incorrectly) count the
-- 
cgit v1.2.3-70-g09d2


From 4fb17a6091674f469e8ac85dc770fbf9a9ba7cc8 Mon Sep 17 00:00:00 2001
From: Neal Cardwell <ncardwell@google.com>
Date: Fri, 6 Feb 2015 16:04:41 -0500
Subject: tcp: mitigate ACK loops for connections as tcp_timewait_sock

Ensure that in state FIN_WAIT2 or TIME_WAIT, where the connection is
represented by a tcp_timewait_sock, we rate limit dupacks in response
to incoming packets (a) with TCP timestamps that fail PAWS checks, or
(b) with sequence numbers that are out of the acceptable window.

We do not send a dupack in response to out-of-window packets if it has
been less than sysctl_tcp_invalid_ratelimit (default 500ms) since we
last sent a dupack in response to an out-of-window packet.

Reported-by: Avery Fay <avery@mixpanel.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  4 ++++
 net/ipv4/tcp_minisocks.c | 29 ++++++++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 66d85a80a1ec..1a7adb411647 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -342,6 +342,10 @@ struct tcp_timewait_sock {
 	u32			  tw_rcv_wnd;
 	u32			  tw_ts_offset;
 	u32			  tw_ts_recent;
+
+	/* The time we sent the last out-of-window ACK: */
+	u32			  tw_last_oow_ack_time;
+
 	long			  tw_ts_recent_stamp;
 #ifdef CONFIG_TCP_MD5SIG
 	struct tcp_md5sig_key	  *tw_md5_key;
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 98a840561ec8..dd11ac7798c6 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -58,6 +58,25 @@ static bool tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win)
 	return seq == e_win && seq == end_seq;
 }
 
+static enum tcp_tw_status
+tcp_timewait_check_oow_rate_limit(struct inet_timewait_sock *tw,
+				  const struct sk_buff *skb, int mib_idx)
+{
+	struct tcp_timewait_sock *tcptw = tcp_twsk((struct sock *)tw);
+
+	if (!tcp_oow_rate_limited(twsk_net(tw), skb, mib_idx,
+				  &tcptw->tw_last_oow_ack_time)) {
+		/* Send ACK. Note, we do not put the bucket,
+		 * it will be released by caller.
+		 */
+		return TCP_TW_ACK;
+	}
+
+	/* We are rate-limiting, so just release the tw sock and drop skb. */
+	inet_twsk_put(tw);
+	return TCP_TW_SUCCESS;
+}
+
 /*
  * * Main purpose of TIME-WAIT state is to close connection gracefully,
  *   when one of ends sits in LAST-ACK or CLOSING retransmitting FIN
@@ -116,7 +135,8 @@ tcp_timewait_state_process(struct inet_timewait_sock *tw, struct sk_buff *skb,
 		    !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq,
 				   tcptw->tw_rcv_nxt,
 				   tcptw->tw_rcv_nxt + tcptw->tw_rcv_wnd))
-			return TCP_TW_ACK;
+			return tcp_timewait_check_oow_rate_limit(
+				tw, skb, LINUX_MIB_TCPACKSKIPPEDFINWAIT2);
 
 		if (th->rst)
 			goto kill;
@@ -250,10 +270,8 @@ kill:
 			inet_twsk_schedule(tw, &tcp_death_row, TCP_TIMEWAIT_LEN,
 					   TCP_TIMEWAIT_LEN);
 
-		/* Send ACK. Note, we do not put the bucket,
-		 * it will be released by caller.
-		 */
-		return TCP_TW_ACK;
+		return tcp_timewait_check_oow_rate_limit(
+			tw, skb, LINUX_MIB_TCPACKSKIPPEDTIMEWAIT);
 	}
 	inet_twsk_put(tw);
 	return TCP_TW_SUCCESS;
@@ -289,6 +307,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo)
 		tcptw->tw_ts_recent	= tp->rx_opt.ts_recent;
 		tcptw->tw_ts_recent_stamp = tp->rx_opt.ts_recent_stamp;
 		tcptw->tw_ts_offset	= tp->tsoffset;
+		tcptw->tw_last_oow_ack_time = 0;
 
 #if IS_ENABLED(CONFIG_IPV6)
 		if (tw->tw_family == PF_INET6) {
-- 
cgit v1.2.3-70-g09d2


From 096a4cfa5807aa89c78ce12309c0b1c10cf88184 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Fri, 6 Feb 2015 18:54:19 +0100
Subject: net: fix a typo in skb_checksum_validate_zero_check

Remove trailing underscore.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 111e665455c3..1bb36edb66b9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3072,7 +3072,7 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
 
 #define skb_checksum_validate_zero_check(skb, proto, check,		\
 					 compute_pseudo)		\
-	__skb_checksum_validate_(skb, proto, true, true, check, compute_pseudo)
+	__skb_checksum_validate(skb, proto, true, true, check, compute_pseudo)
 
 #define skb_checksum_simple_validate(skb)				\
 	__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
-- 
cgit v1.2.3-70-g09d2


From 567e4b79731c352a17d73c483959f795d3593e03 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 6 Feb 2015 12:59:01 -0800
Subject: net: rfs: add hash collision detection

Receive Flow Steering is a nice solution but suffers from
hash collisions when a mix of connected and unconnected traffic
is received on the host, when flow hash table is populated.

Also, clearing flow in inet_release() makes RFS not very good
for short lived flows, as many packets can follow close().
(FIN , ACK packets, ...)

This patch extends the information stored into global hash table
to not only include cpu number, but upper part of the hash value.

I use a 32bit value, and dynamically split it in two parts.

For host with less than 64 possible cpus, this gives 6 bits for the
cpu number, and 26 (32-6) bits for the upper part of the hash.

Since hash bucket selection use low order bits of the hash, we have
a full hash match, if /proc/sys/net/core/rps_sock_flow_entries is big
enough.

If the hash found in flow table does not match, we fallback to RPS (if
it is enabled for the rxqueue).

This means that a packet for an non connected flow can avoid the
IPI through a unrelated/victim CPU.

This also means we no longer have to clear the table at socket
close time, and this helps short lived flows performance.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c          |  5 +----
 include/linux/netdevice.h  | 34 ++++++++++++++++----------------
 include/net/sock.h         | 24 +----------------------
 net/core/dev.c             | 48 ++++++++++++++++++++++++++--------------------
 net/core/sysctl_net_core.c |  2 +-
 net/ipv4/af_inet.c         |  2 --
 6 files changed, 47 insertions(+), 68 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index ad7d3d5f3ee5..857dca47bf80 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -256,7 +256,6 @@ static void tun_flow_delete(struct tun_struct *tun, struct tun_flow_entry *e)
 {
 	tun_debug(KERN_INFO, tun, "delete flow: hash %u index %u\n",
 		  e->rxhash, e->queue_index);
-	sock_rps_reset_flow_hash(e->rps_rxhash);
 	hlist_del_rcu(&e->hash_link);
 	kfree_rcu(e, rcu);
 	--tun->flow_count;
@@ -373,10 +372,8 @@ unlock:
  */
 static inline void tun_flow_save_rps_rxhash(struct tun_flow_entry *e, u32 hash)
 {
-	if (unlikely(e->rps_rxhash != hash)) {
-		sock_rps_reset_flow_hash(e->rps_rxhash);
+	if (unlikely(e->rps_rxhash != hash))
 		e->rps_rxhash = hash;
-	}
 }
 
 /* We try to identify a flow through its rxhash first. The reason that
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ce784d5018e0..ab3b7cef4638 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -644,39 +644,39 @@ struct rps_dev_flow_table {
 /*
  * The rps_sock_flow_table contains mappings of flows to the last CPU
  * on which they were processed by the application (set in recvmsg).
+ * Each entry is a 32bit value. Upper part is the high order bits
+ * of flow hash, lower part is cpu number.
+ * rps_cpu_mask is used to partition the space, depending on number of
+ * possible cpus : rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1
+ * For example, if 64 cpus are possible, rps_cpu_mask = 0x3f,
+ * meaning we use 32-6=26 bits for the hash.
  */
 struct rps_sock_flow_table {
-	unsigned int mask;
-	u16 ents[0];
+	u32	mask;
+	u32	ents[0];
 };
-#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (sizeof(struct rps_sock_flow_table) + \
-    ((_num) * sizeof(u16)))
+#define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
 
 #define RPS_NO_CPU 0xffff
 
+extern u32 rps_cpu_mask;
+extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
+
 static inline void rps_record_sock_flow(struct rps_sock_flow_table *table,
 					u32 hash)
 {
 	if (table && hash) {
-		unsigned int cpu, index = hash & table->mask;
+		unsigned int index = hash & table->mask;
+		u32 val = hash & ~rps_cpu_mask;
 
 		/* We only give a hint, preemption can change cpu under us */
-		cpu = raw_smp_processor_id();
+		val |= raw_smp_processor_id();
 
-		if (table->ents[index] != cpu)
-			table->ents[index] = cpu;
+		if (table->ents[index] != val)
+			table->ents[index] = val;
 	}
 }
 
-static inline void rps_reset_sock_flow(struct rps_sock_flow_table *table,
-				       u32 hash)
-{
-	if (table && hash)
-		table->ents[hash & table->mask] = RPS_NO_CPU;
-}
-
-extern struct rps_sock_flow_table __rcu *rps_sock_flow_table;
-
 #ifdef CONFIG_RFS_ACCEL
 bool rps_may_expire_flow(struct net_device *dev, u16 rxq_index, u32 flow_id,
 			 u16 filter_id);
diff --git a/include/net/sock.h b/include/net/sock.h
index d28b8fededd6..e13824570b0f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -857,18 +857,6 @@ static inline void sock_rps_record_flow_hash(__u32 hash)
 #endif
 }
 
-static inline void sock_rps_reset_flow_hash(__u32 hash)
-{
-#ifdef CONFIG_RPS
-	struct rps_sock_flow_table *sock_flow_table;
-
-	rcu_read_lock();
-	sock_flow_table = rcu_dereference(rps_sock_flow_table);
-	rps_reset_sock_flow(sock_flow_table, hash);
-	rcu_read_unlock();
-#endif
-}
-
 static inline void sock_rps_record_flow(const struct sock *sk)
 {
 #ifdef CONFIG_RPS
@@ -876,28 +864,18 @@ static inline void sock_rps_record_flow(const struct sock *sk)
 #endif
 }
 
-static inline void sock_rps_reset_flow(const struct sock *sk)
-{
-#ifdef CONFIG_RPS
-	sock_rps_reset_flow_hash(sk->sk_rxhash);
-#endif
-}
-
 static inline void sock_rps_save_rxhash(struct sock *sk,
 					const struct sk_buff *skb)
 {
 #ifdef CONFIG_RPS
-	if (unlikely(sk->sk_rxhash != skb->hash)) {
-		sock_rps_reset_flow(sk);
+	if (unlikely(sk->sk_rxhash != skb->hash))
 		sk->sk_rxhash = skb->hash;
-	}
 #endif
 }
 
 static inline void sock_rps_reset_rxhash(struct sock *sk)
 {
 #ifdef CONFIG_RPS
-	sock_rps_reset_flow(sk);
 	sk->sk_rxhash = 0;
 #endif
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index a3a96ffc67f4..8be38675e1a8 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3030,6 +3030,8 @@ static inline void ____napi_schedule(struct softnet_data *sd,
 /* One global table that all flow-based protocols share. */
 struct rps_sock_flow_table __rcu *rps_sock_flow_table __read_mostly;
 EXPORT_SYMBOL(rps_sock_flow_table);
+u32 rps_cpu_mask __read_mostly;
+EXPORT_SYMBOL(rps_cpu_mask);
 
 struct static_key rps_needed __read_mostly;
 
@@ -3086,16 +3088,17 @@ set_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		       struct rps_dev_flow **rflowp)
 {
-	struct netdev_rx_queue *rxqueue;
-	struct rps_map *map;
+	const struct rps_sock_flow_table *sock_flow_table;
+	struct netdev_rx_queue *rxqueue = dev->_rx;
 	struct rps_dev_flow_table *flow_table;
-	struct rps_sock_flow_table *sock_flow_table;
+	struct rps_map *map;
 	int cpu = -1;
-	u16 tcpu;
+	u32 tcpu;
 	u32 hash;
 
 	if (skb_rx_queue_recorded(skb)) {
 		u16 index = skb_get_rx_queue(skb);
+
 		if (unlikely(index >= dev->real_num_rx_queues)) {
 			WARN_ONCE(dev->real_num_rx_queues > 1,
 				  "%s received packet on queue %u, but number "
@@ -3103,39 +3106,40 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 				  dev->name, index, dev->real_num_rx_queues);
 			goto done;
 		}
-		rxqueue = dev->_rx + index;
-	} else
-		rxqueue = dev->_rx;
+		rxqueue += index;
+	}
 
+	/* Avoid computing hash if RFS/RPS is not active for this rxqueue */
+
+	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	map = rcu_dereference(rxqueue->rps_map);
-	if (map) {
-		if (map->len == 1 &&
-		    !rcu_access_pointer(rxqueue->rps_flow_table)) {
-			tcpu = map->cpus[0];
-			if (cpu_online(tcpu))
-				cpu = tcpu;
-			goto done;
-		}
-	} else if (!rcu_access_pointer(rxqueue->rps_flow_table)) {
+	if (!flow_table && !map)
 		goto done;
-	}
 
 	skb_reset_network_header(skb);
 	hash = skb_get_hash(skb);
 	if (!hash)
 		goto done;
 
-	flow_table = rcu_dereference(rxqueue->rps_flow_table);
 	sock_flow_table = rcu_dereference(rps_sock_flow_table);
 	if (flow_table && sock_flow_table) {
-		u16 next_cpu;
 		struct rps_dev_flow *rflow;
+		u32 next_cpu;
+		u32 ident;
+
+		/* First check into global flow table if there is a match */
+		ident = sock_flow_table->ents[hash & sock_flow_table->mask];
+		if ((ident ^ hash) & ~rps_cpu_mask)
+			goto try_rps;
 
+		next_cpu = ident & rps_cpu_mask;
+
+		/* OK, now we know there is a match,
+		 * we can look at the local (per receive queue) flow table
+		 */
 		rflow = &flow_table->flows[hash & flow_table->mask];
 		tcpu = rflow->cpu;
 
-		next_cpu = sock_flow_table->ents[hash & sock_flow_table->mask];
-
 		/*
 		 * If the desired CPU (where last recvmsg was done) is
 		 * different from current CPU (one in the rx-queue flow
@@ -3162,6 +3166,8 @@ static int get_rps_cpu(struct net_device *dev, struct sk_buff *skb,
 		}
 	}
 
+try_rps:
+
 	if (map) {
 		tcpu = map->cpus[reciprocal_scale(hash, map->len)];
 		if (cpu_online(tcpu)) {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index fde21d19e61b..7a31be5e361f 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -65,7 +65,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 					mutex_unlock(&sock_flow_mutex);
 					return -ENOMEM;
 				}
-
+				rps_cpu_mask = roundup_pow_of_two(nr_cpu_ids) - 1;
 				sock_table->mask = size - 1;
 			} else
 				sock_table = orig_sock_table;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index a44773c8346c..d2e49baaff63 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -395,8 +395,6 @@ int inet_release(struct socket *sock)
 	if (sk) {
 		long timeout;
 
-		sock_rps_reset_flow(sk);
-
 		/* Applications forget to leave groups before exiting */
 		ip_mc_drop_socket(sk);
 
-- 
cgit v1.2.3-70-g09d2


From 93c1af6ca94c1e763efba76a127b5c135e3d23a6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 8 Feb 2015 20:39:13 -0800
Subject: net:rfs: adjust table size checking

Make sure root user does not try something stupid.

Also make sure mask field in struct rps_sock_flow_table
does not share a cache line with the potentially often dirtied
flow table.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 567e4b79731c ("net: rfs: add hash collision detection")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h  | 3 ++-
 net/core/sysctl_net_core.c | 2 +-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ab3b7cef4638..d115256ed5a2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -653,7 +653,8 @@ struct rps_dev_flow_table {
  */
 struct rps_sock_flow_table {
 	u32	mask;
-	u32	ents[0];
+
+	u32	ents[0] ____cacheline_aligned_in_smp;
 };
 #define	RPS_SOCK_FLOW_TABLE_SIZE(_num) (offsetof(struct rps_sock_flow_table, ents[_num]))
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 7a31be5e361f..eaa51ddf2368 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -52,7 +52,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 
 	if (write) {
 		if (size) {
-			if (size > 1<<30) {
+			if (size > 1<<29) {
 				/* Enforce limit to prevent overflow */
 				mutex_unlock(&sock_flow_mutex);
 				return -EINVAL;
-- 
cgit v1.2.3-70-g09d2


From d0796d1ef63deb38147729664691ba3090930b26 Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:04 +0100
Subject: tipc: convert legacy nl bearer dump to nl compat

Introduce a framework for dumping netlink data from the new netlink
API and formatting it to the old legacy API format. This is done by
looping the dump data and calling a format handler for each entity, in
this case a bearer.

We dump until either all data is dumped or we reach the limited buffer
size of the legacy API. Remember, the legacy API doesn't scale.

In this commit we convert TIPC_CMD_GET_BEARER_NAMES to use the compat
layer.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |   5 +
 net/tipc/bearer.c                |  29 -----
 net/tipc/bearer.h                |   1 -
 net/tipc/config.c                |   3 -
 net/tipc/netlink_compat.c        | 274 ++++++++++++++++++++++++++++++++++++++-
 5 files changed, 278 insertions(+), 34 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index 876d0a14863c..e1f4f05f4c5c 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -272,6 +272,11 @@ static inline int TLV_CHECK(const void *tlv, __u16 space, __u16 exp_type)
 		(ntohs(((struct tlv_desc *)tlv)->tlv_type) == exp_type);
 }
 
+static inline int TLV_GET_LEN(struct tlv_desc *tlv)
+{
+	return ntohs(tlv->tlv_len);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 35d400e8c2e5..7a9e29641e61 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -205,35 +205,6 @@ struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name)
 	return NULL;
 }
 
-/**
- * tipc_bearer_get_names - record names of bearers in buffer
- */
-struct sk_buff *tipc_bearer_get_names(struct net *net)
-{
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct sk_buff *buf;
-	struct tipc_bearer *b;
-	int i, j;
-
-	buf = tipc_cfg_reply_alloc(MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME));
-	if (!buf)
-		return NULL;
-
-	for (i = 0; media_info_array[i] != NULL; i++) {
-		for (j = 0; j < MAX_BEARERS; j++) {
-			b = rtnl_dereference(tn->bearer_list[j]);
-			if (!b)
-				continue;
-			if (b->media == media_info_array[i]) {
-				tipc_cfg_append_tlv(buf, TIPC_TLV_BEARER_NAME,
-						    b->name,
-						    strlen(b->name) + 1);
-			}
-		}
-	}
-	return buf;
-}
-
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index c035e3e24764..956858276d93 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -205,7 +205,6 @@ void tipc_disable_l2_media(struct tipc_bearer *b);
 int tipc_l2_send_msg(struct net *net, struct sk_buff *buf,
 		     struct tipc_bearer *b, struct tipc_media_addr *dest);
 
-struct sk_buff *tipc_bearer_get_names(struct net *net);
 void tipc_bearer_add_dest(struct net *net, u32 bearer_id, u32 dest);
 void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest);
 struct tipc_bearer *tipc_bearer_find(struct net *net, const char *name);
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 6873360cda53..52e84b0ac48a 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -252,9 +252,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_nametbl_get(net, req_tlv_area,
 					       req_tlv_space);
 		break;
-	case TIPC_CMD_GET_BEARER_NAMES:
-		rep_tlv_buf = tipc_bearer_get_names(net);
-		break;
 	case TIPC_CMD_GET_MEDIA_NAMES:
 		rep_tlv_buf = tipc_media_get_names();
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index f752854c8b10..bd75ea290e3a 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -33,9 +33,265 @@
 
 #include "core.h"
 #include "config.h"
+#include "bearer.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
+/* The legacy API had an artificial message length limit called
+ * ULTRA_STRING_MAX_LEN.
+ */
+#define ULTRA_STRING_MAX_LEN 32768
+
+#define TIPC_SKB_MAX TLV_SPACE(ULTRA_STRING_MAX_LEN)
+
+#define REPLY_TRUNCATED "<truncated>\n"
+
+struct tipc_nl_compat_msg {
+	u16 cmd;
+	int rep_size;
+	struct sk_buff *rep;
+	struct tlv_desc *req;
+	struct sock *dst_sk;
+};
+
+struct tipc_nl_compat_cmd_dump {
+	int (*dumpit)(struct sk_buff *, struct netlink_callback *);
+	int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
+};
+
+static int tipc_skb_tailroom(struct sk_buff *skb)
+{
+	int tailroom;
+	int limit;
+
+	tailroom = skb_tailroom(skb);
+	limit = TIPC_SKB_MAX - skb->len;
+
+	if (tailroom < limit)
+		return tailroom;
+
+	return limit;
+}
+
+static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
+{
+	struct tlv_desc *tlv = (struct tlv_desc *)skb_tail_pointer(skb);
+
+	if (tipc_skb_tailroom(skb) < TLV_SPACE(len))
+		return -EMSGSIZE;
+
+	skb_put(skb, TLV_SPACE(len));
+	tlv->tlv_type = htons(type);
+	tlv->tlv_len = htons(TLV_LENGTH(len));
+	if (len && data)
+		memcpy(TLV_DATA(tlv), data, len);
+
+	return 0;
+}
+
+static struct sk_buff *tipc_tlv_alloc(int size)
+{
+	int hdr_len;
+	struct sk_buff *buf;
+
+	size = TLV_SPACE(size);
+	hdr_len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+
+	buf = alloc_skb(hdr_len + size, GFP_KERNEL);
+	if (!buf)
+		return NULL;
+
+	skb_reserve(buf, hdr_len);
+
+	return buf;
+}
+
+static struct sk_buff *tipc_get_err_tlv(char *str)
+{
+	int str_len = strlen(str) + 1;
+	struct sk_buff *buf;
+
+	buf = tipc_tlv_alloc(TLV_SPACE(str_len));
+	if (buf)
+		tipc_add_tlv(buf, TIPC_TLV_ERROR_STRING, str, str_len);
+
+	return buf;
+}
+
+static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+				   struct tipc_nl_compat_msg *msg,
+				   struct sk_buff *arg)
+{
+	int len = 0;
+	int err;
+	struct sk_buff *buf;
+	struct nlmsghdr *nlmsg;
+	struct netlink_callback cb;
+
+	memset(&cb, 0, sizeof(cb));
+	cb.nlh = (struct nlmsghdr *)arg->data;
+	cb.skb = arg;
+
+	buf = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	buf->sk = msg->dst_sk;
+
+	do {
+		int rem;
+
+		len = (*cmd->dumpit)(buf, &cb);
+
+		nlmsg_for_each_msg(nlmsg, nlmsg_hdr(buf), len, rem) {
+			struct nlattr **attrs;
+
+			err = tipc_nlmsg_parse(nlmsg, &attrs);
+			if (err)
+				goto err_out;
+
+			err = (*cmd->format)(msg, attrs);
+			if (err)
+				goto err_out;
+
+			if (tipc_skb_tailroom(msg->rep) <= 1) {
+				err = -EMSGSIZE;
+				goto err_out;
+			}
+		}
+
+		skb_reset_tail_pointer(buf);
+		buf->len = 0;
+
+	} while (len);
+
+	err = 0;
+
+err_out:
+	kfree_skb(buf);
+
+	if (err == -EMSGSIZE) {
+		/* The legacy API only considered messages filling
+		 * "ULTRA_STRING_MAX_LEN" to be truncated.
+		 */
+		if ((TIPC_SKB_MAX - msg->rep->len) <= 1) {
+			char *tail = skb_tail_pointer(msg->rep);
+
+			if (*tail != '\0')
+				sprintf(tail - sizeof(REPLY_TRUNCATED) - 1,
+					REPLY_TRUNCATED);
+		}
+
+		return 0;
+	}
+
+	return err;
+}
+
+static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
+				 struct tipc_nl_compat_msg *msg)
+{
+	int err;
+	struct sk_buff *arg;
+
+	msg->rep = tipc_tlv_alloc(msg->rep_size);
+	if (!msg->rep)
+		return -ENOMEM;
+
+	arg = nlmsg_new(0, GFP_KERNEL);
+	if (!arg) {
+		kfree_skb(msg->rep);
+		return -ENOMEM;
+	}
+
+	err = __tipc_nl_compat_dumpit(cmd, msg, arg);
+	if (err)
+		kfree_skb(msg->rep);
+
+	kfree_skb(arg);
+
+	return err;
+}
+
+static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
+				      struct nlattr **attrs)
+{
+	struct nlattr *bearer[TIPC_NLA_BEARER_MAX + 1];
+
+	nla_parse_nested(bearer, TIPC_NLA_BEARER_MAX, attrs[TIPC_NLA_BEARER],
+			 NULL);
+
+	return tipc_add_tlv(msg->rep, TIPC_TLV_BEARER_NAME,
+			    nla_data(bearer[TIPC_NLA_BEARER_NAME]),
+			    nla_len(bearer[TIPC_NLA_BEARER_NAME]));
+}
+
+static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
+{
+	struct tipc_nl_compat_cmd_dump dump;
+
+	memset(&dump, 0, sizeof(dump));
+
+	switch (msg->cmd) {
+	case TIPC_CMD_GET_BEARER_NAMES:
+		msg->rep_size = MAX_BEARERS * TLV_SPACE(TIPC_MAX_BEARER_NAME);
+		dump.dumpit = tipc_nl_bearer_dump;
+		dump.format = tipc_nl_compat_bearer_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static int tipc_nl_compat_recv(struct sk_buff *skb, struct genl_info *info)
+{
+	int err;
+	int len;
+	struct tipc_nl_compat_msg msg;
+	struct nlmsghdr *req_nlh;
+	struct nlmsghdr *rep_nlh;
+	struct tipc_genlmsghdr *req_userhdr = info->userhdr;
+	struct net *net = genl_info_net(info);
+
+	memset(&msg, 0, sizeof(msg));
+
+	req_nlh = (struct nlmsghdr *)skb->data;
+	msg.req = nlmsg_data(req_nlh) + GENL_HDRLEN + TIPC_GENL_HDRLEN;
+	msg.cmd = req_userhdr->cmd;
+	msg.dst_sk = info->dst_sk;
+
+	if ((msg.cmd & 0xC000) && (!netlink_net_capable(skb, CAP_NET_ADMIN))) {
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_NET_ADMIN);
+		err = -EACCES;
+		goto send;
+	}
+
+	len = nlmsg_attrlen(req_nlh, GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	if (TLV_GET_LEN(msg.req) && !TLV_OK(msg.req, len)) {
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+		err = -EOPNOTSUPP;
+		goto send;
+	}
+
+	err = tipc_nl_compat_handle(&msg);
+	if (err == -EOPNOTSUPP)
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_NOT_SUPPORTED);
+	else if (err == -EINVAL)
+		msg.rep = tipc_get_err_tlv(TIPC_CFG_TLV_ERROR);
+send:
+	if (!msg.rep)
+		return err;
+
+	len = nlmsg_total_size(GENL_HDRLEN + TIPC_GENL_HDRLEN);
+	skb_push(msg.rep, len);
+	rep_nlh = nlmsg_hdr(msg.rep);
+	memcpy(rep_nlh, info->nlhdr, len);
+	rep_nlh->nlmsg_len = msg.rep->len;
+	genlmsg_unicast(net, msg.rep, NETLINK_CB(skb).portid);
+
+	return err;
+}
+
 static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
@@ -69,6 +325,22 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info)
 	return 0;
 }
 
+/* Temporary function to keep functionality throughout the patchset
+ * without having to mess with the global variables and other trickery
+ * of the old API.
+ */
+static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
+{
+	struct tipc_genlmsghdr *req = info->userhdr;
+
+	switch (req->cmd) {
+	case TIPC_CMD_GET_BEARER_NAMES:
+		return tipc_nl_compat_recv(skb, info);
+	}
+
+	return handle_cmd(skb, info);
+}
+
 static struct genl_family tipc_genl_compat_family = {
 	.id		= GENL_ID_GENERATE,
 	.name		= TIPC_GENL_NAME,
@@ -81,7 +353,7 @@ static struct genl_family tipc_genl_compat_family = {
 static struct genl_ops tipc_genl_compat_ops[] = {
 	{
 		.cmd		= TIPC_GENL_CMD,
-		.doit		= handle_cmd,
+		.doit		= tipc_nl_compat_tmp_wrap,
 	},
 };
 
-- 
cgit v1.2.3-70-g09d2


From 9ab154658a7ff2c5076607e02f18581c6859fc2a Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:05 +0100
Subject: tipc: convert legacy nl bearer enable/disable to nl compat

Introduce a framework for transcoding legacy nl action into actions
(.doit) calls from the new nl API. This is done by converting the
incoming TLV data into netlink data with nested netlink attributes.
Unfortunately due to the randomness of the legacy API we can't do this
generically so each legacy netlink command requires a specific
transcoding recipe. In this case for bearer enable and bearer disable.

Convert TIPC_CMD_ENABLE_BEARER and TIPC_CMD_DISABLE_BEARER into doit
compat calls.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |   5 ++
 net/tipc/bearer.c                |  26 ++-----
 net/tipc/bearer.h                |   3 -
 net/tipc/config.c                |  33 ---------
 net/tipc/netlink_compat.c        | 144 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 154 insertions(+), 57 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index e1f4f05f4c5c..f9226566c1b8 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -277,6 +277,11 @@ static inline int TLV_GET_LEN(struct tlv_desc *tlv)
 	return ntohs(tlv->tlv_len);
 }
 
+static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv,  __u16 type)
+{
+	return (ntohs(tlv->tlv_type) == type);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 7a9e29641e61..de1c800ef806 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -236,8 +236,8 @@ void tipc_bearer_remove_dest(struct net *net, u32 bearer_id, u32 dest)
 /**
  * tipc_enable_bearer - enable bearer with the given name
  */
-int tipc_enable_bearer(struct net *net, const char *name, u32 disc_domain,
-		       u32 priority)
+static int tipc_enable_bearer(struct net *net, const char *name,
+			      u32 disc_domain, u32 priority)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_bearer *b_ptr;
@@ -393,22 +393,6 @@ static void bearer_disable(struct net *net, struct tipc_bearer *b_ptr,
 	kfree_rcu(b_ptr, rcu);
 }
 
-int tipc_disable_bearer(struct net *net, const char *name)
-{
-	struct tipc_bearer *b_ptr;
-	int res;
-
-	b_ptr = tipc_bearer_find(net, name);
-	if (b_ptr == NULL) {
-		pr_warn("Attempt to disable unknown bearer <%s>\n", name);
-		res = -EINVAL;
-	} else {
-		bearer_disable(net, b_ptr, false);
-		res = 0;
-	}
-	return res;
-}
-
 int tipc_enable_l2_media(struct net *net, struct tipc_bearer *b)
 {
 	struct net_device *dev;
@@ -756,7 +740,7 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 	char *name;
 	struct tipc_bearer *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
-	struct net *net = genl_info_net(info);
+	struct net *net = sock_net(skb->sk);
 
 	if (!info->attrs[TIPC_NLA_BEARER])
 		return -EINVAL;
@@ -787,11 +771,11 @@ int tipc_nl_bearer_disable(struct sk_buff *skb, struct genl_info *info)
 
 int tipc_nl_bearer_enable(struct sk_buff *skb, struct genl_info *info)
 {
-	struct net *net = genl_info_net(info);
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	int err;
 	char *bearer;
 	struct nlattr *attrs[TIPC_NLA_BEARER_MAX + 1];
+	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 domain;
 	u32 prio;
 
diff --git a/net/tipc/bearer.h b/net/tipc/bearer.h
index 956858276d93..06f25d144871 100644
--- a/net/tipc/bearer.h
+++ b/net/tipc/bearer.h
@@ -173,9 +173,6 @@ struct tipc_bearer_names {
  */
 
 void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b_ptr);
-int tipc_enable_bearer(struct net *net, const char *bearer_name,
-		       u32 disc_domain, u32 priority);
-int tipc_disable_bearer(struct net *net, const char *name);
 
 /*
  * Routines made available to TIPC by supported media types
diff --git a/net/tipc/config.c b/net/tipc/config.c
index 52e84b0ac48a..f8cd5e1b545f 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -134,33 +134,6 @@ static struct sk_buff *tipc_show_stats(void)
 	return buf;
 }
 
-static struct sk_buff *cfg_enable_bearer(struct net *net)
-{
-	struct tipc_bearer_config *args;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_CONFIG))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	args = (struct tipc_bearer_config *)TLV_DATA(req_tlv_area);
-	if (tipc_enable_bearer(net, args->name,
-			       ntohl(args->disc_domain),
-			       ntohl(args->priority)))
-		return tipc_cfg_reply_error_string("unable to enable bearer");
-
-	return tipc_cfg_reply_none();
-}
-
-static struct sk_buff *cfg_disable_bearer(struct net *net)
-{
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_BEARER_NAME))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	if (tipc_disable_bearer(net, (char *)TLV_DATA(req_tlv_area)))
-		return tipc_cfg_reply_error_string("unable to disable bearer");
-
-	return tipc_cfg_reply_none();
-}
-
 static struct sk_buff *cfg_set_own_addr(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
@@ -267,12 +240,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_link_cmd_config(net, req_tlv_area,
 						   req_tlv_space, cmd);
 		break;
-	case TIPC_CMD_ENABLE_BEARER:
-		rep_tlv_buf = cfg_enable_bearer(net);
-		break;
-	case TIPC_CMD_DISABLE_BEARER:
-		rep_tlv_buf = cfg_disable_bearer(net);
-		break;
 	case TIPC_CMD_SET_NODE_ADDR:
 		rep_tlv_buf = cfg_set_own_addr(net);
 		break;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index bd75ea290e3a..12b0f4424797 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -49,6 +49,7 @@
 struct tipc_nl_compat_msg {
 	u16 cmd;
 	int rep_size;
+	int req_type;
 	struct sk_buff *rep;
 	struct tlv_desc *req;
 	struct sock *dst_sk;
@@ -59,6 +60,11 @@ struct tipc_nl_compat_cmd_dump {
 	int (*format)(struct tipc_nl_compat_msg *msg, struct nlattr **attrs);
 };
 
+struct tipc_nl_compat_cmd_doit {
+	int (*doit)(struct sk_buff *skb, struct genl_info *info);
+	int (*transcode)(struct sk_buff *skb, struct tipc_nl_compat_msg *msg);
+};
+
 static int tipc_skb_tailroom(struct sk_buff *skb)
 {
 	int tailroom;
@@ -213,6 +219,78 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	return err;
 }
 
+static int __tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+				 struct tipc_nl_compat_msg *msg)
+{
+	int err;
+	struct sk_buff *doit_buf;
+	struct sk_buff *trans_buf;
+	struct nlattr **attrbuf;
+	struct genl_info info;
+
+	trans_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!trans_buf)
+		return -ENOMEM;
+
+	err = (*cmd->transcode)(trans_buf, msg);
+	if (err)
+		goto trans_out;
+
+	attrbuf = kmalloc((tipc_genl_family.maxattr + 1) *
+			sizeof(struct nlattr *), GFP_KERNEL);
+	if (!attrbuf) {
+		err = -ENOMEM;
+		goto trans_out;
+	}
+
+	err = nla_parse(attrbuf, tipc_genl_family.maxattr,
+			(const struct nlattr *)trans_buf->data,
+			trans_buf->len, NULL);
+	if (err)
+		goto parse_out;
+
+	doit_buf = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!doit_buf) {
+		err = -ENOMEM;
+		goto parse_out;
+	}
+
+	doit_buf->sk = msg->dst_sk;
+
+	memset(&info, 0, sizeof(info));
+	info.attrs = attrbuf;
+
+	err = (*cmd->doit)(doit_buf, &info);
+
+	kfree_skb(doit_buf);
+parse_out:
+	kfree(attrbuf);
+trans_out:
+	kfree_skb(trans_buf);
+
+	return err;
+}
+
+static int tipc_nl_compat_doit(struct tipc_nl_compat_cmd_doit *cmd,
+			       struct tipc_nl_compat_msg *msg)
+{
+	int err;
+
+	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+		return -EINVAL;
+
+	err = __tipc_nl_compat_doit(cmd, msg);
+	if (err)
+		return err;
+
+	/* The legacy API considered an empty message a success message */
+	msg->rep = tipc_tlv_alloc(0);
+	if (!msg->rep)
+		return -ENOMEM;
+
+	return 0;
+}
+
 static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
 				      struct nlattr **attrs)
 {
@@ -226,11 +304,65 @@ static int tipc_nl_compat_bearer_dump(struct tipc_nl_compat_msg *msg,
 			    nla_len(bearer[TIPC_NLA_BEARER_NAME]));
 }
 
+static int tipc_nl_compat_bearer_enable(struct sk_buff *skb,
+					struct tipc_nl_compat_msg *msg)
+{
+	struct nlattr *prop;
+	struct nlattr *bearer;
+	struct tipc_bearer_config *b;
+
+	b = (struct tipc_bearer_config *)TLV_DATA(msg->req);
+
+	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	if (!bearer)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
+		return -EMSGSIZE;
+
+	if (nla_put_u32(skb, TIPC_NLA_BEARER_DOMAIN, ntohl(b->disc_domain)))
+		return -EMSGSIZE;
+
+	if (ntohl(b->priority) <= TIPC_MAX_LINK_PRI) {
+		prop = nla_nest_start(skb, TIPC_NLA_BEARER_PROP);
+		if (!prop)
+			return -EMSGSIZE;
+		if (nla_put_u32(skb, TIPC_NLA_PROP_PRIO, ntohl(b->priority)))
+			return -EMSGSIZE;
+		nla_nest_end(skb, prop);
+	}
+	nla_nest_end(skb, bearer);
+
+	return 0;
+}
+
+static int tipc_nl_compat_bearer_disable(struct sk_buff *skb,
+					 struct tipc_nl_compat_msg *msg)
+{
+	char *name;
+	struct nlattr *bearer;
+
+	name = (char *)TLV_DATA(msg->req);
+
+	bearer = nla_nest_start(skb, TIPC_NLA_BEARER);
+	if (!bearer)
+		return -EMSGSIZE;
+
+	if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
+		return -EMSGSIZE;
+
+	nla_nest_end(skb, bearer);
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
+	struct tipc_nl_compat_cmd_doit doit;
 
 	memset(&dump, 0, sizeof(dump));
+	memset(&doit, 0, sizeof(doit));
 
 	switch (msg->cmd) {
 	case TIPC_CMD_GET_BEARER_NAMES:
@@ -238,6 +370,16 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		dump.dumpit = tipc_nl_bearer_dump;
 		dump.format = tipc_nl_compat_bearer_dump;
 		return tipc_nl_compat_dumpit(&dump, msg);
+	case TIPC_CMD_ENABLE_BEARER:
+		msg->req_type = TIPC_TLV_BEARER_CONFIG;
+		doit.doit = tipc_nl_bearer_enable;
+		doit.transcode = tipc_nl_compat_bearer_enable;
+		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_DISABLE_BEARER:
+		msg->req_type = TIPC_TLV_BEARER_NAME;
+		doit.doit = tipc_nl_bearer_disable;
+		doit.transcode = tipc_nl_compat_bearer_disable;
+		return tipc_nl_compat_doit(&doit, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -335,6 +477,8 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 
 	switch (req->cmd) {
 	case TIPC_CMD_GET_BEARER_NAMES:
+	case TIPC_CMD_ENABLE_BEARER:
+	case TIPC_CMD_DISABLE_BEARER:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From f2b3b2d4ccbf9666f5f42a21347cd1aaa532b2fa Mon Sep 17 00:00:00 2001
From: Richard Alpe <richard.alpe@ericsson.com>
Date: Mon, 9 Feb 2015 09:50:06 +0100
Subject: tipc: convert legacy nl link stat to nl compat

Add functionality for safely appending string data to a TLV without
keeping write count in the caller.

Convert TIPC_CMD_SHOW_LINK_STATS to compat dumpit.

Signed-off-by: Richard Alpe <richard.alpe@ericsson.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/tipc_config.h |  10 ++
 net/tipc/bcast.c                 |  43 --------
 net/tipc/bcast.h                 |   1 -
 net/tipc/config.c                |   4 -
 net/tipc/link.c                  | 141 ---------------------------
 net/tipc/link.h                  |   3 -
 net/tipc/netlink_compat.c        | 205 +++++++++++++++++++++++++++++++++++++++
 7 files changed, 215 insertions(+), 192 deletions(-)

(limited to 'include')

diff --git a/include/uapi/linux/tipc_config.h b/include/uapi/linux/tipc_config.h
index f9226566c1b8..087b0ef82c07 100644
--- a/include/uapi/linux/tipc_config.h
+++ b/include/uapi/linux/tipc_config.h
@@ -277,11 +277,21 @@ static inline int TLV_GET_LEN(struct tlv_desc *tlv)
 	return ntohs(tlv->tlv_len);
 }
 
+static inline void TLV_SET_LEN(struct tlv_desc *tlv, __u16 len)
+{
+	tlv->tlv_len = htons(len);
+}
+
 static inline int TLV_CHECK_TYPE(struct tlv_desc *tlv,  __u16 type)
 {
 	return (ntohs(tlv->tlv_type) == type);
 }
 
+static inline void TLV_SET_TYPE(struct tlv_desc *tlv, __u16 type)
+{
+	tlv->tlv_type = htons(type);
+}
+
 static inline int TLV_SET(void *tlv, __u16 type, void *data, __u16 len)
 {
 	struct tlv_desc *tlv_ptr;
diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index e96fd6a6d5c2..3e41704832de 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -860,49 +860,6 @@ msg_full:
 	return -EMSGSIZE;
 }
 
-int tipc_bclink_stats(struct net *net, char *buf, const u32 buf_size)
-{
-	int ret;
-	struct tipc_stats *s;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
-	struct tipc_link *bcl = tn->bcl;
-
-	if (!bcl)
-		return 0;
-
-	tipc_bclink_lock(net);
-
-	s = &bcl->stats;
-
-	ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
-			    "  Window:%u packets\n",
-			    bcl->name, bcl->queue_limit[0]);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     s->recv_info, s->recv_fragments,
-			     s->recv_fragmented, s->recv_bundles,
-			     s->recv_bundled);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     s->sent_info, s->sent_fragments,
-			     s->sent_fragmented, s->sent_bundles,
-			     s->sent_bundled);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX naks:%u defs:%u dups:%u\n",
-			     s->recv_nacks, s->deferred_recv, s->duplicates);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX naks:%u acks:%u dups:%u\n",
-			     s->sent_nacks, s->sent_acks, s->retransmitted);
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  Congestion link:%u  Send queue max:%u avg:%u\n",
-			     s->link_congs, s->max_queue_sz,
-			     s->queue_sz_counts ?
-			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
-	tipc_bclink_unlock(net);
-	return ret;
-}
-
 int tipc_bclink_reset_stats(struct net *net)
 {
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index a910c0b9f249..43f397fbac55 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -127,7 +127,6 @@ u32  tipc_bclink_get_last_sent(struct net *net);
 u32  tipc_bclink_acks_missing(struct tipc_node *n_ptr);
 void tipc_bclink_update_link_state(struct tipc_node *node,
 				   u32 last_sent);
-int  tipc_bclink_stats(struct net *net, char *stats_buf, const u32 buf_size);
 int  tipc_bclink_reset_stats(struct net *net);
 int  tipc_bclink_set_queue_limits(struct net *net, u32 limit);
 void tipc_bcbearer_sort(struct net *net, struct tipc_node_map *nm_ptr,
diff --git a/net/tipc/config.c b/net/tipc/config.c
index f8cd5e1b545f..67b76ee847f3 100644
--- a/net/tipc/config.c
+++ b/net/tipc/config.c
@@ -213,10 +213,6 @@ struct sk_buff *tipc_cfg_do_cmd(struct net *net, u32 orig_node, u16 cmd,
 		rep_tlv_buf = tipc_node_get_links(net, req_tlv_area,
 						  req_tlv_space);
 		break;
-	case TIPC_CMD_SHOW_LINK_STATS:
-		rep_tlv_buf = tipc_link_cmd_show_stats(net, req_tlv_area,
-						       req_tlv_space);
-		break;
 	case TIPC_CMD_RESET_LINK_STATS:
 		rep_tlv_buf = tipc_link_cmd_reset_stats(net, req_tlv_area,
 							req_tlv_space);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 466f28fcf215..2622fb99344a 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2146,147 +2146,6 @@ struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 	return tipc_cfg_reply_none();
 }
 
-/**
- * percent - convert count to a percentage of total (rounding up or down)
- */
-static u32 percent(u32 count, u32 total)
-{
-	return (count * 100 + (total / 2)) / total;
-}
-
-/**
- * tipc_link_stats - print link statistics
- * @net: the applicable net namespace
- * @name: link name
- * @buf: print buffer area
- * @buf_size: size of print buffer area
- *
- * Returns length of print buffer data string (or 0 if error)
- */
-static int tipc_link_stats(struct net *net, const char *name, char *buf,
-			   const u32 buf_size)
-{
-	struct tipc_link *l;
-	struct tipc_stats *s;
-	struct tipc_node *node;
-	char *status;
-	u32 profile_total = 0;
-	unsigned int bearer_id;
-	int ret;
-
-	if (!strcmp(name, tipc_bclink_name))
-		return tipc_bclink_stats(net, buf, buf_size);
-
-	node = tipc_link_find_owner(net, name, &bearer_id);
-	if (!node)
-		return 0;
-
-	tipc_node_lock(node);
-
-	l = node->links[bearer_id];
-	if (!l) {
-		tipc_node_unlock(node);
-		return 0;
-	}
-
-	s = &l->stats;
-
-	if (tipc_link_is_active(l))
-		status = "ACTIVE";
-	else if (tipc_link_is_up(l))
-		status = "STANDBY";
-	else
-		status = "DEFUNCT";
-
-	ret = tipc_snprintf(buf, buf_size, "Link <%s>\n"
-			    "  %s  MTU:%u  Priority:%u  Tolerance:%u ms"
-			    "  Window:%u packets\n",
-			    l->name, status, l->max_pkt, l->priority,
-			    l->tolerance, l->queue_limit[0]);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     l->next_in_no - s->recv_info, s->recv_fragments,
-			     s->recv_fragmented, s->recv_bundles,
-			     s->recv_bundled);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
-			     l->next_out_no - s->sent_info, s->sent_fragments,
-			     s->sent_fragmented, s->sent_bundles,
-			     s->sent_bundled);
-
-	profile_total = s->msg_length_counts;
-	if (!profile_total)
-		profile_total = 1;
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX profile sample:%u packets  average:%u octets\n"
-			     "  0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% "
-			     "-16384:%u%% -32768:%u%% -66000:%u%%\n",
-			     s->msg_length_counts,
-			     s->msg_lengths_total / profile_total,
-			     percent(s->msg_length_profile[0], profile_total),
-			     percent(s->msg_length_profile[1], profile_total),
-			     percent(s->msg_length_profile[2], profile_total),
-			     percent(s->msg_length_profile[3], profile_total),
-			     percent(s->msg_length_profile[4], profile_total),
-			     percent(s->msg_length_profile[5], profile_total),
-			     percent(s->msg_length_profile[6], profile_total));
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  RX states:%u probes:%u naks:%u defs:%u"
-			     " dups:%u\n", s->recv_states, s->recv_probes,
-			     s->recv_nacks, s->deferred_recv, s->duplicates);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  TX states:%u probes:%u naks:%u acks:%u"
-			     " dups:%u\n", s->sent_states, s->sent_probes,
-			     s->sent_nacks, s->sent_acks, s->retransmitted);
-
-	ret += tipc_snprintf(buf + ret, buf_size - ret,
-			     "  Congestion link:%u  Send queue"
-			     " max:%u avg:%u\n", s->link_congs,
-			     s->max_queue_sz, s->queue_sz_counts ?
-			     (s->accu_queue_sz / s->queue_sz_counts) : 0);
-
-	tipc_node_unlock(node);
-	return ret;
-}
-
-struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
-					 const void *req_tlv_area,
-					 int req_tlv_space)
-{
-	struct sk_buff *buf;
-	struct tlv_desc *rep_tlv;
-	int str_len;
-	int pb_len;
-	char *pb;
-
-	if (!TLV_CHECK(req_tlv_area, req_tlv_space, TIPC_TLV_LINK_NAME))
-		return tipc_cfg_reply_error_string(TIPC_CFG_TLV_ERROR);
-
-	buf = tipc_cfg_reply_alloc(TLV_SPACE(ULTRA_STRING_MAX_LEN));
-	if (!buf)
-		return NULL;
-
-	rep_tlv = (struct tlv_desc *)buf->data;
-	pb = TLV_DATA(rep_tlv);
-	pb_len = ULTRA_STRING_MAX_LEN;
-	str_len = tipc_link_stats(net, (char *)TLV_DATA(req_tlv_area),
-				  pb, pb_len);
-	if (!str_len) {
-		kfree_skb(buf);
-		return tipc_cfg_reply_error_string("link not found");
-	}
-	str_len += 1;	/* for "\0" */
-	skb_put(buf, TLV_SPACE(str_len));
-	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
-
-	return buf;
-}
-
 static void link_print(struct tipc_link *l_ptr, const char *str)
 {
 	struct tipc_net *tn = net_generic(l_ptr->owner->net, tipc_net_id);
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 34d3f55c4cea..8c8340cf991f 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -217,9 +217,6 @@ int tipc_link_is_active(struct tipc_link *l_ptr);
 void tipc_link_purge_queues(struct tipc_link *l_ptr);
 struct sk_buff *tipc_link_cmd_config(struct net *net, const void *req_tlv_area,
 				     int req_tlv_space, u16 cmd);
-struct sk_buff *tipc_link_cmd_show_stats(struct net *net,
-					 const void *req_tlv_area,
-					 int req_tlv_space);
 struct sk_buff *tipc_link_cmd_reset_stats(struct net *net,
 					  const void *req_tlv_area,
 					  int req_tlv_space);
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index 12b0f4424797..899bd94da467 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -34,6 +34,7 @@
 #include "core.h"
 #include "config.h"
 #include "bearer.h"
+#include "link.h"
 #include <net/genetlink.h>
 #include <linux/tipc_config.h>
 
@@ -48,6 +49,7 @@
 
 struct tipc_nl_compat_msg {
 	u16 cmd;
+	int rep_type;
 	int rep_size;
 	int req_type;
 	struct sk_buff *rep;
@@ -95,6 +97,40 @@ static int tipc_add_tlv(struct sk_buff *skb, u16 type, void *data, u16 len)
 	return 0;
 }
 
+static void tipc_tlv_init(struct sk_buff *skb, u16 type)
+{
+	struct tlv_desc *tlv = (struct tlv_desc *)skb->data;
+
+	TLV_SET_LEN(tlv, 0);
+	TLV_SET_TYPE(tlv, type);
+	skb_put(skb, sizeof(struct tlv_desc));
+}
+
+static int tipc_tlv_sprintf(struct sk_buff *skb, const char *fmt, ...)
+{
+	int n;
+	u16 len;
+	u32 rem;
+	char *buf;
+	struct tlv_desc *tlv;
+	va_list args;
+
+	rem = tipc_skb_tailroom(skb);
+
+	tlv = (struct tlv_desc *)skb->data;
+	len = TLV_GET_LEN(tlv);
+	buf = TLV_DATA(tlv) + len;
+
+	va_start(args, fmt);
+	n = vscnprintf(buf, rem, fmt, args);
+	va_end(args);
+
+	TLV_SET_LEN(tlv, n + len);
+	skb_put(skb, n);
+
+	return n;
+}
+
 static struct sk_buff *tipc_tlv_alloc(int size)
 {
 	int hdr_len;
@@ -200,10 +236,16 @@ static int tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
 	int err;
 	struct sk_buff *arg;
 
+	if (msg->req_type && !TLV_CHECK_TYPE(msg->req, msg->req_type))
+		return -EINVAL;
+
 	msg->rep = tipc_tlv_alloc(msg->rep_size);
 	if (!msg->rep)
 		return -ENOMEM;
 
+	if (msg->rep_type)
+		tipc_tlv_init(msg->rep, msg->rep_type);
+
 	arg = nlmsg_new(0, GFP_KERNEL);
 	if (!arg) {
 		kfree_skb(msg->rep);
@@ -356,6 +398,161 @@ static int tipc_nl_compat_bearer_disable(struct sk_buff *skb,
 	return 0;
 }
 
+static inline u32 perc(u32 count, u32 total)
+{
+	return (count * 100 + (total / 2)) / total;
+}
+
+static void __fill_bc_link_stat(struct tipc_nl_compat_msg *msg,
+				struct nlattr *prop[], struct nlattr *stats[])
+{
+	tipc_tlv_sprintf(msg->rep, "  Window:%u packets\n",
+			 nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep, "  RX naks:%u defs:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+	tipc_tlv_sprintf(msg->rep, "  TX naks:%u acks:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  Congestion link:%u  Send queue max:%u avg:%u",
+			 nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+}
+
+static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
+					 struct nlattr **attrs)
+{
+	char *name;
+	struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
+	struct nlattr *prop[TIPC_NLA_PROP_MAX + 1];
+	struct nlattr *stats[TIPC_NLA_STATS_MAX + 1];
+
+	nla_parse_nested(link, TIPC_NLA_LINK_MAX, attrs[TIPC_NLA_LINK], NULL);
+
+	nla_parse_nested(prop, TIPC_NLA_PROP_MAX, link[TIPC_NLA_LINK_PROP],
+			 NULL);
+
+	nla_parse_nested(stats, TIPC_NLA_STATS_MAX, link[TIPC_NLA_LINK_STATS],
+			 NULL);
+
+	name = (char *)TLV_DATA(msg->req);
+	if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
+		return 0;
+
+	tipc_tlv_sprintf(msg->rep, "\nLink <%s>\n",
+			 nla_data(link[TIPC_NLA_LINK_NAME]));
+
+	if (link[TIPC_NLA_LINK_BROADCAST]) {
+		__fill_bc_link_stat(msg, prop, stats);
+		return 0;
+	}
+
+	if (link[TIPC_NLA_LINK_ACTIVE])
+		tipc_tlv_sprintf(msg->rep, "  ACTIVE");
+	else if (link[TIPC_NLA_LINK_UP])
+		tipc_tlv_sprintf(msg->rep, "  STANDBY");
+	else
+		tipc_tlv_sprintf(msg->rep, "  DEFUNCT");
+
+	tipc_tlv_sprintf(msg->rep, "  MTU:%u  Priority:%u",
+			 nla_get_u32(link[TIPC_NLA_LINK_MTU]),
+			 nla_get_u32(prop[TIPC_NLA_PROP_PRIO]));
+
+	tipc_tlv_sprintf(msg->rep, "  Tolerance:%u ms  Window:%u packets\n",
+			 nla_get_u32(prop[TIPC_NLA_PROP_TOL]),
+			 nla_get_u32(prop[TIPC_NLA_PROP_WIN]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(link[TIPC_NLA_LINK_RX]) -
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX packets:%u fragments:%u/%u bundles:%u/%u\n",
+			 nla_get_u32(link[TIPC_NLA_LINK_TX]) -
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_INFO]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_FRAGMENTED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_BUNDLED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX profile sample:%u packets  average:%u octets\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_CNT]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_TOT]) /
+			 nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  0-64:%u%% -256:%u%% -1024:%u%% -4096:%u%% ",
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P0]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P1]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P2]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P3]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+	tipc_tlv_sprintf(msg->rep, "-16384:%u%% -32768:%u%% -66000:%u%%\n",
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P4]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P5]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])),
+			 perc(nla_get_u32(stats[TIPC_NLA_STATS_MSG_LEN_P6]),
+			      nla_get_u32(stats[TIPC_NLA_STATS_MSG_PROF_TOT])));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  RX states:%u probes:%u naks:%u defs:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_STATES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_PROBES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RX_DEFERRED]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_DUPLICATES]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  TX states:%u probes:%u naks:%u acks:%u dups:%u\n",
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_STATES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_PROBES]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_NACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_TX_ACKS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_RETRANSMITTED]));
+
+	tipc_tlv_sprintf(msg->rep,
+			 "  Congestion link:%u  Send queue max:%u avg:%u",
+			 nla_get_u32(stats[TIPC_NLA_STATS_LINK_CONGS]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_MAX_QUEUE]),
+			 nla_get_u32(stats[TIPC_NLA_STATS_AVG_QUEUE]));
+
+	return 0;
+}
+
 static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 {
 	struct tipc_nl_compat_cmd_dump dump;
@@ -380,6 +577,13 @@ static int tipc_nl_compat_handle(struct tipc_nl_compat_msg *msg)
 		doit.doit = tipc_nl_bearer_disable;
 		doit.transcode = tipc_nl_compat_bearer_disable;
 		return tipc_nl_compat_doit(&doit, msg);
+	case TIPC_CMD_SHOW_LINK_STATS:
+		msg->req_type = TIPC_TLV_LINK_NAME;
+		msg->rep_size = ULTRA_STRING_MAX_LEN;
+		msg->rep_type = TIPC_TLV_ULTRA_STRING;
+		dump.dumpit = tipc_nl_link_dump;
+		dump.format = tipc_nl_compat_link_stat_dump;
+		return tipc_nl_compat_dumpit(&dump, msg);
 	}
 
 	return -EOPNOTSUPP;
@@ -479,6 +683,7 @@ static int tipc_nl_compat_tmp_wrap(struct sk_buff *skb, struct genl_info *info)
 	case TIPC_CMD_GET_BEARER_NAMES:
 	case TIPC_CMD_ENABLE_BEARER:
 	case TIPC_CMD_DISABLE_BEARER:
+	case TIPC_CMD_SHOW_LINK_STATS:
 		return tipc_nl_compat_recv(skb, info);
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 92e584fe443995bbb84069a4d13ea8ebedb5c5c8 Mon Sep 17 00:00:00 2001
From: Moni Shoua <monis@mellanox.com>
Date: Sun, 8 Feb 2015 11:49:32 +0200
Subject: net/bonding: Fix potential bad memory access during bonding events

When queuing work to send the NETDEV_BONDING_INFO netdev event, it's
possible that when the work is executed, the pointer to the slave
becomes invalid. This can happen if between queuing the event and the
execution of the work, the net-device was un-ensvaled and re-enslaved.

Fix that by queuing a work with the data of the slave instead of the
slave structure.

Fixes: 69e6113343cf ('net/bonding: Notify state change on slaves')
Reported-by: Nikolay Aleksandrov <nikolay@redhat.com>
Signed-off-by: Moni Shoua <monis@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 28 +++++++++++-----------------
 include/net/bonding.h           |  2 +-
 2 files changed, 12 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 679ef00d6b16..b979c265fc51 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1196,18 +1196,11 @@ static void bond_fill_ifslave(struct slave *slave, struct ifslave *info)
 	info->link_failure_count = slave->link_failure_count;
 }
 
-static void bond_netdev_notify(struct slave *slave, struct net_device *dev)
+static void bond_netdev_notify(struct net_device *dev,
+			       struct netdev_bonding_info *info)
 {
-	struct bonding *bond = slave->bond;
-	struct netdev_bonding_info bonding_info;
-
 	rtnl_lock();
-	/* make sure that slave is still valid */
-	if (dev->priv_flags & IFF_BONDING) {
-		bond_fill_ifslave(slave, &bonding_info.slave);
-		bond_fill_ifbond(bond, &bonding_info.master);
-		netdev_bonding_info_change(slave->dev, &bonding_info);
-	}
+	netdev_bonding_info_change(dev, info);
 	rtnl_unlock();
 }
 
@@ -1216,25 +1209,26 @@ static void bond_netdev_notify_work(struct work_struct *_work)
 	struct netdev_notify_work *w =
 		container_of(_work, struct netdev_notify_work, work.work);
 
-	bond_netdev_notify(w->slave, w->dev);
+	bond_netdev_notify(w->dev, &w->bonding_info);
 	dev_put(w->dev);
+	kfree(w);
 }
 
 void bond_queue_slave_event(struct slave *slave)
 {
+	struct bonding *bond = slave->bond;
 	struct netdev_notify_work *nnw = kzalloc(sizeof(*nnw), GFP_ATOMIC);
 
 	if (!nnw)
 		return;
 
-	INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
-	nnw->slave = slave;
+	dev_hold(slave->dev);
 	nnw->dev = slave->dev;
+	bond_fill_ifslave(slave, &nnw->bonding_info.slave);
+	bond_fill_ifbond(bond, &nnw->bonding_info.master);
+	INIT_DELAYED_WORK(&nnw->work, bond_netdev_notify_work);
 
-	if (queue_delayed_work(slave->bond->wq, &nnw->work, 0))
-		dev_hold(slave->dev);
-	else
-		kfree(nnw);
+	queue_delayed_work(slave->bond->wq, &nnw->work, 0);
 }
 
 /* enslave device <slave> to bond device <master> */
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 4e17095ad46a..fda6feeb6c1f 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -152,8 +152,8 @@ struct bond_parm_tbl {
 
 struct netdev_notify_work {
 	struct delayed_work	work;
-	struct slave		*slave;
 	struct net_device	*dev;
+	struct netdev_bonding_info bonding_info;
 };
 
 struct slave {
-- 
cgit v1.2.3-70-g09d2


From 35f05dabf95ac3ebc4c15bafd6833f7a3046e66f Mon Sep 17 00:00:00 2001
From: Yishai Hadas <yishaih@mellanox.com>
Date: Sun, 8 Feb 2015 11:49:34 +0200
Subject: IB/mlx4: Reset flow support for IB kernel ULPs

The driver exposes interfaces that directly relate to HW state. Upon fatal
error, consumers of these interfaces (ULPs) that rely on completion of
all their posted work-request could hang, thereby introducing dependencies
in shutdown order.  To prevent this from happening, we manage the
relevant resources (CQs, QPs) that are used by the device. Upon a fatal error,
we now generate simulated completions for outstanding WQEs that were not
completed at the time the HW was reset.

It includes invoking the completion event handler for all involved CQs so that
the ULPs will poll those CQs. When polled we return simulated CQEs with
IB_WC_WR_FLUSH_ERR return code enabling ULPs to clean up their resources and
not wait forever for completions upon receiving remove_one.

The above change requires an extra check in the data path to make sure that when
device is in error state, the simulated CQEs will be returned and no further
WQEs will be posted.

Signed-off-by: Yishai Hadas <yishaih@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/cq.c      | 57 ++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx4/main.c    | 64 ++++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx4/mlx4_ib.h |  9 +++++
 drivers/infiniband/hw/mlx4/qp.c      | 59 +++++++++++++++++++++++++++++----
 drivers/infiniband/hw/mlx4/srq.c     |  8 +++++
 include/linux/mlx4/device.h          |  2 ++
 6 files changed, 193 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index a3b70f6c4035..543ecdd8667b 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -188,6 +188,8 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 	spin_lock_init(&cq->lock);
 	cq->resize_buf = NULL;
 	cq->resize_umem = NULL;
+	INIT_LIST_HEAD(&cq->send_qp_list);
+	INIT_LIST_HEAD(&cq->recv_qp_list);
 
 	if (context) {
 		struct mlx4_ib_create_cq ucmd;
@@ -594,6 +596,55 @@ static int use_tunnel_data(struct mlx4_ib_qp *qp, struct mlx4_ib_cq *cq, struct
 	return 0;
 }
 
+static void mlx4_ib_qp_sw_comp(struct mlx4_ib_qp *qp, int num_entries,
+			       struct ib_wc *wc, int *npolled, int is_send)
+{
+	struct mlx4_ib_wq *wq;
+	unsigned cur;
+	int i;
+
+	wq = is_send ? &qp->sq : &qp->rq;
+	cur = wq->head - wq->tail;
+
+	if (cur == 0)
+		return;
+
+	for (i = 0;  i < cur && *npolled < num_entries; i++) {
+		wc->wr_id = wq->wrid[wq->tail & (wq->wqe_cnt - 1)];
+		wc->status = IB_WC_WR_FLUSH_ERR;
+		wc->vendor_err = MLX4_CQE_SYNDROME_WR_FLUSH_ERR;
+		wq->tail++;
+		(*npolled)++;
+		wc->qp = &qp->ibqp;
+		wc++;
+	}
+}
+
+static void mlx4_ib_poll_sw_comp(struct mlx4_ib_cq *cq, int num_entries,
+				 struct ib_wc *wc, int *npolled)
+{
+	struct mlx4_ib_qp *qp;
+
+	*npolled = 0;
+	/* Find uncompleted WQEs belonging to that cq and retrun
+	 * simulated FLUSH_ERR completions
+	 */
+	list_for_each_entry(qp, &cq->send_qp_list, cq_send_list) {
+		mlx4_ib_qp_sw_comp(qp, num_entries, wc, npolled, 1);
+		if (*npolled >= num_entries)
+			goto out;
+	}
+
+	list_for_each_entry(qp, &cq->recv_qp_list, cq_recv_list) {
+		mlx4_ib_qp_sw_comp(qp, num_entries, wc + *npolled, npolled, 0);
+		if (*npolled >= num_entries)
+			goto out;
+	}
+
+out:
+	return;
+}
+
 static int mlx4_ib_poll_one(struct mlx4_ib_cq *cq,
 			    struct mlx4_ib_qp **cur_qp,
 			    struct ib_wc *wc)
@@ -836,8 +887,13 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 	unsigned long flags;
 	int npolled;
 	int err = 0;
+	struct mlx4_ib_dev *mdev = to_mdev(cq->ibcq.device);
 
 	spin_lock_irqsave(&cq->lock, flags);
+	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		mlx4_ib_poll_sw_comp(cq, num_entries, wc, &npolled);
+		goto out;
+	}
 
 	for (npolled = 0; npolled < num_entries; ++npolled) {
 		err = mlx4_ib_poll_one(cq, &cur_qp, wc + npolled);
@@ -847,6 +903,7 @@ int mlx4_ib_poll_cq(struct ib_cq *ibcq, int num_entries, struct ib_wc *wc)
 
 	mlx4_cq_set_ci(&cq->mcq);
 
+out:
 	spin_unlock_irqrestore(&cq->lock, flags);
 
 	if (err == 0 || err == -EAGAIN)
diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 3140da518a07..eb8e215f1613 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2308,6 +2308,8 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 
 	spin_lock_init(&ibdev->sm_lock);
 	mutex_init(&ibdev->cap_mask_mutex);
+	INIT_LIST_HEAD(&ibdev->qp_list);
+	spin_lock_init(&ibdev->reset_flow_resource_lock);
 
 	if (ibdev->steering_support == MLX4_STEERING_MODE_DEVICE_MANAGED &&
 	    ib_num_ports) {
@@ -2622,6 +2624,67 @@ out:
 	return;
 }
 
+static void mlx4_ib_handle_catas_error(struct mlx4_ib_dev *ibdev)
+{
+	struct mlx4_ib_qp *mqp;
+	unsigned long flags_qp;
+	unsigned long flags_cq;
+	struct mlx4_ib_cq *send_mcq, *recv_mcq;
+	struct list_head    cq_notify_list;
+	struct mlx4_cq *mcq;
+	unsigned long flags;
+
+	pr_warn("mlx4_ib_handle_catas_error was started\n");
+	INIT_LIST_HEAD(&cq_notify_list);
+
+	/* Go over qp list reside on that ibdev, sync with create/destroy qp.*/
+	spin_lock_irqsave(&ibdev->reset_flow_resource_lock, flags);
+
+	list_for_each_entry(mqp, &ibdev->qp_list, qps_list) {
+		spin_lock_irqsave(&mqp->sq.lock, flags_qp);
+		if (mqp->sq.tail != mqp->sq.head) {
+			send_mcq = to_mcq(mqp->ibqp.send_cq);
+			spin_lock_irqsave(&send_mcq->lock, flags_cq);
+			if (send_mcq->mcq.comp &&
+			    mqp->ibqp.send_cq->comp_handler) {
+				if (!send_mcq->mcq.reset_notify_added) {
+					send_mcq->mcq.reset_notify_added = 1;
+					list_add_tail(&send_mcq->mcq.reset_notify,
+						      &cq_notify_list);
+				}
+			}
+			spin_unlock_irqrestore(&send_mcq->lock, flags_cq);
+		}
+		spin_unlock_irqrestore(&mqp->sq.lock, flags_qp);
+		/* Now, handle the QP's receive queue */
+		spin_lock_irqsave(&mqp->rq.lock, flags_qp);
+		/* no handling is needed for SRQ */
+		if (!mqp->ibqp.srq) {
+			if (mqp->rq.tail != mqp->rq.head) {
+				recv_mcq = to_mcq(mqp->ibqp.recv_cq);
+				spin_lock_irqsave(&recv_mcq->lock, flags_cq);
+				if (recv_mcq->mcq.comp &&
+				    mqp->ibqp.recv_cq->comp_handler) {
+					if (!recv_mcq->mcq.reset_notify_added) {
+						recv_mcq->mcq.reset_notify_added = 1;
+						list_add_tail(&recv_mcq->mcq.reset_notify,
+							      &cq_notify_list);
+					}
+				}
+				spin_unlock_irqrestore(&recv_mcq->lock,
+						       flags_cq);
+			}
+		}
+		spin_unlock_irqrestore(&mqp->rq.lock, flags_qp);
+	}
+
+	list_for_each_entry(mcq, &cq_notify_list, reset_notify) {
+		mcq->comp(mcq);
+	}
+	spin_unlock_irqrestore(&ibdev->reset_flow_resource_lock, flags);
+	pr_warn("mlx4_ib_handle_catas_error ended\n");
+}
+
 static void handle_bonded_port_state_event(struct work_struct *work)
 {
 	struct ib_event_work *ew =
@@ -2701,6 +2764,7 @@ static void mlx4_ib_event(struct mlx4_dev *dev, void *ibdev_ptr,
 	case MLX4_DEV_EVENT_CATASTROPHIC_ERROR:
 		ibdev->ib_active = false;
 		ibev.event = IB_EVENT_DEVICE_FATAL;
+		mlx4_ib_handle_catas_error(ibdev);
 		break;
 
 	case MLX4_DEV_EVENT_PORT_MGMT_CHANGE:
diff --git a/drivers/infiniband/hw/mlx4/mlx4_ib.h b/drivers/infiniband/hw/mlx4/mlx4_ib.h
index 721540c9163d..f829fd935b79 100644
--- a/drivers/infiniband/hw/mlx4/mlx4_ib.h
+++ b/drivers/infiniband/hw/mlx4/mlx4_ib.h
@@ -110,6 +110,9 @@ struct mlx4_ib_cq {
 	struct mutex		resize_mutex;
 	struct ib_umem	       *umem;
 	struct ib_umem	       *resize_umem;
+	/* List of qps that it serves.*/
+	struct list_head		send_qp_list;
+	struct list_head		recv_qp_list;
 };
 
 struct mlx4_ib_mr {
@@ -300,6 +303,9 @@ struct mlx4_ib_qp {
 	struct mlx4_roce_smac_vlan_info pri;
 	struct mlx4_roce_smac_vlan_info alt;
 	u64			reg_id;
+	struct list_head	qps_list;
+	struct list_head	cq_recv_list;
+	struct list_head	cq_send_list;
 };
 
 struct mlx4_ib_srq {
@@ -535,6 +541,9 @@ struct mlx4_ib_dev {
 	/* lock when destroying qp1_proxy and getting netdev events */
 	struct mutex		qp1_proxy_lock[MLX4_MAX_PORTS];
 	u8			bond_next_port;
+	/* protect resources needed as part of reset flow */
+	spinlock_t		reset_flow_resource_lock;
+	struct list_head		qp_list;
 };
 
 struct ib_event_work {
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 792f9dc86ada..dfc6ca128a7e 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -46,6 +46,11 @@
 #include "mlx4_ib.h"
 #include "user.h"
 
+static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq,
+			     struct mlx4_ib_cq *recv_cq);
+static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq,
+			       struct mlx4_ib_cq *recv_cq);
+
 enum {
 	MLX4_IB_ACK_REQ_FREQ	= 8,
 };
@@ -618,6 +623,8 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	struct mlx4_ib_sqp *sqp;
 	struct mlx4_ib_qp *qp;
 	enum mlx4_ib_qp_type qp_type = (enum mlx4_ib_qp_type) init_attr->qp_type;
+	struct mlx4_ib_cq *mcq;
+	unsigned long flags;
 
 	/* When tunneling special qps, we use a plain UD qp */
 	if (sqpn) {
@@ -828,6 +835,24 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 	qp->mqp.event = mlx4_ib_qp_event;
 	if (!*caller_qp)
 		*caller_qp = qp;
+
+	spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
+	mlx4_ib_lock_cqs(to_mcq(init_attr->send_cq),
+			 to_mcq(init_attr->recv_cq));
+	/* Maintain device to QPs access, needed for further handling
+	 * via reset flow
+	 */
+	list_add_tail(&qp->qps_list, &dev->qp_list);
+	/* Maintain CQ to QPs access, needed for further handling
+	 * via reset flow
+	 */
+	mcq = to_mcq(init_attr->send_cq);
+	list_add_tail(&qp->cq_send_list, &mcq->send_qp_list);
+	mcq = to_mcq(init_attr->recv_cq);
+	list_add_tail(&qp->cq_recv_list, &mcq->recv_qp_list);
+	mlx4_ib_unlock_cqs(to_mcq(init_attr->send_cq),
+			   to_mcq(init_attr->recv_cq));
+	spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
 	return 0;
 
 err_qpn:
@@ -886,13 +911,13 @@ static void mlx4_ib_lock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *recv
 	__acquires(&send_cq->lock) __acquires(&recv_cq->lock)
 {
 	if (send_cq == recv_cq) {
-		spin_lock_irq(&send_cq->lock);
+		spin_lock(&send_cq->lock);
 		__acquire(&recv_cq->lock);
 	} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
-		spin_lock_irq(&send_cq->lock);
+		spin_lock(&send_cq->lock);
 		spin_lock_nested(&recv_cq->lock, SINGLE_DEPTH_NESTING);
 	} else {
-		spin_lock_irq(&recv_cq->lock);
+		spin_lock(&recv_cq->lock);
 		spin_lock_nested(&send_cq->lock, SINGLE_DEPTH_NESTING);
 	}
 }
@@ -902,13 +927,13 @@ static void mlx4_ib_unlock_cqs(struct mlx4_ib_cq *send_cq, struct mlx4_ib_cq *re
 {
 	if (send_cq == recv_cq) {
 		__release(&recv_cq->lock);
-		spin_unlock_irq(&send_cq->lock);
+		spin_unlock(&send_cq->lock);
 	} else if (send_cq->mcq.cqn < recv_cq->mcq.cqn) {
 		spin_unlock(&recv_cq->lock);
-		spin_unlock_irq(&send_cq->lock);
+		spin_unlock(&send_cq->lock);
 	} else {
 		spin_unlock(&send_cq->lock);
-		spin_unlock_irq(&recv_cq->lock);
+		spin_unlock(&recv_cq->lock);
 	}
 }
 
@@ -953,6 +978,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 			      int is_user)
 {
 	struct mlx4_ib_cq *send_cq, *recv_cq;
+	unsigned long flags;
 
 	if (qp->state != IB_QPS_RESET) {
 		if (mlx4_qp_modify(dev->dev, NULL, to_mlx4_state(qp->state),
@@ -984,8 +1010,13 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 
 	get_cqs(qp, &send_cq, &recv_cq);
 
+	spin_lock_irqsave(&dev->reset_flow_resource_lock, flags);
 	mlx4_ib_lock_cqs(send_cq, recv_cq);
 
+	/* del from lists under both locks above to protect reset flow paths */
+	list_del(&qp->qps_list);
+	list_del(&qp->cq_send_list);
+	list_del(&qp->cq_recv_list);
 	if (!is_user) {
 		__mlx4_ib_cq_clean(recv_cq, qp->mqp.qpn,
 				 qp->ibqp.srq ? to_msrq(qp->ibqp.srq): NULL);
@@ -996,6 +1027,7 @@ static void destroy_qp_common(struct mlx4_ib_dev *dev, struct mlx4_ib_qp *qp,
 	mlx4_qp_remove(dev->dev, &qp->mqp);
 
 	mlx4_ib_unlock_cqs(send_cq, recv_cq);
+	spin_unlock_irqrestore(&dev->reset_flow_resource_lock, flags);
 
 	mlx4_qp_free(dev->dev, &qp->mqp);
 
@@ -2618,8 +2650,15 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 	__be32 uninitialized_var(lso_hdr_sz);
 	__be32 blh;
 	int i;
+	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 
 	spin_lock_irqsave(&qp->sq.lock, flags);
+	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		err = -EIO;
+		*bad_wr = wr;
+		nreq = 0;
+		goto out;
+	}
 
 	ind = qp->sq_next_wqe;
 
@@ -2917,10 +2956,18 @@ int mlx4_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 	int ind;
 	int max_gs;
 	int i;
+	struct mlx4_ib_dev *mdev = to_mdev(ibqp->device);
 
 	max_gs = qp->rq.max_gs;
 	spin_lock_irqsave(&qp->rq.lock, flags);
 
+	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		err = -EIO;
+		*bad_wr = wr;
+		nreq = 0;
+		goto out;
+	}
+
 	ind = qp->rq.head & (qp->rq.wqe_cnt - 1);
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
diff --git a/drivers/infiniband/hw/mlx4/srq.c b/drivers/infiniband/hw/mlx4/srq.c
index 62d9285300af..dce5dfe3a70e 100644
--- a/drivers/infiniband/hw/mlx4/srq.c
+++ b/drivers/infiniband/hw/mlx4/srq.c
@@ -316,8 +316,15 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 	int err = 0;
 	int nreq;
 	int i;
+	struct mlx4_ib_dev *mdev = to_mdev(ibsrq->device);
 
 	spin_lock_irqsave(&srq->lock, flags);
+	if (mdev->dev->persist->state & MLX4_DEVICE_STATE_INTERNAL_ERROR) {
+		err = -EIO;
+		*bad_wr = wr;
+		nreq = 0;
+		goto out;
+	}
 
 	for (nreq = 0; wr; ++nreq, wr = wr->next) {
 		if (unlikely(wr->num_sge > srq->msrq.max_gs)) {
@@ -362,6 +369,7 @@ int mlx4_ib_post_srq_recv(struct ib_srq *ibsrq, struct ib_recv_wr *wr,
 
 		*srq->db.db = cpu_to_be32(srq->wqe_ctr);
 	}
+out:
 
 	spin_unlock_irqrestore(&srq->lock, flags);
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index c116cb02475c..e4ebff7e9d02 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -689,6 +689,8 @@ struct mlx4_cq {
 		void (*comp)(struct mlx4_cq *);
 		void		*priv;
 	} tasklet_ctx;
+	int		reset_notify_added;
+	struct list_head	reset_notify;
 };
 
 struct mlx4_qp {
-- 
cgit v1.2.3-70-g09d2


From a4505152044c31f7b8e108a87ab2009901d96d0e Mon Sep 17 00:00:00 2001
From: Hariprasad Shenai <hariprasad@chelsio.com>
Date: Mon, 9 Feb 2015 12:07:29 +0530
Subject: ethtool: rename reserved1 memeber in ethtool_drvinfo for expansion
 ROM version

Renamed the reserved1 member of struct ethtool_drvinfo to erom_version to get
expansion/option ROM version of the adapter if present.

Signed-off-by: Hariprasad Shenai <hariprasad@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/ethtool.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 5f66d9c2889d..2e49fc880d29 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -139,6 +139,7 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
 
 #define ETHTOOL_FWVERS_LEN	32
 #define ETHTOOL_BUSINFO_LEN	32
+#define ETHTOOL_EROMVERS_LEN	32
 
 /**
  * struct ethtool_drvinfo - general driver and device information
@@ -148,6 +149,7 @@ static inline __u32 ethtool_cmd_speed(const struct ethtool_cmd *ep)
  *	not be an empty string.
  * @version: Driver version string; may be an empty string
  * @fw_version: Firmware version string; may be an empty string
+ * @erom_version: Expansion ROM version string; may be an empty string
  * @bus_info: Device bus address.  This should match the dev_name()
  *	string for the underlying bus device, if there is one.  May be
  *	an empty string.
@@ -176,7 +178,7 @@ struct ethtool_drvinfo {
 	char	version[32];
 	char	fw_version[ETHTOOL_FWVERS_LEN];
 	char	bus_info[ETHTOOL_BUSINFO_LEN];
-	char	reserved1[32];
+	char	erom_version[ETHTOOL_EROMVERS_LEN];
 	char	reserved2[12];
 	__u32	n_priv_flags;
 	__u32	n_stats;
-- 
cgit v1.2.3-70-g09d2


From 8381eacf5c3b35cf7755f4bc521c4d56d24c1cd9 Mon Sep 17 00:00:00 2001
From: Vlad Yasevich <vyasevich@gmail.com>
Date: Mon, 9 Feb 2015 09:38:21 -0500
Subject: ipv6: Make __ipv6_select_ident static

Make __ipv6_select_ident() static as it isn't used outside
the file.

Fixes: 0508c07f5e0c9 (ipv6: Select fragment id during UFO segmentation if not set.)
Signed-off-by: Vladislav Yasevich <vyasevic@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h     | 2 --
 net/ipv6/output_core.c | 3 ++-
 2 files changed, 2 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 6e416f6d3e3c..fde3b593c3f2 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -671,8 +671,6 @@ static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_add
 	return __ipv6_addr_diff(a1, a2, sizeof(struct in6_addr));
 }
 
-u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst,
-			struct in6_addr *src);
 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt);
 void ipv6_proxy_select_ident(struct sk_buff *skb);
 
diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c
index a86cf60f0ca6..74581f706c4d 100644
--- a/net/ipv6/output_core.c
+++ b/net/ipv6/output_core.c
@@ -9,7 +9,8 @@
 #include <net/addrconf.h>
 #include <net/secure_seq.h>
 
-u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, struct in6_addr *src)
+static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst,
+			       struct in6_addr *src)
 {
 	u32 hash, id;
 
-- 
cgit v1.2.3-70-g09d2


From b0f9ca53cbb103e9240a29a974e0b6085e58f9f7 Mon Sep 17 00:00:00 2001
From: Fan Du <fan.du@intel.com>
Date: Tue, 10 Feb 2015 09:53:16 +0800
Subject: ipv4: Namespecify TCP PMTU mechanism

Packetization Layer Path MTU Discovery works separately beside
Path MTU Discovery at IP level, different net namespace has
various requirements on which one to chose, e.g., a virutalized
container instance would require TCP PMTU to probe an usable
effective mtu for underlying tunnel, while the host would
employ classical ICMP based PMTU to function.

Hence making TCP PMTU mechanism per net namespace to decouple
two functionality. Furthermore the probe base MSS should also
be configured separately for each namespace.

Signed-off-by: Fan Du <fan.du@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/ipv4.h   |  2 ++
 include/net/tcp.h          |  2 --
 net/ipv4/sysctl_net_ipv4.c | 28 ++++++++++++++--------------
 net/ipv4/tcp_ipv4.c        |  1 +
 net/ipv4/tcp_output.c      |  8 +++-----
 net/ipv4/tcp_timer.c       |  7 +++++--
 6 files changed, 25 insertions(+), 23 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index e0bdcb147326..dbe225478adb 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -82,6 +82,8 @@ struct netns_ipv4 {
 
 	int sysctl_fwmark_reflect;
 	int sysctl_tcp_fwmark_accept;
+	int sysctl_tcp_mtu_probing;
+	int sysctl_tcp_base_mss;
 
 	struct ping_group_range ping_group_range;
 
diff --git a/include/net/tcp.h b/include/net/tcp.h
index da4196fb78db..8d6b983d5099 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -262,8 +262,6 @@ extern int sysctl_tcp_low_latency;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
-extern int sysctl_tcp_mtu_probing;
-extern int sysctl_tcp_base_mss;
 extern int sysctl_tcp_workaround_signed_windows;
 extern int sysctl_tcp_slow_start_after_idle;
 extern int sysctl_tcp_thin_linear_timeouts;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 82601a68cf90..d151539da8e6 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -603,20 +603,6 @@ static struct ctl_table ipv4_table[] = {
 		.maxlen		= TCP_CA_NAME_MAX,
 		.proc_handler	= proc_tcp_congestion_control,
 	},
-	{
-		.procname	= "tcp_mtu_probing",
-		.data		= &sysctl_tcp_mtu_probing,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tcp_base_mss",
-		.data		= &sysctl_tcp_base_mss,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
 	{
 		.procname	= "tcp_workaround_signed_windows",
 		.data		= &sysctl_tcp_workaround_signed_windows,
@@ -883,6 +869,20 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tcp_mtu_probing",
+		.data		= &init_net.ipv4.sysctl_tcp_mtu_probing,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tcp_base_mss",
+		.data		= &init_net.ipv4.sysctl_tcp_base_mss,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 	{ }
 };
 
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 67bc95fb5d9e..5a2dfed4783b 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2459,6 +2459,7 @@ static int __net_init tcp_sk_init(struct net *net)
 		*per_cpu_ptr(net->ipv4.tcp_sk, cpu) = sk;
 	}
 	net->ipv4.sysctl_tcp_ecn = 2;
+	net->ipv4.sysctl_tcp_base_mss = TCP_BASE_MSS;
 	return 0;
 
 fail:
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 4fcc9a768849..a2a796c5536b 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -59,9 +59,6 @@ int sysctl_tcp_limit_output_bytes __read_mostly = 131072;
  */
 int sysctl_tcp_tso_win_divisor __read_mostly = 3;
 
-int sysctl_tcp_mtu_probing __read_mostly = 0;
-int sysctl_tcp_base_mss __read_mostly = TCP_BASE_MSS;
-
 /* By default, RFC2861 behavior.  */
 int sysctl_tcp_slow_start_after_idle __read_mostly = 1;
 
@@ -1350,11 +1347,12 @@ void tcp_mtup_init(struct sock *sk)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct net *net = sock_net(sk);
 
-	icsk->icsk_mtup.enabled = sysctl_tcp_mtu_probing > 1;
+	icsk->icsk_mtup.enabled = net->ipv4.sysctl_tcp_mtu_probing > 1;
 	icsk->icsk_mtup.search_high = tp->rx_opt.mss_clamp + sizeof(struct tcphdr) +
 			       icsk->icsk_af_ops->net_header_len;
-	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, sysctl_tcp_base_mss);
+	icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, net->ipv4.sysctl_tcp_base_mss);
 	icsk->icsk_mtup.probe_size = 0;
 }
 EXPORT_SYMBOL(tcp_mtup_init);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 1829c7fbc77e..0732b787904e 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -101,17 +101,20 @@ static int tcp_orphan_retries(struct sock *sk, int alive)
 
 static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk)
 {
+	struct net *net = sock_net(sk);
+
 	/* Black hole detection */
-	if (sysctl_tcp_mtu_probing) {
+	if (net->ipv4.sysctl_tcp_mtu_probing) {
 		if (!icsk->icsk_mtup.enabled) {
 			icsk->icsk_mtup.enabled = 1;
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
 		} else {
+			struct net *net = sock_net(sk);
 			struct tcp_sock *tp = tcp_sk(sk);
 			int mss;
 
 			mss = tcp_mtu_to_mss(sk, icsk->icsk_mtup.search_low) >> 1;
-			mss = min(sysctl_tcp_base_mss, mss);
+			mss = min(net->ipv4.sysctl_tcp_base_mss, mss);
 			mss = max(mss, 68 - tp->tcp_header_len);
 			icsk->icsk_mtup.search_low = tcp_mss_to_mtu(sk, mss);
 			tcp_sync_mss(sk, icsk->icsk_pmtu_cookie);
-- 
cgit v1.2.3-70-g09d2