From 47376ceba54600cec4dd9e7c4fe8b98e4269633a Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 16 Dec 2009 23:25:50 +0100
Subject: reiserfs: Fix reiserfs lock <-> inode mutex dependency inversion

The reiserfs lock -> inode mutex dependency gets inverted when we
relax the lock while walking to the tree.

To fix this, use a specialized version of reiserfs_mutex_lock_safe
that takes care of mutex subclasses. Then we can grab the inode
mutex with I_MUTEX_XATTR subclass without any reiserfs lock
dependency.

This fixes the following report:

[ INFO: possible circular locking dependency detected ]
2.6.32-06793-gf405425-dirty #2
-------------------------------------------------------
mv/18566 is trying to acquire lock:
 (&REISERFS_SB(s)->lock){+.+.+.}, at: [<c1110708>] reiserfs_write_lock+0x28=
/0x40

but task is already holding lock:
 (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #1 (&sb->s_type->i_mutex_key#5/3){+.+.+.}:
       [<c104f723>] validate_chain+0xa23/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c11102b4>] reiserfs_for_each_xattr+0x84/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c109cc0b>] do_unlinkat+0xdb/0x160
       [<c109cca0>] sys_unlink+0x10/0x20
       [<c1002c50>] sysenter_do_call+0x12/0x36

-> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
       [<c104fc68>] validate_chain+0xf68/0xf70
       [<c1050155>] __lock_acquire+0x4e5/0xa70
       [<c105075a>] lock_acquire+0x7a/0xa0
       [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
       [<c1110708>] reiserfs_write_lock+0x28/0x40
       [<c1103d6b>] search_by_key+0x1f7b/0x21b0
       [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
       [<c10e77f7>] reiserfs_find_entry+0x77/0x400
       [<c10e81e5>] reiserfs_lookup+0x85/0x130
       [<c109a144>] __lookup_hash+0xb4/0x110
       [<c109b763>] lookup_one_len+0xb3/0x100
       [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
       [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
       [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
       [<c10a565c>] generic_delete_inode+0x9c/0x150
       [<c10a574d>] generic_drop_inode+0x3d/0x60
       [<c10a4667>] iput+0x47/0x50
       [<c10a1c4f>] dentry_iput+0x6f/0xf0
       [<c10a1d74>] d_kill+0x24/0x50
       [<c10a396b>] dput+0x5b/0x120
       [<c109ca89>] sys_renameat+0x1b9/0x230
       [<c109cb28>] sys_rename+0x28/0x30
       [<c1002c50>] sysenter_do_call+0x12/0x36

other info that might help us debug this:

2 locks held by mv/18566:
 #0:  (&sb->s_type->i_mutex_key#5/1){+.+.+.}, at: [<c109b6ac>]
lock_rename+0xcc/0xd0
 #1:  (&sb->s_type->i_mutex_key#5/3){+.+.+.}, at: [<c111033c>]
reiserfs_for_each_xattr+0x10c/0x380

stack backtrace:
Pid: 18566, comm: mv Tainted: G         C 2.6.32-06793-gf405425-dirty #2
Call Trace:
 [<c134b252>] ? printk+0x18/0x1e
 [<c104e790>] print_circular_bug+0xc0/0xd0
 [<c104fc68>] validate_chain+0xf68/0xf70
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1050155>] __lock_acquire+0x4e5/0xa70
 [<c105075a>] lock_acquire+0x7a/0xa0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134c76f>] mutex_lock_nested+0x5f/0x2b0
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c134b60a>] ? schedule+0x27a/0x440
 [<c1110708>] reiserfs_write_lock+0x28/0x40
 [<c1103d6b>] search_by_key+0x1f7b/0x21b0
 [<c1050176>] ? __lock_acquire+0x506/0xa70
 [<c1051267>] ? lock_release_non_nested+0x1e7/0x340
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c104e3ab>] ? trace_hardirqs_on+0xb/0x10
 [<c1042a55>] ? T.316+0x15/0x1a0
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c10e73ef>] search_by_entry_key+0x1f/0x3b0
 [<c134bf2a>] ? __mutex_unlock_slowpath+0x9a/0x120
 [<c104e354>] ? trace_hardirqs_on_caller+0x124/0x170
 [<c10e77f7>] reiserfs_find_entry+0x77/0x400
 [<c10e81e5>] reiserfs_lookup+0x85/0x130
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c109a144>] __lookup_hash+0xb4/0x110
 [<c109b763>] lookup_one_len+0xb3/0x100
 [<c1110350>] reiserfs_for_each_xattr+0x120/0x380
 [<c110ffe0>] ? delete_one_xattr+0x0/0x1c0
 [<c1003342>] ? math_error+0x22/0x150
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c1110615>] reiserfs_delete_xattrs+0x15/0x50
 [<c1110708>] ? reiserfs_write_lock+0x28/0x40
 [<c10ef57f>] reiserfs_delete_inode+0x8f/0x140
 [<c10a561f>] ? generic_delete_inode+0x5f/0x150
 [<c10ef4f0>] ? reiserfs_delete_inode+0x0/0x140
 [<c10a565c>] generic_delete_inode+0x9c/0x150
 [<c10a574d>] generic_drop_inode+0x3d/0x60
 [<c10a4667>] iput+0x47/0x50
 [<c10a1c4f>] dentry_iput+0x6f/0xf0
 [<c10a1d74>] d_kill+0x24/0x50
 [<c10a396b>] dput+0x5b/0x120
 [<c109ca89>] sys_renameat+0x1b9/0x230
 [<c1042d2d>] ? sched_clock_cpu+0x9d/0x100
 [<c104c8cb>] ? trace_hardirqs_off+0xb/0x10
 [<c1042dde>] ? cpu_clock+0x4e/0x60
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c1041816>] ? up_read+0x16/0x30
 [<c1350825>] ? do_page_fault+0x155/0x370
 [<c109cb28>] sys_rename+0x28/0x30
 [<c1002c50>] sysenter_do_call+0x12/0x36

Reported-by: Alexander Beregalov <a.beregalov@gmail.com>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/reiserfs_fs.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include')
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index a05b4a20768d..4351b49e2b1e 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -97,6 +97,15 @@ static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
+			       struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	mutex_lock_nested(m, subclass);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3-70-g09d2


From 2d1c861871d767153538a77c498752b36d4bb4b8 Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Wed, 9 Dec 2009 17:52:13 +1100
Subject: PCI/cardbus: Add a fixup hook and fix powerpc

The cardbus code creates PCI devices without ever going through the
necessary fixup bits and pieces that normal PCI devices go through.

There's in fact a commented out call to pcibios_fixup_bus() in there,
it's commented because ... it doesn't work.

I could make pcibios_fixup_bus() do the right thing on powerpc easily
but I felt it cleaner instead to provide a specific hook pci_fixup_cardbus
for which a weak empty implementation is provided by the PCI core.

This fixes cardbus on powerbooks and probably all other PowerPC
platforms which was broken completely for ever on some platforms and
since 2.6.31 on others such as PowerBooks when we made the DMA ops
mandatory (since those are setup by the fixups).

Acked-by: Dominik Brodowski <linux@dominikbrodowski.net>
Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 arch/powerpc/kernel/pci-common.c | 13 +++++++++++++
 drivers/pci/pci.c                |  5 +++++
 drivers/pcmcia/cardbus.c         |  2 +-
 include/linux/pci.h              |  3 +++
 4 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c
index e8dfdbd9327a..cadbed679fbb 100644
--- a/arch/powerpc/kernel/pci-common.c
+++ b/arch/powerpc/kernel/pci-common.c
@@ -1107,6 +1107,12 @@ void __devinit pcibios_setup_bus_devices(struct pci_bus *bus)
 	list_for_each_entry(dev, &bus->devices, bus_list) {
 		struct dev_archdata *sd = &dev->dev.archdata;
 
+		/* Cardbus can call us to add new devices to a bus, so ignore
+		 * those who are already fully discovered
+		 */
+		if (dev->is_added)
+			continue;
+
 		/* Setup OF node pointer in archdata */
 		sd->of_node = pci_device_to_OF_node(dev);
 
@@ -1147,6 +1153,13 @@ void __devinit pcibios_fixup_bus(struct pci_bus *bus)
 }
 EXPORT_SYMBOL(pcibios_fixup_bus);
 
+void __devinit pci_fixup_cardbus(struct pci_bus *bus)
+{
+	/* Now fixup devices on that bus */
+	pcibios_setup_bus_devices(bus);
+}
+
+
 static int skip_isa_ioresource_align(struct pci_dev *dev)
 {
 	if ((ppc_pci_flags & PPC_PCI_CAN_SKIP_ISA_ALIGN) &&
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d50522bf16b1..864e703cf737 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2798,6 +2798,11 @@ int __attribute__ ((weak)) pci_ext_cfg_avail(struct pci_dev *dev)
 	return 1;
 }
 
+void __weak pci_fixup_cardbus(struct pci_bus *bus)
+{
+}
+EXPORT_SYMBOL(pci_fixup_cardbus);
+
 static int __init pci_setup(char *str)
 {
 	while (str) {
diff --git a/drivers/pcmcia/cardbus.c b/drivers/pcmcia/cardbus.c
index cdf50f3bc2df..d99f846451a3 100644
--- a/drivers/pcmcia/cardbus.c
+++ b/drivers/pcmcia/cardbus.c
@@ -222,7 +222,7 @@ int __ref cb_alloc(struct pcmcia_socket *s)
 	unsigned int max, pass;
 
 	s->functions = pci_scan_slot(bus, PCI_DEVFN(0, 0));
-/*	pcibios_fixup_bus(bus); */
+	pci_fixup_cardbus(bus); 
 
 	max = bus->secondary;
 	for (pass = 0; pass < 2; pass++)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf1e67080849..5da0690d9cee 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -566,6 +566,9 @@ void pcibios_align_resource(void *, struct resource *, resource_size_t,
 				resource_size_t);
 void pcibios_update_irq(struct pci_dev *, int irq);
 
+/* Weak but can be overriden by arch */
+void pci_fixup_cardbus(struct pci_bus *);
+
 /* Generic PCI functions used internally */
 
 extern struct pci_bus *pci_find_bus(int domain, int busnr);
-- 
cgit v1.2.3-70-g09d2


From 4f924ba5b5aaf1477daafeae779495d39549186d Mon Sep 17 00:00:00 2001
From: Mattia Dongili <malattia@linux.it>
Date: Thu, 17 Dec 2009 00:08:33 +0900
Subject: sony-laptop: add AVMode key mapping

Some models are equipped with an "AVMode" function key that sends
  sony-laptop: Unknown event: 0x100 0xa1
  sony-laptop: Unknown event: 0x100 0x21
for press and release respectively.

Cc: "Matthew W. S. Bell" <matthew@bells23.org.uk>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Mattia Dongili <malattia@linux.it>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/platform/x86/sony-laptop.c | 4 ++++
 include/linux/sonypi.h             | 1 +
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/drivers/platform/x86/sony-laptop.c b/drivers/platform/x86/sony-laptop.c
index a2a742c8ff7e..9710f70040ba 100644
--- a/drivers/platform/x86/sony-laptop.c
+++ b/drivers/platform/x86/sony-laptop.c
@@ -232,6 +232,7 @@ static int sony_laptop_input_index[] = {
 	56,	/* 69 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	57,	/* 70 SONYPI_EVENT_VOLUME_DEC_PRESSED */
 	-1,	/* 71 SONYPI_EVENT_BRIGHTNESS_PRESSED */
+	58,	/* 72 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 static int sony_laptop_input_keycode_map[] = {
@@ -293,6 +294,7 @@ static int sony_laptop_input_keycode_map[] = {
 	KEY_F15,	/* 55 SONYPI_EVENT_SETTINGKEY_PRESSED */
 	KEY_VOLUMEUP,	/* 56 SONYPI_EVENT_VOLUME_INC_PRESSED */
 	KEY_VOLUMEDOWN,	/* 57 SONYPI_EVENT_VOLUME_DEC_PRESSED */
+	KEY_MEDIA,	/* 58 SONYPI_EVENT_MEDIA_PRESSED */
 };
 
 /* release buttons after a short delay if pressed */
@@ -890,6 +892,8 @@ static struct sony_nc_event sony_100_events[] = {
 	{ 0x0C, SONYPI_EVENT_FNKEY_RELEASED },
 	{ 0x9f, SONYPI_EVENT_CD_EJECT_PRESSED },
 	{ 0x1f, SONYPI_EVENT_ANYBUTTON_RELEASED },
+	{ 0xa1, SONYPI_EVENT_MEDIA_PRESSED },
+	{ 0x21, SONYPI_EVENT_ANYBUTTON_RELEASED },
 	{ 0, 0 },
 };
 
diff --git a/include/linux/sonypi.h b/include/linux/sonypi.h
index 34c4475ac4a2..4f95c1aac2fd 100644
--- a/include/linux/sonypi.h
+++ b/include/linux/sonypi.h
@@ -111,6 +111,7 @@
 #define SONYPI_EVENT_VOLUME_INC_PRESSED		69
 #define SONYPI_EVENT_VOLUME_DEC_PRESSED		70
 #define SONYPI_EVENT_BRIGHTNESS_PRESSED		71
+#define SONYPI_EVENT_MEDIA_PRESSED		72
 
 /* get/set brightness */
 #define SONYPI_IOCGBRT		_IOR('v', 0, __u8)
-- 
cgit v1.2.3-70-g09d2


From 8c0414cd524e9f1c483ffb3ff1c2d860f5c567c8 Mon Sep 17 00:00:00 2001
From: Sunil Mushran <sunil.mushran@oracle.com>
Date: Thu, 3 Dec 2009 12:46:51 -0800
Subject: fiemap: Add new extent flag FIEMAP_EXTENT_SHARED

Some filesystems may allow multiple files to point to a particular
extent.  This patch adds flag FIEMAP_EXTENT_SHARED to denote extents
that are shared with other inodes.

Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
Acked-by: Mark Fasheh <mfasheh@suse.com>
Signed-off-by: Joel Becker <joel.becker@oracle.com>
---
 include/linux/fiemap.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/fiemap.h b/include/linux/fiemap.h
index 934e22d65801..d830747f5c0b 100644
--- a/include/linux/fiemap.h
+++ b/include/linux/fiemap.h
@@ -62,5 +62,7 @@ struct fiemap {
 #define FIEMAP_EXTENT_MERGED		0x00001000 /* File does not natively
 						    * support extents. Result
 						    * merged for efficiency. */
+#define FIEMAP_EXTENT_SHARED		0x00002000 /* Space shared with other
+						    * files. */
 
 #endif /* _LINUX_FIEMAP_H */
-- 
cgit v1.2.3-70-g09d2


From 78f1699659963fff97975df44db6d5dbe7218e55 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:19:09 -0700
Subject: ACPI: processor: call _PDC early

We discovered that at least one machine (HP Envy), methods in the DSDT
attempt to call external methods defined in a dynamically loaded SSDT.

Unfortunately, the DSDT methods we are trying to call are part of the
EC initialization, which happens very early, and the the dynamic SSDT
is only loaded when a processor _PDC method runs much later.

This results in namespace lookup errors for the (as of yet) undefined
methods.

Since Windows doesn't have any issues with this machine, we take it
as a hint that they must be evaluating _PDC much earlier than we are.

Thus, the proper thing for Linux to do should be to match the Windows
implementation more closely.

Provide a mechanism to call _PDC before we enable the EC. Doing so loads
the dynamic tables, and allows the EC to be enabled correctly.

The ACPI processor driver will still evaluate _PDC in its .add() method
to cover the hotplug case.

Resolves: http://bugzilla.kernel.org/show_bug.cgi?id=14824

Cc: ming.m.lin@intel.com
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/Makefile         |   1 +
 drivers/acpi/bus.c            |   2 +
 drivers/acpi/internal.h       |   1 +
 drivers/acpi/processor_core.c |  69 ---------------------------
 drivers/acpi/processor_pdc.c  | 108 ++++++++++++++++++++++++++++++++++++++++++
 include/acpi/processor.h      |   3 ++
 6 files changed, 115 insertions(+), 69 deletions(-)
 create mode 100644 drivers/acpi/processor_pdc.c

(limited to 'include')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index c7b10b4298e9..66cc3f36a954 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -32,6 +32,7 @@ acpi-$(CONFIG_ACPI_SLEEP)	+= proc.o
 #
 acpi-y				+= bus.o glue.o
 acpi-y				+= scan.o
+acpi-y				+= processor_pdc.o
 acpi-y				+= ec.o
 acpi-$(CONFIG_ACPI_DOCK)	+= dock.o
 acpi-y				+= pci_root.o pci_link.o pci_irq.o pci_bind.o
diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c
index 65f7e335f122..0bdf24a6fd01 100644
--- a/drivers/acpi/bus.c
+++ b/drivers/acpi/bus.c
@@ -888,6 +888,8 @@ static int __init acpi_bus_init(void)
 		goto error1;
 	}
 
+	acpi_early_processor_set_pdc();
+
 	/*
 	 * Maybe EC region is required at bus_scan/acpi_get_devices. So it
 	 * is necessary to enable it as early as possible.
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 074cf8682d52..cb28e0502acc 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -43,6 +43,7 @@ int acpi_power_transition(struct acpi_device *device, int state);
 extern int acpi_power_nocheck;
 
 int acpi_wakeup_device_init(void);
+void acpi_early_processor_set_pdc(void);
 
 /* --------------------------------------------------------------------------
                                   Embedded Controller
diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index 41731236f9a1..a19a4ff962ea 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -124,29 +124,6 @@ static const struct file_operations acpi_processor_info_fops = {
 
 DEFINE_PER_CPU(struct acpi_processor *, processors);
 struct acpi_processor_errata errata __read_mostly;
-static int set_no_mwait(const struct dmi_system_id *id)
-{
-	printk(KERN_NOTICE PREFIX "%s detected - "
-		"disabling mwait for CPU C-states\n", id->ident);
-	idle_nomwait = 1;
-	return 0;
-}
-
-static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
-	{
-	set_no_mwait, "IFL91 board", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
-	DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
-	DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
-	DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
-	{
-	set_no_mwait, "Extensa 5220", {
-	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
-	DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
-	DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
-	DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
-	{},
-};
 
 /* --------------------------------------------------------------------------
                                 Errata Handling
@@ -276,45 +253,6 @@ static int acpi_processor_errata(struct acpi_processor *pr)
 	return result;
 }
 
-/* --------------------------------------------------------------------------
-                              Common ACPI processor functions
-   -------------------------------------------------------------------------- */
-
-/*
- * _PDC is required for a BIOS-OS handshake for most of the newer
- * ACPI processor features.
- */
-static int acpi_processor_set_pdc(struct acpi_processor *pr)
-{
-	struct acpi_object_list *pdc_in = pr->pdc;
-	acpi_status status = AE_OK;
-
-
-	if (!pdc_in)
-		return status;
-	if (idle_nomwait) {
-		/*
-		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
-		 * mode will be disabled in the parameter of _PDC object.
-		 * Of course C1_FFH access mode will also be disabled.
-		 */
-		union acpi_object *obj;
-		u32 *buffer = NULL;
-
-		obj = pdc_in->pointer;
-		buffer = (u32 *)(obj->buffer.pointer);
-		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
-
-	}
-	status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
-
-	if (ACPI_FAILURE(status))
-		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
-		    "Could not evaluate _PDC, using legacy perf. control...\n"));
-
-	return status;
-}
-
 /* --------------------------------------------------------------------------
                               FS Interface (/proc)
    -------------------------------------------------------------------------- */
@@ -825,9 +763,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device)
 	}
 
 	/* _PDC call should be done before doing anything else (if reqd.). */
-	arch_acpi_processor_init_pdc(pr);
 	acpi_processor_set_pdc(pr);
-	arch_acpi_processor_cleanup_pdc(pr);
 
 #ifdef CONFIG_CPU_FREQ
 	acpi_processor_ppc_has_changed(pr, 0);
@@ -1145,11 +1081,6 @@ static int __init acpi_processor_init(void)
 	if (!acpi_processor_dir)
 		return -ENOMEM;
 #endif
-	/*
-	 * Check whether the system is DMI table. If yes, OSPM
-	 * should not use mwait for CPU-states.
-	 */
-	dmi_check_system(processor_idle_dmi_table);
 	result = cpuidle_register_driver(&acpi_idle_driver);
 	if (result < 0)
 		goto out_proc;
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
new file mode 100644
index 000000000000..b416c32dda04
--- /dev/null
+++ b/drivers/acpi/processor_pdc.c
@@ -0,0 +1,108 @@
+#include <linux/dmi.h>
+
+#include <acpi/acpi_drivers.h>
+#include <acpi/processor.h>
+
+#include "internal.h"
+
+#define PREFIX			"ACPI: "
+#define _COMPONENT		ACPI_PROCESSOR_COMPONENT
+ACPI_MODULE_NAME("processor_pdc");
+
+static int set_no_mwait(const struct dmi_system_id *id)
+{
+	printk(KERN_NOTICE PREFIX "%s detected - "
+		"disabling mwait for CPU C-states\n", id->ident);
+	idle_nomwait = 1;
+	return 0;
+}
+
+static struct dmi_system_id __cpuinitdata processor_idle_dmi_table[] = {
+	{
+	set_no_mwait, "IFL91 board", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "COMPAL"),
+	DMI_MATCH(DMI_SYS_VENDOR, "ZEPTO"),
+	DMI_MATCH(DMI_PRODUCT_VERSION, "3215W"),
+	DMI_MATCH(DMI_BOARD_NAME, "IFL91") }, NULL},
+	{
+	set_no_mwait, "Extensa 5220", {
+	DMI_MATCH(DMI_BIOS_VENDOR, "Phoenix Technologies LTD"),
+	DMI_MATCH(DMI_SYS_VENDOR, "Acer"),
+	DMI_MATCH(DMI_PRODUCT_VERSION, "0100"),
+	DMI_MATCH(DMI_BOARD_NAME, "Columbia") }, NULL},
+	{},
+};
+
+/*
+ * _PDC is required for a BIOS-OS handshake for most of the newer
+ * ACPI processor features.
+ */
+static int acpi_processor_eval_pdc(struct acpi_processor *pr)
+{
+	struct acpi_object_list *pdc_in = pr->pdc;
+	acpi_status status = AE_OK;
+
+	if (!pdc_in)
+		return status;
+	if (idle_nomwait) {
+		/*
+		 * If mwait is disabled for CPU C-states, the C2C3_FFH access
+		 * mode will be disabled in the parameter of _PDC object.
+		 * Of course C1_FFH access mode will also be disabled.
+		 */
+		union acpi_object *obj;
+		u32 *buffer = NULL;
+
+		obj = pdc_in->pointer;
+		buffer = (u32 *)(obj->buffer.pointer);
+		buffer[2] &= ~(ACPI_PDC_C_C2C3_FFH | ACPI_PDC_C_C1_FFH);
+
+	}
+	status = acpi_evaluate_object(pr->handle, "_PDC", pdc_in, NULL);
+
+	if (ACPI_FAILURE(status))
+		ACPI_DEBUG_PRINT((ACPI_DB_INFO,
+		    "Could not evaluate _PDC, using legacy perf. control.\n"));
+
+	return status;
+}
+
+void acpi_processor_set_pdc(struct acpi_processor *pr)
+{
+	arch_acpi_processor_init_pdc(pr);
+	acpi_processor_eval_pdc(pr);
+	arch_acpi_processor_cleanup_pdc(pr);
+}
+EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
+
+static acpi_status
+early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
+{
+	struct acpi_processor pr;
+
+	pr.handle = handle;
+
+	/* x86 implementation looks at pr.id to determine some
+	 * CPU capabilites. We can just hard code to 0 since we're
+	 * assuming the CPUs in the system are homogenous and all
+	 * have the same capabilities.
+	 */
+	pr.id = 0;
+
+	acpi_processor_set_pdc(&pr);
+
+	return AE_OK;
+}
+
+void acpi_early_processor_set_pdc(void)
+{
+	/*
+	 * Check whether the system is DMI table. If yes, OSPM
+	 * should not use mwait for CPU-states.
+	 */
+	dmi_check_system(processor_idle_dmi_table);
+
+	acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT,
+			    ACPI_UINT32_MAX,
+			    early_init_pdc, NULL, NULL, NULL);
+}
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 29245c6b5c0e..a1b748a7a766 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -325,6 +325,9 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 
 #endif				/* CONFIG_CPU_FREQ */
 
+/* in processor_pdc.c */
+void acpi_processor_set_pdc(struct acpi_processor *pr);
+
 /* in processor_throttling.c */
 int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
 int acpi_processor_get_throttling_info(struct acpi_processor *pr);
-- 
cgit v1.2.3-70-g09d2


From 6c5807d7bc7d051fce00863ffb98d36325501eb2 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:19:29 -0700
Subject: ACPI: processor: finish unifying arch_acpi_processor_init_pdc()

The only thing arch-specific about calling _PDC is what bits get
set in the input obj_list buffer.

There's no need for several levels of indirection to twiddle those
bits. Additionally, since we're just messing around with a buffer,
we can simplify the interface; no need to pass around the entire
struct acpi_processor * just to get at the buffer.

Cc: Tony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/include/asm/acpi.h      |  4 ++++
 arch/ia64/kernel/acpi-processor.c | 18 ------------------
 arch/x86/include/asm/acpi.h       | 19 +++++++++++++++++++
 arch/x86/kernel/acpi/processor.c  | 34 ----------------------------------
 drivers/acpi/processor_pdc.c      |  6 +++---
 include/acpi/processor.h          |  1 -
 6 files changed, 26 insertions(+), 56 deletions(-)

(limited to 'include')

diff --git a/arch/ia64/include/asm/acpi.h b/arch/ia64/include/asm/acpi.h
index 3b7888294648..7ae58892ba8d 100644
--- a/arch/ia64/include/asm/acpi.h
+++ b/arch/ia64/include/asm/acpi.h
@@ -133,6 +133,10 @@ extern int __initdata nid_to_pxm_map[MAX_NUMNODES];
 #endif
 
 static inline bool arch_has_acpi_pdc(void) { return true; }
+static inline void arch_acpi_set_pdc_bits(u32 *buf)
+{
+	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
+}
 
 #define acpi_unlazy_tlb(x)
 
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
index ebe23f58bd6e..7ba5accebf66 100644
--- a/arch/ia64/kernel/acpi-processor.c
+++ b/arch/ia64/kernel/acpi-processor.c
@@ -14,24 +14,6 @@
 #include <acpi/processor.h>
 #include <asm/acpi.h>
 
-static void init_intel_pdc(struct acpi_processor *pr)
-{
-	u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer;
-
-	buf[2] |= ACPI_PDC_EST_CAPABILITY_SMP;
-
-	return;
-}
-
-/* Initialize _PDC data based on the CPU vendor */
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-{
-	init_intel_pdc(pr);
-	return;
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
-
 void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
 {
 	if (pr->pdc) {
diff --git a/arch/x86/include/asm/acpi.h b/arch/x86/include/asm/acpi.h
index d787e6e92bd1..56f462cf22d2 100644
--- a/arch/x86/include/asm/acpi.h
+++ b/arch/x86/include/asm/acpi.h
@@ -149,6 +149,25 @@ static inline bool arch_has_acpi_pdc(void)
 		c->x86_vendor == X86_VENDOR_CENTAUR);
 }
 
+static inline void arch_acpi_set_pdc_bits(u32 *buf)
+{
+	struct cpuinfo_x86 *c = &cpu_data(0);
+
+	buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
+
+	if (cpu_has(c, X86_FEATURE_EST))
+		buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
+
+	if (cpu_has(c, X86_FEATURE_ACPI))
+		buf[2] |= ACPI_PDC_T_FFH;
+
+	/*
+	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
+	 */
+	if (!cpu_has(c, X86_FEATURE_MWAIT))
+		buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
+}
+
 #else /* !CONFIG_ACPI */
 
 #define acpi_lapic 0
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
index d722ca8cb4c7..0f57307f8224 100644
--- a/arch/x86/kernel/acpi/processor.c
+++ b/arch/x86/kernel/acpi/processor.c
@@ -12,40 +12,6 @@
 #include <acpi/processor.h>
 #include <asm/acpi.h>
 
-static void init_intel_pdc(struct acpi_processor *pr, struct cpuinfo_x86 *c)
-{
-	u32 *buf = (u32 *)pr->pdc->pointer->buffer.pointer;
-
-	buf[2] |= ACPI_PDC_C_CAPABILITY_SMP;
-
-	if (cpu_has(c, X86_FEATURE_EST))
-		buf[2] |= ACPI_PDC_EST_CAPABILITY_SWSMP;
-
-	if (cpu_has(c, X86_FEATURE_ACPI))
-		buf[2] |= ACPI_PDC_T_FFH;
-
-	/*
-	 * If mwait/monitor is unsupported, C2/C3_FFH will be disabled
-	 */
-	if (!cpu_has(c, X86_FEATURE_MWAIT))
-		buf[2] &= ~(ACPI_PDC_C_C2C3_FFH);
-
-	return;
-}
-
-
-/* Initialize _PDC data based on the CPU vendor */
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr)
-{
-	struct cpuinfo_x86 *c = &cpu_data(pr->id);
-
-	init_intel_pdc(pr, c);
-
-	return;
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_init_pdc);
-
 void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
 {
 	if (pr->pdc) {
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index ccda7c95d073..48df08ebcec4 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -40,6 +40,9 @@ static void acpi_set_pdc_bits(u32 *buf)
 
 	/* Enable coordination with firmware's _TSD info */
 	buf[2] = ACPI_PDC_SMP_T_SWCOORD;
+
+	/* Twiddle arch-specific bits needed for _PDC */
+	arch_acpi_set_pdc_bits(buf);
 }
 
 static void acpi_processor_init_pdc(struct acpi_processor *pr)
@@ -81,9 +84,6 @@ static void acpi_processor_init_pdc(struct acpi_processor *pr)
 	obj_list->pointer = obj;
 	pr->pdc = obj_list;
 
-	/* Now let the arch do the bit-twiddling to buf[] */
-	arch_acpi_processor_init_pdc(pr);
-
 	return;
 }
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index a1b748a7a766..50edd734aec6 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -257,7 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module);
 DECLARE_PER_CPU(struct acpi_processor *, processors);
 extern struct acpi_processor_errata errata;
 
-void arch_acpi_processor_init_pdc(struct acpi_processor *pr);
 void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr);
 
 #ifdef ARCH_HAS_POWER_INIT
-- 
cgit v1.2.3-70-g09d2


From 47817254b8637b56730aec26eed2c337d3938bb5 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:19:34 -0700
Subject: ACPI: processor: unify arch_acpi_processor_cleanup_pdc

The x86 and ia64 implementations of the function in $subject are
exactly the same.

Also, since the arch-specific implementations of setting _PDC have
been completely hollowed out, remove the empty shells.

Cc: Tony Luck <tony.luck@intel.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/ia64/kernel/Makefile         |  4 ----
 arch/ia64/kernel/acpi-processor.c | 27 ---------------------------
 arch/x86/kernel/acpi/Makefile     |  2 +-
 arch/x86/kernel/acpi/processor.c  | 25 -------------------------
 drivers/acpi/processor_pdc.c      | 21 ++++++++++++++++++++-
 include/acpi/processor.h          |  2 --
 6 files changed, 21 insertions(+), 60 deletions(-)
 delete mode 100644 arch/ia64/kernel/acpi-processor.c
 delete mode 100644 arch/x86/kernel/acpi/processor.c

(limited to 'include')

diff --git a/arch/ia64/kernel/Makefile b/arch/ia64/kernel/Makefile
index 2a75e937ae8d..e1236349c99f 100644
--- a/arch/ia64/kernel/Makefile
+++ b/arch/ia64/kernel/Makefile
@@ -18,10 +18,6 @@ obj-$(CONFIG_IA64_GENERIC)	+= acpi-ext.o
 obj-$(CONFIG_IA64_HP_ZX1)	+= acpi-ext.o
 obj-$(CONFIG_IA64_HP_ZX1_SWIOTLB) += acpi-ext.o
 
-ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y				+= acpi-processor.o
-endif
-
 obj-$(CONFIG_IA64_PALINFO)	+= palinfo.o
 obj-$(CONFIG_IOSAPIC)		+= iosapic.o
 obj-$(CONFIG_MODULES)		+= module.o
diff --git a/arch/ia64/kernel/acpi-processor.c b/arch/ia64/kernel/acpi-processor.c
deleted file mode 100644
index 7ba5accebf66..000000000000
--- a/arch/ia64/kernel/acpi-processor.c
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * arch/ia64/kernel/acpi-processor.c
- *
- * Copyright (C) 2005 Intel Corporation
- * 	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * 	- Added _PDC for platforms with Intel CPUs
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-
-#include <acpi/processor.h>
-#include <asm/acpi.h>
-
-void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
-{
-	if (pr->pdc) {
-		kfree(pr->pdc->pointer->buffer.pointer);
-		kfree(pr->pdc->pointer);
-		kfree(pr->pdc);
-		pr->pdc = NULL;
-	}
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/arch/x86/kernel/acpi/Makefile b/arch/x86/kernel/acpi/Makefile
index fd5ca97a2ad5..6f35260bb3ef 100644
--- a/arch/x86/kernel/acpi/Makefile
+++ b/arch/x86/kernel/acpi/Makefile
@@ -4,7 +4,7 @@ obj-$(CONFIG_ACPI)		+= boot.o
 obj-$(CONFIG_ACPI_SLEEP)	+= sleep.o wakeup_rm.o wakeup_$(BITS).o
 
 ifneq ($(CONFIG_ACPI_PROCESSOR),)
-obj-y				+= cstate.o processor.o
+obj-y				+= cstate.o
 endif
 
 $(obj)/wakeup_rm.o:    $(obj)/realmode/wakeup.bin
diff --git a/arch/x86/kernel/acpi/processor.c b/arch/x86/kernel/acpi/processor.c
deleted file mode 100644
index 0f57307f8224..000000000000
--- a/arch/x86/kernel/acpi/processor.c
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Copyright (C) 2005 Intel Corporation
- * 	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
- * 	- Added _PDC for platforms with Intel CPUs
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/acpi.h>
-
-#include <acpi/processor.h>
-#include <asm/acpi.h>
-
-void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr)
-{
-	if (pr->pdc) {
-		kfree(pr->pdc->pointer->buffer.pointer);
-		kfree(pr->pdc->pointer);
-		kfree(pr->pdc);
-		pr->pdc = NULL;
-	}
-}
-
-EXPORT_SYMBOL(arch_acpi_processor_cleanup_pdc);
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index 48df08ebcec4..e786e2ce1882 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -1,3 +1,12 @@
+/*
+ * Copyright (C) 2005 Intel Corporation
+ * Copyright (C) 2009 Hewlett-Packard Development Company, L.P.
+ *
+ *	Alex Chiang <achiang@hp.com>
+ *	- Unified x86/ia64 implementations
+ *	Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>
+ *	- Added _PDC for platforms with Intel CPUs
+ */
 #include <linux/dmi.h>
 
 #include <acpi/acpi_drivers.h>
@@ -121,6 +130,16 @@ static int acpi_processor_eval_pdc(struct acpi_processor *pr)
 	return status;
 }
 
+static void acpi_processor_cleanup_pdc(struct acpi_processor *pr)
+{
+	if (pr->pdc) {
+		kfree(pr->pdc->pointer->buffer.pointer);
+		kfree(pr->pdc->pointer);
+		kfree(pr->pdc);
+		pr->pdc = NULL;
+	}
+}
+
 void acpi_processor_set_pdc(struct acpi_processor *pr)
 {
 	if (arch_has_acpi_pdc() == false)
@@ -128,7 +147,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr)
 
 	acpi_processor_init_pdc(pr);
 	acpi_processor_eval_pdc(pr);
-	arch_acpi_processor_cleanup_pdc(pr);
+	acpi_processor_cleanup_pdc(pr);
 }
 EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 50edd734aec6..0873cd57510c 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -257,8 +257,6 @@ int acpi_processor_notify_smm(struct module *calling_module);
 DECLARE_PER_CPU(struct acpi_processor *, processors);
 extern struct acpi_processor_errata errata;
 
-void arch_acpi_processor_cleanup_pdc(struct acpi_processor *pr);
-
 #ifdef ARCH_HAS_POWER_INIT
 void acpi_processor_power_init_bm_check(struct acpi_processor_flags *flags,
 					unsigned int cpu);
-- 
cgit v1.2.3-70-g09d2


From 43bab25ced218385f7e6a076c2459ea008cfd2e1 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:23:16 -0700
Subject: ACPI: processor: change acpi_processor_set_pdc() interface

When calling _PDC, we really only need the handle to the processor
to call the method; we don't look at any other parts of the
struct acpi_processor * given to us.

In the early path, when we walk the namespace, we are given the
handle directly, so just pass it through to acpi_processor_set_pdc()
without stuffing it into a wasteful struct acpi_processor allocated
on the stack each time

This saves 2834 bytes of stack.

Update the interface accordingly.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 drivers/acpi/processor_core.c |  2 +-
 drivers/acpi/processor_pdc.c  | 18 +++---------------
 include/acpi/processor.h      |  2 +-
 3 files changed, 5 insertions(+), 17 deletions(-)

(limited to 'include')

diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c
index a19a4ff962ea..9863c98c81ba 100644
--- a/drivers/acpi/processor_core.c
+++ b/drivers/acpi/processor_core.c
@@ -763,7 +763,7 @@ static int __cpuinit acpi_processor_add(struct acpi_device *device)
 	}
 
 	/* _PDC call should be done before doing anything else (if reqd.). */
-	acpi_processor_set_pdc(pr);
+	acpi_processor_set_pdc(pr->handle);
 
 #ifdef CONFIG_CPU_FREQ
 	acpi_processor_ppc_has_changed(pr, 0);
diff --git a/drivers/acpi/processor_pdc.c b/drivers/acpi/processor_pdc.c
index deeba22c932c..30e4dc0cdf30 100644
--- a/drivers/acpi/processor_pdc.c
+++ b/drivers/acpi/processor_pdc.c
@@ -125,7 +125,7 @@ acpi_processor_eval_pdc(acpi_handle handle, struct acpi_object_list *pdc_in)
 	return status;
 }
 
-void acpi_processor_set_pdc(struct acpi_processor *pr)
+void acpi_processor_set_pdc(acpi_handle handle)
 {
 	struct acpi_object_list *obj_list;
 
@@ -136,7 +136,7 @@ void acpi_processor_set_pdc(struct acpi_processor *pr)
 	if (!obj_list)
 		return;
 
-	acpi_processor_eval_pdc(pr->handle, obj_list);
+	acpi_processor_eval_pdc(handle, obj_list);
 
 	kfree(obj_list->pointer->buffer.pointer);
 	kfree(obj_list->pointer);
@@ -147,19 +147,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_set_pdc);
 static acpi_status
 early_init_pdc(acpi_handle handle, u32 lvl, void *context, void **rv)
 {
-	struct acpi_processor pr;
-
-	pr.handle = handle;
-
-	/* x86 implementation looks at pr.id to determine some
-	 * CPU capabilites. We can just hard code to 0 since we're
-	 * assuming the CPUs in the system are homogenous and all
-	 * have the same capabilities.
-	 */
-	pr.id = 0;
-
-	acpi_processor_set_pdc(&pr);
-
+	acpi_processor_set_pdc(handle);
 	return AE_OK;
 }
 
diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 0873cd57510c..7cc433d69932 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -323,7 +323,7 @@ static inline int acpi_processor_get_bios_limit(int cpu, unsigned int *limit)
 #endif				/* CONFIG_CPU_FREQ */
 
 /* in processor_pdc.c */
-void acpi_processor_set_pdc(struct acpi_processor *pr);
+void acpi_processor_set_pdc(acpi_handle handle);
 
 /* in processor_throttling.c */
 int acpi_processor_tstate_has_changed(struct acpi_processor *pr);
-- 
cgit v1.2.3-70-g09d2


From e59897fe443b5b0a71e135ef4020d1937c9f8901 Mon Sep 17 00:00:00 2001
From: Alex Chiang <achiang@hp.com>
Date: Sun, 20 Dec 2009 12:23:21 -0700
Subject: ACPI: processor: remove _PDC object list from struct acpi_processor

When we call _PDC, we get a handle to the processor, allocate the
object list buffer as needed, and free it immediately after calling
_PDC.

There's no need to drag around this object list with us everywhere
else, so let's just get rid of it.

Signed-off-by: Alex Chiang <achiang@hp.com>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/processor.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/acpi/processor.h b/include/acpi/processor.h
index 7cc433d69932..0ea5ef4eb6a9 100644
--- a/include/acpi/processor.h
+++ b/include/acpi/processor.h
@@ -224,8 +224,6 @@ struct acpi_processor {
 	struct acpi_processor_throttling throttling;
 	struct acpi_processor_limit limit;
 	struct thermal_cooling_device *cdev;
-	/* the _PDC objects for this processor, if any */
-	struct acpi_object_list *pdc;
 };
 
 struct acpi_processor_errata {
-- 
cgit v1.2.3-70-g09d2


From cc3e1bea5d87635c519da657303690f5538bb4eb Mon Sep 17 00:00:00 2001
From: Theodore Ts'o <tytso@mit.edu>
Date: Wed, 23 Dec 2009 06:52:08 -0500
Subject: ext4, jbd2: Add barriers for file systems with exernal journals

This is a bit complicated because we are trying to optimize when we
send barriers to the fs data disk.  We could just throw in an extra
barrier to the data disk whenever we send a barrier to the journal
disk, but that's not always strictly necessary.

We only need to send a barrier during a commit when there are data
blocks which are must be written out due to an inode written in
ordered mode, or if fsync() depends on the commit to force data blocks
to disk.  Finally, before we drop transactions from the beginning of
the journal during a checkpoint operation, we need to guarantee that
any blocks that were flushed out to the data disk are firmly on the
rust platter before we drop the transaction from the journal.

Thanks to Oleg Drokin for pointing out this flaw in ext3/ext4.

Signed-off-by: "Theodore Ts'o" <tytso@mit.edu>
---
 fs/ext4/fsync.c      | 16 ++++++++++++++--
 fs/jbd2/checkpoint.c | 15 +++++++++++++++
 fs/jbd2/commit.c     | 19 +++++++++++--------
 include/linux/jbd2.h |  1 +
 4 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/fs/ext4/fsync.c b/fs/ext4/fsync.c
index 0b22497d92e1..98bd140aad01 100644
--- a/fs/ext4/fsync.c
+++ b/fs/ext4/fsync.c
@@ -88,9 +88,21 @@ int ext4_sync_file(struct file *file, struct dentry *dentry, int datasync)
 		return ext4_force_commit(inode->i_sb);
 
 	commit_tid = datasync ? ei->i_datasync_tid : ei->i_sync_tid;
-	if (jbd2_log_start_commit(journal, commit_tid))
+	if (jbd2_log_start_commit(journal, commit_tid)) {
+		/*
+		 * When the journal is on a different device than the
+		 * fs data disk, we need to issue the barrier in
+		 * writeback mode.  (In ordered mode, the jbd2 layer
+		 * will take care of issuing the barrier.  In
+		 * data=journal, all of the data blocks are written to
+		 * the journal device.)
+		 */
+		if (ext4_should_writeback_data(inode) &&
+		    (journal->j_fs_dev != journal->j_dev) &&
+		    (journal->j_flags & JBD2_BARRIER))
+			blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 		jbd2_log_wait_commit(journal, commit_tid);
-	else if (journal->j_flags & JBD2_BARRIER)
+	} else if (journal->j_flags & JBD2_BARRIER)
 		blkdev_issue_flush(inode->i_sb->s_bdev, NULL);
 	return ret;
 }
diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index ca0f5eb62b20..886849370950 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -22,6 +22,7 @@
 #include <linux/jbd2.h>
 #include <linux/errno.h>
 #include <linux/slab.h>
+#include <linux/blkdev.h>
 #include <trace/events/jbd2.h>
 
 /*
@@ -515,6 +516,20 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
 	journal->j_tail_sequence = first_tid;
 	journal->j_tail = blocknr;
 	spin_unlock(&journal->j_state_lock);
+
+	/*
+	 * If there is an external journal, we need to make sure that
+	 * any data blocks that were recently written out --- perhaps
+	 * by jbd2_log_do_checkpoint() --- are flushed out before we
+	 * drop the transactions from the external journal.  It's
+	 * unlikely this will be necessary, especially with a
+	 * appropriately sized journal, but we need this to guarantee
+	 * correctness.  Fortunately jbd2_cleanup_journal_tail()
+	 * doesn't get called all that often.
+	 */
+	if ((journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 	if (!(journal->j_flags & JBD2_ABORT))
 		jbd2_journal_update_superblock(journal, 1);
 	return 0;
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 6a10238d2c63..1bc74b6f26d2 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -259,6 +259,7 @@ static int journal_submit_data_buffers(journal_t *journal,
 			ret = err;
 		spin_lock(&journal->j_list_lock);
 		J_ASSERT(jinode->i_transaction == commit_transaction);
+		commit_transaction->t_flushed_data_blocks = 1;
 		jinode->i_flags &= ~JI_COMMIT_RUNNING;
 		wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
 	}
@@ -708,8 +709,17 @@ start_journal_io:
 		}
 	}
 
-	/* Done it all: now write the commit record asynchronously. */
+	/* 
+	 * If the journal is not located on the file system device,
+	 * then we must flush the file system device before we issue
+	 * the commit record
+	 */
+	if (commit_transaction->t_flushed_data_blocks &&
+	    (journal->j_fs_dev != journal->j_dev) &&
+	    (journal->j_flags & JBD2_BARRIER))
+		blkdev_issue_flush(journal->j_fs_dev, NULL);
 
+	/* Done it all: now write the commit record asynchronously. */
 	if (JBD2_HAS_INCOMPAT_FEATURE(journal,
 				      JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT)) {
 		err = journal_submit_commit_record(journal, commit_transaction,
@@ -720,13 +730,6 @@ start_journal_io:
 			blkdev_issue_flush(journal->j_dev, NULL);
 	}
 
-	/*
-	 * This is the right place to wait for data buffers both for ASYNC
-	 * and !ASYNC commit. If commit is ASYNC, we need to wait only after
-	 * the commit block went to disk (which happens above). If commit is
-	 * SYNC, we need to wait for data buffers before we start writing
-	 * commit block, which happens below in such setting.
-	 */
 	err = journal_finish_inode_data_buffers(journal, commit_transaction);
 	if (err) {
 		printk(KERN_WARNING
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index f1011f7f3d41..638ce4554c76 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -653,6 +653,7 @@ struct transaction_s
 	 * waiting for it to finish.
 	 */
 	unsigned int t_synchronous_commit:1;
+	unsigned int t_flushed_data_blocks:1;
 
 	/*
 	 * For use by the filesystem to store fs-specific data
-- 
cgit v1.2.3-70-g09d2


From 17bd55d037a02b04d9119511cfd1a4b985d20f63 Mon Sep 17 00:00:00 2001
From: Eric Sandeen <sandeen@redhat.com>
Date: Wed, 23 Dec 2009 07:57:07 -0500
Subject: fs-writeback: Add helper function to start writeback if idle

ext4, at least, would like to start pushing on writeback if it starts
to get close to ENOSPC when reserving worst-case blocks for delalloc
writes.  Writing out delalloc data will convert those worst-case
predictions into usually smaller actual usage, freeing up space
before we hit ENOSPC based on this speculation.

Thanks to Jens for the suggestion for the helper function,
& the naming help.

I've made the helper return status on whether writeback was
started even though I don't plan to use it in the ext4 patch;
it seems like it would be potentially useful to test this
in some cases.

Signed-off-by: Eric Sandeen <sandeen@redhat.com>
Acked-by: Jan Kara <jack@suse.cz>
---
 fs/fs-writeback.c         | 17 +++++++++++++++++
 include/linux/writeback.h |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index 49bc1b8e8f19..f6c2155e0026 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1186,6 +1186,23 @@ void writeback_inodes_sb(struct super_block *sb)
 }
 EXPORT_SYMBOL(writeback_inodes_sb);
 
+/**
+ * writeback_inodes_sb_if_idle	-	start writeback if none underway
+ * @sb: the superblock
+ *
+ * Invoke writeback_inodes_sb if no writeback is currently underway.
+ * Returns 1 if writeback was started, 0 if not.
+ */
+int writeback_inodes_sb_if_idle(struct super_block *sb)
+{
+	if (!writeback_in_progress(sb->s_bdi)) {
+		writeback_inodes_sb(sb);
+		return 1;
+	} else
+		return 0;
+}
+EXPORT_SYMBOL(writeback_inodes_sb_if_idle);
+
 /**
  * sync_inodes_sb	-	sync sb inode pages
  * @sb: the superblock
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index c18c008f4bbf..76e8903cd204 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -70,6 +70,7 @@ struct writeback_control {
 struct bdi_writeback;
 int inode_wait(void *);
 void writeback_inodes_sb(struct super_block *);
+int writeback_inodes_sb_if_idle(struct super_block *);
 void sync_inodes_sb(struct super_block *);
 void writeback_inodes_wbc(struct writeback_control *wbc);
 long wb_do_writeback(struct bdi_writeback *wb, int force_wait);
-- 
cgit v1.2.3-70-g09d2


From 28f6aeea3f12d37bd258b2c0d5ba891bff4ec479 Mon Sep 17 00:00:00 2001
From: Jamal Hadi Salim <hadi@cyberus.ca>
Date: Fri, 25 Dec 2009 17:30:22 -0800
Subject: net: restore ip source validation

when using policy routing and the skb mark:
there are cases where a back path validation requires us
to use a different routing table for src ip validation than
the one used for mapping ingress dst ip.
One such a case is transparent proxying where we pretend to be
the destination system and therefore the local table
is used for incoming packets but possibly a main table would
be used on outbound.
Make the default behavior to allow the above and if users
need to turn on the symmetry via sysctl src_valid_mark

Signed-off-by: Jamal Hadi Salim <hadi@cyberus.ca>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 1 +
 include/linux/sysctl.h     | 1 +
 net/ipv4/devinet.c         | 1 +
 net/ipv4/fib_frontend.c    | 2 ++
 4 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 699e85c01a4d..b2304929434e 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -81,6 +81,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 #define IN_DEV_FORWARD(in_dev)		IN_DEV_CONF_GET((in_dev), FORWARDING)
 #define IN_DEV_MFORWARD(in_dev)		IN_DEV_ANDCONF((in_dev), MC_FORWARDING)
 #define IN_DEV_RPFILTER(in_dev)		IN_DEV_MAXCONF((in_dev), RP_FILTER)
+#define IN_DEV_SRC_VMARK(in_dev)    	IN_DEV_ORCONF((in_dev), SRC_VMARK)
 #define IN_DEV_SOURCE_ROUTE(in_dev)	IN_DEV_ANDCONF((in_dev), \
 						       ACCEPT_SOURCE_ROUTE)
 #define IN_DEV_ACCEPT_LOCAL(in_dev)	IN_DEV_ORCONF((in_dev), ACCEPT_LOCAL)
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 877ba039e6a4..bd27fbc9db62 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
 	NET_IPV4_CONF_ACCEPT_LOCAL=23,
+	NET_IPV4_CONF_SRC_VMARK=24,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..040c4f05b653 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1397,6 +1397,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
 					"accept_source_route"),
 		DEVINET_SYSCTL_RW_ENTRY(ACCEPT_LOCAL, "accept_local"),
+		DEVINET_SYSCTL_RW_ENTRY(SRC_VMARK, "src_valid_mark"),
 		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
 		DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
 		DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 3323168ee52d..82dbf711d6d0 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -252,6 +252,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif,
 		no_addr = in_dev->ifa_list == NULL;
 		rpf = IN_DEV_RPFILTER(in_dev);
 		accept_local = IN_DEV_ACCEPT_LOCAL(in_dev);
+		if (mark && !IN_DEV_SRC_VMARK(in_dev))
+			fl.mark = 0;
 	}
 	rcu_read_unlock();
 
-- 
cgit v1.2.3-70-g09d2


From 81744ee44ab2845c16ffd7d6f762f7b4a49a4750 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 29 Dec 2009 08:35:35 +0100
Subject: block: Fix incorrect alignment offset reporting and update
 documentation

queue_sector_alignment_offset returned the wrong value which caused
partitions to report an incorrect alignment_offset.  Since offset
alignment calculation is needed several places it has been split into a
separate helper function.  The topology stacking function has been
updated accordingly.

Furthermore, comments have been added to clarify how the stacking
function works.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Tested-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 44 +++++++++++++++++++++++++++++++++-----------
 include/linux/blkdev.h | 11 +++++++++--
 2 files changed, 42 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index e14fcbcedbfa..d52d4adc440b 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -505,20 +505,30 @@ static unsigned int lcm(unsigned int a, unsigned int b)
 
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
- * @t:	the stacking driver limits (top)
- * @b:  the underlying queue limits (bottom)
+ * @t:	the stacking driver limits (top device)
+ * @b:  the underlying queue limits (bottom, component device)
  * @offset:  offset to beginning of data within component device
  *
  * Description:
- *    Merges two queue_limit structs.  Returns 0 if alignment didn't
- *    change.  Returns -1 if adding the bottom device caused
- *    misalignment.
+ *    This function is used by stacking drivers like MD and DM to ensure
+ *    that all component devices have compatible block sizes and
+ *    alignments.  The stacking driver must provide a queue_limits
+ *    struct (top) and then iteratively call the stacking function for
+ *    all component (bottom) devices.  The stacking function will
+ *    attempt to combine the values and ensure proper alignment.
+ *
+ *    Returns 0 if the top and bottom queue_limits are compatible.  The
+ *    top device's block sizes and alignment offsets may be adjusted to
+ *    ensure alignment with the bottom device. If no compatible sizes
+ *    and alignments exist, -1 is returned and the resulting top
+ *    queue_limits will have the misaligned flag set to indicate that
+ *    the alignment_offset is undefined.
  */
 int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		     sector_t offset)
 {
 	sector_t alignment;
-	unsigned int top, bottom, granularity;
+	unsigned int top, bottom;
 
 	t->max_sectors = min_not_zero(t->max_sectors, b->max_sectors);
 	t->max_hw_sectors = min_not_zero(t->max_hw_sectors, b->max_hw_sectors);
@@ -536,15 +546,18 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->max_segment_size = min_not_zero(t->max_segment_size,
 					   b->max_segment_size);
 
-	granularity = max(b->physical_block_size, b->io_min);
-	alignment = b->alignment_offset - (offset & (granularity - 1));
+	alignment = queue_limit_alignment_offset(b, offset);
 
+	/* Bottom device has different alignment.  Check that it is
+	 * compatible with the current top alignment.
+	 */
 	if (t->alignment_offset != alignment) {
 
 		top = max(t->physical_block_size, t->io_min)
 			+ t->alignment_offset;
-		bottom = granularity + alignment;
+		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
+		/* Verify that top and bottom intervals line up */
 		if (max(top, bottom) & (min(top, bottom) - 1))
 			t->misaligned = 1;
 	}
@@ -561,32 +574,39 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 	t->no_cluster |= b->no_cluster;
 	t->discard_zeroes_data &= b->discard_zeroes_data;
 
+	/* Physical block size a multiple of the logical block size? */
 	if (t->physical_block_size & (t->logical_block_size - 1)) {
 		t->physical_block_size = t->logical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Minimum I/O a multiple of the physical block size? */
 	if (t->io_min & (t->physical_block_size - 1)) {
 		t->io_min = t->physical_block_size;
 		t->misaligned = 1;
 	}
 
+	/* Optimal I/O a multiple of the physical block size? */
 	if (t->io_opt & (t->physical_block_size - 1)) {
 		t->io_opt = 0;
 		t->misaligned = 1;
 	}
 
+	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
 		& (max(t->physical_block_size, t->io_min) - 1);
 
+	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1))
 		t->misaligned = 1;
 
 	/* Discard alignment and granularity */
 	if (b->discard_granularity) {
+		unsigned int granularity = b->discard_granularity;
+		offset &= granularity - 1;
 
-		alignment = b->discard_alignment -
-			(offset & (b->discard_granularity - 1));
+		alignment = (granularity + b->discard_alignment - offset)
+			& (granularity - 1);
 
 		if (t->discard_granularity != 0 &&
 		    t->discard_alignment != alignment) {
@@ -598,6 +618,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 				t->discard_misaligned = 1;
 		}
 
+		t->max_discard_sectors = min_not_zero(t->max_discard_sectors,
+						      b->max_discard_sectors);
 		t->discard_granularity = max(t->discard_granularity,
 					     b->discard_granularity);
 		t->discard_alignment = lcm(t->discard_alignment, alignment) &
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 784a919aa0d0..59b832be3044 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1116,11 +1116,18 @@ static inline int queue_alignment_offset(struct request_queue *q)
 	return q->limits.alignment_offset;
 }
 
+static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t offset)
+{
+	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
+
+	offset &= granularity - 1;
+	return (granularity + lim->alignment_offset - offset) & (granularity - 1);
+}
+
 static inline int queue_sector_alignment_offset(struct request_queue *q,
 						sector_t sector)
 {
-	return ((sector << 9) - q->limits.alignment_offset)
-		& (q->limits.io_min - 1);
+	return queue_limit_alignment_offset(&q->limits, sector << 9);
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
-- 
cgit v1.2.3-70-g09d2


From db5d247ae811f49185a71e703b65acad845e4b18 Mon Sep 17 00:00:00 2001
From: Clemens Ladisch <cladisch@fastmail.net>
Date: Thu, 24 Dec 2009 12:05:58 +0100
Subject: firewire: fix use of multiple AV/C devices, allow multiple FCP
 listeners

Control of more than one AV/C device at once --- e.g. camcorders, tape
decks, audio devices, TV tuners --- failed or worked only unreliably,
depending on driver implementation.  This affected kernelspace and
userspace drivers alike and was caused by firewire-core's inability to
accept multiple registrations of FCP listeners.

The fix allows multiple address handlers to be registered for the FCP
command and response registers.  When a request for these registers is
received, all handlers are invoked, and the Firewire response is
generated by the core and not by any handler.

The cdev API does not change, i.e., userspace is still expected to send
a response for FCP requests; this response is silently ignored.

Signed-off-by: Clemens Ladisch <clemens@ladisch.de>
Signed-off-by: Stefan Richter <stefanr@s5r6.in-berlin.de> (changelog, rebased, whitespace)
---
 drivers/firewire/core-cdev.c            |  26 ++++---
 drivers/firewire/core-transaction.c     | 118 ++++++++++++++++++++++++++------
 drivers/media/dvb/firewire/firedtv-fw.c |  12 +---
 include/linux/firewire-cdev.h           |   3 +
 include/linux/firewire.h                |   4 +-
 5 files changed, 119 insertions(+), 44 deletions(-)

(limited to 'include')

diff --git a/drivers/firewire/core-cdev.c b/drivers/firewire/core-cdev.c
index 231e6ee5ba43..2cb22d160f6e 100644
--- a/drivers/firewire/core-cdev.c
+++ b/drivers/firewire/core-cdev.c
@@ -601,8 +601,9 @@ static void release_request(struct client *client,
 	struct inbound_transaction_resource *r = container_of(resource,
 			struct inbound_transaction_resource, resource);
 
-	fw_send_response(client->device->card, r->request,
-			 RCODE_CONFLICT_ERROR);
+	if (r->request)
+		fw_send_response(client->device->card, r->request,
+				 RCODE_CONFLICT_ERROR);
 	kfree(r);
 }
 
@@ -645,7 +646,8 @@ static void handle_request(struct fw_card *card, struct fw_request *request,
  failed:
 	kfree(r);
 	kfree(e);
-	fw_send_response(card, request, RCODE_CONFLICT_ERROR);
+	if (request)
+		fw_send_response(card, request, RCODE_CONFLICT_ERROR);
 }
 
 static void release_address_handler(struct client *client,
@@ -715,15 +717,17 @@ static int ioctl_send_response(struct client *client, void *buffer)
 
 	r = container_of(resource, struct inbound_transaction_resource,
 			 resource);
-	if (request->length < r->length)
-		r->length = request->length;
-
-	if (copy_from_user(r->data, u64_to_uptr(request->data), r->length)) {
-		ret = -EFAULT;
-		goto out;
+	if (r->request) {
+		if (request->length < r->length)
+			r->length = request->length;
+		if (copy_from_user(r->data, u64_to_uptr(request->data),
+				   r->length)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		fw_send_response(client->device->card, r->request,
+				 request->rcode);
 	}
-
-	fw_send_response(client->device->card, r->request, request->rcode);
  out:
 	kfree(r);
 
diff --git a/drivers/firewire/core-transaction.c b/drivers/firewire/core-transaction.c
index 842739df23e2..495849eb13cc 100644
--- a/drivers/firewire/core-transaction.c
+++ b/drivers/firewire/core-transaction.c
@@ -432,14 +432,20 @@ static struct fw_address_handler *lookup_overlapping_address_handler(
 	return NULL;
 }
 
+static bool is_enclosing_handler(struct fw_address_handler *handler,
+				 unsigned long long offset, size_t length)
+{
+	return handler->offset <= offset &&
+		offset + length <= handler->offset + handler->length;
+}
+
 static struct fw_address_handler *lookup_enclosing_address_handler(
 	struct list_head *list, unsigned long long offset, size_t length)
 {
 	struct fw_address_handler *handler;
 
 	list_for_each_entry(handler, list, link) {
-		if (handler->offset <= offset &&
-		    offset + length <= handler->offset + handler->length)
+		if (is_enclosing_handler(handler, offset, length))
 			return handler;
 	}
 
@@ -465,6 +471,12 @@ const struct fw_address_region fw_unit_space_region =
 	{ .start = 0xfffff0000900ULL, .end = 0x1000000000000ULL, };
 #endif  /*  0  */
 
+static bool is_in_fcp_region(u64 offset, size_t length)
+{
+	return offset >= (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+		offset + length <= (CSR_REGISTER_BASE | CSR_FCP_END);
+}
+
 /**
  * fw_core_add_address_handler - register for incoming requests
  * @handler: callback
@@ -477,8 +489,11 @@ const struct fw_address_region fw_unit_space_region =
  * give the details of the particular request.
  *
  * Return value:  0 on success, non-zero otherwise.
+ *
  * The start offset of the handler's address region is determined by
  * fw_core_add_address_handler() and is returned in handler->offset.
+ *
+ * Address allocations are exclusive, except for the FCP registers.
  */
 int fw_core_add_address_handler(struct fw_address_handler *handler,
 				const struct fw_address_region *region)
@@ -498,10 +513,12 @@ int fw_core_add_address_handler(struct fw_address_handler *handler,
 
 	handler->offset = region->start;
 	while (handler->offset + handler->length <= region->end) {
-		other =
-		    lookup_overlapping_address_handler(&address_handler_list,
-						       handler->offset,
-						       handler->length);
+		if (is_in_fcp_region(handler->offset, handler->length))
+			other = NULL;
+		else
+			other = lookup_overlapping_address_handler
+					(&address_handler_list,
+					 handler->offset, handler->length);
 		if (other != NULL) {
 			handler->offset += other->length;
 		} else {
@@ -668,6 +685,9 @@ static struct fw_request *allocate_request(struct fw_packet *p)
 void fw_send_response(struct fw_card *card,
 		      struct fw_request *request, int rcode)
 {
+	if (WARN_ONCE(!request, "invalid for FCP address handlers"))
+		return;
+
 	/* unified transaction or broadcast transaction: don't respond */
 	if (request->ack != ACK_PENDING ||
 	    HEADER_DESTINATION_IS_BROADCAST(request->request_header[0])) {
@@ -686,26 +706,15 @@ void fw_send_response(struct fw_card *card,
 }
 EXPORT_SYMBOL(fw_send_response);
 
-void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+static void handle_exclusive_region_request(struct fw_card *card,
+					    struct fw_packet *p,
+					    struct fw_request *request,
+					    unsigned long long offset)
 {
 	struct fw_address_handler *handler;
-	struct fw_request *request;
-	unsigned long long offset;
 	unsigned long flags;
 	int tcode, destination, source;
 
-	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
-		return;
-
-	request = allocate_request(p);
-	if (request == NULL) {
-		/* FIXME: send statically allocated busy packet. */
-		return;
-	}
-
-	offset      =
-		((unsigned long long)
-		 HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) | p->header[2];
 	tcode       = HEADER_GET_TCODE(p->header[0]);
 	destination = HEADER_GET_DESTINATION(p->header[0]);
 	source      = HEADER_GET_SOURCE(p->header[1]);
@@ -732,6 +741,73 @@ void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
 					  request->data, request->length,
 					  handler->callback_data);
 }
+
+static void handle_fcp_region_request(struct fw_card *card,
+				      struct fw_packet *p,
+				      struct fw_request *request,
+				      unsigned long long offset)
+{
+	struct fw_address_handler *handler;
+	unsigned long flags;
+	int tcode, destination, source;
+
+	if ((offset != (CSR_REGISTER_BASE | CSR_FCP_COMMAND) &&
+	     offset != (CSR_REGISTER_BASE | CSR_FCP_RESPONSE)) ||
+	    request->length > 0x200) {
+		fw_send_response(card, request, RCODE_ADDRESS_ERROR);
+
+		return;
+	}
+
+	tcode       = HEADER_GET_TCODE(p->header[0]);
+	destination = HEADER_GET_DESTINATION(p->header[0]);
+	source      = HEADER_GET_SOURCE(p->header[1]);
+
+	if (tcode != TCODE_WRITE_QUADLET_REQUEST &&
+	    tcode != TCODE_WRITE_BLOCK_REQUEST) {
+		fw_send_response(card, request, RCODE_TYPE_ERROR);
+
+		return;
+	}
+
+	spin_lock_irqsave(&address_handler_lock, flags);
+	list_for_each_entry(handler, &address_handler_list, link) {
+		if (is_enclosing_handler(handler, offset, request->length))
+			handler->address_callback(card, NULL, tcode,
+						  destination, source,
+						  p->generation, p->speed,
+						  offset, request->data,
+						  request->length,
+						  handler->callback_data);
+	}
+	spin_unlock_irqrestore(&address_handler_lock, flags);
+
+	fw_send_response(card, request, RCODE_COMPLETE);
+}
+
+void fw_core_handle_request(struct fw_card *card, struct fw_packet *p)
+{
+	struct fw_request *request;
+	unsigned long long offset;
+
+	if (p->ack != ACK_PENDING && p->ack != ACK_COMPLETE)
+		return;
+
+	request = allocate_request(p);
+	if (request == NULL) {
+		/* FIXME: send statically allocated busy packet. */
+		return;
+	}
+
+	offset = ((u64)HEADER_GET_OFFSET_HIGH(p->header[1]) << 32) |
+		p->header[2];
+
+	if (!is_in_fcp_region(offset, request->length))
+		handle_exclusive_region_request(card, p, request, offset);
+	else
+		handle_fcp_region_request(card, p, request, offset);
+
+}
 EXPORT_SYMBOL(fw_core_handle_request);
 
 void fw_core_handle_response(struct fw_card *card, struct fw_packet *p)
diff --git a/drivers/media/dvb/firewire/firedtv-fw.c b/drivers/media/dvb/firewire/firedtv-fw.c
index fe44789ab037..6223bf01efe9 100644
--- a/drivers/media/dvb/firewire/firedtv-fw.c
+++ b/drivers/media/dvb/firewire/firedtv-fw.c
@@ -202,14 +202,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	unsigned long flags;
 	int su;
 
-	if ((tcode != TCODE_WRITE_QUADLET_REQUEST &&
-	     tcode != TCODE_WRITE_BLOCK_REQUEST) ||
-	    offset != CSR_REGISTER_BASE + CSR_FCP_RESPONSE ||
-	    length == 0 ||
-	    (((u8 *)payload)[0] & 0xf0) != 0) {
-		fw_send_response(card, request, RCODE_TYPE_ERROR);
+	if (length < 2 || (((u8 *)payload)[0] & 0xf0) != 0)
 		return;
-	}
 
 	su = ((u8 *)payload)[1] & 0x7;
 
@@ -230,10 +224,8 @@ static void handle_fcp(struct fw_card *card, struct fw_request *request,
 	}
 	spin_unlock_irqrestore(&node_list_lock, flags);
 
-	if (fdtv) {
+	if (fdtv)
 		avc_recv(fdtv, payload, length);
-		fw_send_response(card, request, RCODE_COMPLETE);
-	}
 }
 
 static struct fw_address_handler fcp_handler = {
diff --git a/include/linux/firewire-cdev.h b/include/linux/firewire-cdev.h
index c6b3ca3af6df..1f716d9f714b 100644
--- a/include/linux/firewire-cdev.h
+++ b/include/linux/firewire-cdev.h
@@ -340,6 +340,9 @@ struct fw_cdev_send_response {
  * The @closure field is passed back to userspace in the response event.
  * The @handle field is an out parameter, returning a handle to the allocated
  * range to be used for later deallocation of the range.
+ *
+ * The address range is allocated on all local nodes.  The address allocation
+ * is exclusive except for the FCP command and response registers.
  */
 struct fw_cdev_allocate {
 	__u64 offset;
diff --git a/include/linux/firewire.h b/include/linux/firewire.h
index 9416a461b696..a0e67150a729 100644
--- a/include/linux/firewire.h
+++ b/include/linux/firewire.h
@@ -248,8 +248,8 @@ typedef void (*fw_transaction_callback_t)(struct fw_card *card, int rcode,
 					  void *data, size_t length,
 					  void *callback_data);
 /*
- * Important note:  The callback must guarantee that either fw_send_response()
- * or kfree() is called on the @request.
+ * Important note:  Except for the FCP registers, the callback must guarantee
+ * that either fw_send_response() or kfree() is called on the @request.
  */
 typedef void (*fw_address_callback_t)(struct fw_card *card,
 				      struct fw_request *request,
-- 
cgit v1.2.3-70-g09d2


From 9bd3f98821a83041e77ee25158b80b535d02d7b4 Mon Sep 17 00:00:00 2001
From: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Date: Wed, 30 Dec 2009 08:41:07 +0100
Subject: block: blk_rq_err_sectors cleanup

blk_rq_err_sectors() seems useless, get rid of it.

Signed-off-by: Gui Jianfeng <guijianfeng@cn.fujitsu.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/blkdev.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 59b832be3044..9b98173a8184 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -845,7 +845,6 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
  * blk_rq_err_bytes()		: bytes left till the next error boundary
  * blk_rq_sectors()		: sectors left in the entire request
  * blk_rq_cur_sectors()		: sectors left in the current segment
- * blk_rq_err_sectors()		: sectors left till the next error boundary
  */
 static inline sector_t blk_rq_pos(const struct request *rq)
 {
@@ -874,11 +873,6 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq)
 	return blk_rq_cur_bytes(rq) >> 9;
 }
 
-static inline unsigned int blk_rq_err_sectors(const struct request *rq)
-{
-	return blk_rq_err_bytes(rq) >> 9;
-}
-
 /*
  * Request issue related functions.
  */
-- 
cgit v1.2.3-70-g09d2


From e96dc9674cb597de4fee757ed005c8465072d13f Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:26 +0800
Subject: tracing/syscalls: Fix typo in SYSCALL_DEFINE0

The struct syscall_metadata variable name in SYSCALL_DEFINE0
should be __syscall_meta__##sname instead of __syscall_meta_##sname
to match the name that is in SYSCALL_DEFINE1/2/3/4/5/6.

This error causes event_enter_##sname->data to point to the wrong
location, which causes syscalls which are defined by SYSCALL_DEFINE0()
not to be traced.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D2E.1010807@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/syscalls.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 65793e90d6f6..207466a49f3d 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -195,7 +195,7 @@ struct perf_event_attr;
 	static const struct syscall_metadata __used		\
 	  __attribute__((__aligned__(4)))			\
 	  __attribute__((section("__syscalls_metadata")))	\
-	  __syscall_meta_##sname = {				\
+	  __syscall_meta__##sname = {				\
 		.name 		= "sys_"#sname,			\
 		.nb_args 	= 0,				\
 		.enter_event	= &event_enter__##sname,	\
-- 
cgit v1.2.3-70-g09d2


From fb7ae981cb9fe8665b9da97e8734745e030c151d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Tue, 15 Dec 2009 15:39:38 +0800
Subject: tracing: Fix sign fields in ftrace_define_fields_##call()

Add is_signed_type() call to trace_define_field() in ftrace macros.

The code previously just passed in 0 (false), disregarding whether
or not the field was actually a signed type.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
LKML-Reference: <4B273D3A.6020007@cn.fujitsu.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/trace/ftrace.h      | 7 ++++---
 kernel/trace/trace_export.c | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 73523151a731..c6fe03e902ca 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -414,7 +414,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -422,8 +423,8 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags)	\
 #define __dynamic_array(type, item, len)				       \
 	ret = trace_define_field(event_call, "__data_loc " #type "[]", #item,  \
 				 offsetof(typeof(field), __data_loc_##item),   \
-				 sizeof(field.__data_loc_##item), 0,	       \
-				 FILTER_OTHER);
+				 sizeof(field.__data_loc_##item),	       \
+				 is_signed_type(type), FILTER_OTHER);
 
 #undef __string
 #define __string(item, src) __dynamic_array(char, item, -1)
diff --git a/kernel/trace/trace_export.c b/kernel/trace/trace_export.c
index 458e5bfe26d0..d4fa5dc1ee4e 100644
--- a/kernel/trace/trace_export.c
+++ b/kernel/trace/trace_export.c
@@ -158,7 +158,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	BUILD_BUG_ON(len > MAX_FILTER_STR_VAL);				\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field), item),		\
-				 sizeof(field.item), 0, FILTER_OTHER);	\
+				 sizeof(field.item),			\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
@@ -168,8 +169,8 @@ ftrace_format_##name(struct ftrace_event_call *unused,			\
 	ret = trace_define_field(event_call, #type "[" #len "]", #item,	\
 				 offsetof(typeof(field),		\
 					  container.item),		\
-				 sizeof(field.container.item), 0,	\
-				 FILTER_OTHER);				\
+				 sizeof(field.container.item),		\
+				 is_signed_type(type), FILTER_OTHER);	\
 	if (ret)							\
 		return ret;
 
-- 
cgit v1.2.3-70-g09d2


From 75c85a0bc13367aabb36e8208d4e373b022b43b3 Mon Sep 17 00:00:00 2001
From: James Bottomley <James.Bottomley@suse.de>
Date: Wed, 30 Dec 2009 13:21:06 -0600
Subject: libsrp: fix compile failure

commit 45465487897a1c6d508b14b904dc5777f7ec7e04 ("kfifo: move struct
kfifo in place") caused a compile failure in ibmvscsitgt.c because it
changed a pointer to kfifo in the libsrp.h structure to a direct
inclusion without including <linux/kfifo.h>.

The fix is simple, just add the include, but how did this happen? This
change, introduced at -rc2, hardly looks like a bug fix, and it clearly
didn't go through linux-next, which would have picked up this compile
failure (it only occurs on ppc because of the ibm virtual scsi target).

[ Apparently all of -mm wasn't in linux-next.. ]

Signed-off-by: James Bottomley <James.Bottomley@suse.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/scsi/libsrp.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/scsi/libsrp.h b/include/scsi/libsrp.h
index 07e3adde21d9..f4105c91af53 100644
--- a/include/scsi/libsrp.h
+++ b/include/scsi/libsrp.h
@@ -2,6 +2,7 @@
 #define __LIBSRP_H__
 
 #include <linux/list.h>
+#include <linux/kfifo.h>
 #include <scsi/scsi_cmnd.h>
 #include <scsi/scsi_host.h>
 #include <scsi/srp.h>
-- 
cgit v1.2.3-70-g09d2


From ed656d8deccc5669afa33387568e7ec6f14e3e94 Mon Sep 17 00:00:00 2001
From: Rolf Eike Beer <eike-kernel@sf-tec.de>
Date: Sat, 26 Dec 2009 17:58:11 +0100
Subject: kfifo: Fix typo in comment

It's DECLARE_KFIFO, not DECLARED_KFIFO.

Signed-off-by: Rolf Eike Beer <eike-kernel@sf-tec.de>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 3d44e9c65a8e..7c6b32a1421c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -81,7 +81,7 @@ union { \
 }
 
 /**
- * INIT_KFIFO - Initialize a kfifo declared by DECLARED_KFIFO
+ * INIT_KFIFO - Initialize a kfifo declared by DECLARE_KFIFO
  * @name: name of the declared kfifo datatype
  */
 #define INIT_KFIFO(name) \
-- 
cgit v1.2.3-70-g09d2


From d7f0eea9e431e1b8b0742a74db1a9490730b2a25 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Wed, 30 Dec 2009 15:36:42 +0800
Subject: ACPI: introduce kernel parameter acpi_sleep=sci_force_enable

Introduce kernel parameter acpi_sleep=sci_force_enable

some laptop requires SCI_EN being set directly on resume,
or else they hung somewhere in the resume code path.

We already have a blacklist for these laptops but we still need
this option, especially when debugging some suspend/resume problems,
in case there are systems that need this workaround and are not yet
in the blacklist.

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
Acked-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 Documentation/kernel-parameters.txt |  5 ++++-
 arch/x86/kernel/acpi/sleep.c        |  2 ++
 drivers/acpi/sleep.c                | 29 +++++++++++++++++------------
 include/linux/acpi.h                |  1 +
 4 files changed, 24 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ba4d9dff113..736d45602886 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -240,7 +240,7 @@ and is between 256 and 4096 characters. It is defined in the file
 
 	acpi_sleep=	[HW,ACPI] Sleep options
 			Format: { s3_bios, s3_mode, s3_beep, s4_nohwsig,
-				  old_ordering, s4_nonvs }
+				  old_ordering, s4_nonvs, sci_force_enable }
 			See Documentation/power/video.txt for information on
 			s3_bios and s3_mode.
 			s3_beep is for debugging; it makes the PC's speaker beep
@@ -253,6 +253,9 @@ and is between 256 and 4096 characters. It is defined in the file
 			of _PTS is used by default).
 			s4_nonvs prevents the kernel from saving/restoring the
 			ACPI NVS memory during hibernation.
+			sci_force_enable causes the kernel to set SCI_EN directly
+			on resume from S1/S3 (which is against the ACPI spec,
+			but some broken systems don't work without it).
 
 	acpi_use_timer_override [HW,ACPI]
 			Use timer override. For some broken Nvidia NF5 boards
diff --git a/arch/x86/kernel/acpi/sleep.c b/arch/x86/kernel/acpi/sleep.c
index 82e508677b91..f9961034e557 100644
--- a/arch/x86/kernel/acpi/sleep.c
+++ b/arch/x86/kernel/acpi/sleep.c
@@ -162,6 +162,8 @@ static int __init acpi_sleep_setup(char *str)
 #endif
 		if (strncmp(str, "old_ordering", 12) == 0)
 			acpi_old_suspend_ordering();
+		if (strncmp(str, "sci_force_enable", 16) == 0)
+			acpi_set_sci_en_on_resume();
 		str = strchr(str, ',');
 		if (str != NULL)
 			str += strspn(str, ", \t");
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 5f2c379ab7bf..79d33d908b5a 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -80,6 +80,23 @@ static int acpi_sleep_prepare(u32 acpi_state)
 
 #ifdef CONFIG_ACPI_SLEEP
 static u32 acpi_target_sleep_state = ACPI_STATE_S0;
+/*
+ * According to the ACPI specification the BIOS should make sure that ACPI is
+ * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
+ * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
+ * on such systems during resume.  Unfortunately that doesn't help in
+ * particularly pathological cases in which SCI_EN has to be set directly on
+ * resume, although the specification states very clearly that this flag is
+ * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
+ * cases.
+ */
+static bool set_sci_en_on_resume;
+
+void __init acpi_set_sci_en_on_resume(void)
+{
+	set_sci_en_on_resume = true;
+}
+
 /*
  * ACPI 1.0 wants us to execute _PTS before suspending devices, so we allow the
  * user to request that behavior by using the 'acpi_old_suspend_ordering'
@@ -170,18 +187,6 @@ static void acpi_pm_end(void)
 #endif /* CONFIG_ACPI_SLEEP */
 
 #ifdef CONFIG_SUSPEND
-/*
- * According to the ACPI specification the BIOS should make sure that ACPI is
- * enabled and SCI_EN bit is set on wake-up from S1 - S3 sleep states.  Still,
- * some BIOSes don't do that and therefore we use acpi_enable() to enable ACPI
- * on such systems during resume.  Unfortunately that doesn't help in
- * particularly pathological cases in which SCI_EN has to be set directly on
- * resume, although the specification states very clearly that this flag is
- * owned by the hardware.  The set_sci_en_on_resume variable will be set in such
- * cases.
- */
-static bool set_sci_en_on_resume;
-
 extern void do_suspend_lowlevel(void);
 
 static u32 acpi_suspend_states[] = {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index ce945d4845fc..36924255c0d5 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -251,6 +251,7 @@ int acpi_check_mem_region(resource_size_t start, resource_size_t n,
 void __init acpi_no_s4_hw_signature(void);
 void __init acpi_old_suspend_ordering(void);
 void __init acpi_s4_no_nvs(void);
+void __init acpi_set_sci_en_on_resume(void);
 #endif /* CONFIG_PM_SLEEP */
 
 struct acpi_osc_context {
-- 
cgit v1.2.3-70-g09d2


From 2f5cb43406d0b29b96248f5328a14a6f6abf8ae6 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Wed, 30 Dec 2009 08:23:30 +0000
Subject: phylib: Properly reinitialize PHYs after hibernation

Since hibernation assumes power loss, we should fully reinitialize
PHYs (including platform fixups), as if PHYs were just attached.

This patch factors phy_init_hw() out of phy_attach_direct(), then
converts mdio_bus to dev_pm_ops and adds an appropriate restore()
callback.

Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   | 50 +++++++++++++++++++++++++++++++++++++-------
 drivers/net/phy/phy_device.c | 30 +++++++++++++-------------
 include/linux/phy.h          |  1 +
 3 files changed, 59 insertions(+), 22 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 49252d390903..e17b70291bbc 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -264,6 +264,8 @@ static int mdio_bus_match(struct device *dev, struct device_driver *drv)
 		(phydev->phy_id & phydrv->phy_id_mask));
 }
 
+#ifdef CONFIG_PM
+
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
 	struct device_driver *drv = phydev->dev.driver;
@@ -295,10 +297,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 	return true;
 }
 
-/* Suspend and resume.  Copied from platform_suspend and
- * platform_resume
- */
-static int mdio_bus_suspend(struct device * dev, pm_message_t state)
+static int mdio_bus_suspend(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -318,7 +317,7 @@ static int mdio_bus_suspend(struct device * dev, pm_message_t state)
 	return phydrv->suspend(phydev);
 }
 
-static int mdio_bus_resume(struct device * dev)
+static int mdio_bus_resume(struct device *dev)
 {
 	struct phy_driver *phydrv = to_phy_driver(dev->driver);
 	struct phy_device *phydev = to_phy_device(dev);
@@ -338,11 +337,48 @@ no_resume:
 	return 0;
 }
 
+static int mdio_bus_restore(struct device *dev)
+{
+	struct phy_device *phydev = to_phy_device(dev);
+	struct net_device *netdev = phydev->attached_dev;
+	int ret;
+
+	if (!netdev)
+		return 0;
+
+	ret = phy_init_hw(phydev);
+	if (ret < 0)
+		return ret;
+
+	/* The PHY needs to renegotiate. */
+	phydev->link = 0;
+	phydev->state = PHY_UP;
+
+	phy_start_machine(phydev, NULL);
+
+	return 0;
+}
+
+static struct dev_pm_ops mdio_bus_pm_ops = {
+	.suspend = mdio_bus_suspend,
+	.resume = mdio_bus_resume,
+	.freeze = mdio_bus_suspend,
+	.thaw = mdio_bus_resume,
+	.restore = mdio_bus_restore,
+};
+
+#define MDIO_BUS_PM_OPS (&mdio_bus_pm_ops)
+
+#else
+
+#define MDIO_BUS_PM_OPS NULL
+
+#endif /* CONFIG_PM */
+
 struct bus_type mdio_bus_type = {
 	.name		= "mdio_bus",
 	.match		= mdio_bus_match,
-	.suspend	= mdio_bus_suspend,
-	.resume		= mdio_bus_resume,
+	.pm		= MDIO_BUS_PM_OPS,
 };
 EXPORT_SYMBOL(mdio_bus_type);
 
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b10fedd82143..8212b2b93422 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -378,6 +378,20 @@ void phy_disconnect(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_disconnect);
 
+int phy_init_hw(struct phy_device *phydev)
+{
+	int ret;
+
+	if (!phydev->drv || !phydev->drv->config_init)
+		return 0;
+
+	ret = phy_scan_fixups(phydev);
+	if (ret < 0)
+		return ret;
+
+	return phydev->drv->config_init(phydev);
+}
+
 /**
  * phy_attach_direct - attach a network device to a given PHY device pointer
  * @dev: network device to attach
@@ -425,21 +439,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 	/* Do initial configuration here, now that
 	 * we have certain key parameters
 	 * (dev_flags and interface) */
-	if (phydev->drv->config_init) {
-		int err;
-
-		err = phy_scan_fixups(phydev);
-
-		if (err < 0)
-			return err;
-
-		err = phydev->drv->config_init(phydev);
-
-		if (err < 0)
-			return err;
-	}
-
-	return 0;
+	return phy_init_hw(phydev);
 }
 EXPORT_SYMBOL(phy_attach_direct);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index b1368b8f6572..7968defd2fa7 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -447,6 +447,7 @@ struct phy_device* get_phy_device(struct mii_bus *bus, int addr);
 int phy_device_register(struct phy_device *phy);
 int phy_clear_interrupt(struct phy_device *phydev);
 int phy_config_interrupt(struct phy_device *phydev, u32 interrupts);
+int phy_init_hw(struct phy_device *phydev);
 int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
-- 
cgit v1.2.3-70-g09d2


From 0719d3434747889b314a1e8add776418c4148bcf Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 00:39:22 +0100
Subject: reiserfs: Fix reiserfs lock <-> i_xattr_sem dependency inversion

i_xattr_sem depends on the reiserfs lock. But after we grab
i_xattr_sem, we may relax/relock the reiserfs lock while waiting
on a freezed filesystem, creating a dependency inversion between
the two locks.

In order to avoid the i_xattr_sem -> reiserfs lock dependency, let's
create a reiserfs_down_read_safe() that acts like
reiserfs_mutex_lock_safe(): relax the reiserfs lock while grabbing
another lock to avoid undesired dependencies induced by the
heivyweight reiserfs lock.

This fixes the following warning:

[  990.005931] =======================================================
[  990.012373] [ INFO: possible circular locking dependency detected ]
[  990.013233] 2.6.33-rc1 #1
[  990.013233] -------------------------------------------------------
[  990.013233] dbench/1891 is trying to acquire lock:
[  990.013233]  (&REISERFS_SB(s)->lock){+.+.+.}, at: [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]
[  990.013233] but task is already holding lock:
[  990.013233]  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] which lock already depends on the new lock.
[  990.013233]
[  990.013233]
[  990.013233] the existing dependency chain (in reverse order) is:
[  990.013233]
[  990.013233] -> #1 (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}:
[  990.013233]        [<ffffffff81063afc>] __lock_acquire+0xf9c/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814ac194>] down_write+0x44/0x80
[  990.013233]        [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] -> #0 (&REISERFS_SB(s)->lock){+.+.+.}:
[  990.013233]        [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]        [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]        [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]        [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]        [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]        [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]        [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]        [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]        [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]        [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]        [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]        [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]        [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]        [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]        [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b
[  990.013233]
[  990.013233] other info that might help us debug this:
[  990.013233]
[  990.013233] 2 locks held by dbench/1891:
[  990.013233]  #0:  (&sb->s_type->i_mutex_key#12){+.+.+.}, at: [<ffffffff810e2678>] vfs_setxattr+0x78/0xc0
[  990.013233]  #1:  (&REISERFS_I(inode)->i_xattr_sem){+.+.+.}, at: [<ffffffff8115899a>] reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]
[  990.013233] stack backtrace:
[  990.013233] Pid: 1891, comm: dbench Not tainted 2.6.33-rc1 #1
[  990.013233] Call Trace:
[  990.013233]  [<ffffffff81061639>] print_circular_bug+0xe9/0xf0
[  990.013233]  [<ffffffff81063e30>] __lock_acquire+0x12d0/0x1560
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff8106414f>] lock_acquire+0x8f/0xb0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff8115899a>] ? reiserfs_xattr_set_handle+0x8a/0x470
[  990.013233]  [<ffffffff814aba77>] __mutex_lock_common+0x47/0x3b0
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81159505>] ? reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff81062592>] ? mark_held_locks+0x72/0xa0
[  990.013233]  [<ffffffff814ab81d>] ? __mutex_unlock_slowpath+0xbd/0x140
[  990.013233]  [<ffffffff810628ad>] ? trace_hardirqs_on_caller+0x14d/0x1a0
[  990.013233]  [<ffffffff814abebe>] mutex_lock_nested+0x3e/0x50
[  990.013233]  [<ffffffff81159505>] reiserfs_write_lock+0x35/0x50
[  990.013233]  [<ffffffff811340e5>] reiserfs_prepare_write+0x45/0x180
[  990.013233]  [<ffffffff81158bb6>] reiserfs_xattr_set_handle+0x2a6/0x470
[  990.013233]  [<ffffffff81158e30>] reiserfs_xattr_set+0xb0/0x150
[  990.013233]  [<ffffffff814abcb4>] ? __mutex_lock_common+0x284/0x3b0
[  990.013233]  [<ffffffff8115a6aa>] user_set+0x8a/0x90
[  990.013233]  [<ffffffff8115901a>] reiserfs_setxattr+0xaa/0xb0
[  990.013233]  [<ffffffff810e2596>] __vfs_setxattr_noperm+0x36/0xa0
[  990.013233]  [<ffffffff810e26bc>] vfs_setxattr+0xbc/0xc0
[  990.013233]  [<ffffffff810e2780>] setxattr+0xc0/0x150
[  990.013233]  [<ffffffff81056018>] ? sched_clock_cpu+0xb8/0x100
[  990.013233]  [<ffffffff8105eded>] ? trace_hardirqs_off+0xd/0x10
[  990.013233]  [<ffffffff810560a3>] ? cpu_clock+0x43/0x50
[  990.013233]  [<ffffffff810c6820>] ? fget+0xb0/0x110
[  990.013233]  [<ffffffff810c6770>] ? fget+0x0/0x110
[  990.013233]  [<ffffffff81002ddc>] ? sysret_check+0x27/0x62
[  990.013233]  [<ffffffff810e289d>] sys_fsetxattr+0x8d/0xa0
[  990.013233]  [<ffffffff81002dab>] system_call_fastpath+0x16/0x1b

Reported-and-tested-by: Christian Kujau <lists@nerdbynature.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/xattr.c         | 2 +-
 include/linux/reiserfs_fs.h | 8 ++++++++
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 8891cd88a3f4..a0e2e7acdc75 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -484,7 +484,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
 	if (IS_ERR(dentry))
 		return PTR_ERR(dentry);
 
-	down_write(&REISERFS_I(inode)->i_xattr_sem);
+	reiserfs_down_read_safe(&REISERFS_I(inode)->i_xattr_sem, inode->i_sb);
 
 	xahash = xattr_hash(buffer, buffer_size);
 	while (buffer_pos < buffer_size || buffer_pos == 0) {
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 4351b49e2b1e..35d3f459b0ac 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -106,6 +106,14 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 	reiserfs_write_lock(s);
 }
 
+static inline void
+reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
+{
+	reiserfs_write_unlock(s);
+	down_read(sem);
+	reiserfs_write_lock(s);
+}
+
 /*
  * When we schedule, we usually want to also release the write lock,
  * according to the previous bkl based locking scheme of reiserfs.
-- 
cgit v1.2.3-70-g09d2


From c4a62ca362258d98f42efb282cfbf9b61caffdbe Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Wed, 30 Dec 2009 03:20:19 +0100
Subject: reiserfs: Warn on lock relax if taken recursively

When we relax the reiserfs lock to avoid creating unwanted
dependencies against others locks while grabbing these,
we want to ensure it has not been taken recursively, otherwise
the lock won't be really relaxed. Only its depth will be decreased.
The unwanted dependency would then actually happen.

To prevent from that, add a reiserfs_lock_check_recursive() call
in the places that need it.

Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Alexander Beregalov <a.beregalov@gmail.com>
Cc: Chris Mason <chris.mason@oracle.com>
Cc: Ingo Molnar <mingo@elte.hu>
---
 fs/reiserfs/lock.c          | 9 +++++++++
 include/linux/reiserfs_fs.h | 9 +++++++++
 2 files changed, 18 insertions(+)

(limited to 'include')

diff --git a/fs/reiserfs/lock.c b/fs/reiserfs/lock.c
index ee2cfc0fd8a7..b87aa2c1afc1 100644
--- a/fs/reiserfs/lock.c
+++ b/fs/reiserfs/lock.c
@@ -86,3 +86,12 @@ void reiserfs_check_lock_depth(struct super_block *sb, char *caller)
 		reiserfs_panic(sb, "%s called without kernel lock held %d",
 			       caller);
 }
+
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *sb)
+{
+	struct reiserfs_sb_info *sb_i = REISERFS_SB(sb);
+
+	WARN_ONCE((sb_i->lock_depth > 0), "Unwanted recursive reiserfs lock!\n");
+}
+#endif
diff --git a/include/linux/reiserfs_fs.h b/include/linux/reiserfs_fs.h
index 35d3f459b0ac..793bf8351ab8 100644
--- a/include/linux/reiserfs_fs.h
+++ b/include/linux/reiserfs_fs.h
@@ -62,6 +62,12 @@ void reiserfs_write_unlock(struct super_block *s);
 int reiserfs_write_lock_once(struct super_block *s);
 void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 
+#ifdef CONFIG_REISERFS_CHECK
+void reiserfs_lock_check_recursive(struct super_block *s);
+#else
+static inline void reiserfs_lock_check_recursive(struct super_block *s) { }
+#endif
+
 /*
  * Several mutexes depend on the write lock.
  * However sometimes we want to relax the write lock while we hold
@@ -92,6 +98,7 @@ void reiserfs_write_unlock_once(struct super_block *s, int lock_depth);
 static inline void reiserfs_mutex_lock_safe(struct mutex *m,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock(m);
 	reiserfs_write_lock(s);
@@ -101,6 +108,7 @@ static inline void
 reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 			       struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	mutex_lock_nested(m, subclass);
 	reiserfs_write_lock(s);
@@ -109,6 +117,7 @@ reiserfs_mutex_lock_nested_safe(struct mutex *m, unsigned int subclass,
 static inline void
 reiserfs_down_read_safe(struct rw_semaphore *sem, struct super_block *s)
 {
+	reiserfs_lock_check_recursive(s);
 	reiserfs_write_unlock(s);
 	down_read(sem);
 	reiserfs_write_lock(s);
-- 
cgit v1.2.3-70-g09d2


From 96d07d211739fd2450ac54e81d00fa40fcd4b1bd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Fri, 20 Nov 2009 14:16:33 +0100
Subject: resource: move kernel function inside __KERNEL__

It is an internal function. Move it inside __KERNEL__ ifdef, along
with task_struct declaration.

Then we get:
--- /usr/include/linux/resource.h       2009-09-14 15:09:29.000000000 +0200
+++ usr/include/linux/resource.h       2010-01-04 11:30:54.000000000 +0100
@@ -3,8 +3,6 @@

 #include <linux/time.h>

-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,5 @@
  */
 #include <asm/resource.h>

-int getrusage(struct task_struct *p, int who, struct rusage *ru);

 #endif

***********

include/linux/Kbuild is untouched, since unifdef is run even on
headers-y nowadays.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
---
 include/linux/resource.h | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/resource.h b/include/linux/resource.h
index 40fc7e626082..f1e914eefeab 100644
--- a/include/linux/resource.h
+++ b/include/linux/resource.h
@@ -3,8 +3,6 @@
 
 #include <linux/time.h>
 
-struct task_struct;
-
 /*
  * Resource control/accounting header file for linux
  */
@@ -70,6 +68,12 @@ struct rlimit {
  */
 #include <asm/resource.h>
 
+#ifdef __KERNEL__
+
+struct task_struct;
+
 int getrusage(struct task_struct *p, int who, struct rusage __user *ru);
 
+#endif /* __KERNEL__ */
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From 3e10e716abf3c71bdb5d86b8f507f9e72236c9cd Mon Sep 17 00:00:00 2001
From: Jiri Slaby <jslaby@suse.cz>
Date: Thu, 19 Nov 2009 17:16:37 +0100
Subject: resource: add helpers for fetching rlimits

We want to be sure that compiler fetches the limit variable only
once, so add helpers for fetching current and maximal resource
limits which do that.

Add them to sched.h (instead of resource.h) due to circular dependency
 sched.h->resource.h->task_struct
Alternative would be to create a separate res_access.h or similar.

Signed-off-by: Jiri Slaby <jslaby@suse.cz>
Cc: James Morris <jmorris@namei.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Ingo Molnar <mingo@elte.hu>
---
 include/linux/sched.h | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index f2f842db03ce..8d4991be9d53 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2601,6 +2601,28 @@ static inline void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 }
 #endif /* CONFIG_MM_OWNER */
 
+static inline unsigned long task_rlimit(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_cur);
+}
+
+static inline unsigned long task_rlimit_max(const struct task_struct *tsk,
+		unsigned int limit)
+{
+	return ACCESS_ONCE(tsk->signal->rlim[limit].rlim_max);
+}
+
+static inline unsigned long rlimit(unsigned int limit)
+{
+	return task_rlimit(current, limit);
+}
+
+static inline unsigned long rlimit_max(unsigned int limit)
+{
+	return task_rlimit_max(current, limit);
+}
+
 #endif /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3-70-g09d2


From 1ae861e652b5457e7fa98ccbc55abea1e207916e Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rjw@sisk.pl>
Date: Thu, 31 Dec 2009 12:15:54 +0100
Subject: PCI/PM: Use per-device D3 delays

It turns out that some PCI devices require extra delays when changing
power state from D3 to D0 (and the other way around).  Although this
is against the PCI specification, we can handle it quite easily by
allowing drivers to define arbitrary D3 delays for devices known to
require extra time for switching power states.

Introduce additional field d3_delay in struct pci_dev and use it to
store the value of the device's D0->D3 delay, in miliseconds.  Make
the PCI PM core code use the per-device d3_delay unless
pci_pm_d3_delay is greater (in which case the latter is used).
[This also allows the driver to specify d3_delay shorter than the
 10 ms required by the PCI standard if the device is known to be able
 to handle that.]

Make the sky2 driver set d3_delay to 150 for devices handled by it.

Fixes http://bugzilla.kernel.org/show_bug.cgi?id=14730 which is a
listed regression from 2.6.30.

Signed-off-by: Rafael J. Wysocki <rjw@sisk.pl>
Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
---
 drivers/net/sky2.c  |  1 +
 drivers/pci/pci.c   | 19 +++++++++++++++----
 include/linux/pci.h |  1 +
 3 files changed, 17 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/drivers/net/sky2.c b/drivers/net/sky2.c
index 1c01b96c9611..2d28d58200d0 100644
--- a/drivers/net/sky2.c
+++ b/drivers/net/sky2.c
@@ -4684,6 +4684,7 @@ static int __devinit sky2_probe(struct pci_dev *pdev,
 	INIT_WORK(&hw->restart_work, sky2_restart);
 
 	pci_set_drvdata(pdev, hw);
+	pdev->d3_delay = 150;
 
 	return 0;
 
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 0906599ebfde..315fea47e784 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -29,7 +29,17 @@ const char *pci_power_names[] = {
 };
 EXPORT_SYMBOL_GPL(pci_power_names);
 
-unsigned int pci_pm_d3_delay = PCI_PM_D3_WAIT;
+unsigned int pci_pm_d3_delay;
+
+static void pci_dev_d3_sleep(struct pci_dev *dev)
+{
+	unsigned int delay = dev->d3_delay;
+
+	if (delay < pci_pm_d3_delay)
+		delay = pci_pm_d3_delay;
+
+	msleep(delay);
+}
 
 #ifdef CONFIG_PCI_DOMAINS
 int pci_domains_supported = 1;
@@ -522,7 +532,7 @@ static int pci_raw_set_power_state(struct pci_dev *dev, pci_power_t state)
 	/* Mandatory power management transition delays */
 	/* see PCI PM 1.1 5.6.1 table 18 */
 	if (state == PCI_D3hot || dev->current_state == PCI_D3hot)
-		msleep(pci_pm_d3_delay);
+		pci_dev_d3_sleep(dev);
 	else if (state == PCI_D2 || dev->current_state == PCI_D2)
 		udelay(PCI_PM_D2_DELAY);
 
@@ -1409,6 +1419,7 @@ void pci_pm_init(struct pci_dev *dev)
 	}
 
 	dev->pm_cap = pm;
+	dev->d3_delay = PCI_PM_D3_WAIT;
 
 	dev->d1_support = false;
 	dev->d2_support = false;
@@ -2247,12 +2258,12 @@ static int pci_pm_reset(struct pci_dev *dev, int probe)
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D3hot;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	csr &= ~PCI_PM_CTRL_STATE_MASK;
 	csr |= PCI_D0;
 	pci_write_config_word(dev, dev->pm_cap + PCI_PM_CTRL, csr);
-	msleep(pci_pm_d3_delay);
+	pci_dev_d3_sleep(dev);
 
 	return 0;
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5da0690d9cee..174e5392e51e 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -243,6 +243,7 @@ struct pci_dev {
 	unsigned int	d2_support:1;	/* Low power state D2 is supported */
 	unsigned int	no_d1d2:1;	/* Only allow D0 and D3 */
 	unsigned int	wakeup_prepared:1;
+	unsigned int	d3_delay;	/* D3->D0 transition time in ms */
 
 #ifdef CONFIG_PCIEASPM
 	struct pcie_link_state	*link_state;	/* ASPM link state. */
-- 
cgit v1.2.3-70-g09d2


From 59b015133cd0034f5904a76969d73476380aac46 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 5 Jan 2010 17:56:02 -0800
Subject: Input: serio - fix potential deadlock when unbinding drivers

sysfs_remove_group() waits for sysfs attributes to be removed, therefore
we do not need to worry about driver-specific attributes being accessed
after driver has been detached from the device. In fact, attempts to take
serio->drv_mutex in attribute methods may lead to the following deadlock:

                                          sysfs_read_file()
                                            fill_read_buffer()
                                              sysfs_get_active_two()
                                                psmouse_attr_show_helper()
                                                  serio_pin_driver()
serio_disconnect_driver()
  mutex_lock(&serio->drv_mutex);
                                <-------->        mutex_lock(&serio_drv_mutex);
    psmouse_disconnect()
      sysfs_remove_group(... psmouse_attr_group);
        ....
        sysfs_deactivate();
          wait_for_completion();

Fix this by removing calls to serio_[un]pin_driver() and functions themselves
and using driver-private mutexes to serialize access to attribute's set()
methods that may change device state.

Signed-off-by: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Dmitry Torokhov <dtor@mail.ru>
---
 drivers/input/keyboard/atkbd.c     | 59 ++++++++++++++++----------------------
 drivers/input/mouse/psmouse-base.c | 32 ++-------------------
 include/linux/serio.h              | 19 ------------
 3 files changed, 28 insertions(+), 82 deletions(-)

(limited to 'include')

diff --git a/drivers/input/keyboard/atkbd.c b/drivers/input/keyboard/atkbd.c
index 1f5e2ce327d6..1cf32a7814d0 100644
--- a/drivers/input/keyboard/atkbd.c
+++ b/drivers/input/keyboard/atkbd.c
@@ -225,8 +225,10 @@ struct atkbd {
 
 	struct delayed_work event_work;
 	unsigned long event_jiffies;
-	struct mutex event_mutex;
 	unsigned long event_mask;
+
+	/* Serializes reconnect(), attr->set() and event work */
+	struct mutex mutex;
 };
 
 /*
@@ -577,7 +579,7 @@ static void atkbd_event_work(struct work_struct *work)
 {
 	struct atkbd *atkbd = container_of(work, struct atkbd, event_work.work);
 
-	mutex_lock(&atkbd->event_mutex);
+	mutex_lock(&atkbd->mutex);
 
 	if (!atkbd->enabled) {
 		/*
@@ -596,7 +598,7 @@ static void atkbd_event_work(struct work_struct *work)
 			atkbd_set_repeat_rate(atkbd);
 	}
 
-	mutex_unlock(&atkbd->event_mutex);
+	mutex_unlock(&atkbd->mutex);
 }
 
 /*
@@ -612,7 +614,7 @@ static void atkbd_schedule_event_work(struct atkbd *atkbd, int event_bit)
 
 	atkbd->event_jiffies = jiffies;
 	set_bit(event_bit, &atkbd->event_mask);
-	wmb();
+	mb();
 	schedule_delayed_work(&atkbd->event_work, delay);
 }
 
@@ -849,12 +851,13 @@ static void atkbd_disconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 
+	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
+
 	atkbd_disable(atkbd);
 
 	/* make sure we don't have a command in flight */
 	cancel_delayed_work_sync(&atkbd->event_work);
 
-	sysfs_remove_group(&serio->dev.kobj, &atkbd_attribute_group);
 	input_unregister_device(atkbd->dev);
 	serio_close(serio);
 	serio_set_drvdata(serio, NULL);
@@ -1087,7 +1090,7 @@ static int atkbd_connect(struct serio *serio, struct serio_driver *drv)
 	atkbd->dev = dev;
 	ps2_init(&atkbd->ps2dev, serio);
 	INIT_DELAYED_WORK(&atkbd->event_work, atkbd_event_work);
-	mutex_init(&atkbd->event_mutex);
+	mutex_init(&atkbd->mutex);
 
 	switch (serio->id.type) {
 
@@ -1160,19 +1163,23 @@ static int atkbd_reconnect(struct serio *serio)
 {
 	struct atkbd *atkbd = serio_get_drvdata(serio);
 	struct serio_driver *drv = serio->drv;
+	int retval = -1;
 
 	if (!atkbd || !drv) {
 		printk(KERN_DEBUG "atkbd: reconnect request, but serio is disconnected, ignoring...\n");
 		return -1;
 	}
 
+	mutex_lock(&atkbd->mutex);
+
 	atkbd_disable(atkbd);
 
 	if (atkbd->write) {
 		if (atkbd_probe(atkbd))
-			return -1;
+			goto out;
+
 		if (atkbd->set != atkbd_select_set(atkbd, atkbd->set, atkbd->extra))
-			return -1;
+			goto out;
 
 		atkbd_activate(atkbd);
 
@@ -1190,8 +1197,11 @@ static int atkbd_reconnect(struct serio *serio)
 	}
 
 	atkbd_enable(atkbd);
+	retval = 0;
 
-	return 0;
+ out:
+	mutex_unlock(&atkbd->mutex);
+	return retval;
 }
 
 static struct serio_device_id atkbd_serio_ids[] = {
@@ -1235,47 +1245,28 @@ static ssize_t atkbd_attr_show_helper(struct device *dev, char *buf,
 				ssize_t (*handler)(struct atkbd *, char *))
 {
 	struct serio *serio = to_serio_port(dev);
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	retval = handler((struct atkbd *)serio_get_drvdata(serio), buf);
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return handler(atkbd, buf);
 }
 
 static ssize_t atkbd_attr_set_helper(struct device *dev, const char *buf, size_t count,
 				ssize_t (*handler)(struct atkbd *, const char *, size_t))
 {
 	struct serio *serio = to_serio_port(dev);
-	struct atkbd *atkbd;
+	struct atkbd *atkbd = serio_get_drvdata(serio);
 	int retval;
 
-	retval = serio_pin_driver(serio);
+	retval = mutex_lock_interruptible(&atkbd->mutex);
 	if (retval)
 		return retval;
 
-	if (serio->drv != &atkbd_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
-
-	atkbd = serio_get_drvdata(serio);
 	atkbd_disable(atkbd);
 	retval = handler(atkbd, buf, count);
 	atkbd_enable(atkbd);
 
-out:
-	serio_unpin_driver(serio);
+	mutex_unlock(&atkbd->mutex);
+
 	return retval;
 }
 
diff --git a/drivers/input/mouse/psmouse-base.c b/drivers/input/mouse/psmouse-base.c
index 401ac6b6edd4..d59e18b24ede 100644
--- a/drivers/input/mouse/psmouse-base.c
+++ b/drivers/input/mouse/psmouse-base.c
@@ -1450,24 +1450,10 @@ ssize_t psmouse_attr_show_helper(struct device *dev, struct device_attribute *de
 	struct serio *serio = to_serio_port(dev);
 	struct psmouse_attribute *attr = to_psmouse_attr(devattr);
 	struct psmouse *psmouse;
-	int retval;
-
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out;
-	}
 
 	psmouse = serio_get_drvdata(serio);
 
-	retval = attr->show(psmouse, attr->data, buf);
-
-out:
-	serio_unpin_driver(serio);
-	return retval;
+	return attr->show(psmouse, attr->data, buf);
 }
 
 ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *devattr,
@@ -1478,18 +1464,9 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 	struct psmouse *psmouse, *parent = NULL;
 	int retval;
 
-	retval = serio_pin_driver(serio);
-	if (retval)
-		return retval;
-
-	if (serio->drv != &psmouse_drv) {
-		retval = -ENODEV;
-		goto out_unpin;
-	}
-
 	retval = mutex_lock_interruptible(&psmouse_mutex);
 	if (retval)
-		goto out_unpin;
+		goto out;
 
 	psmouse = serio_get_drvdata(serio);
 
@@ -1519,8 +1496,7 @@ ssize_t psmouse_attr_set_helper(struct device *dev, struct device_attribute *dev
 
  out_unlock:
 	mutex_unlock(&psmouse_mutex);
- out_unpin:
-	serio_unpin_driver(serio);
+ out:
 	return retval;
 }
 
@@ -1582,9 +1558,7 @@ static ssize_t psmouse_attr_set_protocol(struct psmouse *psmouse, void *data, co
 		}
 
 		mutex_unlock(&psmouse_mutex);
-		serio_unpin_driver(serio);
 		serio_unregister_child_port(serio);
-		serio_pin_driver_uninterruptible(serio);
 		mutex_lock(&psmouse_mutex);
 
 		if (serio->drv != &psmouse_drv) {
diff --git a/include/linux/serio.h b/include/linux/serio.h
index e2f3044d4a4a..813d26c247ec 100644
--- a/include/linux/serio.h
+++ b/include/linux/serio.h
@@ -136,25 +136,6 @@ static inline void serio_continue_rx(struct serio *serio)
 	spin_unlock_irq(&serio->lock);
 }
 
-/*
- * Use the following functions to pin serio's driver in process context
- */
-static inline int serio_pin_driver(struct serio *serio)
-{
-	return mutex_lock_interruptible(&serio->drv_mutex);
-}
-
-static inline void serio_pin_driver_uninterruptible(struct serio *serio)
-{
-	mutex_lock(&serio->drv_mutex);
-}
-
-static inline void serio_unpin_driver(struct serio *serio)
-{
-	mutex_unlock(&serio->drv_mutex);
-}
-
-
 #endif
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 76446cac68568fc7f5168a27deaf803ed22a4360 Mon Sep 17 00:00:00 2001
From: Jesse Barnes <jbarnes@virtuousgeek.org>
Date: Thu, 17 Dec 2009 22:05:42 -0500
Subject: drm/i915: execbuf2 support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a new execbuf ioctl, execbuf2, for use by clients that
want to control fence register allocation more finely.  The buffer
passed in to the new ioctl includes a new relocation type to indicate
whether a given object needs a fence register assigned for the command
buffer in question.

Compatibility with the existing execbuf ioctl is implemented in terms
of the new code, preserving the assumption that fence registers are
required for pre-965 rendering commands.

Signed-off-by: Jesse Barnes <jbarnes@virtuousgeek.org>
[ickle: Remove pre-emptive clear_fence_reg()]
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Signed-off-by: Kristian Høgsberg <krh@bitplanet.net>
[anholt: Removed dmesg spam]
Signed-off-by: Eric Anholt <eric@anholt.net>
---
 drivers/gpu/drm/i915/i915_dma.c        |   7 +-
 drivers/gpu/drm/i915/i915_drv.h        |   5 +
 drivers/gpu/drm/i915/i915_gem.c        | 239 +++++++++++++++++++++++++--------
 drivers/gpu/drm/i915/i915_gem_tiling.c |  46 +++----
 include/drm/i915_drm.h                 |  54 ++++++++
 5 files changed, 273 insertions(+), 78 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index e3e5d5094232..d67be655c532 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -813,9 +813,13 @@ static int i915_getparam(struct drm_device *dev, void *data,
 	case I915_PARAM_HAS_PAGEFLIPPING:
 		value = 1;
 		break;
+	case I915_PARAM_HAS_EXECBUF2:
+		/* depends on GEM */
+		value = dev_priv->has_gem;
+		break;
 	default:
 		DRM_DEBUG_DRIVER("Unknown parameter %d\n",
-					param->param);
+				 param->param);
 		return -EINVAL;
 	}
 
@@ -1646,6 +1650,7 @@ struct drm_ioctl_desc i915_ioctls[] = {
 	DRM_IOCTL_DEF(DRM_I915_HWS_ADDR, i915_set_status_page, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH),
+	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER2, i915_gem_execbuffer2, DRM_AUTH),
 	DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY),
 	DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH),
diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h
index 9a05f1a01025..7eb4ad51034d 100644
--- a/drivers/gpu/drm/i915/i915_drv.h
+++ b/drivers/gpu/drm/i915/i915_drv.h
@@ -815,6 +815,8 @@ int i915_gem_sw_finish_ioctl(struct drm_device *dev, void *data,
 			     struct drm_file *file_priv);
 int i915_gem_execbuffer(struct drm_device *dev, void *data,
 			struct drm_file *file_priv);
+int i915_gem_execbuffer2(struct drm_device *dev, void *data,
+			 struct drm_file *file_priv);
 int i915_gem_pin_ioctl(struct drm_device *dev, void *data,
 		       struct drm_file *file_priv);
 int i915_gem_unpin_ioctl(struct drm_device *dev, void *data,
@@ -881,6 +883,9 @@ void i915_gem_shrinker_exit(void);
 void i915_gem_detect_bit_6_swizzle(struct drm_device *dev);
 void i915_gem_object_do_bit_17_swizzle(struct drm_gem_object *obj);
 void i915_gem_object_save_bit_17_swizzle(struct drm_gem_object *obj);
+bool i915_tiling_ok(struct drm_device *dev, int stride, int size,
+		    int tiling_mode);
+bool i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj);
 
 /* i915_gem_debug.c */
 void i915_gem_dump_object(struct drm_gem_object *obj, int len,
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 9e81a0ddafac..0330c3aa8032 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -3199,7 +3199,7 @@ i915_gem_object_set_cpu_read_domain_range(struct drm_gem_object *obj,
 static int
 i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 				 struct drm_file *file_priv,
-				 struct drm_i915_gem_exec_object *entry,
+				 struct drm_i915_gem_exec_object2 *entry,
 				 struct drm_i915_gem_relocation_entry *relocs)
 {
 	struct drm_device *dev = obj->dev;
@@ -3207,12 +3207,35 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
 	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 	int i, ret;
 	void __iomem *reloc_page;
+	bool need_fence;
+
+	need_fence = entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
+	             obj_priv->tiling_mode != I915_TILING_NONE;
+
+	/* Check fence reg constraints and rebind if necessary */
+	if (need_fence && !i915_obj_fenceable(dev, obj))
+		i915_gem_object_unbind(obj);
 
 	/* Choose the GTT offset for our buffer and put it there. */
 	ret = i915_gem_object_pin(obj, (uint32_t) entry->alignment);
 	if (ret)
 		return ret;
 
+	/*
+	 * Pre-965 chips need a fence register set up in order to
+	 * properly handle blits to/from tiled surfaces.
+	 */
+	if (need_fence) {
+		ret = i915_gem_object_get_fence_reg(obj);
+		if (ret != 0) {
+			if (ret != -EBUSY && ret != -ERESTARTSYS)
+				DRM_ERROR("Failure to install fence: %d\n",
+					  ret);
+			i915_gem_object_unpin(obj);
+			return ret;
+		}
+	}
+
 	entry->offset = obj_priv->gtt_offset;
 
 	/* Apply the relocations, using the GTT aperture to avoid cache
@@ -3374,7 +3397,7 @@ i915_gem_object_pin_and_relocate(struct drm_gem_object *obj,
  */
 static int
 i915_dispatch_gem_execbuffer(struct drm_device *dev,
-			      struct drm_i915_gem_execbuffer *exec,
+			      struct drm_i915_gem_execbuffer2 *exec,
 			      struct drm_clip_rect *cliprects,
 			      uint64_t exec_offset)
 {
@@ -3464,7 +3487,7 @@ i915_gem_ring_throttle(struct drm_device *dev, struct drm_file *file_priv)
 }
 
 static int
-i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
+i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object2 *exec_list,
 			      uint32_t buffer_count,
 			      struct drm_i915_gem_relocation_entry **relocs)
 {
@@ -3479,8 +3502,10 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
 	}
 
 	*relocs = drm_calloc_large(reloc_count, sizeof(**relocs));
-	if (*relocs == NULL)
+	if (*relocs == NULL) {
+		DRM_ERROR("failed to alloc relocs, count %d\n", reloc_count);
 		return -ENOMEM;
+	}
 
 	for (i = 0; i < buffer_count; i++) {
 		struct drm_i915_gem_relocation_entry __user *user_relocs;
@@ -3504,7 +3529,7 @@ i915_gem_get_relocs_from_user(struct drm_i915_gem_exec_object *exec_list,
 }
 
 static int
-i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object *exec_list,
+i915_gem_put_relocs_to_user(struct drm_i915_gem_exec_object2 *exec_list,
 			    uint32_t buffer_count,
 			    struct drm_i915_gem_relocation_entry *relocs)
 {
@@ -3537,7 +3562,7 @@ err:
 }
 
 static int
-i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer *exec,
+i915_gem_check_execbuffer (struct drm_i915_gem_execbuffer2 *exec,
 			   uint64_t exec_offset)
 {
 	uint32_t exec_start, exec_len;
@@ -3590,18 +3615,18 @@ i915_gem_wait_for_pending_flip(struct drm_device *dev,
 }
 
 int
-i915_gem_execbuffer(struct drm_device *dev, void *data,
-		    struct drm_file *file_priv)
+i915_gem_do_execbuffer(struct drm_device *dev, void *data,
+		       struct drm_file *file_priv,
+		       struct drm_i915_gem_execbuffer2 *args,
+		       struct drm_i915_gem_exec_object2 *exec_list)
 {
 	drm_i915_private_t *dev_priv = dev->dev_private;
-	struct drm_i915_gem_execbuffer *args = data;
-	struct drm_i915_gem_exec_object *exec_list = NULL;
 	struct drm_gem_object **object_list = NULL;
 	struct drm_gem_object *batch_obj;
 	struct drm_i915_gem_object *obj_priv;
 	struct drm_clip_rect *cliprects = NULL;
 	struct drm_i915_gem_relocation_entry *relocs;
-	int ret, ret2, i, pinned = 0;
+	int ret = 0, ret2, i, pinned = 0;
 	uint64_t exec_offset;
 	uint32_t seqno, flush_domains, reloc_index;
 	int pin_tries, flips;
@@ -3615,25 +3640,13 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
 		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
 		return -EINVAL;
 	}
-	/* Copy in the exec list from userland */
-	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
 	object_list = drm_malloc_ab(sizeof(*object_list), args->buffer_count);
-	if (exec_list == NULL || object_list == NULL) {
-		DRM_ERROR("Failed to allocate exec or object list "
-			  "for %d buffers\n",
+	if (object_list == NULL) {
+		DRM_ERROR("Failed to allocate object list for %d buffers\n",
 			  args->buffer_count);
 		ret = -ENOMEM;
 		goto pre_mutex_err;
 	}
-	ret = copy_from_user(exec_list,
-			     (struct drm_i915_relocation_entry __user *)
-			     (uintptr_t) args->buffers_ptr,
-			     sizeof(*exec_list) * args->buffer_count);
-	if (ret != 0) {
-		DRM_ERROR("copy %d exec entries failed %d\n",
-			  args->buffer_count, ret);
-		goto pre_mutex_err;
-	}
 
 	if (args->num_cliprects != 0) {
 		cliprects = kcalloc(args->num_cliprects, sizeof(*cliprects),
@@ -3885,20 +3898,6 @@ err:
 
 	mutex_unlock(&dev->struct_mutex);
 
-	if (!ret) {
-		/* Copy the new buffer offsets back to the user's exec list. */
-		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
-				   (uintptr_t) args->buffers_ptr,
-				   exec_list,
-				   sizeof(*exec_list) * args->buffer_count);
-		if (ret) {
-			ret = -EFAULT;
-			DRM_ERROR("failed to copy %d exec entries "
-				  "back to user (%d)\n",
-				  args->buffer_count, ret);
-		}
-	}
-
 	/* Copy the updated relocations out regardless of current error
 	 * state.  Failure to update the relocs would mean that the next
 	 * time userland calls execbuf, it would do so with presumed offset
@@ -3915,12 +3914,158 @@ err:
 
 pre_mutex_err:
 	drm_free_large(object_list);
-	drm_free_large(exec_list);
 	kfree(cliprects);
 
 	return ret;
 }
 
+/*
+ * Legacy execbuffer just creates an exec2 list from the original exec object
+ * list array and passes it to the real function.
+ */
+int
+i915_gem_execbuffer(struct drm_device *dev, void *data,
+		    struct drm_file *file_priv)
+{
+	struct drm_i915_gem_execbuffer *args = data;
+	struct drm_i915_gem_execbuffer2 exec2;
+	struct drm_i915_gem_exec_object *exec_list = NULL;
+	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
+	int ret, i;
+
+#if WATCH_EXEC
+	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
+		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
+#endif
+
+	if (args->buffer_count < 1) {
+		DRM_ERROR("execbuf with %d buffers\n", args->buffer_count);
+		return -EINVAL;
+	}
+
+	/* Copy in the exec list from userland */
+	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
+	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
+	if (exec_list == NULL || exec2_list == NULL) {
+		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
+			  args->buffer_count);
+		drm_free_large(exec_list);
+		drm_free_large(exec2_list);
+		return -ENOMEM;
+	}
+	ret = copy_from_user(exec_list,
+			     (struct drm_i915_relocation_entry __user *)
+			     (uintptr_t) args->buffers_ptr,
+			     sizeof(*exec_list) * args->buffer_count);
+	if (ret != 0) {
+		DRM_ERROR("copy %d exec entries failed %d\n",
+			  args->buffer_count, ret);
+		drm_free_large(exec_list);
+		drm_free_large(exec2_list);
+		return -EFAULT;
+	}
+
+	for (i = 0; i < args->buffer_count; i++) {
+		exec2_list[i].handle = exec_list[i].handle;
+		exec2_list[i].relocation_count = exec_list[i].relocation_count;
+		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
+		exec2_list[i].alignment = exec_list[i].alignment;
+		exec2_list[i].offset = exec_list[i].offset;
+		if (!IS_I965G(dev))
+			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
+		else
+			exec2_list[i].flags = 0;
+	}
+
+	exec2.buffers_ptr = args->buffers_ptr;
+	exec2.buffer_count = args->buffer_count;
+	exec2.batch_start_offset = args->batch_start_offset;
+	exec2.batch_len = args->batch_len;
+	exec2.DR1 = args->DR1;
+	exec2.DR4 = args->DR4;
+	exec2.num_cliprects = args->num_cliprects;
+	exec2.cliprects_ptr = args->cliprects_ptr;
+	exec2.flags = 0;
+
+	ret = i915_gem_do_execbuffer(dev, data, file_priv, &exec2, exec2_list);
+	if (!ret) {
+		/* Copy the new buffer offsets back to the user's exec list. */
+		for (i = 0; i < args->buffer_count; i++)
+			exec_list[i].offset = exec2_list[i].offset;
+		/* ... and back out to userspace */
+		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
+				   (uintptr_t) args->buffers_ptr,
+				   exec_list,
+				   sizeof(*exec_list) * args->buffer_count);
+		if (ret) {
+			ret = -EFAULT;
+			DRM_ERROR("failed to copy %d exec entries "
+				  "back to user (%d)\n",
+				  args->buffer_count, ret);
+		}
+	} else {
+		DRM_ERROR("i915_gem_do_execbuffer returns %d\n", ret);
+	}
+
+	drm_free_large(exec_list);
+	drm_free_large(exec2_list);
+	return ret;
+}
+
+int
+i915_gem_execbuffer2(struct drm_device *dev, void *data,
+		     struct drm_file *file_priv)
+{
+	struct drm_i915_gem_execbuffer2 *args = data;
+	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
+	int ret;
+
+#if WATCH_EXEC
+	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n",
+		  (int) args->buffers_ptr, args->buffer_count, args->batch_len);
+#endif
+
+	if (args->buffer_count < 1) {
+		DRM_ERROR("execbuf2 with %d buffers\n", args->buffer_count);
+		return -EINVAL;
+	}
+
+	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
+	if (exec2_list == NULL) {
+		DRM_ERROR("Failed to allocate exec list for %d buffers\n",
+			  args->buffer_count);
+		return -ENOMEM;
+	}
+	ret = copy_from_user(exec2_list,
+			     (struct drm_i915_relocation_entry __user *)
+			     (uintptr_t) args->buffers_ptr,
+			     sizeof(*exec2_list) * args->buffer_count);
+	if (ret != 0) {
+		DRM_ERROR("copy %d exec entries failed %d\n",
+			  args->buffer_count, ret);
+		drm_free_large(exec2_list);
+		return -EFAULT;
+	}
+
+	ret = i915_gem_do_execbuffer(dev, data, file_priv, args, exec2_list);
+	if (!ret) {
+		/* Copy the new buffer offsets back to the user's exec list. */
+		ret = copy_to_user((struct drm_i915_relocation_entry __user *)
+				   (uintptr_t) args->buffers_ptr,
+				   exec2_list,
+				   sizeof(*exec2_list) * args->buffer_count);
+		if (ret) {
+			ret = -EFAULT;
+			DRM_ERROR("failed to copy %d exec entries "
+				  "back to user (%d)\n",
+				  args->buffer_count, ret);
+		}
+	}
+
+	drm_free_large(exec2_list);
+	return ret;
+}
+
 int
 i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 {
@@ -3934,19 +4079,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
 		if (ret)
 			return ret;
 	}
-	/*
-	 * Pre-965 chips need a fence register set up in order to
-	 * properly handle tiled surfaces.
-	 */
-	if (!IS_I965G(dev) && obj_priv->tiling_mode != I915_TILING_NONE) {
-		ret = i915_gem_object_get_fence_reg(obj);
-		if (ret != 0) {
-			if (ret != -EBUSY && ret != -ERESTARTSYS)
-				DRM_ERROR("Failure to install fence: %d\n",
-					  ret);
-			return ret;
-		}
-	}
+
 	obj_priv->pin_count++;
 
 	/* If the object is not active and not pending a flush,
diff --git a/drivers/gpu/drm/i915/i915_gem_tiling.c b/drivers/gpu/drm/i915/i915_gem_tiling.c
index 30d6af6c09bb..df278b2685bf 100644
--- a/drivers/gpu/drm/i915/i915_gem_tiling.c
+++ b/drivers/gpu/drm/i915/i915_gem_tiling.c
@@ -304,35 +304,39 @@ i915_gem_detect_bit_6_swizzle(struct drm_device *dev)
 
 
 /**
- * Returns the size of the fence for a tiled object of the given size.
+ * Returns whether an object is currently fenceable.  If not, it may need
+ * to be unbound and have its pitch adjusted.
  */
-static int
-i915_get_fence_size(struct drm_device *dev, int size)
+bool
+i915_obj_fenceable(struct drm_device *dev, struct drm_gem_object *obj)
 {
-	int i;
-	int start;
+	struct drm_i915_gem_object *obj_priv = obj->driver_private;
 
 	if (IS_I965G(dev)) {
 		/* The 965 can have fences at any page boundary. */
-		return ALIGN(size, 4096);
+		if (obj->size & 4095)
+			return false;
+		return true;
+	} else if (IS_I9XX(dev)) {
+		if (obj_priv->gtt_offset & ~I915_FENCE_START_MASK)
+			return false;
 	} else {
-		/* Align the size to a power of two greater than the smallest
-		 * fence size.
-		 */
-		if (IS_I9XX(dev))
-			start = 1024 * 1024;
-		else
-			start = 512 * 1024;
+		if (obj_priv->gtt_offset & ~I830_FENCE_START_MASK)
+			return false;
+	}
 
-		for (i = start; i < size; i <<= 1)
-			;
+	/* Power of two sized... */
+	if (obj->size & (obj->size - 1))
+		return false;
 
-		return i;
-	}
+	/* Objects must be size aligned as well */
+	if (obj_priv->gtt_offset & (obj->size - 1))
+		return false;
+	return true;
 }
 
 /* Check pitch constriants for all chips & tiling formats */
-static bool
+bool
 i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 {
 	int tile_width;
@@ -384,12 +388,6 @@ i915_tiling_ok(struct drm_device *dev, int stride, int size, int tiling_mode)
 	if (stride & (stride - 1))
 		return false;
 
-	/* We don't 0handle the aperture area covered by the fence being bigger
-	 * than the object size.
-	 */
-	if (i915_get_fence_size(dev, size) != size)
-		return false;
-
 	return true;
 }
 
diff --git a/include/drm/i915_drm.h b/include/drm/i915_drm.h
index ec3f5e80a5df..b64a8d7cdf6d 100644
--- a/include/drm/i915_drm.h
+++ b/include/drm/i915_drm.h
@@ -188,6 +188,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_I915_GEM_MADVISE	0x26
 #define DRM_I915_OVERLAY_PUT_IMAGE	0x27
 #define DRM_I915_OVERLAY_ATTRS	0x28
+#define DRM_I915_GEM_EXECBUFFER2	0x29
 
 #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)
 #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH)
@@ -207,6 +208,7 @@ typedef struct _drm_i915_sarea {
 #define DRM_IOCTL_I915_VBLANK_SWAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)
 #define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init)
 #define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer)
+#define DRM_IOCTL_I915_GEM_EXECBUFFER2	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER2, struct drm_i915_gem_execbuffer2)
 #define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin)
 #define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin)
 #define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)
@@ -272,6 +274,7 @@ typedef struct drm_i915_irq_wait {
 #define I915_PARAM_NUM_FENCES_AVAIL      6
 #define I915_PARAM_HAS_OVERLAY           7
 #define I915_PARAM_HAS_PAGEFLIPPING	 8
+#define I915_PARAM_HAS_EXECBUF2          9
 
 typedef struct drm_i915_getparam {
 	int param;
@@ -567,6 +570,57 @@ struct drm_i915_gem_execbuffer {
 	__u64 cliprects_ptr;
 };
 
+struct drm_i915_gem_exec_object2 {
+	/**
+	 * User's handle for a buffer to be bound into the GTT for this
+	 * operation.
+	 */
+	__u32 handle;
+
+	/** Number of relocations to be performed on this buffer */
+	__u32 relocation_count;
+	/**
+	 * Pointer to array of struct drm_i915_gem_relocation_entry containing
+	 * the relocations to be performed in this buffer.
+	 */
+	__u64 relocs_ptr;
+
+	/** Required alignment in graphics aperture */
+	__u64 alignment;
+
+	/**
+	 * Returned value of the updated offset of the object, for future
+	 * presumed_offset writes.
+	 */
+	__u64 offset;
+
+#define EXEC_OBJECT_NEEDS_FENCE (1<<0)
+	__u64 flags;
+	__u64 rsvd1;
+	__u64 rsvd2;
+};
+
+struct drm_i915_gem_execbuffer2 {
+	/**
+	 * List of gem_exec_object2 structs
+	 */
+	__u64 buffers_ptr;
+	__u32 buffer_count;
+
+	/** Offset in the batchbuffer to start execution from. */
+	__u32 batch_start_offset;
+	/** Bytes used in batchbuffer from batch_start_offset */
+	__u32 batch_len;
+	__u32 DR1;
+	__u32 DR4;
+	__u32 num_cliprects;
+	/** This is a struct drm_clip_rect *cliprects */
+	__u64 cliprects_ptr;
+	__u64 flags; /* currently unused */
+	__u64 rsvd1;
+	__u64 rsvd2;
+};
+
 struct drm_i915_gem_pin {
 	/** Handle of the buffer to be pinned. */
 	__u32 handle;
-- 
cgit v1.2.3-70-g09d2


From 8558e3943df1c51c3377cb4e8a52ea484d6f357d Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Wed, 6 Jan 2010 16:11:06 -0500
Subject: x86, ACPI: delete acpi_boot_table_init() return value

cleanup only.

setup_arch(), doesn't care care if ACPI initialization succeeded
or failed, so delete acpi_boot_table_init()'s return value.

Signed-off-by: Len Brown <len.brown@intel.com>
---
 arch/x86/kernel/acpi/boot.c | 22 ++++++----------------
 include/linux/acpi.h        |  6 +++---
 2 files changed, 9 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index fb1035cd9a6a..036d28adf59d 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -1529,16 +1529,10 @@ static struct dmi_system_id __initdata acpi_dmi_table_late[] = {
  *	if acpi_blacklisted() acpi_disabled = 1;
  *	acpi_irq_model=...
  *	...
- *
- * return value: (currently ignored)
- *	0: success
- *	!0: failure
  */
 
-int __init acpi_boot_table_init(void)
+void __init acpi_boot_table_init(void)
 {
-	int error;
-
 	dmi_check_system(acpi_dmi_table);
 
 	/*
@@ -1546,15 +1540,14 @@ int __init acpi_boot_table_init(void)
 	 * One exception: acpi=ht continues far enough to enumerate LAPICs
 	 */
 	if (acpi_disabled && !acpi_ht)
-		return 1;
+		return; 
 
 	/*
 	 * Initialize the ACPI boot-time table parser.
 	 */
-	error = acpi_table_init();
-	if (error) {
+	if (acpi_table_init()) {
 		disable_acpi();
-		return error;
+		return;
 	}
 
 	acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
@@ -1562,18 +1555,15 @@ int __init acpi_boot_table_init(void)
 	/*
 	 * blacklist may disable ACPI entirely
 	 */
-	error = acpi_blacklisted();
-	if (error) {
+	if (acpi_blacklisted()) {
 		if (acpi_force) {
 			printk(KERN_WARNING PREFIX "acpi=force override\n");
 		} else {
 			printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
 			disable_acpi();
-			return error;
+			return;
 		}
 	}
-
-	return 0;
 }
 
 int __init early_acpi_boot_init(void)
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 36924255c0d5..b926afe8c03e 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -80,7 +80,7 @@ char * __acpi_map_table (unsigned long phys_addr, unsigned long size);
 void __acpi_unmap_table(char *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
-int acpi_boot_table_init (void);
+void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
@@ -321,9 +321,9 @@ static inline int acpi_boot_init(void)
 	return 0;
 }
 
-static inline int acpi_boot_table_init(void)
+static inline void acpi_boot_table_init(void)
 {
-	return 0;
+	return;
 }
 
 static inline int acpi_mps_check(void)
-- 
cgit v1.2.3-70-g09d2


From cfe79c00a2f4f687eed8b7534d1d3d3d35540c29 Mon Sep 17 00:00:00 2001
From: Mike Frysinger <vapier.adi@gmail.com>
Date: Wed, 6 Jan 2010 17:23:23 +0000
Subject: NOMMU: Avoiding duplicate icache flushes of shared maps

When working with FDPIC, there are many shared mappings of read-only
code regions between applications (the C library, applet packages like
busybox, etc.), but the current do_mmap_pgoff() function will issue an
icache flush whenever a VMA is added to an MM instead of only doing it
when the map is initially created.

The flush can instead be done when a region is first mmapped PROT_EXEC.
Note that we may not rely on the first mapping of a region being
executable - it's possible for it to be PROT_READ only, so we have to
remember whether we've flushed the region or not, and then flush the
entire region when a bit of it is made executable.

However, this also affects the brk area.  That will no longer be
executable.  We can mprotect() it to PROT_EXEC on MPU-mode kernels, but
for NOMMU mode kernels, when it increases the brk allocation, making
sys_brk() flush the extra from the icache should suffice.  The brk area
probably isn't used by NOMMU programs since the brk area can only use up
the leavings from the stack allocation, where the stack allocation is
larger than requested.

Signed-off-by: David Howells <dhowells@redhat.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 mm/nommu.c               | 11 ++++++++---
 2 files changed, 10 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84a524afb3dc..84d020bed083 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -123,6 +123,8 @@ struct vm_region {
 	struct file	*vm_file;	/* the backing file or NULL */
 
 	atomic_t	vm_usage;	/* region usage count */
+	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
+						* this region */
 };
 
 /*
diff --git a/mm/nommu.c b/mm/nommu.c
index 6f9248f89bde..a8d17521624a 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -432,6 +432,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk)
 	/*
 	 * Ok, looks good - let it rip.
 	 */
+	flush_icache_range(mm->brk, brk);
 	return mm->brk = brk;
 }
 
@@ -1353,10 +1354,14 @@ unsigned long do_mmap_pgoff(struct file *file,
 share:
 	add_vma_to_mm(current->mm, vma);
 
-	up_write(&nommu_region_sem);
+	/* we flush the region from the icache only when the first executable
+	 * mapping of it is made  */
+	if (vma->vm_flags & VM_EXEC && !region->vm_icache_flushed) {
+		flush_icache_range(region->vm_start, region->vm_end);
+		region->vm_icache_flushed = true;
+	}
 
-	if (prot & PROT_EXEC)
-		flush_icache_range(result, result + len);
+	up_write(&nommu_region_sem);
 
 	kleave(" = %lx", result);
 	return result;
-- 
cgit v1.2.3-70-g09d2


From e6be8d9d17bd44061116f601fe2609b3ace7aa69 Mon Sep 17 00:00:00 2001
From: Zhenyu Wang <zhenyu.z.wang@intel.com>
Date: Tue, 5 Jan 2010 11:25:05 +0800
Subject: drm: remove address mask param for drm_pci_alloc()

drm_pci_alloc() has input of address mask for setting pci dma
mask on the device, which should be properly setup by drm driver.
And leave it as a param for drm_pci_alloc() would cause confusion
or mistake would corrupt the correct dma mask setting, as seen on
intel hw which set wrong dma mask for hw status page. So remove
it from drm_pci_alloc() function.

Signed-off-by: Zhenyu Wang <zhenyuw@linux.intel.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/ati_pcigart.c   | 10 ++++++++--
 drivers/gpu/drm/drm_bufs.c      |  4 ++--
 drivers/gpu/drm/drm_pci.c       |  8 +-------
 drivers/gpu/drm/i915/i915_dma.c |  2 +-
 drivers/gpu/drm/i915/i915_gem.c |  2 +-
 include/drm/drmP.h              |  2 +-
 6 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/drivers/gpu/drm/ati_pcigart.c b/drivers/gpu/drm/ati_pcigart.c
index 628eae3e9b83..a1fce68e3bbe 100644
--- a/drivers/gpu/drm/ati_pcigart.c
+++ b/drivers/gpu/drm/ati_pcigart.c
@@ -39,8 +39,7 @@ static int drm_ati_alloc_pcigart_table(struct drm_device *dev,
 				       struct drm_ati_pcigart_info *gart_info)
 {
 	gart_info->table_handle = drm_pci_alloc(dev, gart_info->table_size,
-						PAGE_SIZE,
-						gart_info->table_mask);
+						PAGE_SIZE);
 	if (gart_info->table_handle == NULL)
 		return -ENOMEM;
 
@@ -112,6 +111,13 @@ int drm_ati_pcigart_init(struct drm_device *dev, struct drm_ati_pcigart_info *ga
 	if (gart_info->gart_table_location == DRM_ATI_GART_MAIN) {
 		DRM_DEBUG("PCI: no table in VRAM: using normal RAM\n");
 
+		if (pci_set_dma_mask(dev->pdev, gart_info->table_mask)) {
+			DRM_ERROR("fail to set dma mask to 0x%Lx\n",
+				  gart_info->table_mask);
+			ret = 1;
+			goto done;
+		}
+
 		ret = drm_ati_alloc_pcigart_table(dev, gart_info);
 		if (ret) {
 			DRM_ERROR("cannot allocate PCI GART page!\n");
diff --git a/drivers/gpu/drm/drm_bufs.c b/drivers/gpu/drm/drm_bufs.c
index 3d09e304f6f4..8417cc4c43f1 100644
--- a/drivers/gpu/drm/drm_bufs.c
+++ b/drivers/gpu/drm/drm_bufs.c
@@ -326,7 +326,7 @@ static int drm_addmap_core(struct drm_device * dev, resource_size_t offset,
 		 * As we're limiting the address to 2^32-1 (or less),
 		 * casting it down to 32 bits is no problem, but we
 		 * need to point to a 64bit variable first. */
-		dmah = drm_pci_alloc(dev, map->size, map->size, 0xffffffffUL);
+		dmah = drm_pci_alloc(dev, map->size, map->size);
 		if (!dmah) {
 			kfree(map);
 			return -ENOMEM;
@@ -885,7 +885,7 @@ int drm_addbufs_pci(struct drm_device * dev, struct drm_buf_desc * request)
 
 	while (entry->buf_count < count) {
 
-		dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000, 0xfffffffful);
+		dmah = drm_pci_alloc(dev, PAGE_SIZE << page_order, 0x1000);
 
 		if (!dmah) {
 			/* Set count correctly so we free the proper amount. */
diff --git a/drivers/gpu/drm/drm_pci.c b/drivers/gpu/drm/drm_pci.c
index 577094fb1995..e68ebf92fa2a 100644
--- a/drivers/gpu/drm/drm_pci.c
+++ b/drivers/gpu/drm/drm_pci.c
@@ -47,8 +47,7 @@
 /**
  * \brief Allocate a PCI consistent memory block, for DMA.
  */
-drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align,
-				dma_addr_t maxaddr)
+drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t align)
 {
 	drm_dma_handle_t *dmah;
 #if 1
@@ -63,11 +62,6 @@ drm_dma_handle_t *drm_pci_alloc(struct drm_device * dev, size_t size, size_t ali
 	if (align > size)
 		return NULL;
 
-	if (pci_set_dma_mask(dev->pdev, maxaddr) != 0) {
-		DRM_ERROR("Setting pci dma mask failed\n");
-		return NULL;
-	}
-
 	dmah = kmalloc(sizeof(drm_dma_handle_t), GFP_KERNEL);
 	if (!dmah)
 		return NULL;
diff --git a/drivers/gpu/drm/i915/i915_dma.c b/drivers/gpu/drm/i915/i915_dma.c
index 701bfeac7f57..02607ed61399 100644
--- a/drivers/gpu/drm/i915/i915_dma.c
+++ b/drivers/gpu/drm/i915/i915_dma.c
@@ -123,7 +123,7 @@ static int i915_init_phys_hws(struct drm_device *dev)
 	drm_i915_private_t *dev_priv = dev->dev_private;
 	/* Program Hardware Status Page */
 	dev_priv->status_page_dmah =
-		drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE, 0xffffffff);
+		drm_pci_alloc(dev, PAGE_SIZE, PAGE_SIZE);
 
 	if (!dev_priv->status_page_dmah) {
 		DRM_ERROR("Can not allocate hardware status page\n");
diff --git a/drivers/gpu/drm/i915/i915_gem.c b/drivers/gpu/drm/i915/i915_gem.c
index 917b8377ae28..c7f0cbec4e84 100644
--- a/drivers/gpu/drm/i915/i915_gem.c
+++ b/drivers/gpu/drm/i915/i915_gem.c
@@ -4708,7 +4708,7 @@ int i915_gem_init_phys_object(struct drm_device *dev,
 
 	phys_obj->id = id;
 
-	phys_obj->handle = drm_pci_alloc(dev, size, 0, 0xffffffff);
+	phys_obj->handle = drm_pci_alloc(dev, size, 0);
 	if (!phys_obj->handle) {
 		ret = -ENOMEM;
 		goto kfree_obj;
diff --git a/include/drm/drmP.h b/include/drm/drmP.h
index 71dafb69cfeb..ffac157fb5b2 100644
--- a/include/drm/drmP.h
+++ b/include/drm/drmP.h
@@ -1408,7 +1408,7 @@ extern int drm_ati_pcigart_cleanup(struct drm_device *dev,
 				   struct drm_ati_pcigart_info * gart_info);
 
 extern drm_dma_handle_t *drm_pci_alloc(struct drm_device *dev, size_t size,
-				       size_t align, dma_addr_t maxaddr);
+				       size_t align);
 extern void __drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
 extern void drm_pci_free(struct drm_device *dev, drm_dma_handle_t * dmah);
 
-- 
cgit v1.2.3-70-g09d2


From 7ad6848c7e81a603605fad3f3575841aab004eea Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Wed, 6 Jan 2010 20:37:01 -0800
Subject: ip: fix mc_loop checks for tunnels with multicast outer addresses

When we have L3 tunnels with different inner/outer families
(i.e. IPV4/IPV6) which use a multicast address as the outer tunnel
destination address, multicast packets will be loopbacked back to the
sending socket even if IP*_MULTICAST_LOOP is set to disabled.

The mc_loop flag is present in the family specific part of the socket
(e.g. the IPv4 or IPv4 specific part).  setsockopt sets the inner
family mc_loop flag. When the packet is pushed through the L3 tunnel
it will eventually be processed by the outer family which if different
will check the flag in a different part of the socket then it was set.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h      | 16 ++++++++++++++++
 net/ipv4/ip_output.c  |  2 +-
 net/ipv6/ip6_output.c |  3 +--
 3 files changed, 18 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index 85108cfbb1ae..d9a0e74d8923 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -326,6 +326,22 @@ static __inline__ void inet_reset_saddr(struct sock *sk)
 
 #endif
 
+static inline int sk_mc_loop(struct sock *sk)
+{
+	if (!sk)
+		return 1;
+	switch (sk->sk_family) {
+	case AF_INET:
+		return inet_sk(sk)->mc_loop;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		return inet6_sk(sk)->mc_loop;
+#endif
+	}
+	__WARN();
+	return 1;
+}
+
 extern int	ip_call_ra_chain(struct sk_buff *skb);
 
 /*
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index e34013a78ef4..3451799e3dbf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -254,7 +254,7 @@ int ip_mc_output(struct sk_buff *skb)
 	 */
 
 	if (rt->rt_flags&RTCF_MULTICAST) {
-		if ((!sk || inet_sk(sk)->mc_loop)
+		if (sk_mc_loop(sk)
 #ifdef CONFIG_IP_MROUTE
 		/* Small optimization: do not loopback not local frames,
 		   which returned after forwarding; they will be  dropped
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index cd48801a8d6f..eb6d09728633 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -121,10 +121,9 @@ static int ip6_output2(struct sk_buff *skb)
 	skb->dev = dev;
 
 	if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
-		struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
 		struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
 
-		if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
+		if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
 		    ((mroute6_socket(dev_net(dev)) &&
 		     !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
 		     ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
-- 
cgit v1.2.3-70-g09d2


From fc6a110754476362f9f4fa3199a637f2331c5993 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 4 Jan 2010 02:02:47 +0000
Subject: Phonet: zero-copy aligned GPRS RX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Newer Nokia cellular modems can use aligned payload for their GPRS pipe.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pep.h | 1 +
 net/phonet/pep-gprs.c    | 4 ++--
 net/phonet/pep.c         | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index 4c61cdce4e5f..c37162547a93 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -77,6 +77,7 @@ static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb)
 
 enum {
 	PNS_PIPE_DATA = 0x20,
+	PNS_PIPE_ALIGNED_DATA,
 
 	PNS_PEP_CONNECT_REQ = 0x40,
 	PNS_PEP_CONNECT_RESP,
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index d183509d3fa6..d01208968c83 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -96,11 +96,11 @@ static int gprs_recv(struct gprs_dev *gp, struct sk_buff *skb)
 		goto drop;
 	}
 
-	if (likely(skb_headroom(skb) & 3)) {
+	if (skb_headroom(skb) & 3) {
 		struct sk_buff *rskb, *fs;
 		int flen = 0;
 
-		/* Phonet Pipe data header is misaligned (3 bytes),
+		/* Phonet Pipe data header may be misaligned (3 bytes),
 		 * so wrap the IP packet as a single fragment of an head-less
 		 * socket buffer. The network stack will pull what it needs,
 		 * but at least, the whole IP payload is not memcpy'd. */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index b6356f3832f6..e23e30907d34 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -354,6 +354,9 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb)
 		queue = &pn->ctrlreq_queue;
 		goto queue;
 
+	case PNS_PIPE_ALIGNED_DATA:
+		__skb_pull(skb, 1);
+		/* fall through */
 	case PNS_PIPE_DATA:
 		__skb_pull(skb, 3); /* Pipe data header */
 		if (!pn_flow_safe(pn->rx_fc)) {
-- 
cgit v1.2.3-70-g09d2


From fea93ecef619b5779ca6984568517b1685079b05 Mon Sep 17 00:00:00 2001
From: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Date: Mon, 4 Jan 2010 02:02:48 +0000
Subject: Phonet: zero-copy GPRS TX
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Send aligned pipe payload if requested to do so. Then, the socket buffer
needs not be fragmented anymore.

Signed-off-by: Rémi Denis-Courmont <remi.denis-courmont@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/phonet/pep.h |  2 ++
 net/phonet/pep.c         | 16 ++++++++++++++--
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h
index c37162547a93..35672b1cf44a 100644
--- a/include/net/phonet/pep.h
+++ b/include/net/phonet/pep.h
@@ -44,6 +44,7 @@ struct pep_sock {
 	u8			rx_fc;	/* RX flow control */
 	u8			tx_fc;	/* TX flow control */
 	u8			init_enable;	/* auto-enable at creation */
+	u8			aligned;
 };
 
 static inline struct pep_sock *pep_sk(struct sock *sk)
@@ -139,6 +140,7 @@ enum {
 	PN_PIPE_SB_NEGOTIATED_FC,
 	PN_PIPE_SB_REQUIRED_FC_TX,
 	PN_PIPE_SB_PREFERRED_FC_RX,
+	PN_PIPE_SB_ALIGNED_DATA,
 };
 
 /* Phonet pipe flow control models */
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index e23e30907d34..72db27e3fc06 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -444,6 +444,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	struct sockaddr_pn dst;
 	u16 peer_type;
 	u8 pipe_handle, enabled, n_sb;
+	u8 aligned = 0;
 
 	if (!pskb_pull(skb, sizeof(*hdr) + 4))
 		return -EINVAL;
@@ -482,6 +483,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 				return -EINVAL;
 			peer_type = (peer_type & 0xff00) | data[0];
 			break;
+		case PN_PIPE_SB_ALIGNED_DATA:
+			aligned = data[0] != 0;
+			break;
 		}
 		n_sb--;
 	}
@@ -513,6 +517,7 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb)
 	newpn->rx_credits = 0;
 	newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL;
 	newpn->init_enable = enabled;
+	newpn->aligned = aligned;
 
 	BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue));
 	skb_queue_head(&newsk->sk_receive_queue, skb);
@@ -832,11 +837,15 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb)
 		return -ENOBUFS;
 	}
 
-	skb_push(skb, 3);
+	skb_push(skb, 3 + pn->aligned);
 	skb_reset_transport_header(skb);
 	ph = pnp_hdr(skb);
 	ph->utid = 0;
-	ph->message_id = PNS_PIPE_DATA;
+	if (pn->aligned) {
+		ph->message_id = PNS_PIPE_ALIGNED_DATA;
+		ph->data[0] = 0; /* padding */
+	} else
+		ph->message_id = PNS_PIPE_DATA;
 	ph->pipe_handle = pn->pipe_handle;
 
 	return pn_skb_send(sk, skb, &pipe_srv);
@@ -930,6 +939,9 @@ int pep_write(struct sock *sk, struct sk_buff *skb)
 	struct sk_buff *rskb, *fs;
 	int flen = 0;
 
+	if (pep_sk(sk)->aligned)
+		return pipe_skb_send(sk, skb);
+
 	rskb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC);
 	if (!rskb) {
 		kfree_skb(skb);
-- 
cgit v1.2.3-70-g09d2


From 65324144b50bc7022cc9b6ca8f4a536a957019e3 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <hawk@comx.dk>
Date: Tue, 5 Jan 2010 05:50:47 +0000
Subject: net: RFC3069, private VLAN proxy arp support

This is to be used together with switch technologies, like RFC3069,
that where the individual ports are not allowed to communicate with
each other, but they are allowed to talk to the upstream router.  As
described in RFC 3069, it is possible to allow these hosts to
communicate through the upstream router by proxy_arp'ing.

This patch basically allow proxy arp replies back to the same
interface (from which the ARP request/solicitation was received).

Tunable per device via proc "proxy_arp_pvlan":
  /proc/sys/net/ipv4/conf/*/proxy_arp_pvlan

This switch technology is known by different vendor names:
 - In RFC 3069 it is called VLAN Aggregation.
 - Cisco and Allied Telesyn call it Private VLAN.
 - Hewlett-Packard call it Source-Port filtering or port-isolation.
 - Ericsson call it MAC-Forced Forwarding (RFC Draft).

Signed-off-by: Jesper Dangaard Brouer <hawk@comx.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 19 +++++++++++++
 include/linux/inetdevice.h             |  1 +
 include/linux/sysctl.h                 |  1 +
 net/ipv4/arp.c                         | 52 +++++++++++++++++++++++++++++++---
 net/ipv4/devinet.c                     |  1 +
 net/ipv4/route.c                       |  7 ++++-
 6 files changed, 76 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 006b39dec87d..c532884f4fec 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -692,6 +692,25 @@ proxy_arp - BOOLEAN
 	conf/{all,interface}/proxy_arp is set to TRUE,
 	it will be disabled otherwise
 
+proxy_arp_pvlan - BOOLEAN
+	Private VLAN proxy arp.
+	Basically allow proxy arp replies back to the same interface
+	(from which the ARP request/solicitation was received).
+
+	This is done to support (ethernet) switch features, like RFC
+	3069, where the individual ports are NOT allowed to
+	communicate with each other, but they are allowed to talk to
+	the upstream router.  As described in RFC 3069, it is possible
+	to allow these hosts to communicate through the upstream
+	router by proxy_arp'ing. Don't need to be used together with
+	proxy_arp.
+
+	This technology is known by different names:
+	  In RFC 3069 it is called VLAN Aggregation.
+	  Cisco and Allied Telesyn call it Private VLAN.
+	  Hewlett-Packard call it Source-Port filtering or port-isolation.
+	  Ericsson call it MAC-Forced Forwarding (RFC Draft).
+
 shared_media - BOOLEAN
 	Send(router) or accept(host) RFC1620 shared media redirects.
 	Overrides ip_secure_redirects.
diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 699e85c01a4d..9a8c57467d3d 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -88,6 +88,7 @@ static inline void ipv4_devconf_setall(struct in_device *in_dev)
 
 #define IN_DEV_LOG_MARTIANS(in_dev)	IN_DEV_ORCONF((in_dev), LOG_MARTIANS)
 #define IN_DEV_PROXY_ARP(in_dev)	IN_DEV_ORCONF((in_dev), PROXY_ARP)
+#define IN_DEV_PROXY_ARP_PVLAN(in_dev)	IN_DEV_CONF_GET(in_dev, PROXY_ARP_PVLAN)
 #define IN_DEV_SHARED_MEDIA(in_dev)	IN_DEV_ORCONF((in_dev), SHARED_MEDIA)
 #define IN_DEV_TX_REDIRECTS(in_dev)	IN_DEV_ORCONF((in_dev), SEND_REDIRECTS)
 #define IN_DEV_SEC_REDIRECTS(in_dev)	IN_DEV_ORCONF((in_dev), \
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 877ba039e6a4..24ff7e3a0d59 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -482,6 +482,7 @@ enum
 	NET_IPV4_CONF_ARP_ACCEPT=21,
 	NET_IPV4_CONF_ARP_NOTIFY=22,
 	NET_IPV4_CONF_ACCEPT_LOCAL=23,
+	NET_IPV4_CONF_PROXY_ARP_PVLAN=24,
 	__NET_IPV4_CONF_MAX
 };
 
diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c
index c95cd93acf29..078709233bc4 100644
--- a/net/ipv4/arp.c
+++ b/net/ipv4/arp.c
@@ -70,6 +70,7 @@
  *					bonding can change the skb before
  *					sending (e.g. insert 8021q tag).
  *		Harald Welte	:	convert to make use of jenkins hash
+ *		Jesper D. Brouer:       Proxy ARP PVLAN RFC 3069 support.
  */
 
 #include <linux/module.h>
@@ -524,12 +525,15 @@ int arp_bind_neighbour(struct dst_entry *dst)
 /*
  * Check if we can use proxy ARP for this path
  */
-
-static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
+static inline int arp_fwd_proxy(struct in_device *in_dev,
+				struct net_device *dev,	struct rtable *rt)
 {
 	struct in_device *out_dev;
 	int imi, omi = -1;
 
+	if (rt->u.dst.dev == dev)
+		return 0;
+
 	if (!IN_DEV_PROXY_ARP(in_dev))
 		return 0;
 
@@ -547,6 +551,43 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt)
 	return (omi != imi && omi != -1);
 }
 
+/*
+ * Check for RFC3069 proxy arp private VLAN (allow to send back to same dev)
+ *
+ * RFC3069 supports proxy arp replies back to the same interface.  This
+ * is done to support (ethernet) switch features, like RFC 3069, where
+ * the individual ports are not allowed to communicate with each
+ * other, BUT they are allowed to talk to the upstream router.  As
+ * described in RFC 3069, it is possible to allow these hosts to
+ * communicate through the upstream router, by proxy_arp'ing.
+ *
+ * RFC 3069: "VLAN Aggregation for Efficient IP Address Allocation"
+ *
+ *  This technology is known by different names:
+ *    In RFC 3069 it is called VLAN Aggregation.
+ *    Cisco and Allied Telesyn call it Private VLAN.
+ *    Hewlett-Packard call it Source-Port filtering or port-isolation.
+ *    Ericsson call it MAC-Forced Forwarding (RFC Draft).
+ *
+ */
+static inline int arp_fwd_pvlan(struct in_device *in_dev,
+				struct net_device *dev,	struct rtable *rt,
+				__be32 sip, __be32 tip)
+{
+	/* Private VLAN is only concerned about the same ethernet segment */
+	if (rt->u.dst.dev != dev)
+		return 0;
+
+	/* Don't reply on self probes (often done by windowz boxes)*/
+	if (sip == tip)
+		return 0;
+
+	if (IN_DEV_PROXY_ARP_PVLAN(in_dev))
+		return 1;
+	else
+		return 0;
+}
+
 /*
  *	Interface to link layer: send routine and receive handler.
  */
@@ -833,8 +874,11 @@ static int arp_process(struct sk_buff *skb)
 			}
 			goto out;
 		} else if (IN_DEV_FORWARD(in_dev)) {
-			    if (addr_type == RTN_UNICAST  && rt->u.dst.dev != dev &&
-			     (arp_fwd_proxy(in_dev, rt) || pneigh_lookup(&arp_tbl, net, &tip, dev, 0))) {
+			if (addr_type == RTN_UNICAST  &&
+			    (arp_fwd_proxy(in_dev, dev, rt) ||
+			     arp_fwd_pvlan(in_dev, dev, rt, sip, tip) ||
+			     pneigh_lookup(&arp_tbl, net, &tip, dev, 0)))
+			{
 				n = neigh_event_ns(&arp_tbl, sha, &sip, dev);
 				if (n)
 					neigh_release(n);
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 5cdbc102a418..0715f4cac391 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -1407,6 +1407,7 @@ static struct devinet_sysctl_table {
 		DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
 		DEVINET_SYSCTL_RW_ENTRY(ARP_NOTIFY, "arp_notify"),
+		DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP_PVLAN, "proxy_arp_pvlan"),
 
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
 		DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index e446496f564f..1cc339441e7d 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -1988,8 +1988,13 @@ static int __mkroute_input(struct sk_buff *skb,
 	if (skb->protocol != htons(ETH_P_IP)) {
 		/* Not IP (i.e. ARP). Do not create route, if it is
 		 * invalid for proxy arp. DNAT routes are always valid.
+		 *
+		 * Proxy arp feature have been extended to allow, ARP
+		 * replies back to the same interface, to support
+		 * Private VLAN switch technologies. See arp.c.
 		 */
-		if (out_dev == in_dev) {
+		if (out_dev == in_dev &&
+		    IN_DEV_PROXY_ARP_PVLAN(in_dev) == 0) {
 			err = -EINVAL;
 			goto cleanup;
 		}
-- 
cgit v1.2.3-70-g09d2


From 6144a85a0e018c19bc4b24f7eb6c1f3f7431813d Mon Sep 17 00:00:00 2001
From: Jason Wessel <jason.wessel@windriver.com>
Date: Thu, 7 Jan 2010 11:58:36 -0600
Subject: maccess,probe_kernel: Allow arch specific override
 probe_kernel_(read|write)

Some archs such as blackfin, would like to have an arch specific
probe_kernel_read() and probe_kernel_write() implementation which can
fall back to the generic implementation if no special operations are
needed.

CC: Thomas Gleixner <tglx@linutronix.de>
CC: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
---
 include/linux/uaccess.h |  4 +++-
 mm/maccess.c            | 11 +++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 6b58367d145e..d512d98dfb7d 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -94,6 +94,7 @@ static inline unsigned long __copy_from_user_nocache(void *to,
  * happens, handle that and return -EFAULT.
  */
 extern long probe_kernel_read(void *dst, void *src, size_t size);
+extern long __probe_kernel_read(void *dst, void *src, size_t size);
 
 /*
  * probe_kernel_write(): safely attempt to write to a location
@@ -104,6 +105,7 @@ extern long probe_kernel_read(void *dst, void *src, size_t size);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-extern long probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace probe_kernel_write(void *dst, void *src, size_t size);
+extern long notrace __probe_kernel_write(void *dst, void *src, size_t size);
 
 #endif		/* __LINUX_UACCESS_H__ */
diff --git a/mm/maccess.c b/mm/maccess.c
index 9073695ff25f..4e348dbaecd7 100644
--- a/mm/maccess.c
+++ b/mm/maccess.c
@@ -14,7 +14,11 @@
  * Safely read from address @src to the buffer at @dst.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long probe_kernel_read(void *dst, void *src, size_t size)
+
+long __weak probe_kernel_read(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_read")));
+
+long __probe_kernel_read(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
@@ -39,7 +43,10 @@ EXPORT_SYMBOL_GPL(probe_kernel_read);
  * Safely write to address @dst from the buffer at @src.  If a kernel fault
  * happens, handle that and return -EFAULT.
  */
-long notrace __weak probe_kernel_write(void *dst, void *src, size_t size)
+long __weak probe_kernel_write(void *dst, void *src, size_t size)
+    __attribute__((alias("__probe_kernel_write")));
+
+long __probe_kernel_write(void *dst, void *src, size_t size)
 {
 	long ret;
 	mm_segment_t old_fs = get_fs();
-- 
cgit v1.2.3-70-g09d2


From b11e1eca7ed9c0b5dab21a62c11acc711d9bdda0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Thu, 7 Jan 2010 11:58:37 -0600
Subject: kgdb: Fix kernel-doc format error in kgdb.h

linux-next-20081022//include/linux/kgdb.h:308): duplicate section name 'Description'

and fix typos in that file's kernel-doc comments.

Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kgdb.h | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6adcc297e354..19ec41a183f5 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -29,8 +29,7 @@ struct pt_regs;
  *
  *	On some architectures it is required to skip a breakpoint
  *	exception when it occurs after a breakpoint has been removed.
- *	This can be implemented in the architecture specific portion of
- *	for kgdb.
+ *	This can be implemented in the architecture specific portion of kgdb.
  */
 extern int kgdb_skipexception(int exception, struct pt_regs *regs);
 
@@ -65,7 +64,7 @@ struct uart_port;
 /**
  *	kgdb_breakpoint - compiled in breakpoint
  *
- *	This will be impelmented a static inline per architecture.  This
+ *	This will be implemented as a static inline per architecture.  This
  *	function is called by the kgdb core to execute an architecture
  *	specific trap to cause kgdb to enter the exception processing.
  *
@@ -190,7 +189,7 @@ kgdb_arch_handle_exception(int vector, int signo, int err_code,
  *	@flags: Current IRQ state
  *
  *	On SMP systems, we need to get the attention of the other CPUs
- *	and get them be in a known state.  This should do what is needed
+ *	and get them into a known state.  This should do what is needed
  *	to get the other CPUs to call kgdb_wait(). Note that on some arches,
  *	the NMI approach is not used for rounding up all the CPUs. For example,
  *	in case of MIPS, smp_call_function() is used to roundup CPUs. In
-- 
cgit v1.2.3-70-g09d2


From 3c9732c06879d85f2fdf7ec69198c1d78da42a98 Mon Sep 17 00:00:00 2001
From: Giuseppe CAVALLARO <peppe.cavallaro@st.com>
Date: Wed, 6 Jan 2010 23:07:13 +0000
Subject: stmmac: add the new Header file for stmmac platform data

Signed-off-by: Giuseppe Cavallaro <peppe.cavallaro@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/stmmac.h | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 53 insertions(+)
 create mode 100644 include/linux/stmmac.h

(limited to 'include')

diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h
new file mode 100644
index 000000000000..32bfd1a8a48d
--- /dev/null
+++ b/include/linux/stmmac.h
@@ -0,0 +1,53 @@
+/*******************************************************************************
+
+  Header file for stmmac platform data
+
+  Copyright (C) 2009  STMicroelectronics Ltd
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms and conditions of the GNU General Public License,
+  version 2, as published by the Free Software Foundation.
+
+  This program is distributed in the hope it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc.,
+  51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+
+  The full GNU General Public License is included in this distribution in
+  the file called "COPYING".
+
+  Author: Giuseppe Cavallaro <peppe.cavallaro@st.com>
+*******************************************************************************/
+
+#ifndef __STMMAC_PLATFORM_DATA
+#define __STMMAC_PLATFORM_DATA
+
+/* platfrom data for platfrom device structure's platfrom_data field */
+
+/* Private data for the STM on-board ethernet driver */
+struct plat_stmmacenet_data {
+	int bus_id;
+	int pbl;
+	int has_gmac;
+	void (*fix_mac_speed)(void *priv, unsigned int speed);
+	void (*bus_setup)(unsigned long ioaddr);
+#ifdef CONFIG_STM_DRIVERS
+	struct stm_pad_config *pad_config;
+#endif
+	void *bsp_priv;
+};
+
+struct plat_stmmacphy_data {
+	int bus_id;
+	int phy_addr;
+	unsigned int phy_mask;
+	int interface;
+	int (*phy_reset)(void *priv);
+	void *priv;
+};
+#endif
+
-- 
cgit v1.2.3-70-g09d2


From 7970e677accb676f15e11468c60cb93ae477a513 Mon Sep 17 00:00:00 2001
From: Alex Deucher <alexdeucher@gmail.com>
Date: Thu, 7 Jan 2010 13:47:47 -0500
Subject: drm: Add eDP connector type

Add a new connector type for eDP (embedded displayport)

eDP is more or less the same as DP but there are some
cases when you might want to handle it separately.

Signed-off-by: Alex Deucher <alexdeucher@gmail.com>
Signed-off-by: Dave Airlie <airlied@redhat.com>
---
 drivers/gpu/drm/drm_crtc.c | 1 +
 include/drm/drm_mode.h     | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_crtc.c b/drivers/gpu/drm/drm_crtc.c
index 4a7bbdbedfc2..fa19c2b9820f 100644
--- a/drivers/gpu/drm/drm_crtc.c
+++ b/drivers/gpu/drm/drm_crtc.c
@@ -158,6 +158,7 @@ static struct drm_conn_prop_enum_list drm_connector_enum_list[] =
 	{ DRM_MODE_CONNECTOR_HDMIA, "HDMI Type A", 0 },
 	{ DRM_MODE_CONNECTOR_HDMIB, "HDMI Type B", 0 },
 	{ DRM_MODE_CONNECTOR_TV, "TV", 0 },
+	{ DRM_MODE_CONNECTOR_eDP, "Embedded DisplayPort", 0 },
 };
 
 static struct drm_prop_enum_list drm_encoder_enum_list[] =
diff --git a/include/drm/drm_mode.h b/include/drm/drm_mode.h
index 43009bc2e757..bc4fdf27bd2e 100644
--- a/include/drm/drm_mode.h
+++ b/include/drm/drm_mode.h
@@ -160,6 +160,7 @@ struct drm_mode_get_encoder {
 #define DRM_MODE_CONNECTOR_HDMIA	11
 #define DRM_MODE_CONNECTOR_HDMIB	12
 #define DRM_MODE_CONNECTOR_TV		13
+#define DRM_MODE_CONNECTOR_eDP		14
 
 struct drm_mode_get_connector {
 
-- 
cgit v1.2.3-70-g09d2


From dd3d145d49c5816b79acc6761ebbd842bc50b0ee Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:48 -0500
Subject: block: Fix discard alignment calculation and printing

Discard alignment reporting for partitions was incorrect.  Update to
match the algorithm used elsewhere.

The alignment can be negative (misaligned).  Fix format string
accordingly.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/genhd.c          | 2 +-
 include/linux/blkdev.h | 7 +++++--
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/block/genhd.c b/block/genhd.c
index b11a4ad7d571..d13ba76a169c 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -867,7 +867,7 @@ static ssize_t disk_discard_alignment_show(struct device *dev,
 {
 	struct gendisk *disk = dev_to_disk(dev);
 
-	return sprintf(buf, "%u\n", queue_discard_alignment(disk->queue));
+	return sprintf(buf, "%d\n", queue_discard_alignment(disk->queue));
 }
 
 static DEVICE_ATTR(range, S_IRUGO, disk_range_show, NULL);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9b98173a8184..a41bcc8e140f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1148,8 +1148,11 @@ static inline int queue_discard_alignment(struct request_queue *q)
 static inline int queue_sector_discard_alignment(struct request_queue *q,
 						 sector_t sector)
 {
-	return ((sector << 9) - q->limits.discard_alignment)
-		& (q->limits.discard_granularity - 1);
+	struct queue_limits *lim = &q->limits;
+	unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1);
+
+	return (lim->discard_granularity + lim->discard_alignment - alignment)
+		& (lim->discard_granularity - 1);
 }
 
 static inline unsigned int queue_discard_zeroes_data(struct request_queue *q)
-- 
cgit v1.2.3-70-g09d2


From 17be8c245054b9c7786545af3ba3ca4e54cd4ad9 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Mon, 11 Jan 2010 03:21:49 -0500
Subject: block: bdev_stack_limits wrapper

DM does not want to know about partition offsets.  Add a partition-aware
wrapper that DM can use when stacking block devices.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Mike Snitzer <snitzer@redhat.com>
Reviewed-by: Alasdair G Kergon <agk@redhat.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-settings.c   | 22 ++++++++++++++++++++++
 include/linux/blkdev.h |  2 ++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index 127f82551855..5eeb9e0d256e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -639,6 +639,28 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 }
 EXPORT_SYMBOL(blk_stack_limits);
 
+/**
+ * bdev_stack_limits - adjust queue limits for stacked drivers
+ * @t:	the stacking driver limits (top device)
+ * @bdev:  the component block_device (bottom)
+ * @start:  first data sector within component device
+ *
+ * Description:
+ *    Merges queue limits for a top device and a block_device.  Returns
+ *    0 if alignment didn't change.  Returns -1 if adding the bottom
+ *    device caused misalignment.
+ */
+int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+		      sector_t start)
+{
+	struct request_queue *bq = bdev_get_queue(bdev);
+
+	start += get_start_sect(bdev);
+
+	return blk_stack_limits(t, &bq->limits, start << 9);
+}
+EXPORT_SYMBOL(bdev_stack_limits);
+
 /**
  * disk_stack_limits - adjust queue limits for stacked drivers
  * @disk:  MD/DM gendisk (top)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a41bcc8e140f..5c8018977efa 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -938,6 +938,8 @@ extern void blk_queue_io_opt(struct request_queue *q, unsigned int opt);
 extern void blk_set_default_limits(struct queue_limits *lim);
 extern int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 			    sector_t offset);
+extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev,
+			    sector_t offset);
 extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev,
 			      sector_t offset);
 extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b);
-- 
cgit v1.2.3-70-g09d2


From ce289321b7dc1eb108e3df0dec872b7429ef49f7 Mon Sep 17 00:00:00 2001
From: Kirill Afonshin <kirill_nnov@mail.ru>
Date: Fri, 8 Jan 2010 22:09:59 +0300
Subject: block: removed unused as_io_context

It isn't used anymore, since AS was deleted.

Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 block/blk-ioc.c           |  5 -----
 include/linux/iocontext.h | 27 ---------------------------
 2 files changed, 32 deletions(-)

(limited to 'include')

diff --git a/block/blk-ioc.c b/block/blk-ioc.c
index cbdabb0dd6d7..98e6bf61b0ac 100644
--- a/block/blk-ioc.c
+++ b/block/blk-ioc.c
@@ -39,8 +39,6 @@ int put_io_context(struct io_context *ioc)
 
 	if (atomic_long_dec_and_test(&ioc->refcount)) {
 		rcu_read_lock();
-		if (ioc->aic && ioc->aic->dtor)
-			ioc->aic->dtor(ioc->aic);
 		cfq_dtor(ioc);
 		rcu_read_unlock();
 
@@ -76,8 +74,6 @@ void exit_io_context(struct task_struct *task)
 	task_unlock(task);
 
 	if (atomic_dec_and_test(&ioc->nr_tasks)) {
-		if (ioc->aic && ioc->aic->exit)
-			ioc->aic->exit(ioc->aic);
 		cfq_exit(ioc);
 
 	}
@@ -97,7 +93,6 @@ struct io_context *alloc_io_context(gfp_t gfp_flags, int node)
 		ret->ioprio = 0;
 		ret->last_waited = jiffies; /* doesn't matter... */
 		ret->nr_batch_requests = 0; /* because this is 0 */
-		ret->aic = NULL;
 		INIT_RADIX_TREE(&ret->radix_root, GFP_ATOMIC | __GFP_HIGH);
 		INIT_HLIST_HEAD(&ret->cic_list);
 		ret->ioc_data = NULL;
diff --git a/include/linux/iocontext.h b/include/linux/iocontext.h
index a63235996309..78ef023227d4 100644
--- a/include/linux/iocontext.h
+++ b/include/linux/iocontext.h
@@ -4,32 +4,6 @@
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
 
-/*
- * This is the per-process anticipatory I/O scheduler state.
- */
-struct as_io_context {
-	spinlock_t lock;
-
-	void (*dtor)(struct as_io_context *aic); /* destructor */
-	void (*exit)(struct as_io_context *aic); /* called on task exit */
-
-	unsigned long state;
-	atomic_t nr_queued; /* queued reads & sync writes */
-	atomic_t nr_dispatched; /* number of requests gone to the drivers */
-
-	/* IO History tracking */
-	/* Thinktime */
-	unsigned long last_end_request;
-	unsigned long ttime_total;
-	unsigned long ttime_samples;
-	unsigned long ttime_mean;
-	/* Layout pattern */
-	unsigned int seek_samples;
-	sector_t last_request_pos;
-	u64 seek_total;
-	sector_t seek_mean;
-};
-
 struct cfq_queue;
 struct cfq_io_context {
 	void *key;
@@ -78,7 +52,6 @@ struct io_context {
 	unsigned long last_waited; /* Time last woken after wait for request */
 	int nr_batch_requests;     /* Number of requests left in the batch */
 
-	struct as_io_context *aic;
 	struct radix_tree_root radix_root;
 	struct hlist_head cic_list;
 	void *ioc_data;
-- 
cgit v1.2.3-70-g09d2


From 7af92f8754b87bc78cbfd447d5f4096b25c46682 Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Wed, 6 Jan 2010 15:45:55 -0800
Subject: genhd: overlapping variable definition

This fixes the sparse warning:
fs/ext4/super.c:2390:40: warning: symbol 'i' shadows an earlier one
fs/ext4/super.c:2368:22: originally declared here

Using 'i' in a macro is dubious practice.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
---
 include/linux/genhd.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/genhd.h b/include/linux/genhd.h
index c6c0c41af35f..9717081c75ad 100644
--- a/include/linux/genhd.h
+++ b/include/linux/genhd.h
@@ -256,9 +256,9 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk,
 #define part_stat_read(part, field)					\
 ({									\
 	typeof((part)->dkstats->field) res = 0;				\
-	int i;								\
-	for_each_possible_cpu(i)					\
-		res += per_cpu_ptr((part)->dkstats, i)->field;		\
+	unsigned int _cpu;						\
+	for_each_possible_cpu(_cpu)					\
+		res += per_cpu_ptr((part)->dkstats, _cpu)->field;	\
 	res;								\
 })
 
-- 
cgit v1.2.3-70-g09d2


From 4b529401c5089cf33f7165607cbc2fde43357bfb Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Date: Fri, 8 Jan 2010 14:42:31 -0800
Subject: mm: make totalhigh_pages unsigned long

Makes it consistent with the extern declaration, used when CONFIG_HIGHMEM
is set Removes redundant casts in printout messages

Signed-off-by: Andreas Fenkart <andreas.fenkart@streamunlimited.com>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: David Howells <dhowells@redhat.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Chen Liqin <liqin.chen@sunplusct.com>
Cc: Lennox Wu <lennox.wu@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/arm/mm/init.c               | 2 +-
 arch/mips/mm/init.c              | 2 +-
 arch/mips/sgi-ip27/ip27-memory.c | 2 +-
 arch/mn10300/mm/init.c           | 3 +--
 arch/score/mm/init.c             | 2 +-
 arch/x86/mm/init_32.c            | 3 +--
 include/linux/highmem.h          | 2 +-
 7 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c
index 52c40d155672..a04ffbbbe253 100644
--- a/arch/arm/mm/init.c
+++ b/arch/arm/mm/init.c
@@ -616,7 +616,7 @@ void __init mem_init(void)
 		"%dK data, %dK init, %luK highmem)\n",
 		nr_free_pages() << (PAGE_SHIFT-10), codesize >> 10,
 		datasize >> 10, initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	if (PAGE_SIZE >= 16384 && num_physpages <= 128) {
 		extern int sysctl_overcommit_memory;
diff --git a/arch/mips/mm/init.c b/arch/mips/mm/init.c
index 9e8d00389eef..1651942f7feb 100644
--- a/arch/mips/mm/init.c
+++ b/arch/mips/mm/init.c
@@ -424,7 +424,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/mips/sgi-ip27/ip27-memory.c b/arch/mips/sgi-ip27/ip27-memory.c
index f61c164d1e67..bc1297109cc5 100644
--- a/arch/mips/sgi-ip27/ip27-memory.c
+++ b/arch/mips/sgi-ip27/ip27-memory.c
@@ -505,5 +505,5 @@ void __init mem_init(void)
 	       (num_physpages - tmp) << (PAGE_SHIFT-10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+	       totalhigh_pages << (PAGE_SHIFT-10));
 }
diff --git a/arch/mn10300/mm/init.c b/arch/mn10300/mm/init.c
index ec1420562dc7..dd27a9a35152 100644
--- a/arch/mn10300/mm/init.c
+++ b/arch/mn10300/mm/init.c
@@ -118,8 +118,7 @@ void __init mem_init(void)
 	       reservedpages << (PAGE_SHIFT - 10),
 	       datasize >> 10,
 	       initsize >> 10,
-	       (unsigned long) (totalhigh_pages << (PAGE_SHIFT - 10))
-	       );
+	       totalhigh_pages << (PAGE_SHIFT - 10));
 }
 
 /*
diff --git a/arch/score/mm/init.c b/arch/score/mm/init.c
index 8c15b2c85d5a..dfaf458d6702 100644
--- a/arch/score/mm/init.c
+++ b/arch/score/mm/init.c
@@ -106,7 +106,7 @@ void __init mem_init(void)
 			ram << (PAGE_SHIFT-10), codesize >> 10,
 			reservedpages << (PAGE_SHIFT-10), datasize >> 10,
 			initsize >> 10,
-			(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)));
+			totalhigh_pages << (PAGE_SHIFT-10));
 }
 #endif /* !CONFIG_NEED_MULTIPLE_NODES */
 
diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c
index c973f8e2a6cf..9a0c258a86be 100644
--- a/arch/x86/mm/init_32.c
+++ b/arch/x86/mm/init_32.c
@@ -892,8 +892,7 @@ void __init mem_init(void)
 		reservedpages << (PAGE_SHIFT-10),
 		datasize >> 10,
 		initsize >> 10,
-		(unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
-	       );
+		totalhigh_pages << (PAGE_SHIFT-10));
 
 	printk(KERN_INFO "virtual kernel memory layout:\n"
 		"    fixmap  : 0x%08lx - 0x%08lx   (%4ld kB)\n"
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 211ff4497269..ab2cc20e21a5 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -46,7 +46,7 @@ void kmap_flush_unused(void);
 
 static inline unsigned int nr_free_highpages(void) { return 0; }
 
-#define totalhigh_pages 0
+#define totalhigh_pages 0UL
 
 #ifndef ARCH_HAS_KMAP
 static inline void *kmap(struct page *page)
-- 
cgit v1.2.3-70-g09d2


From e992cd9b72a18122bd5c958715623057f110793f Mon Sep 17 00:00:00 2001
From: Vegard Nossum <vegard.nossum@gmail.com>
Date: Fri, 8 Jan 2010 14:42:35 -0800
Subject: kmemcheck: make bitfield annotations truly no-ops when disabled

It turns out that even zero-sized struct members (int foo[0];) will affect
the struct layout, causing us in particular to lose 4 bytes in struct
sock.

This patch fixes the regression in CONFIG_KMEMCHECK=n case.

Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Vegard Nossum <vegard.nossum@gmail.com>
Acked-by: Pekka Enberg <penberg@cs.helsinki.fi>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemcheck.h | 110 ++++++++++++++++++++++++----------------------
 1 file changed, 58 insertions(+), 52 deletions(-)

(limited to 'include')

diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h
index e880d4cf9e22..08d7dc4ddf40 100644
--- a/include/linux/kmemcheck.h
+++ b/include/linux/kmemcheck.h
@@ -36,6 +36,56 @@ int kmemcheck_hide_addr(unsigned long address);
 
 bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size);
 
+/*
+ * Bitfield annotations
+ *
+ * How to use: If you have a struct using bitfields, for example
+ *
+ *     struct a {
+ *             int x:8, y:8;
+ *     };
+ *
+ * then this should be rewritten as
+ *
+ *     struct a {
+ *             kmemcheck_bitfield_begin(flags);
+ *             int x:8, y:8;
+ *             kmemcheck_bitfield_end(flags);
+ *     };
+ *
+ * Now the "flags_begin" and "flags_end" members may be used to refer to the
+ * beginning and end, respectively, of the bitfield (and things like
+ * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
+ * fields should be annotated:
+ *
+ *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
+ *     kmemcheck_annotate_bitfield(a, flags);
+ */
+#define kmemcheck_bitfield_begin(name)	\
+	int name##_begin[0];
+
+#define kmemcheck_bitfield_end(name)	\
+	int name##_end[0];
+
+#define kmemcheck_annotate_bitfield(ptr, name)				\
+	do {								\
+		int _n;							\
+									\
+		if (!ptr)						\
+			break;						\
+									\
+		_n = (long) &((ptr)->name##_end)			\
+			- (long) &((ptr)->name##_begin);		\
+		MAYBE_BUILD_BUG_ON(_n < 0);				\
+									\
+		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+	} while (0)
+
+#define kmemcheck_annotate_variable(var)				\
+	do {								\
+		kmemcheck_mark_initialized(&(var), sizeof(var));	\
+	} while (0)							\
+
 #else
 #define kmemcheck_enabled 0
 
@@ -106,60 +156,16 @@ static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size)
 	return true;
 }
 
-#endif /* CONFIG_KMEMCHECK */
-
-/*
- * Bitfield annotations
- *
- * How to use: If you have a struct using bitfields, for example
- *
- *     struct a {
- *             int x:8, y:8;
- *     };
- *
- * then this should be rewritten as
- *
- *     struct a {
- *             kmemcheck_bitfield_begin(flags);
- *             int x:8, y:8;
- *             kmemcheck_bitfield_end(flags);
- *     };
- *
- * Now the "flags_begin" and "flags_end" members may be used to refer to the
- * beginning and end, respectively, of the bitfield (and things like
- * &x.flags_begin is allowed). As soon as the struct is allocated, the bit-
- * fields should be annotated:
- *
- *     struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL);
- *     kmemcheck_annotate_bitfield(a, flags);
- *
- * Note: We provide the same definitions for both kmemcheck and non-
- * kmemcheck kernels. This makes it harder to introduce accidental errors. It
- * is also allowed to pass NULL pointers to kmemcheck_annotate_bitfield().
- */
-#define kmemcheck_bitfield_begin(name)	\
-	int name##_begin[0];
-
-#define kmemcheck_bitfield_end(name)	\
-	int name##_end[0];
+#define kmemcheck_bitfield_begin(name)
+#define kmemcheck_bitfield_end(name)
+#define kmemcheck_annotate_bitfield(ptr, name)	\
+	do {					\
+	} while (0)
 
-#define kmemcheck_annotate_bitfield(ptr, name)				\
-	do {								\
-		int _n;							\
-									\
-		if (!ptr)						\
-			break;						\
-									\
-		_n = (long) &((ptr)->name##_end)			\
-			- (long) &((ptr)->name##_begin);		\
-		MAYBE_BUILD_BUG_ON(_n < 0);				\
-									\
-		kmemcheck_mark_initialized(&((ptr)->name##_begin), _n);	\
+#define kmemcheck_annotate_variable(var)	\
+	do {					\
 	} while (0)
 
-#define kmemcheck_annotate_variable(var)				\
-	do {								\
-		kmemcheck_mark_initialized(&(var), sizeof(var));	\
-	} while (0)							\
+#endif /* CONFIG_KMEMCHECK */
 
 #endif /* LINUX_KMEMCHECK_H */
-- 
cgit v1.2.3-70-g09d2


From 7dd65feb6c603e13eba501c34c662259ab38e70e Mon Sep 17 00:00:00 2001
From: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Date: Fri, 8 Jan 2010 14:42:42 -0800
Subject: lib: add support for LZO-compressed kernels

This patch series adds generic support for creating and extracting
LZO-compressed kernel images, as well as support for using such images on
the x86 and ARM architectures, and support for creating and using
LZO-compressed initrd and initramfs images.

Russell King said:

: Testing on a Cortex A9 model:
: - lzo decompressor is 65% of the time gzip takes to decompress a kernel
: - lzo kernel is 9% larger than a gzip kernel
:
: which I'm happy to say confirms your figures when comparing the two.
:
: However, when comparing your new gzip code to the old gzip code:
: - new is 99% of the size of the old code
: - new takes 42% of the time to decompress than the old code
:
: What this means is that for a proper comparison, the results get even better:
: - lzo is 7.5% larger than the old gzip'd kernel image
: - lzo takes 28% of the time that the old gzip code took
:
: So the expense seems definitely worth the effort.  The only reason I
: can think of ever using gzip would be if you needed the additional
: compression (eg, because you have limited flash to store the image.)
:
: I would argue that the default for ARM should therefore be LZO.

This patch:

The lzo compressor is worse than gzip at compression, but faster at
extraction.  Here are some figures for an ARM board I'm working on:

Uncompressed size: 3.24Mo
gzip  1.61Mo 0.72s
lzo   1.75Mo 0.48s

So for a compression ratio that is still relatively close to gzip, it's
much faster to extract, at least in that case.

This part contains:
 - Makefile routine to support lzo compression
 - Fixes to the existing lzo compressor so that it can be used in
   compressed kernels
 - wrapper around the existing lzo1x_decompress, as it only extracts one
   block at a time, while we need to extract a whole file here
 - config dialog for kernel compression

[akpm@linux-foundation.org: coding-style fixes]
[akpm@linux-foundation.org: cleanup]
Signed-off-by: Albin Tonnerre <albin.tonnerre@free-electrons.com>
Tested-by: Wu Zhangjin <wuzhangjin@gmail.com>
Acked-by: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@elte.hu>
Cc: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Russell King <rmk@arm.linux.org.uk>
Acked-by: Russell King <rmk@arm.linux.org.uk>
Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/decompress/unlzo.h |  10 ++
 init/Kconfig                     |  18 +++-
 lib/decompress_unlzo.c           | 209 +++++++++++++++++++++++++++++++++++++++
 lib/lzo/lzo1x_decompress.c       |   9 +-
 scripts/Makefile.lib             |   5 +
 5 files changed, 244 insertions(+), 7 deletions(-)
 create mode 100644 include/linux/decompress/unlzo.h
 create mode 100644 lib/decompress_unlzo.c

(limited to 'include')

diff --git a/include/linux/decompress/unlzo.h b/include/linux/decompress/unlzo.h
new file mode 100644
index 000000000000..987229752519
--- /dev/null
+++ b/include/linux/decompress/unlzo.h
@@ -0,0 +1,10 @@
+#ifndef DECOMPRESS_UNLZO_H
+#define DECOMPRESS_UNLZO_H
+
+int unlzo(unsigned char *inbuf, int len,
+	int(*fill)(void*, unsigned int),
+	int(*flush)(void*, unsigned int),
+	unsigned char *output,
+	int *pos,
+	void(*error)(char *x));
+#endif
diff --git a/init/Kconfig b/init/Kconfig
index a23da9f01803..d95ca7cd5d45 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -115,10 +115,13 @@ config HAVE_KERNEL_BZIP2
 config HAVE_KERNEL_LZMA
 	bool
 
+config HAVE_KERNEL_LZO
+	bool
+
 choice
 	prompt "Kernel compression mode"
 	default KERNEL_GZIP
-	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA
+	depends on HAVE_KERNEL_GZIP || HAVE_KERNEL_BZIP2 || HAVE_KERNEL_LZMA || HAVE_KERNEL_LZO
 	help
 	  The linux kernel is a kind of self-extracting executable.
 	  Several compression algorithms are available, which differ
@@ -141,9 +144,8 @@ config KERNEL_GZIP
 	bool "Gzip"
 	depends on HAVE_KERNEL_GZIP
 	help
-	  The old and tried gzip compression. Its compression ratio is
-	  the poorest among the 3 choices; however its speed (both
-	  compression and decompression) is the fastest.
+	  The old and tried gzip compression. It provides a good balance
+	  between compression ratio and decompression speed.
 
 config KERNEL_BZIP2
 	bool "Bzip2"
@@ -164,6 +166,14 @@ config KERNEL_LZMA
 	  two. Compression is slowest.	The kernel size is about 33%
 	  smaller with LZMA in comparison to gzip.
 
+config KERNEL_LZO
+	bool "LZO"
+	depends on HAVE_KERNEL_LZO
+	help
+	  Its compression ratio is the poorest among the 4. The kernel
+	  size is about about 10% bigger than gzip; however its speed
+	  (both compression and decompression) is the fastest.
+
 endchoice
 
 config SWAP
diff --git a/lib/decompress_unlzo.c b/lib/decompress_unlzo.c
new file mode 100644
index 000000000000..db521f45626e
--- /dev/null
+++ b/lib/decompress_unlzo.c
@@ -0,0 +1,209 @@
+/*
+ * LZO decompressor for the Linux kernel. Code borrowed from the lzo
+ * implementation by Markus Franz Xaver Johannes Oberhumer.
+ *
+ * Linux kernel adaptation:
+ * Copyright (C) 2009
+ * Albin Tonnerre, Free Electrons <albin.tonnerre@free-electrons.com>
+ *
+ * Original code:
+ * Copyright (C) 1996-2005 Markus Franz Xaver Johannes Oberhumer
+ * All Rights Reserved.
+ *
+ * lzop and the LZO library are free software; you can redistribute them
+ * and/or modify them under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.
+ * If not, write to the Free Software Foundation, Inc.,
+ * 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
+ *
+ * Markus F.X.J. Oberhumer
+ * <markus@oberhumer.com>
+ * http://www.oberhumer.com/opensource/lzop/
+ */
+
+#ifdef STATIC
+#include "lzo/lzo1x_decompress.c"
+#else
+#include <linux/slab.h>
+#include <linux/decompress/unlzo.h>
+#endif
+
+#include <linux/types.h>
+#include <linux/lzo.h>
+#include <linux/decompress/mm.h>
+
+#include <linux/compiler.h>
+#include <asm/unaligned.h>
+
+static const unsigned char lzop_magic[] = {
+	0x89, 0x4c, 0x5a, 0x4f, 0x00, 0x0d, 0x0a, 0x1a, 0x0a };
+
+#define LZO_BLOCK_SIZE        (256*1024l)
+#define HEADER_HAS_FILTER      0x00000800L
+
+STATIC inline int INIT parse_header(u8 *input, u8 *skip)
+{
+	int l;
+	u8 *parse = input;
+	u8 level = 0;
+	u16 version;
+
+	/* read magic: 9 first bits */
+	for (l = 0; l < 9; l++) {
+		if (*parse++ != lzop_magic[l])
+			return 0;
+	}
+	/* get version (2bytes), skip library version (2),
+	 * 'need to be extracted' version (2) and
+	 * method (1) */
+	version = get_unaligned_be16(parse);
+	parse += 7;
+	if (version >= 0x0940)
+		level = *parse++;
+	if (get_unaligned_be32(parse) & HEADER_HAS_FILTER)
+		parse += 8; /* flags + filter info */
+	else
+		parse += 4; /* flags */
+
+	/* skip mode and mtime_low */
+	parse += 8;
+	if (version >= 0x0940)
+		parse += 4;	/* skip mtime_high */
+
+	l = *parse++;
+	/* don't care about the file name, and skip checksum */
+	parse += l + 4;
+
+	*skip = parse - input;
+	return 1;
+}
+
+STATIC inline int INIT unlzo(u8 *input, int in_len,
+				int (*fill) (void *, unsigned int),
+				int (*flush) (void *, unsigned int),
+				u8 *output, int *posp,
+				void (*error_fn) (char *x))
+{
+	u8 skip = 0, r = 0;
+	u32 src_len, dst_len;
+	size_t tmp;
+	u8 *in_buf, *in_buf_save, *out_buf;
+	int obytes_processed = 0;
+
+	set_error_fn(error_fn);
+
+	if (output) {
+		out_buf = output;
+	} else if (!flush) {
+		error("NULL output pointer and no flush function provided");
+		goto exit;
+	} else {
+		out_buf = malloc(LZO_BLOCK_SIZE);
+		if (!out_buf) {
+			error("Could not allocate output buffer");
+			goto exit;
+		}
+	}
+
+	if (input && fill) {
+		error("Both input pointer and fill function provided, don't know what to do");
+		goto exit_1;
+	} else if (input) {
+		in_buf = input;
+	} else if (!fill || !posp) {
+		error("NULL input pointer and missing position pointer or fill function");
+		goto exit_1;
+	} else {
+		in_buf = malloc(lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		if (!in_buf) {
+			error("Could not allocate input buffer");
+			goto exit_1;
+		}
+	}
+	in_buf_save = in_buf;
+
+	if (posp)
+		*posp = 0;
+
+	if (fill)
+		fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+
+	if (!parse_header(input, &skip)) {
+		error("invalid header");
+		goto exit_2;
+	}
+	in_buf += skip;
+
+	if (posp)
+		*posp = skip;
+
+	for (;;) {
+		/* read uncompressed block size */
+		dst_len = get_unaligned_be32(in_buf);
+		in_buf += 4;
+
+		/* exit if last block */
+		if (dst_len == 0) {
+			if (posp)
+				*posp += 4;
+			break;
+		}
+
+		if (dst_len > LZO_BLOCK_SIZE) {
+			error("dest len longer than block size");
+			goto exit_2;
+		}
+
+		/* read compressed block size, and skip block checksum info */
+		src_len = get_unaligned_be32(in_buf);
+		in_buf += 8;
+
+		if (src_len <= 0 || src_len > dst_len) {
+			error("file corrupted");
+			goto exit_2;
+		}
+
+		/* decompress */
+		tmp = dst_len;
+		r = lzo1x_decompress_safe((u8 *) in_buf, src_len,
+						out_buf, &tmp);
+
+		if (r != LZO_E_OK || dst_len != tmp) {
+			error("Compressed data violation");
+			goto exit_2;
+		}
+
+		obytes_processed += dst_len;
+		if (flush)
+			flush(out_buf, dst_len);
+		if (output)
+			out_buf += dst_len;
+		if (posp)
+			*posp += src_len + 12;
+		if (fill) {
+			in_buf = in_buf_save;
+			fill(in_buf, lzo1x_worst_compress(LZO_BLOCK_SIZE));
+		} else
+			in_buf += src_len;
+	}
+
+exit_2:
+	if (!input)
+		free(in_buf);
+exit_1:
+	if (!output)
+		free(out_buf);
+exit:
+	return obytes_processed;
+}
+
+#define decompress unlzo
diff --git a/lib/lzo/lzo1x_decompress.c b/lib/lzo/lzo1x_decompress.c
index 5dc6b29c1575..f2fd09850223 100644
--- a/lib/lzo/lzo1x_decompress.c
+++ b/lib/lzo/lzo1x_decompress.c
@@ -11,11 +11,13 @@
  *  Richard Purdie <rpurdie@openedhand.com>
  */
 
+#ifndef STATIC
 #include <linux/module.h>
 #include <linux/kernel.h>
-#include <linux/lzo.h>
-#include <asm/byteorder.h>
+#endif
+
 #include <asm/unaligned.h>
+#include <linux/lzo.h>
 #include "lzodefs.h"
 
 #define HAVE_IP(x, ip_end, ip) ((size_t)(ip_end - ip) < (x))
@@ -244,9 +246,10 @@ lookbehind_overrun:
 	*out_len = op - out;
 	return LZO_E_LOOKBEHIND_OVERRUN;
 }
-
+#ifndef STATIC
 EXPORT_SYMBOL_GPL(lzo1x_decompress_safe);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO1X Decompressor");
 
+#endif
diff --git a/scripts/Makefile.lib b/scripts/Makefile.lib
index cd815ac2a50b..0fe48cd91ffa 100644
--- a/scripts/Makefile.lib
+++ b/scripts/Makefile.lib
@@ -235,3 +235,8 @@ quiet_cmd_lzma = LZMA    $@
 cmd_lzma = (cat $(filter-out FORCE,$^) | \
 	lzma -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
 	(rm -f $@ ; false)
+
+quiet_cmd_lzo = LZO    $@
+cmd_lzo = (cat $(filter-out FORCE,$^) | \
+	lzop -9 && $(call size_append, $(filter-out FORCE,$^))) > $@ || \
+	(rm -f $@ ; false)
-- 
cgit v1.2.3-70-g09d2


From 80884094e34456887ecdbd107d40e72c4a40f9c9 Mon Sep 17 00:00:00 2001
From: Michael Hennerich <michael.hennerich@analog.com>
Date: Fri, 8 Jan 2010 14:43:08 -0800
Subject: gpio: adp5588-gpio: new driver for ADP5588 GPIO expanders

Signed-off-by: Michael Hennerich <michael.hennerich@analog.com>
Signed-off-by: Mike Frysinger <vapier@gentoo.org>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: David Brownell <david-b@pacbell.net>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpio/Kconfig        |   9 ++
 drivers/gpio/Makefile       |   1 +
 drivers/gpio/adp5588-gpio.c | 266 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/adp5588.h |  12 ++
 4 files changed, 288 insertions(+)
 create mode 100644 drivers/gpio/adp5588-gpio.c

(limited to 'include')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index a019b49ecc9b..1f1d88ae68d6 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -172,6 +172,15 @@ config GPIO_ADP5520
 	  To compile this driver as a module, choose M here: the module will
 	  be called adp5520-gpio.
 
+config GPIO_ADP5588
+	tristate "ADP5588 I2C GPIO expander"
+	depends on I2C
+	help
+	  This option enables support for 18 GPIOs found
+	  on Analog Devices ADP5588 GPIO Expanders.
+	  To compile this driver as a module, choose M here: the module will be
+	  called adp5588-gpio.
+
 comment "PCI GPIO expanders:"
 
 config GPIO_CS5535
diff --git a/drivers/gpio/Makefile b/drivers/gpio/Makefile
index 52fe4cf734c7..48687238edb1 100644
--- a/drivers/gpio/Makefile
+++ b/drivers/gpio/Makefile
@@ -5,6 +5,7 @@ ccflags-$(CONFIG_DEBUG_GPIO)	+= -DDEBUG
 obj-$(CONFIG_GPIOLIB)		+= gpiolib.o
 
 obj-$(CONFIG_GPIO_ADP5520)	+= adp5520-gpio.o
+obj-$(CONFIG_GPIO_ADP5588)	+= adp5588-gpio.o
 obj-$(CONFIG_GPIO_LANGWELL)	+= langwell_gpio.o
 obj-$(CONFIG_GPIO_MAX7301)	+= max7301.o
 obj-$(CONFIG_GPIO_MAX732X)	+= max732x.o
diff --git a/drivers/gpio/adp5588-gpio.c b/drivers/gpio/adp5588-gpio.c
new file mode 100644
index 000000000000..afc097a16b33
--- /dev/null
+++ b/drivers/gpio/adp5588-gpio.c
@@ -0,0 +1,266 @@
+/*
+ * GPIO Chip driver for Analog Devices
+ * ADP5588 I/O Expander and QWERTY Keypad Controller
+ *
+ * Copyright 2009 Analog Devices Inc.
+ *
+ * Licensed under the GPL-2 or later.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/i2c.h>
+#include <linux/gpio.h>
+
+#include <linux/i2c/adp5588.h>
+
+#define DRV_NAME		"adp5588-gpio"
+#define MAXGPIO			18
+#define ADP_BANK(offs)		((offs) >> 3)
+#define ADP_BIT(offs)		(1u << ((offs) & 0x7))
+
+struct adp5588_gpio {
+	struct i2c_client *client;
+	struct gpio_chip gpio_chip;
+	struct mutex lock;	/* protect cached dir, dat_out */
+	unsigned gpio_start;
+	uint8_t dat_out[3];
+	uint8_t dir[3];
+};
+
+static int adp5588_gpio_read(struct i2c_client *client, u8 reg)
+{
+	int ret = i2c_smbus_read_byte_data(client, reg);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Read Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_write(struct i2c_client *client, u8 reg, u8 val)
+{
+	int ret = i2c_smbus_write_byte_data(client, reg, val);
+
+	if (ret < 0)
+		dev_err(&client->dev, "Write Error\n");
+
+	return ret;
+}
+
+static int adp5588_gpio_get_value(struct gpio_chip *chip, unsigned off)
+{
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	return !!(adp5588_gpio_read(dev->client, GPIO_DAT_STAT1 + ADP_BANK(off))
+		  & ADP_BIT(off));
+}
+
+static void adp5588_gpio_set_value(struct gpio_chip *chip,
+				   unsigned off, int val)
+{
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+			   dev->dat_out[bank]);
+	mutex_unlock(&dev->lock);
+}
+
+static int adp5588_gpio_direction_input(struct gpio_chip *chip, unsigned off)
+{
+	int ret;
+	unsigned bank;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] &= ~ADP_BIT(off);
+	ret = adp5588_gpio_write(dev->client, GPIO_DIR1 + bank, dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int adp5588_gpio_direction_output(struct gpio_chip *chip,
+					 unsigned off, int val)
+{
+	int ret;
+	unsigned bank, bit;
+	struct adp5588_gpio *dev =
+	    container_of(chip, struct adp5588_gpio, gpio_chip);
+
+	bank = ADP_BANK(off);
+	bit = ADP_BIT(off);
+
+	mutex_lock(&dev->lock);
+	dev->dir[bank] |= bit;
+
+	if (val)
+		dev->dat_out[bank] |= bit;
+	else
+		dev->dat_out[bank] &= ~bit;
+
+	ret = adp5588_gpio_write(dev->client, GPIO_DAT_OUT1 + bank,
+				 dev->dat_out[bank]);
+	ret |= adp5588_gpio_write(dev->client, GPIO_DIR1 + bank,
+				 dev->dir[bank]);
+	mutex_unlock(&dev->lock);
+
+	return ret;
+}
+
+static int __devinit adp5588_gpio_probe(struct i2c_client *client,
+					const struct i2c_device_id *id)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev;
+	struct gpio_chip *gc;
+	int ret, i, revid;
+
+	if (pdata == NULL) {
+		dev_err(&client->dev, "missing platform data\n");
+		return -ENODEV;
+	}
+
+	if (!i2c_check_functionality(client->adapter,
+					I2C_FUNC_SMBUS_BYTE_DATA)) {
+		dev_err(&client->dev, "SMBUS Byte Data not Supported\n");
+		return -EIO;
+	}
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (dev == NULL) {
+		dev_err(&client->dev, "failed to alloc memory\n");
+		return -ENOMEM;
+	}
+
+	dev->client = client;
+
+	gc = &dev->gpio_chip;
+	gc->direction_input = adp5588_gpio_direction_input;
+	gc->direction_output = adp5588_gpio_direction_output;
+	gc->get = adp5588_gpio_get_value;
+	gc->set = adp5588_gpio_set_value;
+	gc->can_sleep = 1;
+
+	gc->base = pdata->gpio_start;
+	gc->ngpio = MAXGPIO;
+	gc->label = client->name;
+	gc->owner = THIS_MODULE;
+
+	mutex_init(&dev->lock);
+
+
+	ret = adp5588_gpio_read(dev->client, DEV_ID);
+	if (ret < 0)
+		goto err;
+
+	revid = ret & ADP5588_DEVICE_ID_MASK;
+
+	for (i = 0, ret = 0; i <= ADP_BANK(MAXGPIO); i++) {
+		dev->dat_out[i] = adp5588_gpio_read(client, GPIO_DAT_OUT1 + i);
+		dev->dir[i] = adp5588_gpio_read(client, GPIO_DIR1 + i);
+		ret |= adp5588_gpio_write(client, KP_GPIO1 + i, 0);
+		ret |= adp5588_gpio_write(client, GPIO_PULL1 + i,
+				(pdata->pullup_dis_mask >> (8 * i)) & 0xFF);
+
+		if (ret)
+			goto err;
+	}
+
+	ret = gpiochip_add(&dev->gpio_chip);
+	if (ret)
+		goto err;
+
+	dev_info(&client->dev, "gpios %d..%d on a %s Rev. %d\n",
+			gc->base, gc->base + gc->ngpio - 1,
+			client->name, revid);
+
+	if (pdata->setup) {
+		ret = pdata->setup(client, gc->base, gc->ngpio, pdata->context);
+		if (ret < 0)
+			dev_warn(&client->dev, "setup failed, %d\n", ret);
+	}
+
+	i2c_set_clientdata(client, dev);
+	return 0;
+
+err:
+	kfree(dev);
+	return ret;
+}
+
+static int __devexit adp5588_gpio_remove(struct i2c_client *client)
+{
+	struct adp5588_gpio_platform_data *pdata = client->dev.platform_data;
+	struct adp5588_gpio *dev = i2c_get_clientdata(client);
+	int ret;
+
+	if (pdata->teardown) {
+		ret = pdata->teardown(client,
+				      dev->gpio_chip.base, dev->gpio_chip.ngpio,
+				      pdata->context);
+		if (ret < 0) {
+			dev_err(&client->dev, "teardown failed %d\n", ret);
+			return ret;
+		}
+	}
+
+	ret = gpiochip_remove(&dev->gpio_chip);
+	if (ret) {
+		dev_err(&client->dev, "gpiochip_remove failed %d\n", ret);
+		return ret;
+	}
+
+	kfree(dev);
+	return 0;
+}
+
+static const struct i2c_device_id adp5588_gpio_id[] = {
+	{DRV_NAME, 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(i2c, adp5588_gpio_id);
+
+static struct i2c_driver adp5588_gpio_driver = {
+	.driver = {
+		   .name = DRV_NAME,
+		   },
+	.probe = adp5588_gpio_probe,
+	.remove = __devexit_p(adp5588_gpio_remove),
+	.id_table = adp5588_gpio_id,
+};
+
+static int __init adp5588_gpio_init(void)
+{
+	return i2c_add_driver(&adp5588_gpio_driver);
+}
+
+module_init(adp5588_gpio_init);
+
+static void __exit adp5588_gpio_exit(void)
+{
+	i2c_del_driver(&adp5588_gpio_driver);
+}
+
+module_exit(adp5588_gpio_exit);
+
+MODULE_AUTHOR("Michael Hennerich <hennerich@blackfin.uclinux.org>");
+MODULE_DESCRIPTION("GPIO ADP5588 Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c/adp5588.h b/include/linux/i2c/adp5588.h
index fc5db826b48e..02c9af374741 100644
--- a/include/linux/i2c/adp5588.h
+++ b/include/linux/i2c/adp5588.h
@@ -89,4 +89,16 @@ struct adp5588_kpad_platform_data {
 	unsigned short unlock_key2;	/* Unlock Key 2 */
 };
 
+struct adp5588_gpio_platform_data {
+	unsigned gpio_start;		/* GPIO Chip base # */
+	unsigned pullup_dis_mask;	/* Pull-Up Disable Mask */
+	int	(*setup)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	int	(*teardown)(struct i2c_client *client,
+				int gpio, unsigned ngpio,
+				void *context);
+	void	*context;
+};
+
 #endif
-- 
cgit v1.2.3-70-g09d2


From a29815a333c6c6e677294bbe5958e771d0aad3fd Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@qumranet.com>
Date: Sun, 10 Jan 2010 16:28:09 +0200
Subject: core, x86: make LIST_POISON less deadly

The list macros use LIST_POISON1 and LIST_POISON2 as undereferencable
pointers in order to trap erronous use of freed list_heads.  Unfortunately
userspace can arrange for those pointers to actually be dereferencable,
potentially turning an oops to an expolit.

To avoid this allow architectures (currently x86_64 only) to override
the default values for these pointers with truly-undereferencable values.
This is easy on x86_64 as the virtual address space is large and contains
areas that cannot be mapped.

Other 64-bit architectures will likely find similar unmapped ranges.

[ingo: switch to 0xdead000000000000 as the unmapped area]
[ingo: add comments, cleanup]
[jaswinder: eliminate sparse warnings]

Acked-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Jaswinder Singh Rajput <jaswinderrajput@gmail.com>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Avi Kivity <avi@redhat.com>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/x86/Kconfig       |  5 +++++
 include/linux/poison.h | 16 ++++++++++++++--
 2 files changed, 19 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 6bf1f1ac478c..cbcbfdee3ee0 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -1247,6 +1247,11 @@ config ARCH_MEMORY_PROBE
 	def_bool X86_64
 	depends on MEMORY_HOTPLUG
 
+config ILLEGAL_POINTER_VALUE
+       hex
+       default 0 if X86_32
+       default 0xdead000000000000 if X86_64
+
 source "mm/Kconfig"
 
 config HIGHPTE
diff --git a/include/linux/poison.h b/include/linux/poison.h
index 7fc194aef8c2..2110a81c5e2a 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -2,13 +2,25 @@
 #define _LINUX_POISON_H
 
 /********** include/linux/list.h **********/
+
+/*
+ * Architectures might want to move the poison pointer offset
+ * into some well-recognized area such as 0xdead000000000000,
+ * that is also not mappable by user-space exploits:
+ */
+#ifdef CONFIG_ILLEGAL_POINTER_VALUE
+# define POISON_POINTER_DELTA _AC(CONFIG_ILLEGAL_POINTER_VALUE, UL)
+#else
+# define POISON_POINTER_DELTA 0
+#endif
+
 /*
  * These are non-NULL pointers that will result in page faults
  * under normal circumstances, used to verify that nobody uses
  * non-initialized list entries.
  */
-#define LIST_POISON1  ((void *) 0x00100100)
-#define LIST_POISON2  ((void *) 0x00200200)
+#define LIST_POISON1  ((void *) 0x00100100 + POISON_POINTER_DELTA)
+#define LIST_POISON2  ((void *) 0x00200200 + POISON_POINTER_DELTA)
 
 /********** include/linux/timer.h **********/
 /*
-- 
cgit v1.2.3-70-g09d2


From d218d11133d888f9745802146a50255a4781d37a Mon Sep 17 00:00:00 2001
From: Stephen Hemminger <shemminger@vyatta.com>
Date: Mon, 11 Jan 2010 16:28:01 -0800
Subject: tcp: Generalized TTL Security Mechanism

This patch adds the kernel portions needed to implement
RFC 5082 Generalized TTL Security Mechanism (GTSM).
It is a lightweight security measure against forged
packets causing DoS attacks (for BGP).

This is already implemented the same way in BSD kernels.
For the necessary Quagga patch
  http://www.gossamer-threads.com/lists/quagga/dev/17389

Description from Cisco
  http://www.cisco.com/en/US/docs/ios/12_3t/12_3t7/feature/guide/gt_btsh.html

It does add one byte to each socket structure, but I did
a little rearrangement to reuse a hole (on 64 bit), but it
does grow the structure on 32 bit

This should be documented on ip(4) man page and the Glibc in.h
file also needs update.  IPV6_MINHOPLIMIT should also be added
(although BSD doesn't support that).

Only TCP is supported, but could also be added to UDP, DCCP, SCTP
if desired.

Signed-off-by: Stephen Hemminger <shemminger@vyatta.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/in.h      |  2 ++
 include/net/inet_sock.h |  4 +++-
 net/ipv4/ip_sockglue.c  | 14 +++++++++++++-
 net/ipv4/tcp_ipv4.c     |  3 +++
 4 files changed, 21 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/in.h b/include/linux/in.h
index b615649db129..583c76f9c30f 100644
--- a/include/linux/in.h
+++ b/include/linux/in.h
@@ -84,6 +84,8 @@ struct in_addr {
 #define IP_ORIGDSTADDR       20
 #define IP_RECVORIGDSTADDR   IP_ORIGDSTADDR
 
+#define IP_MINTTL       21
+
 /* IP_MTU_DISCOVER values */
 #define IP_PMTUDISC_DONT		0	/* Never send DF frames */
 #define IP_PMTUDISC_WANT		1	/* Use per route hints	*/
diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h
index bd4c53f75ac0..83fd34437cf1 100644
--- a/include/net/inet_sock.h
+++ b/include/net/inet_sock.h
@@ -122,10 +122,12 @@ struct inet_sock {
 	__be32			inet_saddr;
 	__s16			uc_ttl;
 	__u16			cmsg_flags;
-	struct ip_options	*opt;
 	__be16			inet_sport;
 	__u16			inet_id;
+
+	struct ip_options	*opt;
 	__u8			tos;
+	__u8			min_ttl;
 	__u8			mc_ttl;
 	__u8			pmtudisc;
 	__u8			recverr:1,
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cafad9baff03..644dc43a55de 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -451,7 +451,8 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			     (1<<IP_TTL) | (1<<IP_HDRINCL) |
 			     (1<<IP_MTU_DISCOVER) | (1<<IP_RECVERR) |
 			     (1<<IP_ROUTER_ALERT) | (1<<IP_FREEBIND) |
-			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT))) ||
+			     (1<<IP_PASSSEC) | (1<<IP_TRANSPARENT) |
+			     (1<<IP_MINTTL))) ||
 	    optname == IP_MULTICAST_TTL ||
 	    optname == IP_MULTICAST_ALL ||
 	    optname == IP_MULTICAST_LOOP ||
@@ -936,6 +937,14 @@ mc_msf_out:
 		inet->transparent = !!val;
 		break;
 
+	case IP_MINTTL:
+		if (optlen < 1)
+			goto e_inval;
+		if (val < 0 || val > 255)
+			goto e_inval;
+		inet->min_ttl = val;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -1198,6 +1207,9 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	case IP_TRANSPARENT:
 		val = inet->transparent;
 		break;
+	case IP_MINTTL:
+		val = inet->min_ttl;
+		break;
 	default:
 		release_sock(sk);
 		return -ENOPROTOOPT;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 65b8ebfd078a..382f667238ec 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1649,6 +1649,9 @@ int tcp_v4_rcv(struct sk_buff *skb)
 	if (!sk)
 		goto no_tcp_socket;
 
+	if (iph->ttl < inet_sk(sk)->min_ttl)
+		goto discard_and_relse;
+
 process:
 	if (sk->sk_state == TCP_TIME_WAIT)
 		goto do_time_wait;
-- 
cgit v1.2.3-70-g09d2


From a393db6f10ef2d4f28257234cfc730e744dfb6a4 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 22 Dec 2009 13:35:52 +0100
Subject: drbd: Allow online resizing of DRBD devices while peer not reachable
 (needs to be explicitly forced)

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 drivers/block/drbd/drbd_int.h      |  5 ++---
 drivers/block/drbd/drbd_nl.c       | 17 +++++++++++------
 drivers/block/drbd/drbd_receiver.c |  4 ++--
 include/linux/drbd_nl.h            |  1 +
 4 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 79d8e22c4d0d..2bf3a6ef3684 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1371,10 +1371,9 @@ extern int is_valid_ar_handle(struct drbd_request *, sector_t);
 extern void drbd_suspend_io(struct drbd_conf *mdev);
 extern void drbd_resume_io(struct drbd_conf *mdev);
 extern char *ppsize(char *buf, unsigned long long size);
-extern sector_t drbd_new_dev_size(struct drbd_conf *,
-		struct drbd_backing_dev *);
+extern sector_t drbd_new_dev_size(struct drbd_conf *, struct drbd_backing_dev *, int);
 enum determine_dev_size { dev_size_error = -1, unchanged = 0, shrunk = 1, grew = 2 };
-extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *) __must_hold(local);
+extern enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *, int force) __must_hold(local);
 extern void resync_after_online_grow(struct drbd_conf *);
 extern void drbd_setup_queue_param(struct drbd_conf *mdev, unsigned int) __must_hold(local);
 extern int drbd_set_role(struct drbd_conf *mdev, enum drbd_role new_role,
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 3313901a4861..1292e0620663 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -510,7 +510,7 @@ void drbd_resume_io(struct drbd_conf *mdev)
  * Returns 0 on success, negative return values indicate errors.
  * You should call drbd_md_sync() after calling this function.
  */
-enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_hold(local)
+enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev, int force) __must_hold(local)
 {
 	sector_t prev_first_sect, prev_size; /* previous meta location */
 	sector_t la_size;
@@ -541,7 +541,7 @@ enum determine_dev_size drbd_determin_dev_size(struct drbd_conf *mdev) __must_ho
 	/* TODO: should only be some assert here, not (re)init... */
 	drbd_md_set_sector_offsets(mdev, mdev->ldev);
 
-	size = drbd_new_dev_size(mdev, mdev->ldev);
+	size = drbd_new_dev_size(mdev, mdev->ldev, force);
 
 	if (drbd_get_capacity(mdev->this_bdev) != size ||
 	    drbd_bm_capacity(mdev) != size) {
@@ -596,7 +596,7 @@ out:
 }
 
 sector_t
-drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev, int assume_peer_has_space)
 {
 	sector_t p_size = mdev->p_size;   /* partner's disk size. */
 	sector_t la_size = bdev->md.la_size_sect; /* last agreed size. */
@@ -606,6 +606,11 @@ drbd_new_dev_size(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 	m_size = drbd_get_max_capacity(bdev);
 
+	if (mdev->state.conn < C_CONNECTED && assume_peer_has_space) {
+		dev_warn(DEV, "Resize while not connected was forced by the user!\n");
+		p_size = m_size;
+	}
+
 	if (p_size && m_size) {
 		size = min_t(sector_t, p_size, m_size);
 	} else {
@@ -965,7 +970,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 
 	/* Prevent shrinking of consistent devices ! */
 	if (drbd_md_test_flag(nbc, MDF_CONSISTENT) &&
-	   drbd_new_dev_size(mdev, nbc) < nbc->md.la_size_sect) {
+	    drbd_new_dev_size(mdev, nbc, 0) < nbc->md.la_size_sect) {
 		dev_warn(DEV, "refusing to truncate a consistent device\n");
 		retcode = ERR_DISK_TO_SMALL;
 		goto force_diskless_dec;
@@ -1052,7 +1057,7 @@ static int drbd_nl_disk_conf(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp
 	    !drbd_md_test_flag(mdev->ldev, MDF_CONNECTED_IND))
 		set_bit(USE_DEGR_WFC_T, &mdev->flags);
 
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, 0);
 	if (dd == dev_size_error) {
 		retcode = ERR_NOMEM_BITMAP;
 		goto force_diskless_dec;
@@ -1504,7 +1509,7 @@ static int drbd_nl_resize(struct drbd_conf *mdev, struct drbd_nl_cfg_req *nlp,
 	}
 
 	mdev->ldev->dc.disk_size = (sector_t)rs.resize_size;
-	dd = drbd_determin_dev_size(mdev);
+	dd = drbd_determin_dev_size(mdev, rs.resize_force);
 	drbd_md_sync(mdev);
 	put_ldev(mdev);
 	if (dd == dev_size_error) {
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index e3716fadc6a5..f22a5283128a 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -2870,7 +2870,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 
 		/* Never shrink a device with usable data during connect.
 		   But allow online shrinking if we are connected. */
-		if (drbd_new_dev_size(mdev, mdev->ldev) <
+		if (drbd_new_dev_size(mdev, mdev->ldev, 0) <
 		   drbd_get_capacity(mdev->this_bdev) &&
 		   mdev->state.disk >= D_OUTDATED &&
 		   mdev->state.conn < C_CONNECTED) {
@@ -2885,7 +2885,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h)
 #undef min_not_zero
 
 	if (get_ldev(mdev)) {
-		dd = drbd_determin_dev_size(mdev);
+	  dd = drbd_determin_dev_size(mdev, 0);
 		put_ldev(mdev);
 		if (dd == dev_size_error)
 			return FALSE;
diff --git a/include/linux/drbd_nl.h b/include/linux/drbd_nl.h
index db5721ad50d1..a4d82f895994 100644
--- a/include/linux/drbd_nl.h
+++ b/include/linux/drbd_nl.h
@@ -69,6 +69,7 @@ NL_PACKET(disconnect, 6, )
 
 NL_PACKET(resize, 7,
 	NL_INT64(		29,	T_MAY_IGNORE,	resize_size)
+	NL_BIT(			68,	T_MAY_IGNORE,	resize_force)
 )
 
 NL_PACKET(syncer_conf, 8,
-- 
cgit v1.2.3-70-g09d2


From 2ebccd71a71e6078920bc65b40f120e72b71c2b6 Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Tue, 12 Jan 2010 10:09:07 +0100
Subject: drbd: The kernel code is now equivalent to out of tree release 8.3.7

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
---
 include/linux/drbd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index e84f4733cb55..78962272338a 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -53,7 +53,7 @@
 
 
 extern const char *drbd_buildtag(void);
-#define REL_VERSION "8.3.6"
+#define REL_VERSION "8.3.7"
 #define API_VERSION 88
 #define PRO_VERSION_MIN 86
 #define PRO_VERSION_MAX 91
-- 
cgit v1.2.3-70-g09d2


From 3ccd4c6167d3b39d52631767ebbf8b5677c5855d Mon Sep 17 00:00:00 2001
From: Oliver Hartkopp <oliver@hartkopp.net>
Date: Tue, 12 Jan 2010 02:00:46 -0800
Subject: can: Unify droping of invalid tx skbs and netdev stats

To prevent the CAN drivers to operate on invalid socketbuffers the skbs are
now checked and silently dropped at the xmit-function consistently.

Also the netdev stats are consistently using the CAN data length code (dlc)
for [rx|tx]_bytes now.

Signed-off-by: Oliver Hartkopp <oliver@hartkopp.net>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  3 +++
 drivers/net/can/bfin_can.c        |  3 +++
 drivers/net/can/mcp251x.c         |  6 +-----
 drivers/net/can/mscan/mscan.c     |  5 +----
 drivers/net/can/sja1000/sja1000.c |  3 +++
 drivers/net/can/ti_hecc.c         |  4 +++-
 drivers/net/can/usb/ems_usb.c     |  3 +++
 drivers/net/can/vcan.c            | 12 +++++++++---
 include/linux/can/dev.h           | 15 +++++++++++++++
 9 files changed, 41 insertions(+), 13 deletions(-)

(limited to 'include')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index 166cc7e579c0..f7287497ba6e 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -342,6 +342,9 @@ static netdev_tx_t at91_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	unsigned int mb, prio;
 	u32 reg_mid, reg_mcr;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	mb = get_tx_next_mb(priv);
 	prio = get_tx_next_prio(priv);
 
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 0ec1524523cc..7e1926e79e98 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -318,6 +318,9 @@ static int bfin_can_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 val;
 	int i;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	netif_stop_queue(dev);
 
 	/* fill id */
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index 1a72ca066a17..afa2fa45fed9 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -494,12 +494,8 @@ static netdev_tx_t mcp251x_hard_start_xmit(struct sk_buff *skb,
 		return NETDEV_TX_BUSY;
 	}
 
-	if (skb->len != sizeof(struct can_frame)) {
-		dev_err(&spi->dev, "dropping packet - bad length\n");
-		dev_kfree_skb(skb);
-		net->stats.tx_dropped++;
+	if (can_dropped_invalid_skb(net, skb))
 		return NETDEV_TX_OK;
-	}
 
 	netif_stop_queue(net);
 	priv->tx_skb = skb;
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index 500d18918bd5..40827c128b65 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -204,11 +204,8 @@ static netdev_tx_t mscan_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	int i, rtr, buf_id;
 	u32 can_id;
 
-	if (skb->len != sizeof(*frame) || frame->can_dlc > 8) {
-		kfree_skb(skb);
-		dev->stats.tx_dropped++;
+	if (can_dropped_invalid_skb(dev, skb))
 		return NETDEV_TX_OK;
-	}
 
 	out_8(&regs->cantier, 0);
 
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 542a4f7255b4..345304d779b9 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -249,6 +249,9 @@ static netdev_tx_t sja1000_start_xmit(struct sk_buff *skb,
 	uint8_t dreg;
 	int i;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	netif_stop_queue(dev);
 
 	fi = dlc = cf->can_dlc;
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 5c993c2da528..7d370e32a7a8 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -477,6 +477,9 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
 	u32 mbxno, mbx_mask, data;
 	unsigned long flags;
 
+	if (can_dropped_invalid_skb(ndev, skb))
+		return NETDEV_TX_OK;
+
 	mbxno = get_tx_head_mb(priv);
 	mbx_mask = BIT(mbxno);
 	spin_lock_irqsave(&priv->mbx_lock, flags);
@@ -491,7 +494,6 @@ static netdev_tx_t ti_hecc_xmit(struct sk_buff *skb, struct net_device *ndev)
 	spin_unlock_irqrestore(&priv->mbx_lock, flags);
 
 	/* Prepare mailbox for transmission */
-	data = min_t(u8, cf->can_dlc, 8);
 	if (cf->can_id & CAN_RTR_FLAG) /* Remote transmission request */
 		data |= HECC_CANMCF_RTR;
 	data |= get_tx_head_prio(priv) << 8;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index efbb05c71bf4..ddb17e256656 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -767,6 +767,9 @@ static netdev_tx_t ems_usb_start_xmit(struct sk_buff *skb, struct net_device *ne
 	size_t size = CPC_HEADER_SIZE + CPC_MSG_HEADER_LEN
 			+ sizeof(struct cpc_can_msg);
 
+	if (can_dropped_invalid_skb(netdev, skb))
+		return NETDEV_TX_OK;
+
 	/* create a URB, and a buffer for it, and copy the data to the URB */
 	urb = usb_alloc_urb(0, GFP_ATOMIC);
 	if (!urb) {
diff --git a/drivers/net/can/vcan.c b/drivers/net/can/vcan.c
index 80ac56313981..d124d837ae58 100644
--- a/drivers/net/can/vcan.c
+++ b/drivers/net/can/vcan.c
@@ -47,6 +47,7 @@
 #include <linux/if_arp.h>
 #include <linux/if_ether.h>
 #include <linux/can.h>
+#include <linux/can/dev.h>
 #include <net/rtnetlink.h>
 
 static __initdata const char banner[] =
@@ -70,10 +71,11 @@ MODULE_PARM_DESC(echo, "Echo sent frames (for testing). Default: 0 (Off)");
 
 static void vcan_rx(struct sk_buff *skb, struct net_device *dev)
 {
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	struct net_device_stats *stats = &dev->stats;
 
 	stats->rx_packets++;
-	stats->rx_bytes += skb->len;
+	stats->rx_bytes += cf->can_dlc;
 
 	skb->protocol  = htons(ETH_P_CAN);
 	skb->pkt_type  = PACKET_BROADCAST;
@@ -85,11 +87,15 @@ static void vcan_rx(struct sk_buff *skb, struct net_device *dev)
 
 static netdev_tx_t vcan_tx(struct sk_buff *skb, struct net_device *dev)
 {
+	struct can_frame *cf = (struct can_frame *)skb->data;
 	struct net_device_stats *stats = &dev->stats;
 	int loop;
 
+	if (can_dropped_invalid_skb(dev, skb))
+		return NETDEV_TX_OK;
+
 	stats->tx_packets++;
-	stats->tx_bytes += skb->len;
+	stats->tx_bytes += cf->can_dlc;
 
 	/* set flag whether this packet has to be looped back */
 	loop = skb->pkt_type == PACKET_LOOPBACK;
@@ -103,7 +109,7 @@ static netdev_tx_t vcan_tx(struct sk_buff *skb, struct net_device *dev)
 			 * CAN core already did the echo for us
 			 */
 			stats->rx_packets++;
-			stats->rx_bytes += skb->len;
+			stats->rx_bytes += cf->can_dlc;
 		}
 		kfree_skb(skb);
 		return NETDEV_TX_OK;
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 3db7767d2a17..7e7c98a3e908 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -60,6 +60,21 @@ struct can_priv {
  */
 #define get_can_dlc(i)	(min_t(__u8, (i), 8))
 
+/* Drop a given socketbuffer if it does not contain a valid CAN frame. */
+static inline int can_dropped_invalid_skb(struct net_device *dev,
+					  struct sk_buff *skb)
+{
+	const struct can_frame *cf = (struct can_frame *)skb->data;
+
+	if (unlikely(skb->len != sizeof(*cf) || cf->can_dlc > 8)) {
+		kfree_skb(skb);
+		dev->stats.tx_dropped++;
+		return 1;
+	}
+
+	return 0;
+}
+
 struct net_device *alloc_candev(int sizeof_priv, unsigned int echo_skb_max);
 void free_candev(struct net_device *dev);
 
-- 
cgit v1.2.3-70-g09d2


From 81077e82c3f591578625805dd6464a27a9ff56ec Mon Sep 17 00:00:00 2001
From: Lukáš Turek <8an@praha12.net>
Date: Mon, 21 Dec 2009 22:50:47 +0100
Subject: nl80211: Add new WIPHY attribute COVERAGE_CLASS

The new attribute NL80211_ATTR_WIPHY_COVERAGE_CLASS sets IEEE 802.11
Coverage Class, which depends on maximum distance of nodes in a
wireless network. It's required for long distance links (more than a few
hundred meters).

The attribute is now ignored by two non-mac80211 drivers, rndis and
iwmc3200wifi, together with WIPHY_PARAM_RETRY_SHORT and
WIPHY_PARAM_RETRY_LONG. If it turns out to be a problem, we could split
set_wiphy_params callback or add new capability bits.

Signed-off-by: Lukas Turek <8an@praha12.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  4 ++++
 include/net/cfg80211.h  |  2 ++
 net/wireless/core.c     |  1 +
 net/wireless/nl80211.c  | 15 +++++++++++++++
 4 files changed, 22 insertions(+)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 2bfbe88837ef..d4c556de7170 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -430,6 +430,8 @@ enum nl80211_commands {
  * @NL80211_ATTR_WIPHY_RTS_THRESHOLD: RTS threshold (TX frames with length
  *	larger than or equal to this use RTS/CTS handshake); allowed range:
  *	0..65536, disable with (u32)-1; dot11RTSThreshold; u32
+ * @NL80211_ATTR_WIPHY_COVERAGE_CLASS: Coverage Class as defined by IEEE 802.11
+ *	section 7.3.2.9; dot11CoverageClass; u8
  *
  * @NL80211_ATTR_IFINDEX: network interface index of the device to operate on
  * @NL80211_ATTR_IFNAME: network interface name
@@ -779,6 +781,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_COOKIE,
 
+	NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index add79930f47d..a8d5d04314b9 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -837,6 +837,7 @@ enum wiphy_params_flags {
 	WIPHY_PARAM_RETRY_LONG		= 1 << 1,
 	WIPHY_PARAM_FRAG_THRESHOLD	= 1 << 2,
 	WIPHY_PARAM_RTS_THRESHOLD	= 1 << 3,
+	WIPHY_PARAM_COVERAGE_CLASS	= 1 << 4,
 };
 
 /**
@@ -1236,6 +1237,7 @@ struct wiphy {
 	u8 retry_long;
 	u32 frag_threshold;
 	u32 rts_threshold;
+	u8 coverage_class;
 
 	char fw_version[ETHTOOL_BUSINFO_LEN];
 	u32 hw_version;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index c2a2c563d21a..0a545bb6ed05 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -402,6 +402,7 @@ struct wiphy *wiphy_new(const struct cfg80211_ops *ops, int sizeof_priv)
 	rdev->wiphy.retry_long = 4;
 	rdev->wiphy.frag_threshold = (u32) -1;
 	rdev->wiphy.rts_threshold = (u32) -1;
+	rdev->wiphy.coverage_class = 0;
 
 	return &rdev->wiphy;
 }
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e3bee3cecdfa..c09fbcd278fb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -69,6 +69,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 	[NL80211_ATTR_WIPHY_RETRY_LONG] = { .type = NLA_U8 },
 	[NL80211_ATTR_WIPHY_FRAG_THRESHOLD] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_RTS_THRESHOLD] = { .type = NLA_U32 },
+	[NL80211_ATTR_WIPHY_COVERAGE_CLASS] = { .type = NLA_U8 },
 
 	[NL80211_ATTR_IFTYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
@@ -444,6 +445,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		    dev->wiphy.frag_threshold);
 	NLA_PUT_U32(msg, NL80211_ATTR_WIPHY_RTS_THRESHOLD,
 		    dev->wiphy.rts_threshold);
+	NLA_PUT_U8(msg, NL80211_ATTR_WIPHY_COVERAGE_CLASS,
+		    dev->wiphy.coverage_class);
 
 	NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_SCAN_SSIDS,
 		   dev->wiphy.max_scan_ssids);
@@ -684,6 +687,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 	u32 changed;
 	u8 retry_short = 0, retry_long = 0;
 	u32 frag_threshold = 0, rts_threshold = 0;
+	u8 coverage_class = 0;
 
 	rtnl_lock();
 
@@ -806,9 +810,16 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		changed |= WIPHY_PARAM_RTS_THRESHOLD;
 	}
 
+	if (info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]) {
+		coverage_class = nla_get_u8(
+			info->attrs[NL80211_ATTR_WIPHY_COVERAGE_CLASS]);
+		changed |= WIPHY_PARAM_COVERAGE_CLASS;
+	}
+
 	if (changed) {
 		u8 old_retry_short, old_retry_long;
 		u32 old_frag_threshold, old_rts_threshold;
+		u8 old_coverage_class;
 
 		if (!rdev->ops->set_wiphy_params) {
 			result = -EOPNOTSUPP;
@@ -819,6 +830,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 		old_retry_long = rdev->wiphy.retry_long;
 		old_frag_threshold = rdev->wiphy.frag_threshold;
 		old_rts_threshold = rdev->wiphy.rts_threshold;
+		old_coverage_class = rdev->wiphy.coverage_class;
 
 		if (changed & WIPHY_PARAM_RETRY_SHORT)
 			rdev->wiphy.retry_short = retry_short;
@@ -828,6 +840,8 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			rdev->wiphy.frag_threshold = frag_threshold;
 		if (changed & WIPHY_PARAM_RTS_THRESHOLD)
 			rdev->wiphy.rts_threshold = rts_threshold;
+		if (changed & WIPHY_PARAM_COVERAGE_CLASS)
+			rdev->wiphy.coverage_class = coverage_class;
 
 		result = rdev->ops->set_wiphy_params(&rdev->wiphy, changed);
 		if (result) {
@@ -835,6 +849,7 @@ static int nl80211_set_wiphy(struct sk_buff *skb, struct genl_info *info)
 			rdev->wiphy.retry_long = old_retry_long;
 			rdev->wiphy.frag_threshold = old_frag_threshold;
 			rdev->wiphy.rts_threshold = old_rts_threshold;
+			rdev->wiphy.coverage_class = old_coverage_class;
 		}
 	}
 
-- 
cgit v1.2.3-70-g09d2


From 310bc676e314e92c18257bfc916951879451ee32 Mon Sep 17 00:00:00 2001
From: Lukáš Turek <8an@praha12.net>
Date: Mon, 21 Dec 2009 22:50:48 +0100
Subject: mac80211: Add new callback set_coverage_class

Mac80211 callback to driver set_coverage_class() sets slot time and ACK
timeout for given IEEE 802.11 coverage class. The callback is optional,
but it's essential for long distance links.

Signed-off-by: Lukas Turek <8an@praha12.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h      |  5 +++++
 net/mac80211/cfg.c          |  7 +++++++
 net/mac80211/driver-ops.h   | 15 +++++++++++++++
 net/mac80211/driver-trace.h | 23 +++++++++++++++++++++++
 4 files changed, 50 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f073a2a50574..ad4b70034e77 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1533,6 +1533,10 @@ enum ieee80211_ampdu_mlme_action {
  *	and need to call wiphy_rfkill_set_hw_state() in the callback.
  *	The callback can sleep.
  *
+ * @set_coverage_class: Set slot time for given coverage class as specified
+ *	in IEEE 802.11-2007 section 17.3.8.6 and modify ACK timeout
+ *	accordingly. This callback is not required and may sleep.
+ *
  * @testmode_cmd: Implement a cfg80211 test mode command.
  *	The callback can sleep.
  *
@@ -1592,6 +1596,7 @@ struct ieee80211_ops {
 			    struct ieee80211_sta *sta, u16 tid, u16 *ssn);
 
 	void (*rfkill_poll)(struct ieee80211_hw *hw);
+	void (*set_coverage_class)(struct ieee80211_hw *hw, u8 coverage_class);
 #ifdef CONFIG_NL80211_TESTMODE
 	int (*testmode_cmd)(struct ieee80211_hw *hw, void *data, int len);
 #endif
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 2e5e841e9b7b..976014c5e742 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1230,6 +1230,13 @@ static int ieee80211_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	int err;
 
+	if (changed & WIPHY_PARAM_COVERAGE_CLASS) {
+		err = drv_set_coverage_class(local, wiphy->coverage_class);
+
+		if (err)
+			return err;
+	}
+
 	if (changed & WIPHY_PARAM_RTS_THRESHOLD) {
 		err = drv_set_rts_threshold(local, wiphy->rts_threshold);
 
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 8757ea73d544..de91d39e0276 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -214,6 +214,21 @@ static inline int drv_set_rts_threshold(struct ieee80211_local *local,
 	return ret;
 }
 
+static inline int drv_set_coverage_class(struct ieee80211_local *local,
+					 u8 value)
+{
+	int ret = 0;
+	might_sleep();
+
+	if (local->ops->set_coverage_class)
+		local->ops->set_coverage_class(&local->hw, value);
+	else
+		ret = -EOPNOTSUPP;
+
+	trace_drv_set_coverage_class(local, value, ret);
+	return ret;
+}
+
 static inline void drv_sta_notify(struct ieee80211_local *local,
 				  struct ieee80211_sub_if_data *sdata,
 				  enum sta_notify_cmd cmd,
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 977cc7528bc6..0ea258123b8e 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -491,6 +491,29 @@ TRACE_EVENT(drv_set_rts_threshold,
 	)
 );
 
+TRACE_EVENT(drv_set_coverage_class,
+	TP_PROTO(struct ieee80211_local *local, u8 value, int ret),
+
+	TP_ARGS(local, value, ret),
+
+	TP_STRUCT__entry(
+		LOCAL_ENTRY
+		__field(u8, value)
+		__field(int, ret)
+	),
+
+	TP_fast_assign(
+		LOCAL_ASSIGN;
+		__entry->ret = ret;
+		__entry->value = value;
+	),
+
+	TP_printk(
+		LOCAL_PR_FMT " value:%d ret:%d",
+		LOCAL_PR_ARG, __entry->value, __entry->ret
+	)
+);
+
 TRACE_EVENT(drv_sta_notify,
 	TP_PROTO(struct ieee80211_local *local,
 		 struct ieee80211_sub_if_data *sdata,
-- 
cgit v1.2.3-70-g09d2


From e00cfce0cb2a397859607bf515c6de9ce064b64a Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 29 Dec 2009 12:59:19 +0200
Subject: mac80211: Select lowest rate based on basic rate set in AP mode

If the basic rate set is configured to not include the lowest rate
(e.g., basic rate set = 6, 12, 24 Mbps in IEEE 802.11g mode), the AP
should not send out broadcast frames at 1 Mbps. This type of
configuration can be used to optimize channel usage in cases where
there is no need for backwards compatibility with IEEE 802.11b-only
devices.

In AP mode, mac80211 was unconditionally using the lowest rate for
Beacon frames and similarly, with all rate control algorithms that use
rate_control_send_low(), the lowest rate ended up being used for all
broadcast frames (and all unicast frames that are sent before
association). Change this to take into account the basic rate
configuration in AP mode, i.e., use the lowest rate in the basic rate
set instead of the lowest supported rate when selecting the rate.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  2 ++
 net/mac80211/rate.c    | 25 +++++++++++++++++++++++++
 net/mac80211/tx.c      | 24 +++++++++++++-----------
 3 files changed, 40 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ad4b70034e77..39c516c352be 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -2299,6 +2299,7 @@ enum rate_control_changed {
  * @max_rate_idx: user-requested maximum rate (not MCS for now)
  * @skb: the skb that will be transmitted, the control information in it needs
  *	to be filled in
+ * @ap: whether this frame is sent out in AP mode
  */
 struct ieee80211_tx_rate_control {
 	struct ieee80211_hw *hw;
@@ -2308,6 +2309,7 @@ struct ieee80211_tx_rate_control {
 	struct ieee80211_tx_rate reported_rate;
 	bool rts, short_preamble;
 	u8 max_rate_idx;
+	bool ap;
 };
 
 struct rate_control_ops {
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index b9007f80cb92..6349e7f4dcae 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -207,6 +207,27 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc)
 	return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc));
 }
 
+static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx)
+{
+	u8 i;
+
+	if (basic_rates == 0)
+		return; /* assume basic rates unknown and accept rate */
+	if (*idx < 0)
+		return;
+	if (basic_rates & (1 << *idx))
+		return; /* selected rate is a basic rate */
+
+	for (i = *idx + 1; i <= max_rate_idx; i++) {
+		if (basic_rates & (1 << i)) {
+			*idx = i;
+			return;
+		}
+	}
+
+	/* could not find a basic rate; use original selection */
+}
+
 bool rate_control_send_low(struct ieee80211_sta *sta,
 			   void *priv_sta,
 			   struct ieee80211_tx_rate_control *txrc)
@@ -218,6 +239,10 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
 		info->control.rates[0].count =
 			(info->flags & IEEE80211_TX_CTL_NO_ACK) ?
 			1 : txrc->hw->max_rate_tries;
+		if (!sta && txrc->ap)
+			rc_send_low_broadcast(&info->control.rates[0].idx,
+					      txrc->bss_conf->basic_rates,
+					      txrc->sband->n_bitrates);
 		return true;
 	}
 	return false;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 140da4a7f13d..4961168f5091 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -520,6 +520,7 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	txrc.skb = tx->skb;
 	txrc.reported_rate.idx = -1;
 	txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx;
+	txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP;
 
 	/* set up RTS protection if desired */
 	if (len > tx->local->hw.wiphy->rts_threshold) {
@@ -2060,6 +2061,7 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	struct beacon_data *beacon;
 	struct ieee80211_supported_band *sband;
 	enum ieee80211_band band = local->hw.conf.channel->band;
+	struct ieee80211_tx_rate_control txrc;
 
 	sband = local->hw.wiphy->bands[band];
 
@@ -2167,21 +2169,21 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	info = IEEE80211_SKB_CB(skb);
 
 	info->flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+	info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	info->band = band;
-	/*
-	 * XXX: For now, always use the lowest rate
-	 */
-	info->control.rates[0].idx = 0;
-	info->control.rates[0].count = 1;
-	info->control.rates[1].idx = -1;
-	info->control.rates[2].idx = -1;
-	info->control.rates[3].idx = -1;
-	info->control.rates[4].idx = -1;
-	BUILD_BUG_ON(IEEE80211_TX_MAX_RATES != 5);
+
+	memset(&txrc, 0, sizeof(txrc));
+	txrc.hw = hw;
+	txrc.sband = sband;
+	txrc.bss_conf = &sdata->vif.bss_conf;
+	txrc.skb = skb;
+	txrc.reported_rate.idx = -1;
+	txrc.max_rate_idx = sdata->max_ratectrl_rateidx;
+	txrc.ap = true;
+	rate_control_get_rate(sdata, NULL, &txrc);
 
 	info->control.vif = vif;
 
-	info->flags |= IEEE80211_TX_CTL_NO_ACK;
 	info->flags |= IEEE80211_TX_CTL_CLEAR_PS_FILT;
 	info->flags |= IEEE80211_TX_CTL_ASSIGN_SEQ;
  out:
-- 
cgit v1.2.3-70-g09d2


From 37eb0b164cf9fa9f70c8500926f5cde7c652f48e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Wed, 6 Jan 2010 13:09:08 +0200
Subject: cfg80211/mac80211: Use more generic bitrate mask for rate control

Extend struct cfg80211_bitrate_mask to actually use a bitfield mask
instead of just a single fixed or maximum rate index. This change
itself does not modify the behavior (except for debugfs files), but it
prepares cfg80211 and mac80211 for a new nl80211 command for setting
which rates can be used in TX rate control.

Since frames are now going through the rate control algorithm
unconditionally, the internal IEEE80211_TX_INTFL_RCALGO flag can now
be removed. The RC implementations can use the rate_idx_mask value to
optimize their behavior if only a single rate is enabled.

The old max_rate_idx in struct ieee80211_tx_rate_control is maintained
(but commented as deprecated) for backwards compatibility with existing
RC implementations. Once these implementations have been updated to
use the more generic rate_idx_mask, the max_rate_idx value can be
removed.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h        | 13 ++-------
 include/net/mac80211.h        |  8 +++---
 net/mac80211/cfg.c            | 32 +++-------------------
 net/mac80211/debugfs_netdev.c | 22 ++++++++-------
 net/mac80211/ieee80211_i.h    |  4 +--
 net/mac80211/iface.c          |  8 ++++--
 net/mac80211/rate.c           | 63 +++++++++++++++++++++++++++++++++++--------
 net/mac80211/rate.h           |  5 +---
 net/mac80211/tx.c             | 12 +++++++--
 net/wireless/wext-compat.c    | 34 ++++++++++++++++++++---
 10 files changed, 122 insertions(+), 79 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a8d5d04314b9..22e062afb5a1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -857,20 +857,11 @@ enum tx_power_setting {
  * cfg80211_bitrate_mask - masks for bitrate control
  */
 struct cfg80211_bitrate_mask {
-/*
- * As discussed in Berlin, this struct really
- * should look like this:
-
 	struct {
 		u32 legacy;
-		u8 mcs[IEEE80211_HT_MCS_MASK_LEN];
+		/* TODO: add support for masking MCS rates; e.g.: */
+		/* u8 mcs[IEEE80211_HT_MCS_MASK_LEN]; */
 	} control[IEEE80211_NUM_BANDS];
-
- * Since we can always fix in-kernel users, let's keep
- * it simpler for now:
- */
-	u32 fixed;   /* fixed bitrate, 0 == not fixed */
-	u32 maxrate; /* in kbps, 0 == no limit */
 };
 /**
  * struct cfg80211_pmksa - PMK Security Association
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 39c516c352be..7e5af6d90b93 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -255,9 +255,6 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_CTL_RATE_CTRL_PROBE: internal to mac80211, can be
  *	set by rate control algorithms to indicate probe rate, will
  *	be cleared for fragmented frames (except on the last fragment)
- * @IEEE80211_TX_INTFL_RCALGO: mac80211 internal flag, do not test or
- *	set this flag in the driver; indicates that the rate control
- *	algorithm was used and should be notified of TX status
  * @IEEE80211_TX_INTFL_NEED_TXPROCESSING: completely internal to mac80211,
  *	used to indicate that a pending frame requires TX processing before
  *	it can be sent out.
@@ -287,7 +284,6 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_STAT_AMPDU			= BIT(10),
 	IEEE80211_TX_STAT_AMPDU_NO_BACK		= BIT(11),
 	IEEE80211_TX_CTL_RATE_CTRL_PROBE	= BIT(12),
-	IEEE80211_TX_INTFL_RCALGO		= BIT(13),
 	IEEE80211_TX_INTFL_NEED_TXPROCESSING	= BIT(14),
 	IEEE80211_TX_INTFL_RETRIED		= BIT(15),
 	IEEE80211_TX_INTFL_DONT_ENCRYPT		= BIT(16),
@@ -2297,6 +2293,9 @@ enum rate_control_changed {
  * @short_preamble: whether mac80211 will request short-preamble transmission
  *	if the selected rate supports it
  * @max_rate_idx: user-requested maximum rate (not MCS for now)
+ *	(deprecated; this will be removed once drivers get updated to use
+ *	rate_idx_mask)
+ * @rate_idx_mask: user-requested rate mask (not MCS for now)
  * @skb: the skb that will be transmitted, the control information in it needs
  *	to be filled in
  * @ap: whether this frame is sent out in AP mode
@@ -2309,6 +2308,7 @@ struct ieee80211_tx_rate_control {
 	struct ieee80211_tx_rate reported_rate;
 	bool rts, short_preamble;
 	u8 max_rate_idx;
+	u32 rate_idx_mask;
 	bool ap;
 };
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 976014c5e742..e5dda6fb8dff 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1406,8 +1406,6 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = wdev_priv(dev->ieee80211_ptr);
 	int i;
-	u32 target_rate;
-	struct ieee80211_supported_band *sband;
 
 	/*
 	 * This _could_ be supported by providing a hook for
@@ -1417,35 +1415,11 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
 	if (local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL)
 		return -EOPNOTSUPP;
 
-	sband = local->hw.wiphy->bands[local->hw.conf.channel->band];
-
-	/*
-	 * target_rate = -1, rate->fixed = 0 means auto only, so use all rates
-	 * target_rate = X, rate->fixed = 1 means only rate X
-	 * target_rate = X, rate->fixed = 0 means all rates <= X
-	 */
-	sdata->max_ratectrl_rateidx = -1;
-	sdata->force_unicast_rateidx = -1;
 
-	if (mask->fixed)
-		target_rate = mask->fixed / 100;
-	else if (mask->maxrate)
-		target_rate = mask->maxrate / 100;
-	else
-		return 0;
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++)
+		sdata->rc_rateidx_mask[i] = mask->control[i].legacy;
 
-	for (i = 0; i< sband->n_bitrates; i++) {
-		if (target_rate != sband->bitrates[i].bitrate)
-			continue;
-
-		/* requested bitrate found */
-		sdata->max_ratectrl_rateidx = i;
-		if (mask->fixed)
-			sdata->force_unicast_rateidx = i;
-		return 0;
-	}
-
-	return -EINVAL;
+	return 0;
 }
 
 static int ieee80211_remain_on_channel(struct wiphy *wiphy,
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 59f6e3bcbd09..1481049f0f71 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -127,8 +127,10 @@ __IEEE80211_IF_FILE(name, ieee80211_if_write_##name)
 
 /* common attributes */
 IEEE80211_IF_FILE(drop_unencrypted, drop_unencrypted, DEC);
-IEEE80211_IF_FILE(force_unicast_rateidx, force_unicast_rateidx, DEC);
-IEEE80211_IF_FILE(max_ratectrl_rateidx, max_ratectrl_rateidx, DEC);
+IEEE80211_IF_FILE(rc_rateidx_mask_2ghz, rc_rateidx_mask[IEEE80211_BAND_2GHZ],
+		  HEX);
+IEEE80211_IF_FILE(rc_rateidx_mask_5ghz, rc_rateidx_mask[IEEE80211_BAND_5GHZ],
+		  HEX);
 
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
@@ -264,8 +266,8 @@ IEEE80211_IF_FILE(dot11MeshHWMPRootMode,
 static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, sta);
-	DEBUGFS_ADD(force_unicast_rateidx, sta);
-	DEBUGFS_ADD(max_ratectrl_rateidx, sta);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, sta);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, sta);
 
 	DEBUGFS_ADD(bssid, sta);
 	DEBUGFS_ADD(aid, sta);
@@ -275,8 +277,8 @@ static void add_sta_files(struct ieee80211_sub_if_data *sdata)
 static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, ap);
-	DEBUGFS_ADD(force_unicast_rateidx, ap);
-	DEBUGFS_ADD(max_ratectrl_rateidx, ap);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, ap);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, ap);
 
 	DEBUGFS_ADD(num_sta_ps, ap);
 	DEBUGFS_ADD(dtim_count, ap);
@@ -286,8 +288,8 @@ static void add_ap_files(struct ieee80211_sub_if_data *sdata)
 static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, wds);
-	DEBUGFS_ADD(force_unicast_rateidx, wds);
-	DEBUGFS_ADD(max_ratectrl_rateidx, wds);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, wds);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, wds);
 
 	DEBUGFS_ADD(peer, wds);
 }
@@ -295,8 +297,8 @@ static void add_wds_files(struct ieee80211_sub_if_data *sdata)
 static void add_vlan_files(struct ieee80211_sub_if_data *sdata)
 {
 	DEBUGFS_ADD(drop_unencrypted, vlan);
-	DEBUGFS_ADD(force_unicast_rateidx, vlan);
-	DEBUGFS_ADD(max_ratectrl_rateidx, vlan);
+	DEBUGFS_ADD(rc_rateidx_mask_2ghz, vlan);
+	DEBUGFS_ADD(rc_rateidx_mask_5ghz, vlan);
 }
 
 static void add_monitor_files(struct ieee80211_sub_if_data *sdata)
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index a27921ee6e63..3e4ac3f30857 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -494,8 +494,8 @@ struct ieee80211_sub_if_data {
 	 */
 	struct ieee80211_if_ap *bss;
 
-	int force_unicast_rateidx; /* forced TX rateidx for unicast frames */
-	int max_ratectrl_rateidx; /* max TX rateidx for rate control */
+	/* bitmap of allowed (non-MCS) rate indexes for rate control */
+	u32 rc_rateidx_mask[IEEE80211_NUM_BANDS];
 
 	union {
 		struct ieee80211_if_ap ap;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index 264a6c975f8b..fe140bf033f9 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -856,8 +856,12 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
 
 	INIT_LIST_HEAD(&sdata->key_list);
 
-	sdata->force_unicast_rateidx = -1;
-	sdata->max_ratectrl_rateidx = -1;
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		struct ieee80211_supported_band *sband;
+		sband = local->hw.wiphy->bands[i];
+		sdata->rc_rateidx_mask[i] =
+			sband ? (1 << sband->n_bitrates) - 1 : 0;
+	}
 
 	/* setup type-dependent data */
 	ieee80211_setup_sdata(sdata, type);
diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c
index 6349e7f4dcae..c74b7c85403c 100644
--- a/net/mac80211/rate.c
+++ b/net/mac80211/rate.c
@@ -249,6 +249,38 @@ bool rate_control_send_low(struct ieee80211_sta *sta,
 }
 EXPORT_SYMBOL(rate_control_send_low);
 
+static void rate_idx_match_mask(struct ieee80211_tx_rate *rate,
+				int n_bitrates, u32 mask)
+{
+	int j;
+
+	/* See whether the selected rate or anything below it is allowed. */
+	for (j = rate->idx; j >= 0; j--) {
+		if (mask & (1 << j)) {
+			/* Okay, found a suitable rate. Use it. */
+			rate->idx = j;
+			return;
+		}
+	}
+
+	/* Try to find a higher rate that would be allowed */
+	for (j = rate->idx + 1; j < n_bitrates; j++) {
+		if (mask & (1 << j)) {
+			/* Okay, found a suitable rate. Use it. */
+			rate->idx = j;
+			return;
+		}
+	}
+
+	/*
+	 * Uh.. No suitable rate exists. This should not really happen with
+	 * sane TX rate mask configurations. However, should someone manage to
+	 * configure supported rates and TX rate mask in incompatible way,
+	 * allow the frame to be transmitted with whatever the rate control
+	 * selected.
+	 */
+}
+
 void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 			   struct sta_info *sta,
 			   struct ieee80211_tx_rate_control *txrc)
@@ -258,6 +290,7 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_sta *ista = NULL;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(txrc->skb);
 	int i;
+	u32 mask;
 
 	if (sta) {
 		ista = &sta->sta;
@@ -270,23 +303,31 @@ void rate_control_get_rate(struct ieee80211_sub_if_data *sdata,
 		info->control.rates[i].count = 1;
 	}
 
-	if (sta && sdata->force_unicast_rateidx > -1) {
-		info->control.rates[0].idx = sdata->force_unicast_rateidx;
-	} else {
-		ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
-		info->flags |= IEEE80211_TX_INTFL_RCALGO;
-	}
+	ref->ops->get_rate(ref->priv, ista, priv_sta, txrc);
 
 	/*
-	 * try to enforce the maximum rate the user wanted
+	 * Try to enforce the rateidx mask the user wanted. skip this if the
+	 * default mask (allow all rates) is used to save some processing for
+	 * the common case.
 	 */
-	if (sdata->max_ratectrl_rateidx > -1)
+	mask = sdata->rc_rateidx_mask[info->band];
+	if (mask != (1 << txrc->sband->n_bitrates) - 1) {
+		if (sta) {
+			/* Filter out rates that the STA does not support */
+			mask &= sta->sta.supp_rates[info->band];
+		}
+		/*
+		 * Make sure the rate index selected for each TX rate is
+		 * included in the configured mask and change the rate indexes
+		 * if needed.
+		 */
 		for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) {
+			/* Rate masking supports only legacy rates for now */
 			if (info->control.rates[i].flags & IEEE80211_TX_RC_MCS)
 				continue;
-			info->control.rates[i].idx =
-				min_t(s8, info->control.rates[i].idx,
-				      sdata->max_ratectrl_rateidx);
+			rate_idx_match_mask(&info->control.rates[i],
+					    txrc->sband->n_bitrates, mask);
+		}
 	}
 
 	BUG_ON(info->control.rates[0].idx < 0);
diff --git a/net/mac80211/rate.h b/net/mac80211/rate.h
index cb9bd1f65e27..669dddd40521 100644
--- a/net/mac80211/rate.h
+++ b/net/mac80211/rate.h
@@ -44,10 +44,7 @@ static inline void rate_control_tx_status(struct ieee80211_local *local,
 	struct rate_control_ref *ref = local->rate_ctrl;
 	struct ieee80211_sta *ista = &sta->sta;
 	void *priv_sta = sta->rate_ctrl_priv;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
-
-	if (likely(info->flags & IEEE80211_TX_INTFL_RCALGO))
-		ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
+	ref->ops->tx_status(ref->priv, sband, ista, priv_sta, skb);
 }
 
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 4961168f5091..d3a44812f8bf 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -519,7 +519,11 @@ ieee80211_tx_h_rate_ctrl(struct ieee80211_tx_data *tx)
 	txrc.bss_conf = &tx->sdata->vif.bss_conf;
 	txrc.skb = tx->skb;
 	txrc.reported_rate.idx = -1;
-	txrc.max_rate_idx = tx->sdata->max_ratectrl_rateidx;
+	txrc.rate_idx_mask = tx->sdata->rc_rateidx_mask[tx->channel->band];
+	if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
+		txrc.max_rate_idx = -1;
+	else
+		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
 	txrc.ap = tx->sdata->vif.type == NL80211_IFTYPE_AP;
 
 	/* set up RTS protection if desired */
@@ -2178,7 +2182,11 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 	txrc.bss_conf = &sdata->vif.bss_conf;
 	txrc.skb = skb;
 	txrc.reported_rate.idx = -1;
-	txrc.max_rate_idx = sdata->max_ratectrl_rateidx;
+	txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
+	if (txrc.rate_idx_mask == (1 << sband->n_bitrates) - 1)
+		txrc.max_rate_idx = -1;
+	else
+		txrc.max_rate_idx = fls(txrc.rate_idx_mask) - 1;
 	txrc.ap = true;
 	rate_control_get_rate(sdata, NULL, &txrc);
 
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 4198243a3dff..966d2f01beac 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -1204,21 +1204,47 @@ int cfg80211_wext_siwrate(struct net_device *dev,
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
 	struct cfg80211_registered_device *rdev = wiphy_to_dev(wdev->wiphy);
 	struct cfg80211_bitrate_mask mask;
+	u32 fixed, maxrate;
+	struct ieee80211_supported_band *sband;
+	int band, ridx;
+	bool match = false;
 
 	if (!rdev->ops->set_bitrate_mask)
 		return -EOPNOTSUPP;
 
-	mask.fixed = 0;
-	mask.maxrate = 0;
+	memset(&mask, 0, sizeof(mask));
+	fixed = 0;
+	maxrate = 0;
 
 	if (rate->value < 0) {
 		/* nothing */
 	} else if (rate->fixed) {
-		mask.fixed = rate->value / 1000; /* kbps */
+		fixed = rate->value / 100000;
 	} else {
-		mask.maxrate = rate->value / 1000; /* kbps */
+		maxrate = rate->value / 100000;
+	}
+
+	for (band = 0; band < IEEE80211_NUM_BANDS; band++) {
+		sband = wdev->wiphy->bands[band];
+		if (sband == NULL)
+			continue;
+		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
+			struct ieee80211_rate *srate = &sband->bitrates[ridx];
+			if (fixed == srate->bitrate) {
+				mask.control[band].legacy = 1 << ridx;
+				match = true;
+				break;
+			}
+			if (srate->bitrate <= maxrate) {
+				mask.control[band].legacy |= 1 << ridx;
+				match = true;
+			}
+		}
 	}
 
+	if (!match)
+		return -EINVAL;
+
 	return rdev->ops->set_bitrate_mask(wdev->wiphy, dev, NULL, &mask);
 }
 EXPORT_SYMBOL_GPL(cfg80211_wext_siwrate);
-- 
cgit v1.2.3-70-g09d2


From 13ae75b103e07304a34ab40c9136e9f53e06475c Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni.malinen@atheros.com>
Date: Tue, 29 Dec 2009 12:59:45 +0200
Subject: nl80211: New command for setting TX rate mask for rate control

Add a new NL80211_CMD_SET_TX_BITRATE_MASK command and related
attributes to provide support for setting TX rate mask for rate
control. This uses the existing cfg80211 set_bitrate_mask operation
that was previously used only with WEXT compat code (SIOCSIWRATE). The
nl80211 command allows more generic configuration of allowed rates as
a mask instead of fixed/max rate.

Signed-off-by: Jouni Malinen <jouni.malinen@atheros.com>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  44 +++++++++++++++++++
 include/net/cfg80211.h  |   4 +-
 net/wireless/nl80211.c  | 111 ++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 157 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index d4c556de7170..7a1c8c145b22 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -295,6 +295,10 @@
  *	This command is also used as an event to notify when a requested
  *	remain-on-channel duration has expired.
  *
+ * @NL80211_CMD_SET_TX_BITRATE_MASK: Set the mask of rates to be used in TX
+ *	rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface
+ *	and @NL80211_ATTR_TX_RATES the set of allowed rates.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -381,6 +385,8 @@ enum nl80211_commands {
 	NL80211_CMD_REMAIN_ON_CHANNEL,
 	NL80211_CMD_CANCEL_REMAIN_ON_CHANNEL,
 
+	NL80211_CMD_SET_TX_BITRATE_MASK,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -640,6 +646,13 @@ enum nl80211_commands {
  *
  * @NL80211_ATTR_COOKIE: Generic 64-bit cookie to identify objects.
  *
+ * @NL80211_ATTR_TX_RATES: Nested set of attributes
+ *	(enum nl80211_tx_rate_attributes) describing TX rates per band. The
+ *	enum nl80211_band value is used as the index (nla_type() of the nested
+ *	data. If a band is not included, it will be configured to allow all
+ *	rates based on negotiated supported rates information. This attribute
+ *	is used with %NL80211_CMD_SET_TX_BITRATE_MASK.
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -783,6 +796,8 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_COVERAGE_CLASS,
 
+	NL80211_ATTR_TX_RATES,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -1482,4 +1497,33 @@ enum nl80211_key_attributes {
 	NL80211_KEY_MAX = __NL80211_KEY_AFTER_LAST - 1
 };
 
+/**
+ * enum nl80211_tx_rate_attributes - TX rate set attributes
+ * @__NL80211_TXRATE_INVALID: invalid
+ * @NL80211_TXRATE_LEGACY: Legacy (non-MCS) rates allowed for TX rate selection
+ *	in an array of rates as defined in IEEE 802.11 7.3.2.2 (u8 values with
+ *	1 = 500 kbps) but without the IE length restriction (at most
+ *	%NL80211_MAX_SUPP_RATES in a single array).
+ * @__NL80211_TXRATE_AFTER_LAST: internal
+ * @NL80211_TXRATE_MAX: highest TX rate attribute
+ */
+enum nl80211_tx_rate_attributes {
+	__NL80211_TXRATE_INVALID,
+	NL80211_TXRATE_LEGACY,
+
+	/* keep last */
+	__NL80211_TXRATE_AFTER_LAST,
+	NL80211_TXRATE_MAX = __NL80211_TXRATE_AFTER_LAST - 1
+};
+
+/**
+ * enum nl80211_band - Frequency band
+ * @NL80211_BAND_2GHZ - 2.4 GHz ISM band
+ * @NL80211_BAND_5GHZ - around 5 GHz band (4.9 - 5.7 GHz)
+ */
+enum nl80211_band {
+	NL80211_BAND_2GHZ,
+	NL80211_BAND_5GHZ,
+};
+
 #endif /* __LINUX_NL80211_H */
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 22e062afb5a1..0d734413b5fb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -39,8 +39,8 @@
  * @IEEE80211_BAND_5GHZ: around 5GHz band (4.9-5.7)
  */
 enum ieee80211_band {
-	IEEE80211_BAND_2GHZ,
-	IEEE80211_BAND_5GHZ,
+	IEEE80211_BAND_2GHZ = NL80211_BAND_2GHZ,
+	IEEE80211_BAND_5GHZ = NL80211_BAND_5GHZ,
 
 	/* keep last */
 	IEEE80211_NUM_BANDS
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index c09fbcd278fb..b804062e0179 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -144,6 +144,7 @@ static struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] __read_mostly = {
 				 .len = WLAN_PMKID_LEN },
 	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
 	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
+	[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
 };
 
 /* policy for the attributes */
@@ -575,6 +576,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 	CMD(del_pmksa, DEL_PMKSA);
 	CMD(flush_pmksa, FLUSH_PMKSA);
 	CMD(remain_on_channel, REMAIN_ON_CHANNEL);
+	CMD(set_bitrate_mask, SET_TX_BITRATE_MASK);
 	if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) {
 		i++;
 		NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS);
@@ -4438,6 +4440,109 @@ static int nl80211_cancel_remain_on_channel(struct sk_buff *skb,
 	return err;
 }
 
+static u32 rateset_to_mask(struct ieee80211_supported_band *sband,
+			   u8 *rates, u8 rates_len)
+{
+	u8 i;
+	u32 mask = 0;
+
+	for (i = 0; i < rates_len; i++) {
+		int rate = (rates[i] & 0x7f) * 5;
+		int ridx;
+		for (ridx = 0; ridx < sband->n_bitrates; ridx++) {
+			struct ieee80211_rate *srate =
+				&sband->bitrates[ridx];
+			if (rate == srate->bitrate) {
+				mask |= 1 << ridx;
+				break;
+			}
+		}
+		if (ridx == sband->n_bitrates)
+			return 0; /* rate not found */
+	}
+
+	return mask;
+}
+
+static struct nla_policy
+nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] __read_mostly = {
+	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
+				    .len = NL80211_MAX_SUPP_RATES },
+};
+
+static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
+				       struct genl_info *info)
+{
+	struct nlattr *tb[NL80211_TXRATE_MAX + 1];
+	struct cfg80211_registered_device *rdev;
+	struct cfg80211_bitrate_mask mask;
+	int err, rem, i;
+	struct net_device *dev;
+	struct nlattr *tx_rates;
+	struct ieee80211_supported_band *sband;
+
+	if (info->attrs[NL80211_ATTR_TX_RATES] == NULL)
+		return -EINVAL;
+
+	rtnl_lock();
+
+	err = get_rdev_dev_by_info_ifindex(info, &rdev, &dev);
+	if (err)
+		goto unlock_rtnl;
+
+	if (!rdev->ops->set_bitrate_mask) {
+		err = -EOPNOTSUPP;
+		goto unlock;
+	}
+
+	memset(&mask, 0, sizeof(mask));
+	/* Default to all rates enabled */
+	for (i = 0; i < IEEE80211_NUM_BANDS; i++) {
+		sband = rdev->wiphy.bands[i];
+		mask.control[i].legacy =
+			sband ? (1 << sband->n_bitrates) - 1 : 0;
+	}
+
+	/*
+	 * The nested attribute uses enum nl80211_band as the index. This maps
+	 * directly to the enum ieee80211_band values used in cfg80211.
+	 */
+	nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem)
+	{
+		enum ieee80211_band band = nla_type(tx_rates);
+		if (band < 0 || band >= IEEE80211_NUM_BANDS) {
+			err = -EINVAL;
+			goto unlock;
+		}
+		sband = rdev->wiphy.bands[band];
+		if (sband == NULL) {
+			err = -EINVAL;
+			goto unlock;
+		}
+		nla_parse(tb, NL80211_TXRATE_MAX, nla_data(tx_rates),
+			  nla_len(tx_rates), nl80211_txattr_policy);
+		if (tb[NL80211_TXRATE_LEGACY]) {
+			mask.control[band].legacy = rateset_to_mask(
+				sband,
+				nla_data(tb[NL80211_TXRATE_LEGACY]),
+				nla_len(tb[NL80211_TXRATE_LEGACY]));
+			if (mask.control[band].legacy == 0) {
+				err = -EINVAL;
+				goto unlock;
+			}
+		}
+	}
+
+	err = rdev->ops->set_bitrate_mask(&rdev->wiphy, dev, NULL, &mask);
+
+ unlock:
+	dev_put(dev);
+	cfg80211_unlock_rdev(rdev);
+ unlock_rtnl:
+	rtnl_unlock();
+	return err;
+}
+
 static struct genl_ops nl80211_ops[] = {
 	{
 		.cmd = NL80211_CMD_GET_WIPHY,
@@ -4712,6 +4817,12 @@ static struct genl_ops nl80211_ops[] = {
 		.policy = nl80211_policy,
 		.flags = GENL_ADMIN_PERM,
 	},
+	{
+		.cmd = NL80211_CMD_SET_TX_BITRATE_MASK,
+		.doit = nl80211_set_tx_bitrate_mask,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+	},
 };
 
 static struct genl_multicast_group nl80211_mlme_mcgrp = {
-- 
cgit v1.2.3-70-g09d2


From 7044cc565b45a898c140fb185174a66f2d68a163 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 5 Jan 2010 20:16:19 +0200
Subject: mac80211: add functions to create PS Poll and Nullfunc templates

Some hardware, for example wl1251 and wl1271, handle the transmission
of power save related frames in hardware, but the driver is responsible
for creating the templates. It's better to create the templates in mac80211,
that way all drivers can benefit from this.

Add two new functions, ieee80211_pspoll_get() and ieee80211_nullfunc_get()
which drivers need to call to get the frame. Drivers are also responsible
for updating the templates after each association.

Also new struct ieee80211_hdr_3addr is added to ieee80211.h to make it
easy to calculate length of the Nullfunc frame.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h |  9 ++++++
 include/net/mac80211.h    | 30 ++++++++++++++++++
 net/mac80211/tx.c         | 78 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 117 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index aeea282bd2fe..602c0692c3fc 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -130,6 +130,15 @@ struct ieee80211_hdr {
 	u8 addr4[6];
 } __attribute__ ((packed));
 
+struct ieee80211_hdr_3addr {
+	__le16 frame_control;
+	__le16 duration_id;
+	u8 addr1[6];
+	u8 addr2[6];
+	u8 addr3[6];
+	__le16 seq_ctrl;
+} __attribute__ ((packed));
+
 /**
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
  * @fc: frame control bytes in little-endian byteorder
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7e5af6d90b93..75f46e26ad60 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1874,6 +1874,36 @@ static inline struct sk_buff *ieee80211_beacon_get(struct ieee80211_hw *hw,
 	return ieee80211_beacon_get_tim(hw, vif, NULL, NULL);
 }
 
+/**
+ * ieee80211_pspoll_get - retrieve a PS Poll template
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * Creates a PS Poll a template which can, for example, uploaded to
+ * hardware. The template must be updated after association so that correct
+ * AID, BSSID and MAC address is used.
+ *
+ * Note: Caller (or hardware) is responsible for setting the
+ * &IEEE80211_FCTL_PM bit.
+ */
+struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif);
+
+/**
+ * ieee80211_nullfunc_get - retrieve a nullfunc template
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ *
+ * Creates a Nullfunc template which can, for example, uploaded to
+ * hardware. The template must be updated after association so that correct
+ * BSSID and address is used.
+ *
+ * Note: Caller (or hardware) is responsible for setting the
+ * &IEEE80211_FCTL_PM bit as well as Duration and Sequence Control fields.
+ */
+struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif);
+
 /**
  * ieee80211_rts_get - RTS frame generation function
  * @hw: pointer obtained from ieee80211_alloc_hw().
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index d3a44812f8bf..055b45b146d9 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2200,6 +2200,84 @@ struct sk_buff *ieee80211_beacon_get_tim(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_beacon_get_tim);
 
+struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
+				     struct ieee80211_vif *vif)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+	struct ieee80211_pspoll *pspoll;
+	struct ieee80211_local *local;
+	struct sk_buff *skb;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+		return NULL;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+	local = sdata->local;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*pspoll));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for "
+		       "pspoll template\n", sdata->name);
+		return NULL;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	pspoll = (struct ieee80211_pspoll *) skb_put(skb, sizeof(*pspoll));
+	memset(pspoll, 0, sizeof(*pspoll));
+	pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
+					    IEEE80211_STYPE_PSPOLL);
+	pspoll->aid = cpu_to_le16(ifmgd->aid);
+
+	/* aid in PS-Poll has its two MSBs each set to 1 */
+	pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
+
+	memcpy(pspoll->bssid, ifmgd->bssid, ETH_ALEN);
+	memcpy(pspoll->ta, vif->addr, ETH_ALEN);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_pspoll_get);
+
+struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif)
+{
+	struct ieee80211_hdr_3addr *nullfunc;
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_if_managed *ifmgd;
+	struct ieee80211_local *local;
+	struct sk_buff *skb;
+
+	if (WARN_ON(vif->type != NL80211_IFTYPE_STATION))
+		return NULL;
+
+	sdata = vif_to_sdata(vif);
+	ifmgd = &sdata->u.mgd;
+	local = sdata->local;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*nullfunc));
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for nullfunc "
+		       "template\n", sdata->name);
+		return NULL;
+	}
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	nullfunc = (struct ieee80211_hdr_3addr *) skb_put(skb,
+							  sizeof(*nullfunc));
+	memset(nullfunc, 0, sizeof(*nullfunc));
+	nullfunc->frame_control = cpu_to_le16(IEEE80211_FTYPE_DATA |
+					      IEEE80211_STYPE_NULLFUNC |
+					      IEEE80211_FCTL_TODS);
+	memcpy(nullfunc->addr1, ifmgd->bssid, ETH_ALEN);
+	memcpy(nullfunc->addr2, vif->addr, ETH_ALEN);
+	memcpy(nullfunc->addr3, ifmgd->bssid, ETH_ALEN);
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_nullfunc_get);
+
 void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
 		       const struct ieee80211_tx_info *frame_txctl,
-- 
cgit v1.2.3-70-g09d2


From 05e54ea6cce400ac34528d705179b45244f61074 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 5 Jan 2010 20:16:38 +0200
Subject: mac80211: create Probe Request template

Certain type of hardware, for example wl1251 and wl1271, need a template
for the Probe Request. Create a function ieee80211_probereq_get() which
creates the template and drivers send it to hardware.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 17 +++++++++++++++++
 net/mac80211/tx.c      | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 75f46e26ad60..e1e73c6abeff 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1904,6 +1904,23 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
 struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
 				       struct ieee80211_vif *vif);
 
+/**
+ * ieee80211_probereq_get - retrieve a Probe Request template
+ * @hw: pointer obtained from ieee80211_alloc_hw().
+ * @vif: &struct ieee80211_vif pointer from the add_interface callback.
+ * @ssid: SSID buffer
+ * @ssid_len: length of SSID
+ * @ie: buffer containing all IEs except SSID for the template
+ * @ie_len: length of the IE buffer
+ *
+ * Creates a Probe Request template which can, for example, be uploaded to
+ * hardware.
+ */
+struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif,
+				       const u8 *ssid, size_t ssid_len,
+				       const u8 *ie, size_t ie_len);
+
 /**
  * ieee80211_rts_get - RTS frame generation function
  * @hw: pointer obtained from ieee80211_alloc_hw().
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 055b45b146d9..0661e696a1dd 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2278,6 +2278,56 @@ struct sk_buff *ieee80211_nullfunc_get(struct ieee80211_hw *hw,
 }
 EXPORT_SYMBOL(ieee80211_nullfunc_get);
 
+struct sk_buff *ieee80211_probereq_get(struct ieee80211_hw *hw,
+				       struct ieee80211_vif *vif,
+				       const u8 *ssid, size_t ssid_len,
+				       const u8 *ie, size_t ie_len)
+{
+	struct ieee80211_sub_if_data *sdata;
+	struct ieee80211_local *local;
+	struct ieee80211_hdr_3addr *hdr;
+	struct sk_buff *skb;
+	size_t ie_ssid_len;
+	u8 *pos;
+
+	sdata = vif_to_sdata(vif);
+	local = sdata->local;
+	ie_ssid_len = 2 + ssid_len;
+
+	skb = dev_alloc_skb(local->hw.extra_tx_headroom + sizeof(*hdr) +
+			    ie_ssid_len + ie_len);
+	if (!skb) {
+		printk(KERN_DEBUG "%s: failed to allocate buffer for probe "
+		       "request template\n", sdata->name);
+		return NULL;
+	}
+
+	skb_reserve(skb, local->hw.extra_tx_headroom);
+
+	hdr = (struct ieee80211_hdr_3addr *) skb_put(skb, sizeof(*hdr));
+	memset(hdr, 0, sizeof(*hdr));
+	hdr->frame_control = cpu_to_le16(IEEE80211_FTYPE_MGMT |
+					 IEEE80211_STYPE_PROBE_REQ);
+	memset(hdr->addr1, 0xff, ETH_ALEN);
+	memcpy(hdr->addr2, vif->addr, ETH_ALEN);
+	memset(hdr->addr3, 0xff, ETH_ALEN);
+
+	pos = skb_put(skb, ie_ssid_len);
+	*pos++ = WLAN_EID_SSID;
+	*pos++ = ssid_len;
+	if (ssid)
+		memcpy(pos, ssid, ssid_len);
+	pos += ssid_len;
+
+	if (ie) {
+		pos = skb_put(skb, ie_len);
+		memcpy(pos, ie, ie_len);
+	}
+
+	return skb;
+}
+EXPORT_SYMBOL(ieee80211_probereq_get);
+
 void ieee80211_rts_get(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 		       const void *frame, size_t frame_len,
 		       const struct ieee80211_tx_info *frame_txctl,
-- 
cgit v1.2.3-70-g09d2


From 34a6eddbabd704b3c7dae9362234552267573be2 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <j@w1.fi>
Date: Wed, 6 Jan 2010 16:19:24 +0200
Subject: cfg80211: Store IEs from both Beacon and Probe Response frames

Store information elements from Beacon and Probe Response frames in
separate buffers to allow both sets to be made available through
nl80211. This allows user space applications to get access to IEs from
Beacon frames even if we have received Probe Response frames from the
BSS. Previously, the IEs from Probe Response frames would have
overridden the IEs from Beacon frames.

This feature is of somewhat limited use since most protocols include
the same (or extended) information in Probe Response frames. However,
there are couple of exceptions where the IEs from Beacon frames could
be of some use: TIM IE is only included in Beacon frames (and it would
be needed to figure out the DTIM period used in the BSS) and at least
some implementations of Wireless Provisioning Services seem to include
the full IE only in Beacon frames).

The new BSS attribute for scan results is added to allow both the IE
sets to be delivered. This is done in a way that maintains the
previously used behavior for applications that are not aware of the
new NL80211_BSS_BEACON_IES attribute.

Signed-off-by: Jouni Malinen <j@w1.fi>
Acked-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/nl80211.h |  10 +++-
 include/net/cfg80211.h  |  12 ++++-
 net/wireless/core.h     |   3 +-
 net/wireless/nl80211.c  |   4 ++
 net/wireless/scan.c     | 120 ++++++++++++++++++++++++++++++++++++------------
 5 files changed, 116 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h
index 7a1c8c145b22..127a73015760 100644
--- a/include/linux/nl80211.h
+++ b/include/linux/nl80211.h
@@ -1378,13 +1378,20 @@ enum nl80211_channel_type {
  * @NL80211_BSS_BEACON_INTERVAL: beacon interval of the (I)BSS (u16)
  * @NL80211_BSS_CAPABILITY: capability field (CPU order, u16)
  * @NL80211_BSS_INFORMATION_ELEMENTS: binary attribute containing the
- *	raw information elements from the probe response/beacon (bin)
+ *	raw information elements from the probe response/beacon (bin);
+ *	if the %NL80211_BSS_BEACON_IES attribute is present, the IEs here are
+ *	from a Probe Response frame; otherwise they are from a Beacon frame.
+ *	However, if the driver does not indicate the source of the IEs, these
+ *	IEs may be from either frame subtype.
  * @NL80211_BSS_SIGNAL_MBM: signal strength of probe response/beacon
  *	in mBm (100 * dBm) (s32)
  * @NL80211_BSS_SIGNAL_UNSPEC: signal strength of the probe response/beacon
  *	in unspecified units, scaled to 0..100 (u8)
  * @NL80211_BSS_STATUS: status, if this BSS is "used"
  * @NL80211_BSS_SEEN_MS_AGO: age of this BSS entry in ms
+ * @NL80211_BSS_BEACON_IES: binary attribute containing the raw information
+ *	elements from a Beacon frame (bin); not present if no Beacon frame has
+ *	yet been received
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -1400,6 +1407,7 @@ enum nl80211_bss {
 	NL80211_BSS_SIGNAL_UNSPEC,
 	NL80211_BSS_STATUS,
 	NL80211_BSS_SEEN_MS_AGO,
+	NL80211_BSS_BEACON_IES,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 0d734413b5fb..2af52704e670 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -626,8 +626,14 @@ enum cfg80211_signal_type {
  * @beacon_interval: the beacon interval as from the frame
  * @capability: the capability field in host byte order
  * @information_elements: the information elements (Note that there
- *	is no guarantee that these are well-formed!)
+ *	is no guarantee that these are well-formed!); this is a pointer to
+ *	either the beacon_ies or proberesp_ies depending on whether Probe
+ *	Response frame has been received
  * @len_information_elements: total length of the information elements
+ * @beacon_ies: the information elements from the last Beacon frame
+ * @len_beacon_ies: total length of the beacon_ies
+ * @proberesp_ies: the information elements from the last Probe Response frame
+ * @len_proberesp_ies: total length of the proberesp_ies
  * @signal: signal strength value (type depends on the wiphy's signal_type)
  * @free_priv: function pointer to free private data
  * @priv: private area for driver use, has at least wiphy->bss_priv_size bytes
@@ -641,6 +647,10 @@ struct cfg80211_bss {
 	u16 capability;
 	u8 *information_elements;
 	size_t len_information_elements;
+	u8 *beacon_ies;
+	size_t len_beacon_ies;
+	u8 *proberesp_ies;
+	size_t len_proberesp_ies;
 
 	s32 signal;
 
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 30ec95f05b52..2d6a6b9c0c43 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -111,7 +111,8 @@ struct cfg80211_internal_bss {
 	unsigned long ts;
 	struct kref ref;
 	atomic_t hold;
-	bool ies_allocated;
+	bool beacon_ies_allocated;
+	bool proberesp_ies_allocated;
 
 	/* must be last because of priv member */
 	struct cfg80211_bss pub;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index b804062e0179..4af7991a9ec8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3163,6 +3163,10 @@ static int nl80211_send_bss(struct sk_buff *msg, u32 pid, u32 seq, int flags,
 		NLA_PUT(msg, NL80211_BSS_INFORMATION_ELEMENTS,
 			res->len_information_elements,
 			res->information_elements);
+	if (res->beacon_ies && res->len_beacon_ies &&
+	    res->beacon_ies != res->information_elements)
+		NLA_PUT(msg, NL80211_BSS_BEACON_IES,
+			res->len_beacon_ies, res->beacon_ies);
 	if (res->tsf)
 		NLA_PUT_U64(msg, NL80211_BSS_TSF, res->tsf);
 	if (res->beacon_interval)
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 0c2cbbebca95..06b0231ee5e3 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -100,8 +100,10 @@ static void bss_release(struct kref *ref)
 	if (bss->pub.free_priv)
 		bss->pub.free_priv(&bss->pub);
 
-	if (bss->ies_allocated)
-		kfree(bss->pub.information_elements);
+	if (bss->beacon_ies_allocated)
+		kfree(bss->pub.beacon_ies);
+	if (bss->proberesp_ies_allocated)
+		kfree(bss->pub.proberesp_ies);
 
 	BUG_ON(atomic_read(&bss->hold));
 
@@ -375,8 +377,7 @@ rb_find_bss(struct cfg80211_registered_device *dev,
 
 static struct cfg80211_internal_bss *
 cfg80211_bss_update(struct cfg80211_registered_device *dev,
-		    struct cfg80211_internal_bss *res,
-		    bool overwrite)
+		    struct cfg80211_internal_bss *res)
 {
 	struct cfg80211_internal_bss *found = NULL;
 	const u8 *meshid, *meshcfg;
@@ -418,28 +419,64 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 		found->pub.capability = res->pub.capability;
 		found->ts = res->ts;
 
-		/* overwrite IEs */
-		if (overwrite) {
+		/* Update IEs */
+		if (res->pub.proberesp_ies) {
 			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
-			size_t ielen = res->pub.len_information_elements;
+			size_t ielen = res->pub.len_proberesp_ies;
+
+			if (found->pub.proberesp_ies &&
+			    !found->proberesp_ies_allocated &&
+			    ksize(found) >= used + ielen) {
+				memcpy(found->pub.proberesp_ies,
+				       res->pub.proberesp_ies, ielen);
+				found->pub.len_proberesp_ies = ielen;
+			} else {
+				u8 *ies = found->pub.proberesp_ies;
+
+				if (found->proberesp_ies_allocated)
+					ies = krealloc(ies, ielen, GFP_ATOMIC);
+				else
+					ies = kmalloc(ielen, GFP_ATOMIC);
+
+				if (ies) {
+					memcpy(ies, res->pub.proberesp_ies,
+					       ielen);
+					found->proberesp_ies_allocated = true;
+					found->pub.proberesp_ies = ies;
+					found->pub.len_proberesp_ies = ielen;
+				}
+			}
 
-			if (!found->ies_allocated && ksize(found) >= used + ielen) {
-				memcpy(found->pub.information_elements,
-				       res->pub.information_elements, ielen);
-				found->pub.len_information_elements = ielen;
+			/* Override possible earlier Beacon frame IEs */
+			found->pub.information_elements =
+				found->pub.proberesp_ies;
+			found->pub.len_information_elements =
+				found->pub.len_proberesp_ies;
+		}
+		if (res->pub.beacon_ies) {
+			size_t used = dev->wiphy.bss_priv_size + sizeof(*res);
+			size_t ielen = res->pub.len_beacon_ies;
+
+			if (found->pub.beacon_ies &&
+			    !found->beacon_ies_allocated &&
+			    ksize(found) >= used + ielen) {
+				memcpy(found->pub.beacon_ies,
+				       res->pub.beacon_ies, ielen);
+				found->pub.len_beacon_ies = ielen;
 			} else {
-				u8 *ies = found->pub.information_elements;
+				u8 *ies = found->pub.beacon_ies;
 
-				if (found->ies_allocated)
+				if (found->beacon_ies_allocated)
 					ies = krealloc(ies, ielen, GFP_ATOMIC);
 				else
 					ies = kmalloc(ielen, GFP_ATOMIC);
 
 				if (ies) {
-					memcpy(ies, res->pub.information_elements, ielen);
-					found->ies_allocated = true;
-					found->pub.information_elements = ies;
-					found->pub.len_information_elements = ielen;
+					memcpy(ies, res->pub.beacon_ies,
+					       ielen);
+					found->beacon_ies_allocated = true;
+					found->pub.beacon_ies = ies;
+					found->pub.len_beacon_ies = ielen;
 				}
 			}
 		}
@@ -489,14 +526,26 @@ cfg80211_inform_bss(struct wiphy *wiphy,
 	res->pub.tsf = timestamp;
 	res->pub.beacon_interval = beacon_interval;
 	res->pub.capability = capability;
-	/* point to after the private area */
-	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
-	memcpy(res->pub.information_elements, ie, ielen);
-	res->pub.len_information_elements = ielen;
+	/*
+	 * Since we do not know here whether the IEs are from a Beacon or Probe
+	 * Response frame, we need to pick one of the options and only use it
+	 * with the driver that does not provide the full Beacon/Probe Response
+	 * frame. Use Beacon frame pointer to avoid indicating that this should
+	 * override the information_elements pointer should we have received an
+	 * earlier indication of Probe Response data.
+	 *
+	 * The initial buffer for the IEs is allocated with the BSS entry and
+	 * is located after the private area.
+	 */
+	res->pub.beacon_ies = (u8 *)res + sizeof(*res) + privsz;
+	memcpy(res->pub.beacon_ies, ie, ielen);
+	res->pub.len_beacon_ies = ielen;
+	res->pub.information_elements = res->pub.beacon_ies;
+	res->pub.len_information_elements = res->pub.len_beacon_ies;
 
 	kref_init(&res->ref);
 
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, 0);
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res);
 	if (!res)
 		return NULL;
 
@@ -517,7 +566,6 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	struct cfg80211_internal_bss *res;
 	size_t ielen = len - offsetof(struct ieee80211_mgmt,
 				      u.probe_resp.variable);
-	bool overwrite;
 	size_t privsz = wiphy->bss_priv_size;
 
 	if (WARN_ON(wiphy->signal_type == NL80211_BSS_SIGNAL_UNSPEC &&
@@ -538,16 +586,28 @@ cfg80211_inform_bss_frame(struct wiphy *wiphy,
 	res->pub.tsf = le64_to_cpu(mgmt->u.probe_resp.timestamp);
 	res->pub.beacon_interval = le16_to_cpu(mgmt->u.probe_resp.beacon_int);
 	res->pub.capability = le16_to_cpu(mgmt->u.probe_resp.capab_info);
-	/* point to after the private area */
-	res->pub.information_elements = (u8 *)res + sizeof(*res) + privsz;
-	memcpy(res->pub.information_elements, mgmt->u.probe_resp.variable, ielen);
-	res->pub.len_information_elements = ielen;
+	/*
+	 * The initial buffer for the IEs is allocated with the BSS entry and
+	 * is located after the private area.
+	 */
+	if (ieee80211_is_probe_resp(mgmt->frame_control)) {
+		res->pub.proberesp_ies = (u8 *) res + sizeof(*res) + privsz;
+		memcpy(res->pub.proberesp_ies, mgmt->u.probe_resp.variable,
+		       ielen);
+		res->pub.len_proberesp_ies = ielen;
+		res->pub.information_elements = res->pub.proberesp_ies;
+		res->pub.len_information_elements = res->pub.len_proberesp_ies;
+	} else {
+		res->pub.beacon_ies = (u8 *) res + sizeof(*res) + privsz;
+		memcpy(res->pub.beacon_ies, mgmt->u.beacon.variable, ielen);
+		res->pub.len_beacon_ies = ielen;
+		res->pub.information_elements = res->pub.beacon_ies;
+		res->pub.len_information_elements = res->pub.len_beacon_ies;
+	}
 
 	kref_init(&res->ref);
 
-	overwrite = ieee80211_is_probe_resp(mgmt->frame_control);
-
-	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res, overwrite);
+	res = cfg80211_bss_update(wiphy_to_dev(wiphy), res);
 	if (!res)
 		return NULL;
 
-- 
cgit v1.2.3-70-g09d2


From ab13315af97919fae0e014748105fdc2e30afb2d Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:42:31 +0200
Subject: mac80211: add U-APSD client support

Add Unscheduled Automatic Power-Save Delivery (U-APSD) client support. The
idea is that the data frames from the client trigger AP to send the buffered
frames with ACs which have U-APSD enabled. This decreases latency and makes it
possible to save even more power.

Driver needs to use IEEE80211_HW_UAPSD to enable the feature. The current
implementation assumes that firmware takes care of the wakeup and
hardware needing IEEE80211_HW_PS_NULLFUNC_STACK is not yet supported.

Tested with wl1251 on a Nokia N900 and Cisco Aironet 1231G AP and running
various test traffic with ping.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h  | 18 ++++++++++++++++++
 include/net/mac80211.h     |  7 +++++++
 net/mac80211/cfg.c         |  7 +++++++
 net/mac80211/ieee80211_i.h | 13 ++++++++++++-
 net/mac80211/main.c        |  4 ++++
 net/mac80211/mlme.c        | 31 ++++++++++++++++++++++++++++---
 net/mac80211/scan.c        | 18 ++++++++++++++++++
 net/mac80211/util.c        |  2 ++
 net/mac80211/work.c        | 12 ++++++++++--
 9 files changed, 106 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 602c0692c3fc..a8c6069a0d9f 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -120,6 +120,24 @@
 #define IEEE80211_QOS_CTL_TID_MASK	0x000F
 #define IEEE80211_QOS_CTL_TAG1D_MASK	0x0007
 
+/* U-APSD queue for WMM IEs sent by AP */
+#define IEEE80211_WMM_IE_AP_QOSINFO_UAPSD	(1<<7)
+
+/* U-APSD queues for WMM IEs sent by STA */
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VO	(1<<0)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_VI	(1<<1)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BK	(1<<2)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_BE	(1<<3)
+#define IEEE80211_WMM_IE_STA_QOSINFO_AC_MASK	0x0f
+
+/* U-APSD max SP length for WMM IEs sent by STA */
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL	0x00
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_2	0x01
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_4	0x02
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_6	0x03
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK	0x03
+#define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT	5
+
 struct ieee80211_hdr {
 	__le16 frame_control;
 	__le16 duration_id;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index e1e73c6abeff..f313a3cbabda 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -113,6 +113,7 @@ struct ieee80211_tx_queue_params {
 	u16 cw_min;
 	u16 cw_max;
 	u8 aifs;
+	bool uapsd;
 };
 
 /**
@@ -929,6 +930,11 @@ enum ieee80211_tkip_key_type {
  *	Hardware supports dynamic spatial multiplexing powersave,
  *	ie. can turn off all but one chain and then wake the rest
  *	up as required after, for example, rts/cts handshake.
+ *
+ * @IEEE80211_HW_SUPPORTS_UAPSD:
+ *	Hardware supports Unscheduled Automatic Power Save Delivery
+ *	(U-APSD) in managed mode. The mode is configured with
+ *	conf_tx() operation.
  */
 enum ieee80211_hw_flags {
 	IEEE80211_HW_HAS_RATE_CONTROL			= 1<<0,
@@ -948,6 +954,7 @@ enum ieee80211_hw_flags {
 	IEEE80211_HW_BEACON_FILTER			= 1<<14,
 	IEEE80211_HW_SUPPORTS_STATIC_SMPS		= 1<<15,
 	IEEE80211_HW_SUPPORTS_DYNAMIC_SMPS		= 1<<16,
+	IEEE80211_HW_SUPPORTS_UAPSD			= 1<<17,
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index dc12e9466ffd..8286df5822d5 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1128,6 +1128,13 @@ static int ieee80211_set_txq_params(struct wiphy *wiphy,
 	p.cw_max = params->cwmax;
 	p.cw_min = params->cwmin;
 	p.txop = params->txop;
+
+	/*
+	 * Setting tx queue params disables u-apsd because it's only
+	 * called in master mode.
+	 */
+	p.uapsd = false;
+
 	if (drv_conf_tx(local, params->queue, &p)) {
 		printk(KERN_DEBUG "%s: failed to set TX queue "
 		       "parameters for queue %d\n",
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 3e4ac3f30857..3468e378509a 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -58,6 +58,15 @@ struct ieee80211_local;
 
 #define TU_TO_EXP_TIME(x)	(jiffies + usecs_to_jiffies((x) * 1024))
 
+#define IEEE80211_DEFAULT_UAPSD_QUEUES \
+	(IEEE80211_WMM_IE_STA_QOSINFO_AC_BK |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_BE |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_VI |	\
+	 IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+
+#define IEEE80211_DEFAULT_MAX_SP_LEN		\
+	IEEE80211_WMM_IE_STA_QOSINFO_SP_ALL
+
 struct ieee80211_fragment_entry {
 	unsigned long first_frag_time;
 	unsigned int seq;
@@ -78,6 +87,7 @@ struct ieee80211_bss {
 	u8 dtim_period;
 
 	bool wmm_used;
+	bool uapsd_supported;
 
 	unsigned long last_probe_resp;
 
@@ -285,7 +295,7 @@ struct ieee80211_work {
 			u8 ssid[IEEE80211_MAX_SSID_LEN];
 			u8 ssid_len;
 			u8 supp_rates_len;
-			bool wmm_used, use_11n;
+			bool wmm_used, use_11n, uapsd_used;
 		} assoc;
 		struct {
 			u32 duration;
@@ -306,6 +316,7 @@ enum ieee80211_sta_flags {
 	IEEE80211_STA_DISABLE_11N	= BIT(4),
 	IEEE80211_STA_CSA_RECEIVED	= BIT(5),
 	IEEE80211_STA_MFP_ENABLED	= BIT(6),
+	IEEE80211_STA_UAPSD_ENABLED	= BIT(7),
 };
 
 struct ieee80211_if_managed {
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 468829143991..0054bba08ce1 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -491,6 +491,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	else if (local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)
 		local->hw.wiphy->signal_type = CFG80211_SIGNAL_TYPE_UNSPEC;
 
+	WARN((local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)
+	     && (local->hw.flags & IEEE80211_HW_PS_NULLFUNC_STACK),
+	     "U-APSD not supported with HW_PS_NULLFUNC_STACK\n");
+
 	/*
 	 * Calculate scan IE length -- we need this to alloc
 	 * memory and to subtract from the driver limit. It
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 86f025bc9456..39c27d83a4f2 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -569,7 +569,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	struct ieee80211_tx_queue_params params;
 	size_t left;
 	int count;
-	u8 *pos;
+	u8 *pos, uapsd_queues = 0;
 
 	if (local->hw.queues < 4)
 		return;
@@ -579,6 +579,10 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 
 	if (wmm_param_len < 8 || wmm_param[5] /* version */ != 1)
 		return;
+
+	if (ifmgd->flags & IEEE80211_STA_UAPSD_ENABLED)
+		uapsd_queues = IEEE80211_DEFAULT_UAPSD_QUEUES;
+
 	count = wmm_param[6] & 0x0f;
 	if (count == ifmgd->wmm_last_param_set)
 		return;
@@ -593,6 +597,7 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 	for (; left >= 4; left -= 4, pos += 4) {
 		int aci = (pos[0] >> 5) & 0x03;
 		int acm = (pos[0] >> 4) & 0x01;
+		bool uapsd = false;
 		int queue;
 
 		switch (aci) {
@@ -600,22 +605,30 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 			queue = 3;
 			if (acm)
 				local->wmm_acm |= BIT(1) | BIT(2); /* BK/- */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BK)
+				uapsd = true;
 			break;
 		case 2: /* AC_VI */
 			queue = 1;
 			if (acm)
 				local->wmm_acm |= BIT(4) | BIT(5); /* CL/VI */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VI)
+				uapsd = true;
 			break;
 		case 3: /* AC_VO */
 			queue = 0;
 			if (acm)
 				local->wmm_acm |= BIT(6) | BIT(7); /* VO/NC */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_VO)
+				uapsd = true;
 			break;
 		case 0: /* AC_BE */
 		default:
 			queue = 2;
 			if (acm)
 				local->wmm_acm |= BIT(0) | BIT(3); /* BE/EE */
+			if (uapsd_queues & IEEE80211_WMM_IE_STA_QOSINFO_AC_BE)
+				uapsd = true;
 			break;
 		}
 
@@ -623,11 +636,14 @@ static void ieee80211_sta_wmm_params(struct ieee80211_local *local,
 		params.cw_max = ecw2cw((pos[1] & 0xf0) >> 4);
 		params.cw_min = ecw2cw(pos[1] & 0x0f);
 		params.txop = get_unaligned_le16(pos + 2);
+		params.uapsd = uapsd;
+
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 		printk(KERN_DEBUG "%s: WMM queue=%d aci=%d acm=%d aifs=%d "
-		       "cWmin=%d cWmax=%d txop=%d\n",
+		       "cWmin=%d cWmax=%d txop=%d uapsd=%d\n",
 		       wiphy_name(local->hw.wiphy), queue, aci, acm,
-		       params.aifs, params.cw_min, params.cw_max, params.txop);
+		       params.aifs, params.cw_min, params.cw_max, params.txop,
+		       params.uapsd);
 #endif
 		if (drv_conf_tx(local, queue, &params) && local->ops->conf_tx)
 			printk(KERN_DEBUG "%s: failed to set TX queue "
@@ -1906,6 +1922,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	wk->assoc.ht_information_ie =
 		ieee80211_bss_get_ie(req->bss, WLAN_EID_HT_INFORMATION);
 
+	if (bss->wmm_used && bss->uapsd_supported &&
+	    (sdata->local->hw.flags & IEEE80211_HW_SUPPORTS_UAPSD)) {
+		wk->assoc.uapsd_used = true;
+		ifmgd->flags |= IEEE80211_STA_UAPSD_ENABLED;
+	} else {
+		wk->assoc.uapsd_used = false;
+		ifmgd->flags &= ~IEEE80211_STA_UAPSD_ENABLED;
+	}
+
 	ssid = ieee80211_bss_get_ie(req->bss, WLAN_EID_SSID);
 	memcpy(wk->assoc.ssid, ssid + 2, ssid[1]);
 	wk->assoc.ssid_len = ssid[1];
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 30cb62bb45b3..9afe2f9885dc 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -54,6 +54,23 @@ void ieee80211_rx_bss_put(struct ieee80211_local *local,
 	cfg80211_put_bss(container_of((void *)bss, struct cfg80211_bss, priv));
 }
 
+static bool is_uapsd_supported(struct ieee802_11_elems *elems)
+{
+	u8 qos_info;
+
+	if (elems->wmm_info && elems->wmm_info_len == 7
+	    && elems->wmm_info[5] == 1)
+		qos_info = elems->wmm_info[6];
+	else if (elems->wmm_param && elems->wmm_param_len == 24
+		 && elems->wmm_param[5] == 1)
+		qos_info = elems->wmm_param[6];
+	else
+		/* no valid wmm information or parameter element found */
+		return false;
+
+	return qos_info & IEEE80211_WMM_IE_AP_QOSINFO_UAPSD;
+}
+
 struct ieee80211_bss *
 ieee80211_bss_info_update(struct ieee80211_local *local,
 			  struct ieee80211_rx_status *rx_status,
@@ -117,6 +134,7 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 	}
 
 	bss->wmm_used = elems->wmm_param || elems->wmm_info;
+	bss->uapsd_supported = is_uapsd_supported(elems);
 
 	if (!beacon)
 		bss->last_probe_resp = jiffies;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index a2ba6e29bd9a..e278f97c8305 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -792,6 +792,8 @@ void ieee80211_set_wmm_default(struct ieee80211_sub_if_data *sdata)
 			break;
 		}
 
+		qparam.uapsd = false;
+
 		drv_conf_tx(local, queue, &qparam);
 	}
 }
diff --git a/net/mac80211/work.c b/net/mac80211/work.c
index 7c5d95b1bc04..a74fd6ee0083 100644
--- a/net/mac80211/work.c
+++ b/net/mac80211/work.c
@@ -202,7 +202,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb;
 	struct ieee80211_mgmt *mgmt;
-	u8 *pos;
+	u8 *pos, qos_info;
 	const u8 *ies;
 	size_t offset = 0, noffset;
 	int i, len, count, rates_len, supp_rates_len;
@@ -375,6 +375,14 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (wk->assoc.wmm_used && local->hw.queues >= 4) {
+		if (wk->assoc.uapsd_used) {
+			qos_info = IEEE80211_DEFAULT_UAPSD_QUEUES;
+			qos_info |= (IEEE80211_DEFAULT_MAX_SP_LEN <<
+				     IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT);
+		} else {
+			qos_info = 0;
+		}
+
 		pos = skb_put(skb, 9);
 		*pos++ = WLAN_EID_VENDOR_SPECIFIC;
 		*pos++ = 7; /* len */
@@ -384,7 +392,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata,
 		*pos++ = 2; /* WME */
 		*pos++ = 0; /* WME info */
 		*pos++ = 1; /* WME ver */
-		*pos++ = 0;
+		*pos++ = qos_info;
 	}
 
 	/* add any remaining custom (i.e. vendor specific here) IEs */
-- 
cgit v1.2.3-70-g09d2


From 558a6669d7cb407fbb0b5aec184b5c3b9a893d30 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Tue, 12 Jan 2010 10:43:00 +0200
Subject: ieee80211: add struct ieee80211_hdr_qos

The header can be used to create qos nullfunc frames, for example.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a8c6069a0d9f..842701906ae9 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -157,6 +157,16 @@ struct ieee80211_hdr_3addr {
 	__le16 seq_ctrl;
 } __attribute__ ((packed));
 
+struct ieee80211_qos_hdr {
+	__le16 frame_control;
+	__le16 duration_id;
+	u8 addr1[6];
+	u8 addr2[6];
+	u8 addr3[6];
+	__le16 seq_ctrl;
+	__le16 qos_ctrl;
+} __attribute__ ((packed));
+
 /**
  * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set
  * @fc: frame control bytes in little-endian byteorder
-- 
cgit v1.2.3-70-g09d2


From 5040ab67a2c6d5710ba497dc52a8f7035729d7b0 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 11 Jan 2010 11:14:44 +0900
Subject: libata: retry link resume if necessary

Interestingly, when SIDPR is used in ata_piix, writes to DET in
SControl sometimes get ignored leading to detection failure.  Update
sata_link_resume() such that it reads back SControl after clearing DET
and retry if it's not clear.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: fengxiangjun <fengxiangjun@neusoft.com>
Reported-by: Jim Faulkner <jfaulkne@ccs.neu.edu>
Cc: stable@kernel.org
Signed-off-by: Jeff Garzik <jgarzik@redhat.com>
---
 drivers/ata/libata-core.c | 38 +++++++++++++++++++++++++++++++-------
 include/linux/libata.h    |  3 +++
 2 files changed, 34 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 22ff51bdbc8a..6728328f3bea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -3790,21 +3790,45 @@ int sata_link_debounce(struct ata_link *link, const unsigned long *params,
 int sata_link_resume(struct ata_link *link, const unsigned long *params,
 		     unsigned long deadline)
 {
+	int tries = ATA_LINK_RESUME_TRIES;
 	u32 scontrol, serror;
 	int rc;
 
 	if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
 		return rc;
 
-	scontrol = (scontrol & 0x0f0) | 0x300;
+	/*
+	 * Writes to SControl sometimes get ignored under certain
+	 * controllers (ata_piix SIDPR).  Make sure DET actually is
+	 * cleared.
+	 */
+	do {
+		scontrol = (scontrol & 0x0f0) | 0x300;
+		if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
+			return rc;
+		/*
+		 * Some PHYs react badly if SStatus is pounded
+		 * immediately after resuming.  Delay 200ms before
+		 * debouncing.
+		 */
+		msleep(200);
 
-	if ((rc = sata_scr_write(link, SCR_CONTROL, scontrol)))
-		return rc;
+		/* is SControl restored correctly? */
+		if ((rc = sata_scr_read(link, SCR_CONTROL, &scontrol)))
+			return rc;
+	} while ((scontrol & 0xf0f) != 0x300 && --tries);
 
-	/* Some PHYs react badly if SStatus is pounded immediately
-	 * after resuming.  Delay 200ms before debouncing.
-	 */
-	msleep(200);
+	if ((scontrol & 0xf0f) != 0x300) {
+		ata_link_printk(link, KERN_ERR,
+				"failed to resume link (SControl %X)\n",
+				scontrol);
+		return 0;
+	}
+
+	if (tries < ATA_LINK_RESUME_TRIES)
+		ata_link_printk(link, KERN_WARNING,
+				"link resume succeeded after %d retries\n",
+				ATA_LINK_RESUME_TRIES - tries);
 
 	if ((rc = sata_link_debounce(link, params, deadline)))
 		return rc;
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 6a9c4ddd3d95..73112250862c 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -354,6 +354,9 @@ enum {
 	/* max tries if error condition is still set after ->error_handler */
 	ATA_EH_MAX_TRIES	= 5,
 
+	/* sometimes resuming a link requires several retries */
+	ATA_LINK_RESUME_TRIES	= 5,
+
 	/* how hard are we gonna try to probe/recover devices */
 	ATA_PROBE_MAX_TRIES	= 3,
 	ATA_EH_DEV_TRIES	= 3,
-- 
cgit v1.2.3-70-g09d2


From 2c761270d5520dd84ab0b4e47c24d99ff8503c38 Mon Sep 17 00:00:00 2001
From: Dave Chinner <david@fromorbit.com>
Date: Tue, 12 Jan 2010 17:39:16 +1100
Subject: lib: Introduce generic list_sort function

There are two copies of list_sort() in the tree already, one in the DRM
code, another in ubifs.  Now XFS needs this as well.  Create a generic
list_sort() function from the ubifs version and convert existing users
to it so we don't end up with yet another copy in the tree.

Signed-off-by: Dave Chinner <david@fromorbit.com>
Acked-by: Dave Airlie <airlied@redhat.com>
Acked-by: Artem Bityutskiy <dedekind@infradead.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/gpu/drm/drm_modes.c |  90 ++------------------------------------
 fs/ubifs/gc.c               |  96 +----------------------------------------
 include/linux/list_sort.h   |  11 +++++
 lib/Makefile                |   2 +-
 lib/list_sort.c             | 102 ++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 119 insertions(+), 182 deletions(-)
 create mode 100644 include/linux/list_sort.h
 create mode 100644 lib/list_sort.c

(limited to 'include')

diff --git a/drivers/gpu/drm/drm_modes.c b/drivers/gpu/drm/drm_modes.c
index 6d81a02463a3..76d63394c776 100644
--- a/drivers/gpu/drm/drm_modes.c
+++ b/drivers/gpu/drm/drm_modes.c
@@ -1,9 +1,4 @@
 /*
- * The list_sort function is (presumably) licensed under the GPL (see the
- * top level "COPYING" file for details).
- *
- * The remainder of this file is:
- *
  * Copyright © 1997-2003 by The XFree86 Project, Inc.
  * Copyright © 2007 Dave Airlie
  * Copyright © 2007-2008 Intel Corporation
@@ -36,6 +31,7 @@
  */
 
 #include <linux/list.h>
+#include <linux/list_sort.h>
 #include "drmP.h"
 #include "drm.h"
 #include "drm_crtc.h"
@@ -855,6 +851,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
 
 /**
  * drm_mode_compare - compare modes for favorability
+ * @priv: unused
  * @lh_a: list_head for first mode
  * @lh_b: list_head for second mode
  *
@@ -868,7 +865,7 @@ EXPORT_SYMBOL(drm_mode_prune_invalid);
  * Negative if @lh_a is better than @lh_b, zero if they're equivalent, or
  * positive if @lh_b is better than @lh_a.
  */
-static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
+static int drm_mode_compare(void *priv, struct list_head *lh_a, struct list_head *lh_b)
 {
 	struct drm_display_mode *a = list_entry(lh_a, struct drm_display_mode, head);
 	struct drm_display_mode *b = list_entry(lh_b, struct drm_display_mode, head);
@@ -885,85 +882,6 @@ static int drm_mode_compare(struct list_head *lh_a, struct list_head *lh_b)
 	return diff;
 }
 
-/* FIXME: what we don't have a list sort function? */
-/* list sort from Mark J Roberts (mjr@znex.org) */
-void list_sort(struct list_head *head,
-	       int (*cmp)(struct list_head *a, struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * drm_mode_sort - sort mode list
  * @mode_list: list to sort
@@ -975,7 +893,7 @@ void list_sort(struct list_head *head,
  */
 void drm_mode_sort(struct list_head *mode_list)
 {
-	list_sort(mode_list, drm_mode_compare);
+	list_sort(NULL, mode_list, drm_mode_compare);
 }
 EXPORT_SYMBOL(drm_mode_sort);
 
diff --git a/fs/ubifs/gc.c b/fs/ubifs/gc.c
index 618c2701d3a7..e5a3d8e96bb7 100644
--- a/fs/ubifs/gc.c
+++ b/fs/ubifs/gc.c
@@ -54,6 +54,7 @@
  */
 
 #include <linux/pagemap.h>
+#include <linux/list_sort.h>
 #include "ubifs.h"
 
 /*
@@ -107,101 +108,6 @@ static int switch_gc_head(struct ubifs_info *c)
 	return err;
 }
 
-/**
- * list_sort - sort a list.
- * @priv: private data, passed to @cmp
- * @head: the list to sort
- * @cmp: the elements comparison function
- *
- * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
- * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
- * in ascending order.
- *
- * The comparison function @cmp is supposed to return a negative value if @a is
- * than @b, and a positive value if @a is greater than @b. If @a and @b are
- * equivalent, then it does not matter what this function returns.
- */
-static void list_sort(void *priv, struct list_head *head,
-		      int (*cmp)(void *priv, struct list_head *a,
-				 struct list_head *b))
-{
-	struct list_head *p, *q, *e, *list, *tail, *oldhead;
-	int insize, nmerges, psize, qsize, i;
-
-	if (list_empty(head))
-		return;
-
-	list = head->next;
-	list_del(head);
-	insize = 1;
-	for (;;) {
-		p = oldhead = list;
-		list = tail = NULL;
-		nmerges = 0;
-
-		while (p) {
-			nmerges++;
-			q = p;
-			psize = 0;
-			for (i = 0; i < insize; i++) {
-				psize++;
-				q = q->next == oldhead ? NULL : q->next;
-				if (!q)
-					break;
-			}
-
-			qsize = insize;
-			while (psize > 0 || (qsize > 0 && q)) {
-				if (!psize) {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				} else if (!qsize || !q) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else if (cmp(priv, p, q) <= 0) {
-					e = p;
-					p = p->next;
-					psize--;
-					if (p == oldhead)
-						p = NULL;
-				} else {
-					e = q;
-					q = q->next;
-					qsize--;
-					if (q == oldhead)
-						q = NULL;
-				}
-				if (tail)
-					tail->next = e;
-				else
-					list = e;
-				e->prev = tail;
-				tail = e;
-			}
-			p = q;
-		}
-
-		tail->next = list;
-		list->prev = tail;
-
-		if (nmerges <= 1)
-			break;
-
-		insize *= 2;
-	}
-
-	head->next = list;
-	head->prev = list->prev;
-	list->prev->next = head;
-	list->prev = head;
-}
-
 /**
  * data_nodes_cmp - compare 2 data nodes.
  * @priv: UBIFS file-system description object
diff --git a/include/linux/list_sort.h b/include/linux/list_sort.h
new file mode 100644
index 000000000000..1a2df2efb771
--- /dev/null
+++ b/include/linux/list_sort.h
@@ -0,0 +1,11 @@
+#ifndef _LINUX_LIST_SORT_H
+#define _LINUX_LIST_SORT_H
+
+#include <linux/types.h>
+
+struct list_head;
+
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b));
+#endif
diff --git a/lib/Makefile b/lib/Makefile
index 911b25aed1e7..3b0b4a696db9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -21,7 +21,7 @@ lib-y	+= kobject.o kref.o klist.o
 
 obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
-	 string_helpers.o gcd.o
+	 string_helpers.o gcd.o list_sort.o
 
 ifeq ($(CONFIG_DEBUG_KOBJECT),y)
 CFLAGS_kobject.o += -DDEBUG
diff --git a/lib/list_sort.c b/lib/list_sort.c
new file mode 100644
index 000000000000..19d11e0bb958
--- /dev/null
+++ b/lib/list_sort.c
@@ -0,0 +1,102 @@
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/list_sort.h>
+#include <linux/slab.h>
+#include <linux/list.h>
+
+/**
+ * list_sort - sort a list.
+ * @priv: private data, passed to @cmp
+ * @head: the list to sort
+ * @cmp: the elements comparison function
+ *
+ * This function has been implemented by Mark J Roberts <mjr@znex.org>. It
+ * implements "merge sort" which has O(nlog(n)) complexity. The list is sorted
+ * in ascending order.
+ *
+ * The comparison function @cmp is supposed to return a negative value if @a is
+ * less than @b, and a positive value if @a is greater than @b. If @a and @b
+ * are equivalent, then it does not matter what this function returns.
+ */
+void list_sort(void *priv, struct list_head *head,
+	       int (*cmp)(void *priv, struct list_head *a,
+			  struct list_head *b))
+{
+	struct list_head *p, *q, *e, *list, *tail, *oldhead;
+	int insize, nmerges, psize, qsize, i;
+
+	if (list_empty(head))
+		return;
+
+	list = head->next;
+	list_del(head);
+	insize = 1;
+	for (;;) {
+		p = oldhead = list;
+		list = tail = NULL;
+		nmerges = 0;
+
+		while (p) {
+			nmerges++;
+			q = p;
+			psize = 0;
+			for (i = 0; i < insize; i++) {
+				psize++;
+				q = q->next == oldhead ? NULL : q->next;
+				if (!q)
+					break;
+			}
+
+			qsize = insize;
+			while (psize > 0 || (qsize > 0 && q)) {
+				if (!psize) {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				} else if (!qsize || !q) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else if (cmp(priv, p, q) <= 0) {
+					e = p;
+					p = p->next;
+					psize--;
+					if (p == oldhead)
+						p = NULL;
+				} else {
+					e = q;
+					q = q->next;
+					qsize--;
+					if (q == oldhead)
+						q = NULL;
+				}
+				if (tail)
+					tail->next = e;
+				else
+					list = e;
+				e->prev = tail;
+				tail = e;
+			}
+			p = q;
+		}
+
+		tail->next = list;
+		list->prev = tail;
+
+		if (nmerges <= 1)
+			break;
+
+		insize *= 2;
+	}
+
+	head->next = list;
+	head->prev = list->prev;
+	list->prev->next = head;
+	list->prev = head;
+}
+
+EXPORT_SYMBOL(list_sort);
-- 
cgit v1.2.3-70-g09d2


From cd65c3c7d1081290b7365897c2290a84aa967d4d Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Wed, 13 Jan 2010 18:10:36 -0800
Subject: net: fix build erros with CONFIG_BUG=n, CONFIG_GENERIC_BUG=n

Fixed build errors introduced by commit 7ad6848c (ip: fix mc_loop
checks for tunnels with multicast outer addresses)

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/net/ip.h b/include/net/ip.h
index d9a0e74d8923..fb63371c07a8 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -338,7 +338,7 @@ static inline int sk_mc_loop(struct sock *sk)
 		return inet6_sk(sk)->mc_loop;
 #endif
 	}
-	__WARN();
+	WARN_ON(1);
 	return 1;
 }
 
-- 
cgit v1.2.3-70-g09d2


From 508e14b4a4fb1a824a14f2c5b8d7df67b313f8e4 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <danborkmann@googlemail.com>
Date: Tue, 12 Jan 2010 14:27:30 +0000
Subject: netpoll: allow execution of multiple rx_hooks per interface

Signed-off-by: Daniel Borkmann <danborkmann@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  11 +++-
 net/core/netpoll.c      | 169 ++++++++++++++++++++++++++++++------------------
 2 files changed, 114 insertions(+), 66 deletions(-)

(limited to 'include')

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 2524267210d3..a765ea898549 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -21,15 +21,20 @@ struct netpoll {
 	__be32 local_ip, remote_ip;
 	u16 local_port, remote_port;
 	u8 remote_mac[ETH_ALEN];
+
+	struct list_head rx; /* rx_np list element */
 };
 
 struct netpoll_info {
 	atomic_t refcnt;
+
 	int rx_flags;
 	spinlock_t rx_lock;
-	struct netpoll *rx_np; /* netpoll that registered an rx_hook */
+	struct list_head rx_np; /* netpolls that registered an rx_hook */
+
 	struct sk_buff_head arp_tx; /* list of arp requests to reply to */
 	struct sk_buff_head txq;
+
 	struct delayed_work tx_work;
 };
 
@@ -51,7 +56,7 @@ static inline int netpoll_rx(struct sk_buff *skb)
 	unsigned long flags;
 	int ret = 0;
 
-	if (!npinfo || (!npinfo->rx_np && !npinfo->rx_flags))
+	if (!npinfo || (list_empty(&npinfo->rx_np) && !npinfo->rx_flags))
 		return 0;
 
 	spin_lock_irqsave(&npinfo->rx_lock, flags);
@@ -67,7 +72,7 @@ static inline int netpoll_rx_on(struct sk_buff *skb)
 {
 	struct netpoll_info *npinfo = skb->dev->npinfo;
 
-	return npinfo && (npinfo->rx_np || npinfo->rx_flags);
+	return npinfo && (!list_empty(&npinfo->rx_np) || npinfo->rx_flags);
 }
 
 static inline int netpoll_receive_skb(struct sk_buff *skb)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 0b4d0d35ef40..7aa697253765 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -407,11 +407,24 @@ static void arp_reply(struct sk_buff *skb)
 	__be32 sip, tip;
 	unsigned char *sha;
 	struct sk_buff *send_skb;
-	struct netpoll *np = NULL;
+	struct netpoll *np, *tmp;
+	unsigned long flags;
+	int hits = 0;
+
+	if (list_empty(&npinfo->rx_np))
+		return;
+
+	/* Before checking the packet, we do some early
+	   inspection whether this is interesting at all */
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->dev == skb->dev)
+			hits++;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 
-	if (npinfo->rx_np && npinfo->rx_np->dev == skb->dev)
-		np = npinfo->rx_np;
-	if (!np)
+	/* No netpoll struct is using this dev */
+	if (!hits)
 		return;
 
 	/* No arp on this interface */
@@ -437,77 +450,91 @@ static void arp_reply(struct sk_buff *skb)
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&sip, arp_ptr, 4);
 	arp_ptr += 4;
-	/* if we actually cared about dst hw addr, it would get copied here */
+	/* If we actually cared about dst hw addr,
+	   it would get copied here */
 	arp_ptr += skb->dev->addr_len;
 	memcpy(&tip, arp_ptr, 4);
 
 	/* Should we ignore arp? */
-	if (tip != np->local_ip ||
-	    ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
+	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
 		return;
 
 	size = arp_hdr_len(skb->dev);
-	send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
-			    LL_RESERVED_SPACE(np->dev));
 
-	if (!send_skb)
-		return;
-
-	skb_reset_network_header(send_skb);
-	arp = (struct arphdr *) skb_put(send_skb, size);
-	send_skb->dev = skb->dev;
-	send_skb->protocol = htons(ETH_P_ARP);
+	spin_lock_irqsave(&npinfo->rx_lock, flags);
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (tip != np->local_ip)
+			continue;
 
-	/* Fill the device header for the ARP frame */
-	if (dev_hard_header(send_skb, skb->dev, ptype,
-			    sha, np->dev->dev_addr,
-			    send_skb->len) < 0) {
-		kfree_skb(send_skb);
-		return;
-	}
+		send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
+				    LL_RESERVED_SPACE(np->dev));
+		if (!send_skb)
+			continue;
 
-	/*
-	 * Fill out the arp protocol part.
-	 *
-	 * we only support ethernet device type,
-	 * which (according to RFC 1390) should always equal 1 (Ethernet).
-	 */
+		skb_reset_network_header(send_skb);
+		arp = (struct arphdr *) skb_put(send_skb, size);
+		send_skb->dev = skb->dev;
+		send_skb->protocol = htons(ETH_P_ARP);
 
-	arp->ar_hrd = htons(np->dev->type);
-	arp->ar_pro = htons(ETH_P_IP);
-	arp->ar_hln = np->dev->addr_len;
-	arp->ar_pln = 4;
-	arp->ar_op = htons(type);
+		/* Fill the device header for the ARP frame */
+		if (dev_hard_header(send_skb, skb->dev, ptype,
+				    sha, np->dev->dev_addr,
+				    send_skb->len) < 0) {
+			kfree_skb(send_skb);
+			continue;
+		}
 
-	arp_ptr=(unsigned char *)(arp + 1);
-	memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &tip, 4);
-	arp_ptr += 4;
-	memcpy(arp_ptr, sha, np->dev->addr_len);
-	arp_ptr += np->dev->addr_len;
-	memcpy(arp_ptr, &sip, 4);
+		/*
+		 * Fill out the arp protocol part.
+		 *
+		 * we only support ethernet device type,
+		 * which (according to RFC 1390) should
+		 * always equal 1 (Ethernet).
+		 */
 
-	netpoll_send_skb(np, send_skb);
+		arp->ar_hrd = htons(np->dev->type);
+		arp->ar_pro = htons(ETH_P_IP);
+		arp->ar_hln = np->dev->addr_len;
+		arp->ar_pln = 4;
+		arp->ar_op = htons(type);
+
+		arp_ptr = (unsigned char *)(arp + 1);
+		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &tip, 4);
+		arp_ptr += 4;
+		memcpy(arp_ptr, sha, np->dev->addr_len);
+		arp_ptr += np->dev->addr_len;
+		memcpy(arp_ptr, &sip, 4);
+
+		netpoll_send_skb(np, send_skb);
+
+		/* If there are several rx_hooks for the same address,
+		   we're fine by sending a single reply */
+		break;
+	}
+	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 }
 
 int __netpoll_rx(struct sk_buff *skb)
 {
 	int proto, len, ulen;
+	int hits = 0;
 	struct iphdr *iph;
 	struct udphdr *uh;
-	struct netpoll_info *npi = skb->dev->npinfo;
-	struct netpoll *np = npi->rx_np;
+	struct netpoll_info *npinfo = skb->dev->npinfo;
+	struct netpoll *np, *tmp;
 
-	if (!np)
+	if (list_empty(&npinfo->rx_np))
 		goto out;
+
 	if (skb->dev->type != ARPHRD_ETHER)
 		goto out;
 
 	/* check if netpoll clients need ARP */
 	if (skb->protocol == htons(ETH_P_ARP) &&
 	    atomic_read(&trapped)) {
-		skb_queue_tail(&npi->arp_tx, skb);
+		skb_queue_tail(&npinfo->arp_tx, skb);
 		return 1;
 	}
 
@@ -551,16 +578,23 @@ int __netpoll_rx(struct sk_buff *skb)
 		goto out;
 	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
 		goto out;
-	if (np->local_ip && np->local_ip != iph->daddr)
-		goto out;
-	if (np->remote_ip && np->remote_ip != iph->saddr)
-		goto out;
-	if (np->local_port && np->local_port != ntohs(uh->dest))
-		goto out;
 
-	np->rx_hook(np, ntohs(uh->source),
-		    (char *)(uh+1),
-		    ulen - sizeof(struct udphdr));
+	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
+		if (np->local_ip && np->local_ip != iph->daddr)
+			continue;
+		if (np->remote_ip && np->remote_ip != iph->saddr)
+			continue;
+		if (np->local_port && np->local_port != ntohs(uh->dest))
+			continue;
+
+		np->rx_hook(np, ntohs(uh->source),
+			       (char *)(uh+1),
+			       ulen - sizeof(struct udphdr));
+		hits++;
+	}
+
+	if (!hits)
+		goto out;
 
 	kfree_skb(skb);
 	return 1;
@@ -684,6 +718,7 @@ int netpoll_setup(struct netpoll *np)
 	struct net_device *ndev = NULL;
 	struct in_device *in_dev;
 	struct netpoll_info *npinfo;
+	struct netpoll *npe, *tmp;
 	unsigned long flags;
 	int err;
 
@@ -704,7 +739,7 @@ int netpoll_setup(struct netpoll *np)
 		}
 
 		npinfo->rx_flags = 0;
-		npinfo->rx_np = NULL;
+		INIT_LIST_HEAD(&npinfo->rx_np);
 
 		spin_lock_init(&npinfo->rx_lock);
 		skb_queue_head_init(&npinfo->arp_tx);
@@ -785,7 +820,7 @@ int netpoll_setup(struct netpoll *np)
 	if (np->rx_hook) {
 		spin_lock_irqsave(&npinfo->rx_lock, flags);
 		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
-		npinfo->rx_np = np;
+		list_add_tail(&np->rx, &npinfo->rx_np);
 		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 	}
 
@@ -801,9 +836,16 @@ int netpoll_setup(struct netpoll *np)
 	return 0;
 
  release:
-	if (!ndev->npinfo)
+	if (!ndev->npinfo) {
+		spin_lock_irqsave(&npinfo->rx_lock, flags);
+		list_for_each_entry_safe(npe, tmp, &npinfo->rx_np, rx) {
+			npe->dev = NULL;
+		}
+		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
+
 		kfree(npinfo);
-	np->dev = NULL;
+	}
+
 	dev_put(ndev);
 	return err;
 }
@@ -823,10 +865,11 @@ void netpoll_cleanup(struct netpoll *np)
 	if (np->dev) {
 		npinfo = np->dev->npinfo;
 		if (npinfo) {
-			if (npinfo->rx_np == np) {
+			if (!list_empty(&npinfo->rx_np)) {
 				spin_lock_irqsave(&npinfo->rx_lock, flags);
-				npinfo->rx_np = NULL;
-				npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
+				list_del(&np->rx);
+				if (list_empty(&npinfo->rx_np))
+					npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
 				spin_unlock_irqrestore(&npinfo->rx_lock, flags);
 			}
 
-- 
cgit v1.2.3-70-g09d2


From 9a58a80a701bdb2d220cdab4914218df5b48d781 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Thu, 14 Jan 2010 03:10:54 -0800
Subject: proc_fops: convert drivers/isdn/ to seq_file

Convert code away from ->read_proc/->write_proc interfaces.  Switch to
proc_create()/proc_create_data() which make addition of proc entries
reliable wrt NULL ->proc_fops, NULL ->data and so on.

Problem with ->read_proc et al is described here commit
786d7e1612f0b0adb6046f19b906609e4fe8b1ba "Fix rmmod/read/write races in
/proc entries"

[akpm@linux-foundation.org: CONFIG_PROC_FS=n build fix]
Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: Karsten Keil <keil@b1-systems.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/isdn/INTERFACE.CAPI       |   9 +-
 drivers/isdn/capi/capi.c                |  99 ++++++----------
 drivers/isdn/capi/capidrv.c             |  55 +++------
 drivers/isdn/capi/kcapi.c               |   8 +-
 drivers/isdn/gigaset/capi.c             |  75 ++++++------
 drivers/isdn/hardware/avm/avmcard.h     |   6 +-
 drivers/isdn/hardware/avm/b1.c          |  54 +++++----
 drivers/isdn/hardware/avm/b1dma.c       |  71 ++++++------
 drivers/isdn/hardware/avm/b1isa.c       |   2 +-
 drivers/isdn/hardware/avm/b1pci.c       |   4 +-
 drivers/isdn/hardware/avm/b1pcmcia.c    |   2 +-
 drivers/isdn/hardware/avm/c4.c          |  53 +++++----
 drivers/isdn/hardware/avm/t1isa.c       |   2 +-
 drivers/isdn/hardware/avm/t1pci.c       |   2 +-
 drivers/isdn/hardware/eicon/capimain.c  |  40 ++++---
 drivers/isdn/hardware/eicon/diva_didd.c |  45 ++++----
 drivers/isdn/hardware/eicon/divasi.c    |  48 ++++----
 drivers/isdn/hardware/eicon/divasproc.c | 198 ++++++++++++++------------------
 drivers/isdn/hysdn/hycapi.c             |  56 ++++-----
 include/linux/isdn/capilli.h            |   3 +-
 net/bluetooth/cmtp/capi.c               |  37 +++---
 21 files changed, 411 insertions(+), 458 deletions(-)

(limited to 'include')

diff --git a/Documentation/isdn/INTERFACE.CAPI b/Documentation/isdn/INTERFACE.CAPI
index 5fe8de5cc727..f172091fb7cd 100644
--- a/Documentation/isdn/INTERFACE.CAPI
+++ b/Documentation/isdn/INTERFACE.CAPI
@@ -149,10 +149,11 @@ char *(*procinfo)(struct capi_ctr *ctrlr)
 	pointer to a callback function returning the entry for the device in
 	the CAPI controller info table, /proc/capi/controller
 
-read_proc_t *ctr_read_proc
-	pointer to the read_proc callback function for the device's proc file
-	system entry, /proc/capi/controllers/<n>; will be called with a
-	pointer to the device's capi_ctr structure as the last (data) argument
+const struct file_operations *proc_fops
+	pointers to callback functions for the device's proc file
+	system entry, /proc/capi/controllers/<n>; pointer to the device's
+	capi_ctr structure is available from struct proc_dir_entry::data
+	which is available from struct inode.
 
 Note: Callback functions except send_message() are never called in interrupt
 context.
diff --git a/drivers/isdn/capi/capi.c b/drivers/isdn/capi/capi.c
index 65bf91e16a42..79f9364aded6 100644
--- a/drivers/isdn/capi/capi.c
+++ b/drivers/isdn/capi/capi.c
@@ -33,6 +33,7 @@
 #endif /* CONFIG_ISDN_CAPI_MIDDLEWARE */
 #include <linux/skbuff.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/poll.h>
 #include <linux/capi.h>
 #include <linux/kernelcapi.h>
@@ -1407,114 +1408,84 @@ static void capinc_tty_exit(void)
  * /proc/capi/capi20:
  *  minor applid nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int proc_capidev_read_proc(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data)
+static int capi20_proc_show(struct seq_file *m, void *v)
 {
         struct capidev *cdev;
 	struct list_head *l;
-	int len = 0;
 
 	read_lock(&capidev_list_lock);
 	list_for_each(l, &capidev_list) {
 		cdev = list_entry(l, struct capidev, list);
-		len += sprintf(page+len, "0 %d %lu %lu %lu %lu\n",
+		seq_printf(m, "0 %d %lu %lu %lu %lu\n",
 			cdev->ap.applid,
 			cdev->ap.nrecvctlpkt,
 			cdev->ap.nrecvdatapkt,
 			cdev->ap.nsentctlpkt,
 			cdev->ap.nsentdatapkt);
-		if (len <= off) {
-			off -= len;
-			len = 0;
-		} else {
-			if (len-off > count)
-				goto endloop;
-		}
 	}
-
-endloop:
 	read_unlock(&capidev_list_lock);
-	if (len < count)
-		*eof = 1;
-	if (len > count) len = count;
-	if (len < 0) len = 0;
-	return len;
+	return 0;
 }
 
+static int capi20_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, capi20_proc_show, NULL);
+}
+
+static const struct file_operations capi20_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= capi20_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /*
  * /proc/capi/capi20ncci:
  *  applid ncci
  */
-static int proc_capincci_read_proc(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data)
+static int capi20ncci_proc_show(struct seq_file *m, void *v)
 {
         struct capidev *cdev;
         struct capincci *np;
 	struct list_head *l;
-	int len = 0;
 
 	read_lock(&capidev_list_lock);
 	list_for_each(l, &capidev_list) {
 		cdev = list_entry(l, struct capidev, list);
 		for (np=cdev->nccis; np; np = np->next) {
-			len += sprintf(page+len, "%d 0x%x\n",
+			seq_printf(m, "%d 0x%x\n",
 				       cdev->ap.applid,
 				       np->ncci);
-			if (len <= off) {
-				off -= len;
-				len = 0;
-			} else {
-				if (len-off > count)
-					goto endloop;
-			}
 		}
 	}
-endloop:
 	read_unlock(&capidev_list_lock);
-	*start = page+off;
-	if (len < count)
-		*eof = 1;
-	if (len>count) len = count;
-	if (len<0) len = 0;
-	return len;
+	return 0;
 }
 
-static struct procfsentries {
-  char *name;
-  mode_t mode;
-  int (*read_proc)(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data);
-  struct proc_dir_entry *procent;
-} procfsentries[] = {
-   /* { "capi",		  S_IFDIR, 0 }, */
-   { "capi/capi20", 	  0	 , proc_capidev_read_proc },
-   { "capi/capi20ncci",   0	 , proc_capincci_read_proc },
+static int capi20ncci_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, capi20ncci_proc_show, NULL);
+}
+
+static const struct file_operations capi20ncci_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= capi20ncci_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
 static void __init proc_init(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=0; i < nelem; i++) {
-        struct procfsentries *p = procfsentries + i;
-	p->procent = create_proc_entry(p->name, p->mode, NULL);
-	if (p->procent) p->procent->read_proc = p->read_proc;
-    }
+	proc_create("capi/capi20", 0, NULL, &capi20_proc_fops);
+	proc_create("capi/capi20ncci", 0, NULL, &capi20ncci_proc_fops);
 }
 
 static void __exit proc_exit(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=nelem-1; i >= 0; i--) {
-        struct procfsentries *p = procfsentries + i;
-	if (p->procent) {
-	   remove_proc_entry(p->name, NULL);
-	   p->procent = NULL;
-	}
-    }
+	remove_proc_entry("capi/capi20", NULL);
+	remove_proc_entry("capi/capi20ncci", NULL);
 }
 
 /* -------- init function and module interface ---------------------- */
diff --git a/drivers/isdn/capi/capidrv.c b/drivers/isdn/capi/capidrv.c
index 66b7d7a86474..bb450152fb74 100644
--- a/drivers/isdn/capi/capidrv.c
+++ b/drivers/isdn/capi/capidrv.c
@@ -24,6 +24,7 @@
 #include <linux/isdn.h>
 #include <linux/isdnif.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/capi.h>
 #include <linux/kernelcapi.h>
 #include <linux/ctype.h>
@@ -2229,59 +2230,37 @@ static void lower_callback(unsigned int cmd, u32 contr, void *data)
  * /proc/capi/capidrv:
  * nrecvctlpkt nrecvdatapkt nsendctlpkt nsenddatapkt
  */
-static int proc_capidrv_read_proc(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data)
+static int capidrv_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
-
-	len += sprintf(page+len, "%lu %lu %lu %lu\n",
+	seq_printf(m, "%lu %lu %lu %lu\n",
 			global.ap.nrecvctlpkt,
 			global.ap.nrecvdatapkt,
 			global.ap.nsentctlpkt,
 			global.ap.nsentdatapkt);
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	return 0;
+}
+
+static int capidrv_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, capidrv_proc_show, NULL);
 }
 
-static struct procfsentries {
-  char *name;
-  mode_t mode;
-  int (*read_proc)(char *page, char **start, off_t off,
-                                       int count, int *eof, void *data);
-  struct proc_dir_entry *procent;
-} procfsentries[] = {
-   /* { "capi",		  S_IFDIR, 0 }, */
-   { "capi/capidrv", 	  0	 , proc_capidrv_read_proc },
+static const struct file_operations capidrv_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= capidrv_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
 };
 
 static void __init proc_init(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=0; i < nelem; i++) {
-        struct procfsentries *p = procfsentries + i;
-	p->procent = create_proc_entry(p->name, p->mode, NULL);
-	if (p->procent) p->procent->read_proc = p->read_proc;
-    }
+	proc_create("capi/capidrv", 0, NULL, &capidrv_proc_fops);
 }
 
 static void __exit proc_exit(void)
 {
-    int nelem = ARRAY_SIZE(procfsentries);
-    int i;
-
-    for (i=nelem-1; i >= 0; i--) {
-        struct procfsentries *p = procfsentries + i;
-	if (p->procent) {
-	   remove_proc_entry(p->name, NULL);
-	   p->procent = NULL;
-	}
-    }
+	remove_proc_entry("capi/capidrv", NULL);
 }
 
 static int __init capidrv_init(void)
diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index dc506ab99cac..b0bacf377c18 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -490,13 +490,7 @@ attach_capi_ctr(struct capi_ctr *card)
 	card->traceflag = showcapimsgs;
 
 	sprintf(card->procfn, "capi/controllers/%d", card->cnr);
-	card->procent = create_proc_entry(card->procfn, 0, NULL);
-	if (card->procent) {
-	   card->procent->read_proc = 
-		(int (*)(char *,char **,off_t,int,int *,void *))
-			card->ctr_read_proc;
-	   card->procent->data = card;
-	}
+	card->procent = proc_create_data(card->procfn, 0, NULL, card->proc_fops, card);
 
 	ncards++;
 	printk(KERN_NOTICE "kcapi: Controller [%03d]: %s attached\n",
diff --git a/drivers/isdn/gigaset/capi.c b/drivers/isdn/gigaset/capi.c
index 3f5cd06af104..6f0ae32906bf 100644
--- a/drivers/isdn/gigaset/capi.c
+++ b/drivers/isdn/gigaset/capi.c
@@ -13,6 +13,8 @@
 
 #include "gigaset.h"
 #include <linux/ctype.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/isdn/capilli.h>
 #include <linux/isdn/capicmd.h>
 #include <linux/isdn/capiutil.h>
@@ -2106,35 +2108,22 @@ static char *gigaset_procinfo(struct capi_ctr *ctr)
 	return ctr->name;	/* ToDo: more? */
 }
 
-/**
- * gigaset_ctr_read_proc() - build controller proc file entry
- * @page:	buffer of PAGE_SIZE bytes for receiving the entry.
- * @start:	unused.
- * @off:	unused.
- * @count:	unused.
- * @eof:	unused.
- * @ctr:	controller descriptor structure.
- *
- * Return value: length of generated entry
- */
-static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
-			  int count, int *eof, struct capi_ctr *ctr)
+static int gigaset_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctr = m->private;
 	struct cardstate *cs = ctr->driverdata;
 	char *s;
 	int i;
-	int len = 0;
-	len += sprintf(page+len, "%-16s %s\n", "name", ctr->name);
-	len += sprintf(page+len, "%-16s %s %s\n", "dev",
+
+	seq_printf(m, "%-16s %s\n", "name", ctr->name);
+	seq_printf(m, "%-16s %s %s\n", "dev",
 			dev_driver_string(cs->dev), dev_name(cs->dev));
-	len += sprintf(page+len, "%-16s %d\n", "id", cs->myid);
+	seq_printf(m, "%-16s %d\n", "id", cs->myid);
 	if (cs->gotfwver)
-		len += sprintf(page+len, "%-16s %d.%d.%d.%d\n", "firmware",
+		seq_printf(m, "%-16s %d.%d.%d.%d\n", "firmware",
 			cs->fwver[0], cs->fwver[1], cs->fwver[2], cs->fwver[3]);
-	len += sprintf(page+len, "%-16s %d\n", "channels",
-			cs->channels);
-	len += sprintf(page+len, "%-16s %s\n", "onechannel",
-			cs->onechannel ? "yes" : "no");
+	seq_printf(m, "%-16s %d\n", "channels", cs->channels);
+	seq_printf(m, "%-16s %s\n", "onechannel", cs->onechannel ? "yes" : "no");
 
 	switch (cs->mode) {
 	case M_UNKNOWN:
@@ -2152,7 +2141,7 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
 	default:
 		s = "??";
 	}
-	len += sprintf(page+len, "%-16s %s\n", "mode", s);
+	seq_printf(m, "%-16s %s\n", "mode", s);
 
 	switch (cs->mstate) {
 	case MS_UNINITIALIZED:
@@ -2176,25 +2165,21 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
 	default:
 		s = "??";
 	}
-	len += sprintf(page+len, "%-16s %s\n", "mstate", s);
+	seq_printf(m, "%-16s %s\n", "mstate", s);
 
-	len += sprintf(page+len, "%-16s %s\n", "running",
-			cs->running ? "yes" : "no");
-	len += sprintf(page+len, "%-16s %s\n", "connected",
-			cs->connected ? "yes" : "no");
-	len += sprintf(page+len, "%-16s %s\n", "isdn_up",
-			cs->isdn_up ? "yes" : "no");
-	len += sprintf(page+len, "%-16s %s\n", "cidmode",
-			cs->cidmode ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "running", cs->running ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "connected", cs->connected ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "isdn_up", cs->isdn_up ? "yes" : "no");
+	seq_printf(m, "%-16s %s\n", "cidmode", cs->cidmode ? "yes" : "no");
 
 	for (i = 0; i < cs->channels; i++) {
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "corrupted",
+		seq_printf(m, "[%d]%-13s %d\n", i, "corrupted",
 				cs->bcs[i].corrupted);
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "trans_down",
+		seq_printf(m, "[%d]%-13s %d\n", i, "trans_down",
 				cs->bcs[i].trans_down);
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "trans_up",
+		seq_printf(m, "[%d]%-13s %d\n", i, "trans_up",
 				cs->bcs[i].trans_up);
-		len += sprintf(page+len, "[%d]%-13s %d\n", i, "chstate",
+		seq_printf(m, "[%d]%-13s %d\n", i, "chstate",
 				cs->bcs[i].chstate);
 		switch (cs->bcs[i].proto2) {
 		case L2_BITSYNC:
@@ -2209,11 +2194,23 @@ static int gigaset_ctr_read_proc(char *page, char **start, off_t off,
 		default:
 			s = "??";
 		}
-		len += sprintf(page+len, "[%d]%-13s %s\n", i, "proto2", s);
+		seq_printf(m, "[%d]%-13s %s\n", i, "proto2", s);
 	}
-	return len;
+	return 0;
 }
 
+static int gigaset_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, gigaset_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations gigaset_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= gigaset_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 static struct capi_driver capi_driver_gigaset = {
 	.name		= "gigaset",
@@ -2256,7 +2253,7 @@ int gigaset_isdn_register(struct cardstate *cs, const char *isdnid)
 	iif->ctr.release_appl  = gigaset_release_appl;
 	iif->ctr.send_message  = gigaset_send_message;
 	iif->ctr.procinfo      = gigaset_procinfo;
-	iif->ctr.ctr_read_proc = gigaset_ctr_read_proc;
+	iif->ctr.proc_fops = &gigaset_proc_fops;
 	INIT_LIST_HEAD(&iif->appls);
 	skb_queue_head_init(&iif->sendqueue);
 	atomic_set(&iif->sendqlen, 0);
diff --git a/drivers/isdn/hardware/avm/avmcard.h b/drivers/isdn/hardware/avm/avmcard.h
index d964f07e4a56..a70e8854461d 100644
--- a/drivers/isdn/hardware/avm/avmcard.h
+++ b/drivers/isdn/hardware/avm/avmcard.h
@@ -556,8 +556,7 @@ u16  b1_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
 void b1_parse_version(avmctrl_info *card);
 irqreturn_t b1_interrupt(int interrupt, void *devptr);
 
-int b1ctl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl);
+extern const struct file_operations b1ctl_proc_fops;
 
 avmcard_dmainfo *avmcard_dma_alloc(char *name, struct pci_dev *,
 				   long rsize, long ssize);
@@ -577,7 +576,6 @@ void b1dma_register_appl(struct capi_ctr *ctrl,
 				capi_register_params *rp);
 void b1dma_release_appl(struct capi_ctr *ctrl, u16 appl);
 u16  b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb);
-int b1dmactl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl);
+extern const struct file_operations b1dmactl_proc_fops;
 
 #endif /* _AVMCARD_H_ */
diff --git a/drivers/isdn/hardware/avm/b1.c b/drivers/isdn/hardware/avm/b1.c
index a7c0083e78a7..c38fa0f4c729 100644
--- a/drivers/isdn/hardware/avm/b1.c
+++ b/drivers/isdn/hardware/avm/b1.c
@@ -12,6 +12,8 @@
 #include <linux/module.h>
 #include <linux/kernel.h>
 #include <linux/pci.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -634,18 +636,17 @@ irqreturn_t b1_interrupt(int interrupt, void *devptr)
 }
 
 /* ------------------------------------------------------------- */
-int b1ctl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl)
+static int b1ctl_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	u8 flag;
-	int len = 0;
 	char *s;
 
-	len += sprintf(page+len, "%-16s %s\n", "name", card->name);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
+	seq_printf(m, "%-16s %s\n", "name", card->name);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->port);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
 	switch (card->cardtype) {
 	case avm_b1isa: s = "B1 ISA"; break;
 	case avm_b1pci: s = "B1 PCI"; break;
@@ -658,20 +659,20 @@ int b1ctl_read_proc(char *page, char **start, off_t off,
 	case avm_c2: s = "C2"; break;
 	default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if (card->cardtype == avm_t1isa)
-	   len += sprintf(page+len, "%-16s %d\n", "cardnr", card->cardnr);
+		seq_printf(m, "%-16s %d\n", "cardnr", card->cardnr);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
 
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[3];
         	if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s%s%s%s\n",
 			"protocol",
 			(flag & 0x01) ? " DSS1" : "",
 			(flag & 0x02) ? " CT1" : "",
@@ -685,7 +686,7 @@ int b1ctl_read_proc(char *page, char **start, off_t off,
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[5];
 		if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s\n",
 			"linetype",
 			(flag & 0x01) ? " point to point" : "",
 			(flag & 0x02) ? " point to multipoint" : "",
@@ -693,16 +694,25 @@ int b1ctl_read_proc(char *page, char **start, off_t off,
 			(flag & 0x04) ? " leased line with D-channel" : ""
 			);
 	}
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
-
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
+
+	return 0;
+}
+
+static int b1ctl_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, b1ctl_proc_show, PDE(inode)->data);
 }
 
+const struct file_operations b1ctl_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= b1ctl_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL(b1ctl_proc_fops);
+
 /* ------------------------------------------------------------- */
 
 #ifdef CONFIG_PCI
@@ -781,8 +791,6 @@ EXPORT_SYMBOL(b1_send_message);
 EXPORT_SYMBOL(b1_parse_version);
 EXPORT_SYMBOL(b1_interrupt);
 
-EXPORT_SYMBOL(b1ctl_read_proc);
-
 static int __init b1_init(void)
 {
 	char *p;
diff --git a/drivers/isdn/hardware/avm/b1dma.c b/drivers/isdn/hardware/avm/b1dma.c
index 0e84aaae43fd..124550d0dbf3 100644
--- a/drivers/isdn/hardware/avm/b1dma.c
+++ b/drivers/isdn/hardware/avm/b1dma.c
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -855,21 +857,20 @@ u16 b1dma_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 
 /* ------------------------------------------------------------- */
 
-int b1dmactl_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl)
+static int b1dmactl_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	u8 flag;
-	int len = 0;
 	char *s;
 	u32 txoff, txlen, rxoff, rxlen, csr;
 	unsigned long flags;
 
-	len += sprintf(page+len, "%-16s %s\n", "name", card->name);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
-	len += sprintf(page+len, "%-16s 0x%lx\n", "membase", card->membase);
+	seq_printf(m, "%-16s %s\n", "name", card->name);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->port);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
+	seq_printf(m, "%-16s 0x%lx\n", "membase", card->membase);
 	switch (card->cardtype) {
 	case avm_b1isa: s = "B1 ISA"; break;
 	case avm_b1pci: s = "B1 PCI"; break;
@@ -882,18 +883,18 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 	case avm_c2: s = "C2"; break;
 	default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
 
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[3];
         	if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s%s%s%s\n",
 			"protocol",
 			(flag & 0x01) ? " DSS1" : "",
 			(flag & 0x02) ? " CT1" : "",
@@ -907,7 +908,7 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[5];
 		if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s\n",
 			"linetype",
 			(flag & 0x01) ? " point to point" : "",
 			(flag & 0x02) ? " point to multipoint" : "",
@@ -915,7 +916,7 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 			(flag & 0x04) ? " leased line with D-channel" : ""
 			);
 	}
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
 
 
 	spin_lock_irqsave(&card->lock, flags);
@@ -930,27 +931,30 @@ int b1dmactl_read_proc(char *page, char **start, off_t off,
 
 	spin_unlock_irqrestore(&card->lock, flags);
 
-        len += sprintf(page+len, "%-16s 0x%lx\n",
-				"csr (cached)", (unsigned long)card->csr);
-        len += sprintf(page+len, "%-16s 0x%lx\n",
-				"csr", (unsigned long)csr);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"txoff", (unsigned long)txoff);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"txlen", (unsigned long)txlen);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"rxoff", (unsigned long)rxoff);
-        len += sprintf(page+len, "%-16s %lu\n",
-				"rxlen", (unsigned long)rxlen);
-
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	seq_printf(m, "%-16s 0x%lx\n", "csr (cached)", (unsigned long)card->csr);
+	seq_printf(m, "%-16s 0x%lx\n", "csr", (unsigned long)csr);
+	seq_printf(m, "%-16s %lu\n", "txoff", (unsigned long)txoff);
+	seq_printf(m, "%-16s %lu\n", "txlen", (unsigned long)txlen);
+	seq_printf(m, "%-16s %lu\n", "rxoff", (unsigned long)rxoff);
+	seq_printf(m, "%-16s %lu\n", "rxlen", (unsigned long)rxlen);
+
+	return 0;
+}
+
+static int b1dmactl_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, b1dmactl_proc_show, PDE(inode)->data);
 }
 
+const struct file_operations b1dmactl_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= b1dmactl_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+EXPORT_SYMBOL(b1dmactl_proc_fops);
+
 /* ------------------------------------------------------------- */
 
 EXPORT_SYMBOL(b1dma_reset);
@@ -963,7 +967,6 @@ EXPORT_SYMBOL(b1dma_reset_ctr);
 EXPORT_SYMBOL(b1dma_register_appl);
 EXPORT_SYMBOL(b1dma_release_appl);
 EXPORT_SYMBOL(b1dma_send_message);
-EXPORT_SYMBOL(b1dmactl_read_proc);
 
 static int __init b1dma_init(void)
 {
diff --git a/drivers/isdn/hardware/avm/b1isa.c b/drivers/isdn/hardware/avm/b1isa.c
index 6461a32bc838..ff5390546f92 100644
--- a/drivers/isdn/hardware/avm/b1isa.c
+++ b/drivers/isdn/hardware/avm/b1isa.c
@@ -121,7 +121,7 @@ static int b1isa_probe(struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1isa_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pci.c b/drivers/isdn/hardware/avm/b1pci.c
index 5b314a2c4049..c97e4315079d 100644
--- a/drivers/isdn/hardware/avm/b1pci.c
+++ b/drivers/isdn/hardware/avm/b1pci.c
@@ -112,7 +112,7 @@ static int b1pci_probe(struct capicardparams *p, struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pci_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 	cinfo->capi_ctrl.owner         = THIS_MODULE;
 
@@ -251,7 +251,7 @@ static int b1pciv4_probe(struct capicardparams *p, struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1dma_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pciv4_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/b1pcmcia.c b/drivers/isdn/hardware/avm/b1pcmcia.c
index 7740403b40e1..d6391e0afeea 100644
--- a/drivers/isdn/hardware/avm/b1pcmcia.c
+++ b/drivers/isdn/hardware/avm/b1pcmcia.c
@@ -108,7 +108,7 @@ static int b1pcmcia_add_card(unsigned int port, unsigned irq,
 	cinfo->capi_ctrl.load_firmware = b1_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = b1pcmcia_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/c4.c b/drivers/isdn/hardware/avm/c4.c
index 6833301a45fc..de6e6b311819 100644
--- a/drivers/isdn/hardware/avm/c4.c
+++ b/drivers/isdn/hardware/avm/c4.c
@@ -11,6 +11,8 @@
 
 #include <linux/module.h>
 #include <linux/kernel.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 #include <linux/delay.h>
 #include <linux/mm.h>
@@ -1062,19 +1064,18 @@ static char *c4_procinfo(struct capi_ctr *ctrl)
 	return cinfo->infobuf;
 }
 
-static int c4_read_proc(char *page, char **start, off_t off,
-        		int count, int *eof, struct capi_ctr *ctrl)
+static int c4_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	avmctrl_info *cinfo = (avmctrl_info *)(ctrl->driverdata);
 	avmcard *card = cinfo->card;
 	u8 flag;
-	int len = 0;
 	char *s;
 
-	len += sprintf(page+len, "%-16s %s\n", "name", card->name);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->port);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
-	len += sprintf(page+len, "%-16s 0x%lx\n", "membase", card->membase);
+	seq_printf(m, "%-16s %s\n", "name", card->name);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->port);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
+	seq_printf(m, "%-16s 0x%lx\n", "membase", card->membase);
 	switch (card->cardtype) {
 	case avm_b1isa: s = "B1 ISA"; break;
 	case avm_b1pci: s = "B1 PCI"; break;
@@ -1087,18 +1088,18 @@ static int c4_read_proc(char *page, char **start, off_t off,
 	case avm_c2: s = "C2"; break;
 	default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-	   len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
 
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[3];
         	if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s%s%s%s\n",
 			"protocol",
 			(flag & 0x01) ? " DSS1" : "",
 			(flag & 0x02) ? " CT1" : "",
@@ -1112,7 +1113,7 @@ static int c4_read_proc(char *page, char **start, off_t off,
 	if (card->cardtype != avm_m1) {
         	flag = ((u8 *)(ctrl->profile.manu))[5];
 		if (flag)
-			len += sprintf(page+len, "%-16s%s%s%s%s\n",
+			seq_printf(m, "%-16s%s%s%s%s\n",
 			"linetype",
 			(flag & 0x01) ? " point to point" : "",
 			(flag & 0x02) ? " point to multipoint" : "",
@@ -1120,16 +1121,24 @@ static int c4_read_proc(char *page, char **start, off_t off,
 			(flag & 0x04) ? " leased line with D-channel" : ""
 			);
 	}
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
-
-	if (off+count >= len)
-	   *eof = 1;
-	if (len < off)
-           return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
+
+	return 0;
 }
 
+static int c4_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, c4_proc_show, PDE(inode)->data);
+}
+
+static const struct file_operations c4_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= c4_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /* ------------------------------------------------------------- */
 
 static int c4_add_card(struct capicardparams *p, struct pci_dev *dev,
@@ -1201,7 +1210,7 @@ static int c4_add_card(struct capicardparams *p, struct pci_dev *dev,
 		cinfo->capi_ctrl.load_firmware = c4_load_firmware;
 		cinfo->capi_ctrl.reset_ctr     = c4_reset_ctr;
 		cinfo->capi_ctrl.procinfo      = c4_procinfo;
-		cinfo->capi_ctrl.ctr_read_proc = c4_read_proc;
+		cinfo->capi_ctrl.proc_fops = &c4_proc_fops;
 		strcpy(cinfo->capi_ctrl.name, card->name);
 
 		retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1isa.c b/drivers/isdn/hardware/avm/t1isa.c
index 1c53fd49adb6..baeeb3c2a3ee 100644
--- a/drivers/isdn/hardware/avm/t1isa.c
+++ b/drivers/isdn/hardware/avm/t1isa.c
@@ -429,7 +429,7 @@ static int t1isa_probe(struct pci_dev *pdev, int cardnr)
 	cinfo->capi_ctrl.load_firmware = t1isa_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = t1isa_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = t1isa_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1ctl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1ctl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/avm/t1pci.c b/drivers/isdn/hardware/avm/t1pci.c
index e6d298d75146..5a3f83098018 100644
--- a/drivers/isdn/hardware/avm/t1pci.c
+++ b/drivers/isdn/hardware/avm/t1pci.c
@@ -119,7 +119,7 @@ static int t1pci_add_card(struct capicardparams *p, struct pci_dev *pdev)
 	cinfo->capi_ctrl.load_firmware = b1dma_load_firmware;
 	cinfo->capi_ctrl.reset_ctr     = b1dma_reset_ctr;
 	cinfo->capi_ctrl.procinfo      = t1pci_procinfo;
-	cinfo->capi_ctrl.ctr_read_proc = b1dmactl_read_proc;
+	cinfo->capi_ctrl.proc_fops = &b1dmactl_proc_fops;
 	strcpy(cinfo->capi_ctrl.name, card->name);
 
 	retval = attach_capi_ctr(&cinfo->capi_ctrl);
diff --git a/drivers/isdn/hardware/eicon/capimain.c b/drivers/isdn/hardware/eicon/capimain.c
index 98fcdfc7ca55..0f073cd73763 100644
--- a/drivers/isdn/hardware/eicon/capimain.c
+++ b/drivers/isdn/hardware/eicon/capimain.c
@@ -13,6 +13,7 @@
 #include <linux/module.h>
 #include <linux/init.h>
 #include <asm/uaccess.h>
+#include <linux/seq_file.h>
 #include <linux/skbuff.h>
 
 #include "os_capi.h"
@@ -75,25 +76,32 @@ void diva_os_free_message_buffer(diva_os_message_buffer_s * dmb)
 /*
  * proc function for controller info
  */
-static int diva_ctl_read_proc(char *page, char **start, off_t off,
-			      int count, int *eof, struct capi_ctr *ctrl)
+static int diva_ctl_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	diva_card *card = (diva_card *) ctrl->driverdata;
-	int len = 0;
-
-	len += sprintf(page + len, "%s\n", ctrl->name);
-	len += sprintf(page + len, "Serial No. : %s\n", ctrl->serial);
-	len += sprintf(page + len, "Id         : %d\n", card->Id);
-	len += sprintf(page + len, "Channels   : %d\n", card->d.channels);
-
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+
+	seq_printf(m, "%s\n", ctrl->name);
+	seq_printf(m, "Serial No. : %s\n", ctrl->serial);
+	seq_printf(m, "Id         : %d\n", card->Id);
+	seq_printf(m, "Channels   : %d\n", card->d.channels);
+
+	return 0;
+}
+
+static int diva_ctl_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, diva_ctl_proc_show, NULL);
 }
 
+static const struct file_operations diva_ctl_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= diva_ctl_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /*
  * set additional os settings in capi_ctr struct
  */
@@ -102,7 +110,7 @@ void diva_os_set_controller_struct(struct capi_ctr *ctrl)
 	ctrl->driver_name = DRIVERLNAME;
 	ctrl->load_firmware = NULL;
 	ctrl->reset_ctr = NULL;
-	ctrl->ctr_read_proc = diva_ctl_read_proc;
+	ctrl->proc_fops = &diva_ctl_proc_fops;
 	ctrl->owner = THIS_MODULE;
 }
 
diff --git a/drivers/isdn/hardware/eicon/diva_didd.c b/drivers/isdn/hardware/eicon/diva_didd.c
index 993b14cf1778..5d06a7437824 100644
--- a/drivers/isdn/hardware/eicon/diva_didd.c
+++ b/drivers/isdn/hardware/eicon/diva_didd.c
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/kernel.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <net/net_namespace.h>
 
 #include "platform.h"
@@ -62,39 +63,41 @@ static char *getrev(const char *revision)
 	return rev;
 }
 
-static int
-proc_read(char *page, char **start, off_t off, int count, int *eof,
-	  void *data)
+static int divadidd_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	char tmprev[32];
 
 	strcpy(tmprev, main_revision);
-	len += sprintf(page + len, "%s\n", DRIVERNAME);
-	len += sprintf(page + len, "name     : %s\n", DRIVERLNAME);
-	len += sprintf(page + len, "release  : %s\n", DRIVERRELEASE_DIDD);
-	len += sprintf(page + len, "build    : %s(%s)\n",
+	seq_printf(m, "%s\n", DRIVERNAME);
+	seq_printf(m, "name     : %s\n", DRIVERLNAME);
+	seq_printf(m, "release  : %s\n", DRIVERRELEASE_DIDD);
+	seq_printf(m, "build    : %s(%s)\n",
 		       diva_didd_common_code_build, DIVA_BUILD);
-	len += sprintf(page + len, "revision : %s\n", getrev(tmprev));
-
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+	seq_printf(m, "revision : %s\n", getrev(tmprev));
+
+	return 0;
 }
 
+static int divadidd_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, divadidd_proc_show, NULL);
+}
+
+static const struct file_operations divadidd_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= divadidd_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int DIVA_INIT_FUNCTION create_proc(void)
 {
 	proc_net_eicon = proc_mkdir("eicon", init_net.proc_net);
 
 	if (proc_net_eicon) {
-		if ((proc_didd =
-		     create_proc_entry(DRIVERLNAME, S_IFREG | S_IRUGO,
-				       proc_net_eicon))) {
-			proc_didd->read_proc = proc_read;
-		}
+		proc_didd = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
+					&divadidd_proc_fops);
 		return (1);
 	}
 	return (0);
diff --git a/drivers/isdn/hardware/eicon/divasi.c b/drivers/isdn/hardware/eicon/divasi.c
index 69e71ebe7841..f577719ab3fa 100644
--- a/drivers/isdn/hardware/eicon/divasi.c
+++ b/drivers/isdn/hardware/eicon/divasi.c
@@ -17,6 +17,7 @@
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
 #include <linux/skbuff.h>
+#include <linux/seq_file.h>
 #include <linux/smp_lock.h>
 #include <asm/uaccess.h>
 
@@ -86,39 +87,40 @@ static void diva_um_timer_function(unsigned long data);
 extern struct proc_dir_entry *proc_net_eicon;
 static struct proc_dir_entry *um_idi_proc_entry = NULL;
 
-static int
-um_idi_proc_read(char *page, char **start, off_t off, int count, int *eof,
-		 void *data)
+static int um_idi_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
 	char tmprev[32];
 
-	len += sprintf(page + len, "%s\n", DRIVERNAME);
-	len += sprintf(page + len, "name     : %s\n", DRIVERLNAME);
-	len += sprintf(page + len, "release  : %s\n", DRIVERRELEASE_IDI);
+	seq_printf(m, "%s\n", DRIVERNAME);
+	seq_printf(m, "name     : %s\n", DRIVERLNAME);
+	seq_printf(m, "release  : %s\n", DRIVERRELEASE_IDI);
 	strcpy(tmprev, main_revision);
-	len += sprintf(page + len, "revision : %s\n", getrev(tmprev));
-	len += sprintf(page + len, "build    : %s\n", DIVA_BUILD);
-	len += sprintf(page + len, "major    : %d\n", major);
-
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+	seq_printf(m, "revision : %s\n", getrev(tmprev));
+	seq_printf(m, "build    : %s\n", DIVA_BUILD);
+	seq_printf(m, "major    : %d\n", major);
+
+	return 0;
+}
+
+static int um_idi_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, um_idi_proc_show, NULL);
 }
 
+static const struct file_operations um_idi_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= um_idi_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 static int DIVA_INIT_FUNCTION create_um_idi_proc(void)
 {
-	um_idi_proc_entry = create_proc_entry(DRIVERLNAME,
-					      S_IFREG | S_IRUGO | S_IWUSR,
-					      proc_net_eicon);
+	um_idi_proc_entry = proc_create(DRIVERLNAME, S_IRUGO, proc_net_eicon,
+					&um_idi_proc_fops);
 	if (!um_idi_proc_entry)
 		return (0);
-
-	um_idi_proc_entry->read_proc = um_idi_proc_read;
-
 	return (1);
 }
 
diff --git a/drivers/isdn/hardware/eicon/divasproc.c b/drivers/isdn/hardware/eicon/divasproc.c
index 040827288ec9..46d44a942624 100644
--- a/drivers/isdn/hardware/eicon/divasproc.c
+++ b/drivers/isdn/hardware/eicon/divasproc.c
@@ -14,6 +14,7 @@
 #include <linux/kernel.h>
 #include <linux/poll.h>
 #include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/list.h>
 #include <asm/uaccess.h>
 
@@ -141,14 +142,10 @@ void remove_divas_proc(void)
 	}
 }
 
-/*
-** write group_optimization 
-*/
-static int
-write_grp_opt(struct file *file, const char __user *buffer, unsigned long count,
-	      void *data)
+static ssize_t grp_opt_proc_write(struct file *file, const char __user *buffer,
+				  size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
 	if ((count == 1) || (count == 2)) {
@@ -172,14 +169,10 @@ write_grp_opt(struct file *file, const char __user *buffer, unsigned long count,
 	return (-EINVAL);
 }
 
-/*
-** write dynamic_l1_down
-*/
-static int
-write_d_l1_down(struct file *file, const char __user *buffer, unsigned long count,
-		void *data)
+static ssize_t d_l1_down_proc_write(struct file *file, const char __user *buffer,
+				    size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
 	if ((count == 1) || (count == 2)) {
@@ -203,63 +196,62 @@ write_d_l1_down(struct file *file, const char __user *buffer, unsigned long coun
 	return (-EINVAL);
 }
 
-
-/*
-** read dynamic_l1_down 
-*/
-static int
-read_d_l1_down(char *page, char **start, off_t off, int count, int *eof,
-	       void *data)
+static int d_l1_down_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = m->private;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
-	len += sprintf(page + len, "%s\n",
+	seq_printf(m, "%s\n",
 		       (IoAdapter->capi_cfg.
 			cfg_1 & DIVA_XDI_CAPI_CFG_1_DYNAMIC_L1_ON) ? "1" :
 		       "0");
+	return 0;
+}
 
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+static int d_l1_down_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, d_l1_down_proc_show, PDE(inode)->data);
 }
 
-/*
-** read group_optimization
-*/
-static int
-read_grp_opt(char *page, char **start, off_t off, int count, int *eof,
-	     void *data)
+static const struct file_operations d_l1_down_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= d_l1_down_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= d_l1_down_proc_write,
+};
+
+static int grp_opt_proc_show(struct seq_file *m, void *v)
 {
-	int len = 0;
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = m->private;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
-	len += sprintf(page + len, "%s\n",
+	seq_printf(m, "%s\n",
 		       (IoAdapter->capi_cfg.
 			cfg_1 & DIVA_XDI_CAPI_CFG_1_GROUP_POPTIMIZATION_ON)
 		       ? "1" : "0");
+	return 0;
+}
 
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+static int grp_opt_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, grp_opt_proc_show, PDE(inode)->data);
 }
 
-/*
-** info write
-*/
-static int
-info_write(struct file *file, const char __user *buffer, unsigned long count,
-	   void *data)
+static const struct file_operations grp_opt_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= grp_opt_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= grp_opt_proc_write,
+};
+
+static ssize_t info_proc_write(struct file *file, const char __user *buffer,
+			       size_t count, loff_t *pos)
 {
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = PDE(file->f_path.dentry->d_inode)->data;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 	char c[4];
 
@@ -277,63 +269,46 @@ info_write(struct file *file, const char __user *buffer, unsigned long count,
 	return (-EINVAL);
 }
 
-/*
-** info read
-*/
-static int
-info_read(char *page, char **start, off_t off, int count, int *eof,
-	  void *data)
+static int info_proc_show(struct seq_file *m, void *v)
 {
 	int i = 0;
-	int len = 0;
 	char *p;
 	char tmpser[16];
-	diva_os_xdi_adapter_t *a = (diva_os_xdi_adapter_t *) data;
+	diva_os_xdi_adapter_t *a = m->private;
 	PISDN_ADAPTER IoAdapter = IoAdapters[a->controller - 1];
 
-	len +=
-	    sprintf(page + len, "Name        : %s\n",
-		    IoAdapter->Properties.Name);
-	len += sprintf(page + len, "DSP state   : %08x\n", a->dsp_mask);
-	len += sprintf(page + len, "Channels    : %02d\n",
-		       IoAdapter->Properties.Channels);
-	len += sprintf(page + len, "E. max/used : %03d/%03d\n",
+	seq_printf(m, "Name        : %s\n", IoAdapter->Properties.Name);
+	seq_printf(m, "DSP state   : %08x\n", a->dsp_mask);
+	seq_printf(m, "Channels    : %02d\n", IoAdapter->Properties.Channels);
+	seq_printf(m, "E. max/used : %03d/%03d\n",
 		       IoAdapter->e_max, IoAdapter->e_count);
 	diva_get_vserial_number(IoAdapter, tmpser);
-	len += sprintf(page + len, "Serial      : %s\n", tmpser);
-	len +=
-	    sprintf(page + len, "IRQ         : %d\n",
-		    IoAdapter->irq_info.irq_nr);
-	len += sprintf(page + len, "CardIndex   : %d\n", a->CardIndex);
-	len += sprintf(page + len, "CardOrdinal : %d\n", a->CardOrdinal);
-	len += sprintf(page + len, "Controller  : %d\n", a->controller);
-	len += sprintf(page + len, "Bus-Type    : %s\n",
+	seq_printf(m, "Serial      : %s\n", tmpser);
+	seq_printf(m, "IRQ         : %d\n", IoAdapter->irq_info.irq_nr);
+	seq_printf(m, "CardIndex   : %d\n", a->CardIndex);
+	seq_printf(m, "CardOrdinal : %d\n", a->CardOrdinal);
+	seq_printf(m, "Controller  : %d\n", a->controller);
+	seq_printf(m, "Bus-Type    : %s\n",
 		       (a->Bus ==
 			DIVAS_XDI_ADAPTER_BUS_ISA) ? "ISA" : "PCI");
-	len += sprintf(page + len, "Port-Name   : %s\n", a->port_name);
+	seq_printf(m, "Port-Name   : %s\n", a->port_name);
 	if (a->Bus == DIVAS_XDI_ADAPTER_BUS_PCI) {
-		len +=
-		    sprintf(page + len, "PCI-bus     : %d\n",
-			    a->resources.pci.bus);
-		len +=
-		    sprintf(page + len, "PCI-func    : %d\n",
-			    a->resources.pci.func);
+		seq_printf(m, "PCI-bus     : %d\n", a->resources.pci.bus);
+		seq_printf(m, "PCI-func    : %d\n", a->resources.pci.func);
 		for (i = 0; i < 8; i++) {
 			if (a->resources.pci.bar[i]) {
-				len +=
-				    sprintf(page + len,
+				seq_printf(m,
 					    "Mem / I/O %d : 0x%x / mapped : 0x%lx",
 					    i, a->resources.pci.bar[i],
 					    (unsigned long) a->resources.
 					    pci.addr[i]);
 				if (a->resources.pci.length[i]) {
-					len +=
-					    sprintf(page + len,
+					seq_printf(m,
 						    " / length : %d",
 						    a->resources.pci.
 						    length[i]);
 				}
-				len += sprintf(page + len, "\n");
+				seq_putc(m, '\n');
 			}
 		}
 	}
@@ -353,16 +328,25 @@ info_read(char *page, char **start, off_t off, int count, int *eof,
 	} else {
 		p = "ready";
 	}
-	len += sprintf(page + len, "State       : %s\n", p);
+	seq_printf(m, "State       : %s\n", p);
 
-	if (off + count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len - off) ? count : len - off);
+	return 0;
+}
+
+static int info_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, info_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations info_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= info_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+	.write		= info_proc_write,
+};
+
 /*
 ** adapter proc init/de-init
 */
@@ -380,28 +364,20 @@ int create_adapter_proc(diva_os_xdi_adapter_t * a)
 		return (0);
 	a->proc_adapter_dir = (void *) de;
 
-	if (!(pe =
-	     create_proc_entry(info_proc_name, S_IFREG | S_IRUGO | S_IWUSR, de)))
+	pe = proc_create_data(info_proc_name, S_IRUGO | S_IWUSR, de,
+			      &info_proc_fops, a);
+	if (!pe)
 		return (0);
 	a->proc_info = (void *) pe;
-	pe->write_proc = info_write;
-	pe->read_proc = info_read;
-	pe->data = a;
 
-	if ((pe = create_proc_entry(grp_opt_proc_name,
-			       S_IFREG | S_IRUGO | S_IWUSR, de))) {
+	pe = proc_create_data(grp_opt_proc_name, S_IRUGO | S_IWUSR, de,
+			      &grp_opt_proc_fops, a);
+	if (pe)
 		a->proc_grp_opt = (void *) pe;
-		pe->write_proc = write_grp_opt;
-		pe->read_proc = read_grp_opt;
-		pe->data = a;
-	}
-	if ((pe = create_proc_entry(d_l1_down_proc_name,
-			       S_IFREG | S_IRUGO | S_IWUSR, de))) {
+	pe = proc_create_data(d_l1_down_proc_name, S_IRUGO | S_IWUSR, de,
+			      &d_l1_down_proc_fops, a);
+	if (pe)
 		a->proc_d_l1_down = (void *) pe;
-		pe->write_proc = write_d_l1_down;
-		pe->read_proc = read_d_l1_down;
-		pe->data = a;
-	}
 
 	DBG_TRC(("proc entry %s created", tmp));
 
diff --git a/drivers/isdn/hysdn/hycapi.c b/drivers/isdn/hysdn/hycapi.c
index 4ffaa14b9fc4..fe874afa4f81 100644
--- a/drivers/isdn/hysdn/hycapi.c
+++ b/drivers/isdn/hysdn/hycapi.c
@@ -11,6 +11,8 @@
  */
 
 #include <linux/module.h>
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/signal.h>
 #include <linux/kernel.h>
 #include <linux/skbuff.h>
@@ -432,26 +434,16 @@ static u16 hycapi_send_message(struct capi_ctr *ctrl, struct sk_buff *skb)
 	return retval;
 }
 
-/*********************************************************************
-hycapi_read_proc
-
-Informations provided in the /proc/capi-entries.
-
-*********************************************************************/
-
-static int hycapi_read_proc(char *page, char **start, off_t off,
-			    int count, int *eof, struct capi_ctr *ctrl)
+static int hycapi_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	hycapictrl_info *cinfo = (hycapictrl_info *)(ctrl->driverdata);
 	hysdn_card *card = cinfo->card;
-	int len = 0;
 	char *s;
-#ifdef HYCAPI_PRINTFNAMES
-	printk(KERN_NOTICE "hycapi_read_proc\n");    
-#endif
-	len += sprintf(page+len, "%-16s %s\n", "name", cinfo->cardname);
-	len += sprintf(page+len, "%-16s 0x%x\n", "io", card->iobase);
-	len += sprintf(page+len, "%-16s %d\n", "irq", card->irq);
+
+	seq_printf(m, "%-16s %s\n", "name", cinfo->cardname);
+	seq_printf(m, "%-16s 0x%x\n", "io", card->iobase);
+	seq_printf(m, "%-16s %d\n", "irq", card->irq);
     
 	switch (card->brdtype) {
 		case BD_PCCARD:  s = "HYSDN Hycard"; break;
@@ -461,24 +453,32 @@ static int hycapi_read_proc(char *page, char **start, off_t off,
 		case BD_PLEXUS: s = "HYSDN Plexus30"; break;
 		default: s = "???"; break;
 	}
-	len += sprintf(page+len, "%-16s %s\n", "type", s);
+	seq_printf(m, "%-16s %s\n", "type", s);
 	if ((s = cinfo->version[VER_DRIVER]) != NULL)
-		len += sprintf(page+len, "%-16s %s\n", "ver_driver", s);
+		seq_printf(m, "%-16s %s\n", "ver_driver", s);
 	if ((s = cinfo->version[VER_CARDTYPE]) != NULL)
-		len += sprintf(page+len, "%-16s %s\n", "ver_cardtype", s);
+		seq_printf(m, "%-16s %s\n", "ver_cardtype", s);
 	if ((s = cinfo->version[VER_SERIAL]) != NULL)
-		len += sprintf(page+len, "%-16s %s\n", "ver_serial", s);
+		seq_printf(m, "%-16s %s\n", "ver_serial", s);
     
-	len += sprintf(page+len, "%-16s %s\n", "cardname", cinfo->cardname);
+	seq_printf(m, "%-16s %s\n", "cardname", cinfo->cardname);
     
-	if (off+count >= len)
-		*eof = 1;
-	if (len < off)
-		return 0;
-	*start = page + off;
-	return ((count < len-off) ? count : len-off);
+	return 0;
+}
+
+static int hycapi_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, hycapi_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations hycapi_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= hycapi_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
 /**************************************************************
 hycapi_load_firmware
 
@@ -774,7 +774,7 @@ hycapi_capi_create(hysdn_card *card)
 		ctrl->load_firmware = hycapi_load_firmware;
 		ctrl->reset_ctr     = hycapi_reset_ctr;
 		ctrl->procinfo      = hycapi_procinfo;
-		ctrl->ctr_read_proc = hycapi_read_proc;
+		ctrl->proc_fops = &hycapi_proc_fops;
 		strcpy(ctrl->name, cinfo->cardname);
 		ctrl->owner = THIS_MODULE;
 
diff --git a/include/linux/isdn/capilli.h b/include/linux/isdn/capilli.h
index 7acb87a44872..d3e5e9da0c82 100644
--- a/include/linux/isdn/capilli.h
+++ b/include/linux/isdn/capilli.h
@@ -50,8 +50,7 @@ struct capi_ctr {
 	u16  (*send_message)(struct capi_ctr *, struct sk_buff *skb);
 	
 	char *(*procinfo)(struct capi_ctr *);
-	int (*ctr_read_proc)(char *page, char **start, off_t off,
-			     int count, int *eof, struct capi_ctr *card);
+	const struct file_operations *proc_fops;
 
 	/* filled in before calling ready callback */
 	u8 manu[CAPI_MANUFACTURER_LEN];		/* CAPI_GET_MANUFACTURER */
diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c
index 97f8d68d574d..3487cfe74aec 100644
--- a/net/bluetooth/cmtp/capi.c
+++ b/net/bluetooth/cmtp/capi.c
@@ -21,7 +21,8 @@
 */
 
 #include <linux/module.h>
-
+#include <linux/proc_fs.h>
+#include <linux/seq_file.h>
 #include <linux/types.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -516,33 +517,37 @@ static char *cmtp_procinfo(struct capi_ctr *ctrl)
 	return "CAPI Message Transport Protocol";
 }
 
-static int cmtp_ctr_read_proc(char *page, char **start, off_t off, int count, int *eof, struct capi_ctr *ctrl)
+static int cmtp_proc_show(struct seq_file *m, void *v)
 {
+	struct capi_ctr *ctrl = m->private;
 	struct cmtp_session *session = ctrl->driverdata;
 	struct cmtp_application *app;
 	struct list_head *p, *n;
-	int len = 0;
 
-	len += sprintf(page + len, "%s\n\n", cmtp_procinfo(ctrl));
-	len += sprintf(page + len, "addr %s\n", session->name);
-	len += sprintf(page + len, "ctrl %d\n", session->num);
+	seq_printf(m, "%s\n\n", cmtp_procinfo(ctrl));
+	seq_printf(m, "addr %s\n", session->name);
+	seq_printf(m, "ctrl %d\n", session->num);
 
 	list_for_each_safe(p, n, &session->applications) {
 		app = list_entry(p, struct cmtp_application, list);
-		len += sprintf(page + len, "appl %d -> %d\n", app->appl, app->mapping);
+		seq_printf(m, "appl %d -> %d\n", app->appl, app->mapping);
 	}
 
-	if (off + count >= len)
-		*eof = 1;
-
-	if (len < off)
-		return 0;
-
-	*start = page + off;
+	return 0;
+}
 
-	return ((count < len - off) ? count : len - off);
+static int cmtp_proc_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, cmtp_proc_show, PDE(inode)->data);
 }
 
+static const struct file_operations cmtp_proc_fops = {
+	.owner		= THIS_MODULE,
+	.open		= cmtp_proc_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
 
 int cmtp_attach_device(struct cmtp_session *session)
 {
@@ -582,7 +587,7 @@ int cmtp_attach_device(struct cmtp_session *session)
 	session->ctrl.send_message  = cmtp_send_message;
 
 	session->ctrl.procinfo      = cmtp_procinfo;
-	session->ctrl.ctr_read_proc = cmtp_ctr_read_proc;
+	session->ctrl.proc_fops = &cmtp_proc_fops;
 
 	if (attach_capi_ctr(&session->ctrl) < 0) {
 		BT_ERR("Can't attach new controller");
-- 
cgit v1.2.3-70-g09d2


From 6d125529c6cbfe570ce3bf9a0728548f087499da Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 24 Dec 2009 06:58:56 -0500
Subject: Fix ACC_MODE() for real

commit 5300990c0370e804e49d9a59d928c5d53fb73487 had stepped on a rather
nasty mess: definitions of ACC_MODE used to be different.  Fixed the
resulting breakage, converting them to variant that takes O_... value;
all callers have that and it actually simplifies life (see tomoyo part
of changes).

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c               | 2 +-
 include/linux/fs.h       | 2 +-
 security/tomoyo/tomoyo.c | 7 +------
 3 files changed, 3 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/fs/namei.c b/fs/namei.c
index 1b26b1620664..d930f1856ed2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1620,7 +1620,7 @@ struct file *do_filp_open(int dfd, const char *pathname,
 		open_flag |= O_DSYNC;
 
 	if (!acc_mode)
-		acc_mode = MAY_OPEN | ACC_MODE(flag);
+		acc_mode = MAY_OPEN | ACC_MODE(open_flag);
 
 	/* O_TRUNC implies we need access checks for write permissions */
 	if (flag & O_TRUNC)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9147ca88f253..b1bcb275b596 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2463,7 +2463,7 @@ int proc_nr_files(struct ctl_table *table, int write,
 
 int __init get_filesystem_list(char *buf);
 
-#define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
+#define ACC_MODE(x) ("\004\002\006\006"[(x)&O_ACCMODE])
 #define OPEN_FMODE(flag) ((__force fmode_t)((flag + 1) & O_ACCMODE))
 
 #endif /* __KERNEL__ */
diff --git a/security/tomoyo/tomoyo.c b/security/tomoyo/tomoyo.c
index 8a00ade85166..2aceebf5f354 100644
--- a/security/tomoyo/tomoyo.c
+++ b/security/tomoyo/tomoyo.c
@@ -80,9 +80,8 @@ static int tomoyo_bprm_check_security(struct linux_binprm *bprm)
 		return tomoyo_find_next_domain(bprm);
 	/*
 	 * Read permission is checked against interpreters using next domain.
-	 * '1' is the result of open_to_namei_flags(O_RDONLY).
 	 */
-	return tomoyo_check_open_permission(domain, &bprm->file->f_path, 1);
+	return tomoyo_check_open_permission(domain, &bprm->file->f_path, O_RDONLY);
 }
 
 static int tomoyo_path_truncate(struct path *path, loff_t length,
@@ -184,10 +183,6 @@ static int tomoyo_file_fcntl(struct file *file, unsigned int cmd,
 static int tomoyo_dentry_open(struct file *f, const struct cred *cred)
 {
 	int flags = f->f_flags;
-
-	if ((flags + 1) & O_ACCMODE)
-		flags++;
-	flags |= f->f_flags & (O_APPEND | O_TRUNC);
 	/* Don't check read permission here if called from do_execve(). */
 	if (current->in_execve)
 		return 0;
-- 
cgit v1.2.3-70-g09d2


From 9d173fc5dfa8c1b4578b331ac7ff3ce8af27006e Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Thu, 14 Jan 2010 13:09:14 +0200
Subject: mac80211: fix mac80211.h documentation warnings

There were some warnings about missing documentation and a missing reference.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/DocBook/mac80211.tmpl | 2 +-
 include/net/mac80211.h              | 7 ++++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/Documentation/DocBook/mac80211.tmpl b/Documentation/DocBook/mac80211.tmpl
index f3f37f141dbd..971d1c0c83e5 100644
--- a/Documentation/DocBook/mac80211.tmpl
+++ b/Documentation/DocBook/mac80211.tmpl
@@ -144,7 +144,7 @@ usage should require reading the full document.
         this though and the recommendation to allow only a single
         interface in STA mode at first!
       </para>
-!Finclude/net/mac80211.h ieee80211_if_init_conf
+!Finclude/net/mac80211.h ieee80211_vif
     </chapter>
 
     <chapter id="rx-tx">
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f313a3cbabda..bbfa4750092e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -107,6 +107,7 @@ enum ieee80211_max_queues {
  *	2^n-1 in the range 1..32767]
  * @cw_max: maximum contention window [like @cw_min]
  * @txop: maximum burst time in units of 32 usecs, 0 meaning disabled
+ * @uapsd: is U-APSD mode enabled for the queue
  */
 struct ieee80211_tx_queue_params {
 	u16 txop;
@@ -608,7 +609,11 @@ enum ieee80211_conf_changed {
 /**
  * enum ieee80211_smps_mode - spatial multiplexing power save mode
  *
- * @
+ * @IEEE80211_SMPS_AUTOMATIC: automatic
+ * @IEEE80211_SMPS_OFF: off
+ * @IEEE80211_SMPS_STATIC: static
+ * @IEEE80211_SMPS_DYNAMIC: dynamic
+ * @IEEE80211_SMPS_NUM_MODES: internal, don't use
  */
 enum ieee80211_smps_mode {
 	IEEE80211_SMPS_AUTOMATIC,
-- 
cgit v1.2.3-70-g09d2


From c99445b14054e0c4ed4715df1dad1fc608cbab46 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@nokia.com>
Date: Thu, 14 Jan 2010 13:09:21 +0200
Subject: mac80211: improve powersave documentation

There has been some confusion how drivers should implement powersave
support. Improve the documentation a bit to make it more clear what
drivers need to do. Also mention about U-APSD.

Signed-off-by: Kalle Valo <kalle.valo@nokia.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 75 ++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 54 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index bbfa4750092e..c90047de4428 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -569,7 +569,13 @@ struct ieee80211_rx_status {
  * @IEEE80211_CONF_MONITOR: there's a monitor interface present -- use this
  *	to determine for example whether to calculate timestamps for packets
  *	or not, do not use instead of filter flags!
- * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only)
+ * @IEEE80211_CONF_PS: Enable 802.11 power save mode (managed mode only).
+ *	This is the power save mode defined by IEEE 802.11-2007 section 11.2,
+ *	meaning that the hardware still wakes up for beacons, is able to
+ *	transmit frames and receive the possible acknowledgment frames.
+ *	Not to be confused with hardware specific wakeup/sleep states,
+ *	driver is responsible for that. See the section "Powersave support"
+ *	for more.
  * @IEEE80211_CONF_IDLE: The device is running, but idle; if the flag is set
  *	the driver should be prepared to handle configuration requests but
  *	may turn the device off as much as possible. Typically, this flag will
@@ -1138,18 +1144,24 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  *
  * mac80211 has support for various powersave implementations.
  *
- * First, it can support hardware that handles all powersaving by
- * itself, such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS
- * hardware flag. In that case, it will be told about the desired
- * powersave mode depending on the association status, and the driver
- * must take care of sending nullfunc frames when necessary, i.e. when
- * entering and leaving powersave mode. The driver is required to look at
- * the AID in beacons and signal to the AP that it woke up when it finds
- * traffic directed to it. This mode supports dynamic PS by simply
- * enabling/disabling PS.
- *
- * Additionally, such hardware may set the %IEEE80211_HW_SUPPORTS_DYNAMIC_PS
- * flag to indicate that it can support dynamic PS mode itself (see below).
+ * First, it can support hardware that handles all powersaving by itself,
+ * such hardware should simply set the %IEEE80211_HW_SUPPORTS_PS hardware
+ * flag. In that case, it will be told about the desired powersave mode
+ * with the %IEEE80211_CONF_PS flag depending on the association status.
+ * The hardware must take care of sending nullfunc frames when necessary,
+ * i.e. when entering and leaving powersave mode. The hardware is required
+ * to look at the AID in beacons and signal to the AP that it woke up when
+ * it finds traffic directed to it.
+ *
+ * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in
+ * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused
+ * with hardware wakeup and sleep states. Driver is responsible for waking
+ * up the hardware before issueing commands to the hardware and putting it
+ * back to sleep at approriate times.
+ *
+ * When PS is enabled, hardware needs to wakeup for beacons and receive the
+ * buffered multicast/broadcast frames after the beacon. Also it must be
+ * possible to send frames and receive the acknowledment frame.
  *
  * Other hardware designs cannot send nullfunc frames by themselves and also
  * need software support for parsing the TIM bitmap. This is also supported
@@ -1157,14 +1169,35 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw,
  * %IEEE80211_HW_PS_NULLFUNC_STACK flags. The hardware is of course still
  * required to pass up beacons. The hardware is still required to handle
  * waking up for multicast traffic; if it cannot the driver must handle that
- * as best as it can, mac80211 is too slow.
- *
- * Dynamic powersave mode is an extension to normal powersave mode in which
- * the hardware stays awake for a user-specified period of time after sending
- * a frame so that reply frames need not be buffered and therefore delayed
- * to the next wakeup. This can either be supported by hardware, in which case
- * the driver needs to look at the @dynamic_ps_timeout hardware configuration
- * value, or by the stack if all nullfunc handling is in the stack.
+ * as best as it can, mac80211 is too slow to do that.
+ *
+ * Dynamic powersave is an extension to normal powersave in which the
+ * hardware stays awake for a user-specified period of time after sending a
+ * frame so that reply frames need not be buffered and therefore delayed to
+ * the next wakeup. It's compromise of getting good enough latency when
+ * there's data traffic and still saving significantly power in idle
+ * periods.
+ *
+ * Dynamic powersave is supported by simply mac80211 enabling and disabling
+ * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS
+ * flag and mac80211 will handle everything automatically. Additionally,
+ * hardware having support for the dynamic PS feature may set the
+ * %IEEE80211_HW_SUPPORTS_DYNAMIC_PS flag to indicate that it can support
+ * dynamic PS mode itself. The driver needs to look at the
+ * @dynamic_ps_timeout hardware configuration value and use it that value
+ * whenever %IEEE80211_CONF_PS is set. In this case mac80211 will disable
+ * dynamic PS feature in stack and will just keep %IEEE80211_CONF_PS
+ * enabled whenever user has enabled powersave.
+ *
+ * Driver informs U-APSD client support by enabling
+ * %IEEE80211_HW_SUPPORTS_UAPSD flag. The mode is configured through the
+ * uapsd paramater in conf_tx() operation. Hardware needs to send the QoS
+ * Nullfunc frames and stay awake until the service period has ended. To
+ * utilize U-APSD, dynamic powersave is disabled for voip AC and all frames
+ * from that AC are transmitted with powersave enabled.
+ *
+ * Note: U-APSD client mode is not yet supported with
+ * %IEEE80211_HW_PS_NULLFUNC_STACK.
  */
 
 /**
-- 
cgit v1.2.3-70-g09d2


From d5f1fb53353edc38da326445267c1df0c9676df2 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizf@cn.fujitsu.com>
Date: Thu, 14 Jan 2010 10:53:55 +0800
Subject: lib: Introduce strnstr()

It differs strstr() in that it limits the length to be searched
in the first string.

Signed-off-by: Li Zefan <lizf@cn.fujitsu.com>
LKML-Reference: <4B4E8743.6030805@cn.fujitsu.com>
Acked-by: Frederic Weisbecker <fweisbec@gmail.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/string.h |  5 ++++-
 lib/string.c           | 27 ++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/string.h b/include/linux/string.h
index 651839a2a755..a716ee2a8adb 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -72,7 +72,10 @@ static inline __must_check char *strstrip(char *str)
 }
 
 #ifndef __HAVE_ARCH_STRSTR
-extern char * strstr(const char *,const char *);
+extern char * strstr(const char *, const char *);
+#endif
+#ifndef __HAVE_ARCH_STRNSTR
+extern char * strnstr(const char *, const char *, size_t);
 #endif
 #ifndef __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
diff --git a/lib/string.c b/lib/string.c
index 9f75b4ec50b8..a1cdcfcc42d0 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -667,7 +667,7 @@ EXPORT_SYMBOL(memscan);
  */
 char *strstr(const char *s1, const char *s2)
 {
-	int l1, l2;
+	size_t l1, l2;
 
 	l2 = strlen(s2);
 	if (!l2)
@@ -684,6 +684,31 @@ char *strstr(const char *s1, const char *s2)
 EXPORT_SYMBOL(strstr);
 #endif
 
+#ifndef __HAVE_ARCH_STRNSTR
+/**
+ * strnstr - Find the first substring in a length-limited string
+ * @s1: The string to be searched
+ * @s2: The string to search for
+ * @len: the maximum number of characters to search
+ */
+char *strnstr(const char *s1, const char *s2, size_t len)
+{
+	size_t l1 = len, l2;
+
+	l2 = strlen(s2);
+	if (!l2)
+		return (char *)s1;
+	while (l1 >= l2) {
+		l1--;
+		if (!memcmp(s1, s2, l2))
+			return (char *)s1;
+		s1++;
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(strnstr);
+#endif
+
 #ifndef __HAVE_ARCH_MEMCHR
 /**
  * memchr - Find a character in an area of memory.
-- 
cgit v1.2.3-70-g09d2


From ad72c347e56bf3a0231b9d686e17764157d2961c Mon Sep 17 00:00:00 2001
From: Christian Pellegrin <chripell@fsfe.org>
Date: Thu, 14 Jan 2010 07:08:34 +0000
Subject: can: Proper ctrlmode handling for CAN devices

This patch adds error checking of ctrlmode values for CAN devices. As
an example all availabe bits are implemented in the mcp251x driver.

Signed-off-by: Christian Pellegrin <chripell@fsfe.org>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/can/at91_can.c        |  1 +
 drivers/net/can/bfin_can.c        |  1 +
 drivers/net/can/dev.c             |  2 ++
 drivers/net/can/mcp251x.c         | 11 ++++++++++-
 drivers/net/can/mscan/mscan.c     |  1 +
 drivers/net/can/sja1000/sja1000.c |  1 +
 drivers/net/can/ti_hecc.c         |  1 +
 drivers/net/can/usb/ems_usb.c     |  1 +
 include/linux/can/dev.h           |  1 +
 9 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/drivers/net/can/at91_can.c b/drivers/net/can/at91_can.c
index f7287497ba6e..a2f29a38798a 100644
--- a/drivers/net/can/at91_can.c
+++ b/drivers/net/can/at91_can.c
@@ -1073,6 +1073,7 @@ static int __init at91_can_probe(struct platform_device *pdev)
 	priv->can.bittiming_const = &at91_bittiming_const;
 	priv->can.do_set_bittiming = at91_set_bittiming;
 	priv->can.do_set_mode = at91_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 	priv->reg_base = addr;
 	priv->dev = dev;
 	priv->clk = clk;
diff --git a/drivers/net/can/bfin_can.c b/drivers/net/can/bfin_can.c
index 7e1926e79e98..bf7f9ba2d903 100644
--- a/drivers/net/can/bfin_can.c
+++ b/drivers/net/can/bfin_can.c
@@ -603,6 +603,7 @@ struct net_device *alloc_bfin_candev(void)
 	priv->can.bittiming_const = &bfin_can_bittiming_const;
 	priv->can.do_set_bittiming = bfin_can_set_bittiming;
 	priv->can.do_set_mode = bfin_can_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	return dev;
 }
diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index c1bb29f0322b..f08f1202ff00 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -592,6 +592,8 @@ static int can_changelink(struct net_device *dev,
 		if (dev->flags & IFF_UP)
 			return -EBUSY;
 		cm = nla_data(data[IFLA_CAN_CTRLMODE]);
+		if (cm->flags & ~priv->ctrlmode_supported)
+			return -EOPNOTSUPP;
 		priv->ctrlmode &= ~cm->mask;
 		priv->ctrlmode |= cm->flags;
 	}
diff --git a/drivers/net/can/mcp251x.c b/drivers/net/can/mcp251x.c
index afa2fa45fed9..bbe186b5a0ed 100644
--- a/drivers/net/can/mcp251x.c
+++ b/drivers/net/can/mcp251x.c
@@ -539,9 +539,14 @@ static void mcp251x_set_normal_mode(struct spi_device *spi)
 	if (priv->can.ctrlmode & CAN_CTRLMODE_LOOPBACK) {
 		/* Put device into loopback mode */
 		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LOOPBACK);
+	} else if (priv->can.ctrlmode & CAN_CTRLMODE_LISTENONLY) {
+		/* Put device into listen-only mode */
+		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_LISTEN_ONLY);
 	} else {
 		/* Put device into normal mode */
-		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL);
+		mcp251x_write_reg(spi, CANCTRL, CANCTRL_REQOP_NORMAL |
+				  (priv->can.ctrlmode & CAN_CTRLMODE_ONE_SHOT ?
+				   CANCTRL_OSM : 0));
 
 		/* Wait for the device to enter normal mode */
 		timeout = jiffies + HZ;
@@ -948,6 +953,10 @@ static int __devinit mcp251x_can_probe(struct spi_device *spi)
 	priv->can.bittiming_const = &mcp251x_bittiming_const;
 	priv->can.do_set_mode = mcp251x_do_set_mode;
 	priv->can.clock.freq = pdata->oscillator_frequency / 2;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES |
+		CAN_CTRLMODE_LOOPBACK | CAN_CTRLMODE_LISTENONLY;
+	if (pdata->model == CAN_MCP251X_MCP2515)
+		priv->can.ctrlmode_supported |= CAN_CTRLMODE_ONE_SHOT;
 	priv->net = net;
 	dev_set_drvdata(&spi->dev, priv);
 
diff --git a/drivers/net/can/mscan/mscan.c b/drivers/net/can/mscan/mscan.c
index 40827c128b65..6b7dd578d417 100644
--- a/drivers/net/can/mscan/mscan.c
+++ b/drivers/net/can/mscan/mscan.c
@@ -686,6 +686,7 @@ struct net_device *alloc_mscandev(void)
 	priv->can.bittiming_const = &mscan_bittiming_const;
 	priv->can.do_set_bittiming = mscan_do_set_bittiming;
 	priv->can.do_set_mode = mscan_do_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	for (i = 0; i < TX_QUEUE_SIZE; i++) {
 		priv->tx_queue[i].id = i;
diff --git a/drivers/net/can/sja1000/sja1000.c b/drivers/net/can/sja1000/sja1000.c
index 345304d779b9..ace103a44833 100644
--- a/drivers/net/can/sja1000/sja1000.c
+++ b/drivers/net/can/sja1000/sja1000.c
@@ -567,6 +567,7 @@ struct net_device *alloc_sja1000dev(int sizeof_priv)
 	priv->can.bittiming_const = &sja1000_bittiming_const;
 	priv->can.do_set_bittiming = sja1000_set_bittiming;
 	priv->can.do_set_mode = sja1000_set_mode;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	if (sizeof_priv)
 		priv->priv = (void *)priv + sizeof(struct sja1000_priv);
diff --git a/drivers/net/can/ti_hecc.c b/drivers/net/can/ti_hecc.c
index 7d370e32a7a8..8332e242b0be 100644
--- a/drivers/net/can/ti_hecc.c
+++ b/drivers/net/can/ti_hecc.c
@@ -909,6 +909,7 @@ static int ti_hecc_probe(struct platform_device *pdev)
 	priv->can.bittiming_const = &ti_hecc_bittiming_const;
 	priv->can.do_set_mode = ti_hecc_do_set_mode;
 	priv->can.do_get_state = ti_hecc_get_state;
+	priv->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	ndev->irq = irq->start;
 	ndev->flags |= IFF_ECHO;
diff --git a/drivers/net/can/usb/ems_usb.c b/drivers/net/can/usb/ems_usb.c
index ddb17e256656..bfab283ba9b1 100644
--- a/drivers/net/can/usb/ems_usb.c
+++ b/drivers/net/can/usb/ems_usb.c
@@ -1022,6 +1022,7 @@ static int ems_usb_probe(struct usb_interface *intf,
 	dev->can.bittiming_const = &ems_usb_bittiming_const;
 	dev->can.do_set_bittiming = ems_usb_set_bittiming;
 	dev->can.do_set_mode = ems_usb_set_mode;
+	dev->can.ctrlmode_supported = CAN_CTRLMODE_3_SAMPLES;
 
 	netdev->flags |= IFF_ECHO; /* we support local echo */
 
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 7e7c98a3e908..c8c660a79f90 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -38,6 +38,7 @@ struct can_priv {
 
 	enum can_state state;
 	u32 ctrlmode;
+	u32 ctrlmode_supported;
 
 	int restart_ms;
 	struct timer_list restart_timer;
-- 
cgit v1.2.3-70-g09d2


From 05c2828c72c4eabf62376adfe27bd24797621f62 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 Jan 2010 06:17:09 +0000
Subject: tun: export underlying socket

Tun device looks similar to a packet socket
in that both pass complete frames from/to userspace.

This patch fills in enough fields in the socket underlying tun driver
to support sendmsg/recvmsg operations, and message flags
MSG_TRUNC and MSG_DONTWAIT, and exports access to this socket
to modules.  Regular read/write behaviour is unchanged.

This way, code using raw sockets to inject packets
into a physical device, can support injecting
packets into host network stack almost without modification.

First user of this interface will be vhost virtualization
accelerator.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Herbert Xu <herbert@gondor.apana.org.au>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c      | 101 +++++++++++++++++++++++++++++++++++++++----------
 include/linux/if_tun.h |  14 +++++++
 2 files changed, 96 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 2834a01bae24..5adb3d150552 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
 	err = 0;
 	tfile->tun = tun;
 	tun->tfile = tfile;
+	tun->socket.file = file;
 	dev_hold(tun->dev);
 	sock_hold(tun->socket.sk);
 	atomic_inc(&tfile->count);
@@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun)
 	/* Detach from net device */
 	netif_tx_lock_bh(tun->dev);
 	tun->tfile = NULL;
+	tun->socket.file = NULL;
 	netif_tx_unlock_bh(tun->dev);
 
 	/* Drop read queue */
@@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
 	/* Notify and wake up reader process */
 	if (tun->flags & TUN_FASYNC)
 		kill_fasync(&tun->fasync, SIGIO, POLL_IN);
-	wake_up_interruptible(&tun->socket.wait);
+	wake_up_interruptible_poll(&tun->socket.wait, POLLIN |
+				   POLLRDNORM | POLLRDBAND);
 	return NETDEV_TX_OK;
 
 drop:
@@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 	len = min_t(int, skb->len, len);
 
 	skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
-	total += len;
+	total += skb->len;
 
 	tun->dev->stats.tx_packets++;
 	tun->dev->stats.tx_bytes += len;
@@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
 	return total;
 }
 
-static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
-			    unsigned long count, loff_t pos)
+static ssize_t tun_do_read(struct tun_struct *tun,
+			   struct kiocb *iocb, const struct iovec *iv,
+			   ssize_t len, int noblock)
 {
-	struct file *file = iocb->ki_filp;
-	struct tun_file *tfile = file->private_data;
-	struct tun_struct *tun = __tun_get(tfile);
 	DECLARE_WAITQUEUE(wait, current);
 	struct sk_buff *skb;
-	ssize_t len, ret = 0;
-
-	if (!tun)
-		return -EBADFD;
+	ssize_t ret = 0;
 
 	DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
 
-	len = iov_length(iv, count);
-	if (len < 0) {
-		ret = -EINVAL;
-		goto out;
-	}
-
 	add_wait_queue(&tun->socket.wait, &wait);
 	while (len) {
 		current->state = TASK_INTERRUPTIBLE;
 
 		/* Read frames from the queue */
 		if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
-			if (file->f_flags & O_NONBLOCK) {
+			if (noblock) {
 				ret = -EAGAIN;
 				break;
 			}
@@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
 	current->state = TASK_RUNNING;
 	remove_wait_queue(&tun->socket.wait, &wait);
 
+	return ret;
+}
+
+static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
+			    unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct tun_file *tfile = file->private_data;
+	struct tun_struct *tun = __tun_get(tfile);
+	ssize_t len, ret;
+
+	if (!tun)
+		return -EBADFD;
+	len = iov_length(iv, count);
+	if (len < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
+	ret = min_t(ssize_t, ret, len);
 out:
 	tun_put(tun);
 	return ret;
@@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk)
 		return;
 
 	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
-		wake_up_interruptible_sync(sk->sk_sleep);
+		wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
+						POLLWRNORM | POLLWRBAND);
 
 	tun = tun_sk(sk)->tun;
 	kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
@@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk)
 	free_netdev(tun_sk(sk)->tun->dev);
 }
 
+static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len)
+{
+	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+	return tun_get_user(tun, m->msg_iov, total_len,
+			    m->msg_flags & MSG_DONTWAIT);
+}
+
+static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
+		       struct msghdr *m, size_t total_len,
+		       int flags)
+{
+	struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
+	int ret;
+	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
+		return -EINVAL;
+	ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
+			  flags & MSG_DONTWAIT);
+	if (ret > total_len) {
+		m->msg_flags |= MSG_TRUNC;
+		ret = flags & MSG_TRUNC ? ret : total_len;
+	}
+	return ret;
+}
+
+/* Ops structure to mimic raw sockets with tun */
+static const struct proto_ops tun_socket_ops = {
+	.sendmsg = tun_sendmsg,
+	.recvmsg = tun_recvmsg,
+};
+
 static struct proto tun_proto = {
 	.name		= "tun",
 	.owner		= THIS_MODULE,
@@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
 			goto err_free_dev;
 
 		init_waitqueue_head(&tun->socket.wait);
+		tun->socket.ops = &tun_socket_ops;
 		sock_init_data(&tun->socket, sk);
 		sk->sk_write_space = tun_sock_write_space;
 		sk->sk_sndbuf = INT_MAX;
@@ -1525,6 +1571,23 @@ static void tun_cleanup(void)
 	rtnl_link_unregister(&tun_link_ops);
 }
 
+/* Get an underlying socket object from tun file.  Returns error unless file is
+ * attached to a device.  The returned object works like a packet socket, it
+ * can be used for sock_sendmsg/sock_recvmsg.  The caller is responsible for
+ * holding a reference to the file for as long as the socket is in use. */
+struct socket *tun_get_socket(struct file *file)
+{
+	struct tun_struct *tun;
+	if (file->f_op != &tun_fops)
+		return ERR_PTR(-EINVAL);
+	tun = tun_get(file);
+	if (!tun)
+		return ERR_PTR(-EBADFD);
+	tun_put(tun);
+	return &tun->socket;
+}
+EXPORT_SYMBOL_GPL(tun_get_socket);
+
 module_init(tun_init);
 module_exit(tun_cleanup);
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
diff --git a/include/linux/if_tun.h b/include/linux/if_tun.h
index 3f5fd523b49d..404abe00162c 100644
--- a/include/linux/if_tun.h
+++ b/include/linux/if_tun.h
@@ -86,4 +86,18 @@ struct tun_filter {
 	__u8   addr[0][ETH_ALEN];
 };
 
+#ifdef __KERNEL__
+#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
+struct socket *tun_get_socket(struct file *);
+#else
+#include <linux/err.h>
+#include <linux/errno.h>
+struct file;
+struct socket;
+static inline struct socket *tun_get_socket(struct file *f)
+{
+	return ERR_PTR(-EINVAL);
+}
+#endif /* CONFIG_TUN */
+#endif /* __KERNEL__ */
 #endif /* __IF_TUN_H */
-- 
cgit v1.2.3-70-g09d2


From 3a4d5c94e959359ece6d6b55045c3f046677f55c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 14 Jan 2010 06:17:27 +0000
Subject: vhost_net: a kernel-level virtio server

What it is: vhost net is a character device that can be used to reduce
the number of system calls involved in virtio networking.
Existing virtio net code is used in the guest without modification.

There's similarity with vringfd, with some differences and reduced scope
- uses eventfd for signalling
- structures can be moved around in memory at any time (good for
  migration, bug work-arounds in userspace)
- write logging is supported (good for migration)
- support memory table and not just an offset (needed for kvm)

common virtio related code has been put in a separate file vhost.c and
can be made into a separate module if/when more backends appear.  I used
Rusty's lguest.c as the source for developing this part : this supplied
me with witty comments I wouldn't be able to write myself.

What it is not: vhost net is not a bus, and not a generic new system
call. No assumptions are made on how guest performs hypercalls.
Userspace hypervisors are supported as well as kvm.

How it works: Basically, we connect virtio frontend (configured by
userspace) to a backend. The backend could be a network device, or a tap
device.  Backend is also configured by userspace, including vlan/mac
etc.

Status: This works for me, and I haven't see any crashes.
Compared to userspace, people reported improved latency (as I save up to
4 system calls per packet), as well as better bandwidth and CPU
utilization.

Features that I plan to look at in the future:
- mergeable buffers
- zero copy
- scalability tuning: figure out the best threading model to use

Note on RCU usage (this is also documented in vhost.h, near
private_pointer which is the value protected by this variant of RCU):
what is happening is that the rcu_dereference() is being used in a
workqueue item.  The role of rcu_read_lock() is taken on by the start of
execution of the workqueue item, of rcu_read_unlock() by the end of
execution of the workqueue item, and of synchronize_rcu() by
flush_workqueue()/flush_work(). In the future we might need to apply
some gcc attribute or sparse annotation to the function passed to
INIT_WORK(). Paul's ack below is for this RCU usage.

(Includes fixes by Alan Cox <alan@linux.intel.com>,
David L Stevens <dlstevens@us.ibm.com>,
Chris Wright <chrisw@redhat.com>)

Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS                |    9 +
 arch/ia64/kvm/Kconfig      |    1 +
 arch/powerpc/kvm/Kconfig   |    1 +
 arch/s390/kvm/Kconfig      |    1 +
 arch/x86/kvm/Kconfig       |    1 +
 drivers/Makefile           |    1 +
 drivers/vhost/Kconfig      |   11 +
 drivers/vhost/Makefile     |    2 +
 drivers/vhost/net.c        |  661 ++++++++++++++++++++++++++
 drivers/vhost/vhost.c      | 1098 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/vhost/vhost.h      |  161 +++++++
 include/linux/Kbuild       |    1 +
 include/linux/miscdevice.h |    1 +
 include/linux/vhost.h      |  130 ++++++
 14 files changed, 2079 insertions(+)
 create mode 100644 drivers/vhost/Kconfig
 create mode 100644 drivers/vhost/Makefile
 create mode 100644 drivers/vhost/net.c
 create mode 100644 drivers/vhost/vhost.c
 create mode 100644 drivers/vhost/vhost.h
 create mode 100644 include/linux/vhost.h

(limited to 'include')

diff --git a/MAINTAINERS b/MAINTAINERS
index 745643b8c344..337dffbe9a47 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5803,6 +5803,15 @@ S:	Maintained
 F:	Documentation/filesystems/vfat.txt
 F:	fs/fat/
 
+VIRTIO HOST (VHOST)
+M:	"Michael S. Tsirkin" <mst@redhat.com>
+L:	kvm@vger.kernel.org
+L:	virtualization@lists.osdl.org
+L:	netdev@vger.kernel.org
+S:	Maintained
+F:	drivers/vhost/
+F:	include/linux/vhost.h
+
 VIA RHINE NETWORK DRIVER
 M:	Roger Luethi <rl@hellgate.ch>
 S:	Maintained
diff --git a/arch/ia64/kvm/Kconfig b/arch/ia64/kvm/Kconfig
index ef3e7be29caf..01c75797119c 100644
--- a/arch/ia64/kvm/Kconfig
+++ b/arch/ia64/kvm/Kconfig
@@ -47,6 +47,7 @@ config KVM_INTEL
 	  Provides support for KVM on Itanium 2 processors equipped with the VT
 	  extensions.
 
+source drivers/vhost/Kconfig
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/powerpc/kvm/Kconfig b/arch/powerpc/kvm/Kconfig
index 07703f72330e..e28841fbfb8d 100644
--- a/arch/powerpc/kvm/Kconfig
+++ b/arch/powerpc/kvm/Kconfig
@@ -75,6 +75,7 @@ config KVM_E500
 
 	  If unsure, say N.
 
+source drivers/vhost/Kconfig
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/s390/kvm/Kconfig b/arch/s390/kvm/Kconfig
index 6ee55ae84ce2..a7251580891c 100644
--- a/arch/s390/kvm/Kconfig
+++ b/arch/s390/kvm/Kconfig
@@ -35,6 +35,7 @@ config KVM
 
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
+source drivers/vhost/Kconfig
 source drivers/virtio/Kconfig
 
 endif # VIRTUALIZATION
diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig
index 4cd498332466..3c4d0109ad20 100644
--- a/arch/x86/kvm/Kconfig
+++ b/arch/x86/kvm/Kconfig
@@ -65,6 +65,7 @@ config KVM_AMD
 
 # OK, it's a little counter-intuitive to do this, but it puts it neatly under
 # the virtualization menu.
+source drivers/vhost/Kconfig
 source drivers/lguest/Kconfig
 source drivers/virtio/Kconfig
 
diff --git a/drivers/Makefile b/drivers/Makefile
index 6ee53c7a57a1..81e36596b1e9 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -106,6 +106,7 @@ obj-$(CONFIG_HID)		+= hid/
 obj-$(CONFIG_PPC_PS3)		+= ps3/
 obj-$(CONFIG_OF)		+= of/
 obj-$(CONFIG_SSB)		+= ssb/
+obj-$(CONFIG_VHOST_NET)		+= vhost/
 obj-$(CONFIG_VIRTIO)		+= virtio/
 obj-$(CONFIG_VLYNQ)		+= vlynq/
 obj-$(CONFIG_STAGING)		+= staging/
diff --git a/drivers/vhost/Kconfig b/drivers/vhost/Kconfig
new file mode 100644
index 000000000000..9f409f447aea
--- /dev/null
+++ b/drivers/vhost/Kconfig
@@ -0,0 +1,11 @@
+config VHOST_NET
+	tristate "Host kernel accelerator for virtio net (EXPERIMENTAL)"
+	depends on NET && EVENTFD && EXPERIMENTAL
+	---help---
+	  This kernel module can be loaded in host kernel to accelerate
+	  guest networking with virtio_net. Not to be confused with virtio_net
+	  module itself which needs to be loaded in guest kernel.
+
+	  To compile this driver as a module, choose M here: the module will
+	  be called vhost_net.
+
diff --git a/drivers/vhost/Makefile b/drivers/vhost/Makefile
new file mode 100644
index 000000000000..72dd02050bb9
--- /dev/null
+++ b/drivers/vhost/Makefile
@@ -0,0 +1,2 @@
+obj-$(CONFIG_VHOST_NET) += vhost_net.o
+vhost_net-y := vhost.o net.o
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
new file mode 100644
index 000000000000..4c8928319e1d
--- /dev/null
+++ b/drivers/vhost/net.c
@@ -0,0 +1,661 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * virtio-net server in host kernel.
+ */
+
+#include <linux/compat.h>
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mmu_context.h>
+#include <linux/miscdevice.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/file.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+#include <linux/if_tun.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+/* Max number of bytes transferred before requeueing the job.
+ * Using this limit prevents one virtqueue from starving others. */
+#define VHOST_NET_WEIGHT 0x80000
+
+enum {
+	VHOST_NET_VQ_RX = 0,
+	VHOST_NET_VQ_TX = 1,
+	VHOST_NET_VQ_MAX = 2,
+};
+
+enum vhost_net_poll_state {
+	VHOST_NET_POLL_DISABLED = 0,
+	VHOST_NET_POLL_STARTED = 1,
+	VHOST_NET_POLL_STOPPED = 2,
+};
+
+struct vhost_net {
+	struct vhost_dev dev;
+	struct vhost_virtqueue vqs[VHOST_NET_VQ_MAX];
+	struct vhost_poll poll[VHOST_NET_VQ_MAX];
+	/* Tells us whether we are polling a socket for TX.
+	 * We only do this when socket buffer fills up.
+	 * Protected by tx vq lock. */
+	enum vhost_net_poll_state tx_poll_state;
+};
+
+/* Pop first len bytes from iovec. Return number of segments used. */
+static int move_iovec_hdr(struct iovec *from, struct iovec *to,
+			  size_t len, int iov_count)
+{
+	int seg = 0;
+	size_t size;
+	while (len && seg < iov_count) {
+		size = min(from->iov_len, len);
+		to->iov_base = from->iov_base;
+		to->iov_len = size;
+		from->iov_len -= size;
+		from->iov_base += size;
+		len -= size;
+		++from;
+		++to;
+		++seg;
+	}
+	return seg;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_stop(struct vhost_net *net)
+{
+	if (likely(net->tx_poll_state != VHOST_NET_POLL_STARTED))
+		return;
+	vhost_poll_stop(net->poll + VHOST_NET_VQ_TX);
+	net->tx_poll_state = VHOST_NET_POLL_STOPPED;
+}
+
+/* Caller must have TX VQ lock */
+static void tx_poll_start(struct vhost_net *net, struct socket *sock)
+{
+	if (unlikely(net->tx_poll_state != VHOST_NET_POLL_STOPPED))
+		return;
+	vhost_poll_start(net->poll + VHOST_NET_VQ_TX, sock->file);
+	net->tx_poll_state = VHOST_NET_POLL_STARTED;
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_tx(struct vhost_net *net)
+{
+	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_TX];
+	unsigned head, out, in, s;
+	struct msghdr msg = {
+		.msg_name = NULL,
+		.msg_namelen = 0,
+		.msg_control = NULL,
+		.msg_controllen = 0,
+		.msg_iov = vq->iov,
+		.msg_flags = MSG_DONTWAIT,
+	};
+	size_t len, total_len = 0;
+	int err, wmem;
+	size_t hdr_size;
+	struct socket *sock = rcu_dereference(vq->private_data);
+	if (!sock)
+		return;
+
+	wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+	if (wmem >= sock->sk->sk_sndbuf)
+		return;
+
+	use_mm(net->dev.mm);
+	mutex_lock(&vq->mutex);
+	vhost_disable_notify(vq);
+
+	if (wmem < sock->sk->sk_sndbuf * 2)
+		tx_poll_stop(net);
+	hdr_size = vq->hdr_size;
+
+	for (;;) {
+		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+					 ARRAY_SIZE(vq->iov),
+					 &out, &in,
+					 NULL, NULL);
+		/* Nothing new?  Wait for eventfd to tell us they refilled. */
+		if (head == vq->num) {
+			wmem = atomic_read(&sock->sk->sk_wmem_alloc);
+			if (wmem >= sock->sk->sk_sndbuf * 3 / 4) {
+				tx_poll_start(net, sock);
+				set_bit(SOCK_ASYNC_NOSPACE, &sock->flags);
+				break;
+			}
+			if (unlikely(vhost_enable_notify(vq))) {
+				vhost_disable_notify(vq);
+				continue;
+			}
+			break;
+		}
+		if (in) {
+			vq_err(vq, "Unexpected descriptor format for TX: "
+			       "out %d, int %d\n", out, in);
+			break;
+		}
+		/* Skip header. TODO: support TSO. */
+		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, out);
+		msg.msg_iovlen = out;
+		len = iov_length(vq->iov, out);
+		/* Sanity check */
+		if (!len) {
+			vq_err(vq, "Unexpected header len for TX: "
+			       "%zd expected %zd\n",
+			       iov_length(vq->hdr, s), hdr_size);
+			break;
+		}
+		/* TODO: Check specific error and bomb out unless ENOBUFS? */
+		err = sock->ops->sendmsg(NULL, sock, &msg, len);
+		if (unlikely(err < 0)) {
+			vhost_discard_vq_desc(vq);
+			tx_poll_start(net, sock);
+			break;
+		}
+		if (err != len)
+			pr_err("Truncated TX packet: "
+			       " len %d != %zd\n", err, len);
+		vhost_add_used_and_signal(&net->dev, vq, head, 0);
+		total_len += len;
+		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+			vhost_poll_queue(&vq->poll);
+			break;
+		}
+	}
+
+	mutex_unlock(&vq->mutex);
+	unuse_mm(net->dev.mm);
+}
+
+/* Expects to be always run from workqueue - which acts as
+ * read-size critical section for our kind of RCU. */
+static void handle_rx(struct vhost_net *net)
+{
+	struct vhost_virtqueue *vq = &net->dev.vqs[VHOST_NET_VQ_RX];
+	unsigned head, out, in, log, s;
+	struct vhost_log *vq_log;
+	struct msghdr msg = {
+		.msg_name = NULL,
+		.msg_namelen = 0,
+		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
+		.msg_controllen = 0,
+		.msg_iov = vq->iov,
+		.msg_flags = MSG_DONTWAIT,
+	};
+
+	struct virtio_net_hdr hdr = {
+		.flags = 0,
+		.gso_type = VIRTIO_NET_HDR_GSO_NONE
+	};
+
+	size_t len, total_len = 0;
+	int err;
+	size_t hdr_size;
+	struct socket *sock = rcu_dereference(vq->private_data);
+	if (!sock || skb_queue_empty(&sock->sk->sk_receive_queue))
+		return;
+
+	use_mm(net->dev.mm);
+	mutex_lock(&vq->mutex);
+	vhost_disable_notify(vq);
+	hdr_size = vq->hdr_size;
+
+	vq_log = unlikely(vhost_has_feature(&net->dev, VHOST_F_LOG_ALL)) ?
+		vq->log : NULL;
+
+	for (;;) {
+		head = vhost_get_vq_desc(&net->dev, vq, vq->iov,
+					 ARRAY_SIZE(vq->iov),
+					 &out, &in,
+					 vq_log, &log);
+		/* OK, now we need to know about added descriptors. */
+		if (head == vq->num) {
+			if (unlikely(vhost_enable_notify(vq))) {
+				/* They have slipped one in as we were
+				 * doing that: check again. */
+				vhost_disable_notify(vq);
+				continue;
+			}
+			/* Nothing new?  Wait for eventfd to tell us
+			 * they refilled. */
+			break;
+		}
+		/* We don't need to be notified again. */
+		if (out) {
+			vq_err(vq, "Unexpected descriptor format for RX: "
+			       "out %d, int %d\n",
+			       out, in);
+			break;
+		}
+		/* Skip header. TODO: support TSO/mergeable rx buffers. */
+		s = move_iovec_hdr(vq->iov, vq->hdr, hdr_size, in);
+		msg.msg_iovlen = in;
+		len = iov_length(vq->iov, in);
+		/* Sanity check */
+		if (!len) {
+			vq_err(vq, "Unexpected header len for RX: "
+			       "%zd expected %zd\n",
+			       iov_length(vq->hdr, s), hdr_size);
+			break;
+		}
+		err = sock->ops->recvmsg(NULL, sock, &msg,
+					 len, MSG_DONTWAIT | MSG_TRUNC);
+		/* TODO: Check specific error and bomb out unless EAGAIN? */
+		if (err < 0) {
+			vhost_discard_vq_desc(vq);
+			break;
+		}
+		/* TODO: Should check and handle checksum. */
+		if (err > len) {
+			pr_err("Discarded truncated rx packet: "
+			       " len %d > %zd\n", err, len);
+			vhost_discard_vq_desc(vq);
+			continue;
+		}
+		len = err;
+		err = memcpy_toiovec(vq->hdr, (unsigned char *)&hdr, hdr_size);
+		if (err) {
+			vq_err(vq, "Unable to write vnet_hdr at addr %p: %d\n",
+			       vq->iov->iov_base, err);
+			break;
+		}
+		len += hdr_size;
+		vhost_add_used_and_signal(&net->dev, vq, head, len);
+		if (unlikely(vq_log))
+			vhost_log_write(vq, vq_log, log, len);
+		total_len += len;
+		if (unlikely(total_len >= VHOST_NET_WEIGHT)) {
+			vhost_poll_queue(&vq->poll);
+			break;
+		}
+	}
+
+	mutex_unlock(&vq->mutex);
+	unuse_mm(net->dev.mm);
+}
+
+static void handle_tx_kick(struct work_struct *work)
+{
+	struct vhost_virtqueue *vq;
+	struct vhost_net *net;
+	vq = container_of(work, struct vhost_virtqueue, poll.work);
+	net = container_of(vq->dev, struct vhost_net, dev);
+	handle_tx(net);
+}
+
+static void handle_rx_kick(struct work_struct *work)
+{
+	struct vhost_virtqueue *vq;
+	struct vhost_net *net;
+	vq = container_of(work, struct vhost_virtqueue, poll.work);
+	net = container_of(vq->dev, struct vhost_net, dev);
+	handle_rx(net);
+}
+
+static void handle_tx_net(struct work_struct *work)
+{
+	struct vhost_net *net;
+	net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_TX].work);
+	handle_tx(net);
+}
+
+static void handle_rx_net(struct work_struct *work)
+{
+	struct vhost_net *net;
+	net = container_of(work, struct vhost_net, poll[VHOST_NET_VQ_RX].work);
+	handle_rx(net);
+}
+
+static int vhost_net_open(struct inode *inode, struct file *f)
+{
+	struct vhost_net *n = kmalloc(sizeof *n, GFP_KERNEL);
+	int r;
+	if (!n)
+		return -ENOMEM;
+	n->vqs[VHOST_NET_VQ_TX].handle_kick = handle_tx_kick;
+	n->vqs[VHOST_NET_VQ_RX].handle_kick = handle_rx_kick;
+	r = vhost_dev_init(&n->dev, n->vqs, VHOST_NET_VQ_MAX);
+	if (r < 0) {
+		kfree(n);
+		return r;
+	}
+
+	vhost_poll_init(n->poll + VHOST_NET_VQ_TX, handle_tx_net, POLLOUT);
+	vhost_poll_init(n->poll + VHOST_NET_VQ_RX, handle_rx_net, POLLIN);
+	n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+
+	f->private_data = n;
+
+	return 0;
+}
+
+static void vhost_net_disable_vq(struct vhost_net *n,
+				 struct vhost_virtqueue *vq)
+{
+	if (!vq->private_data)
+		return;
+	if (vq == n->vqs + VHOST_NET_VQ_TX) {
+		tx_poll_stop(n);
+		n->tx_poll_state = VHOST_NET_POLL_DISABLED;
+	} else
+		vhost_poll_stop(n->poll + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_enable_vq(struct vhost_net *n,
+				struct vhost_virtqueue *vq)
+{
+	struct socket *sock = vq->private_data;
+	if (!sock)
+		return;
+	if (vq == n->vqs + VHOST_NET_VQ_TX) {
+		n->tx_poll_state = VHOST_NET_POLL_STOPPED;
+		tx_poll_start(n, sock);
+	} else
+		vhost_poll_start(n->poll + VHOST_NET_VQ_RX, sock->file);
+}
+
+static struct socket *vhost_net_stop_vq(struct vhost_net *n,
+					struct vhost_virtqueue *vq)
+{
+	struct socket *sock;
+
+	mutex_lock(&vq->mutex);
+	sock = vq->private_data;
+	vhost_net_disable_vq(n, vq);
+	rcu_assign_pointer(vq->private_data, NULL);
+	mutex_unlock(&vq->mutex);
+	return sock;
+}
+
+static void vhost_net_stop(struct vhost_net *n, struct socket **tx_sock,
+			   struct socket **rx_sock)
+{
+	*tx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_TX);
+	*rx_sock = vhost_net_stop_vq(n, n->vqs + VHOST_NET_VQ_RX);
+}
+
+static void vhost_net_flush_vq(struct vhost_net *n, int index)
+{
+	vhost_poll_flush(n->poll + index);
+	vhost_poll_flush(&n->dev.vqs[index].poll);
+}
+
+static void vhost_net_flush(struct vhost_net *n)
+{
+	vhost_net_flush_vq(n, VHOST_NET_VQ_TX);
+	vhost_net_flush_vq(n, VHOST_NET_VQ_RX);
+}
+
+static int vhost_net_release(struct inode *inode, struct file *f)
+{
+	struct vhost_net *n = f->private_data;
+	struct socket *tx_sock;
+	struct socket *rx_sock;
+
+	vhost_net_stop(n, &tx_sock, &rx_sock);
+	vhost_net_flush(n);
+	vhost_dev_cleanup(&n->dev);
+	if (tx_sock)
+		fput(tx_sock->file);
+	if (rx_sock)
+		fput(rx_sock->file);
+	/* We do an extra flush before freeing memory,
+	 * since jobs can re-queue themselves. */
+	vhost_net_flush(n);
+	kfree(n);
+	return 0;
+}
+
+static struct socket *get_raw_socket(int fd)
+{
+	struct {
+		struct sockaddr_ll sa;
+		char  buf[MAX_ADDR_LEN];
+	} uaddr;
+	int uaddr_len = sizeof uaddr, r;
+	struct socket *sock = sockfd_lookup(fd, &r);
+	if (!sock)
+		return ERR_PTR(-ENOTSOCK);
+
+	/* Parameter checking */
+	if (sock->sk->sk_type != SOCK_RAW) {
+		r = -ESOCKTNOSUPPORT;
+		goto err;
+	}
+
+	r = sock->ops->getname(sock, (struct sockaddr *)&uaddr.sa,
+			       &uaddr_len, 0);
+	if (r)
+		goto err;
+
+	if (uaddr.sa.sll_family != AF_PACKET) {
+		r = -EPFNOSUPPORT;
+		goto err;
+	}
+	return sock;
+err:
+	fput(sock->file);
+	return ERR_PTR(r);
+}
+
+static struct socket *get_tun_socket(int fd)
+{
+	struct file *file = fget(fd);
+	struct socket *sock;
+	if (!file)
+		return ERR_PTR(-EBADF);
+	sock = tun_get_socket(file);
+	if (IS_ERR(sock))
+		fput(file);
+	return sock;
+}
+
+static struct socket *get_socket(int fd)
+{
+	struct socket *sock;
+	/* special case to disable backend */
+	if (fd == -1)
+		return NULL;
+	sock = get_raw_socket(fd);
+	if (!IS_ERR(sock))
+		return sock;
+	sock = get_tun_socket(fd);
+	if (!IS_ERR(sock))
+		return sock;
+	return ERR_PTR(-ENOTSOCK);
+}
+
+static long vhost_net_set_backend(struct vhost_net *n, unsigned index, int fd)
+{
+	struct socket *sock, *oldsock;
+	struct vhost_virtqueue *vq;
+	int r;
+
+	mutex_lock(&n->dev.mutex);
+	r = vhost_dev_check_owner(&n->dev);
+	if (r)
+		goto err;
+
+	if (index >= VHOST_NET_VQ_MAX) {
+		r = -ENOBUFS;
+		goto err;
+	}
+	vq = n->vqs + index;
+	mutex_lock(&vq->mutex);
+
+	/* Verify that ring has been setup correctly. */
+	if (!vhost_vq_access_ok(vq)) {
+		r = -EFAULT;
+		goto err;
+	}
+	sock = get_socket(fd);
+	if (IS_ERR(sock)) {
+		r = PTR_ERR(sock);
+		goto err;
+	}
+
+	/* start polling new socket */
+	oldsock = vq->private_data;
+	if (sock == oldsock)
+		goto done;
+
+	vhost_net_disable_vq(n, vq);
+	rcu_assign_pointer(vq->private_data, sock);
+	vhost_net_enable_vq(n, vq);
+	mutex_unlock(&vq->mutex);
+done:
+	if (oldsock) {
+		vhost_net_flush_vq(n, index);
+		fput(oldsock->file);
+	}
+err:
+	mutex_unlock(&n->dev.mutex);
+	return r;
+}
+
+static long vhost_net_reset_owner(struct vhost_net *n)
+{
+	struct socket *tx_sock = NULL;
+	struct socket *rx_sock = NULL;
+	long err;
+	mutex_lock(&n->dev.mutex);
+	err = vhost_dev_check_owner(&n->dev);
+	if (err)
+		goto done;
+	vhost_net_stop(n, &tx_sock, &rx_sock);
+	vhost_net_flush(n);
+	err = vhost_dev_reset_owner(&n->dev);
+done:
+	mutex_unlock(&n->dev.mutex);
+	if (tx_sock)
+		fput(tx_sock->file);
+	if (rx_sock)
+		fput(rx_sock->file);
+	return err;
+}
+
+static int vhost_net_set_features(struct vhost_net *n, u64 features)
+{
+	size_t hdr_size = features & (1 << VHOST_NET_F_VIRTIO_NET_HDR) ?
+		sizeof(struct virtio_net_hdr) : 0;
+	int i;
+	mutex_lock(&n->dev.mutex);
+	if ((features & (1 << VHOST_F_LOG_ALL)) &&
+	    !vhost_log_access_ok(&n->dev)) {
+		mutex_unlock(&n->dev.mutex);
+		return -EFAULT;
+	}
+	n->dev.acked_features = features;
+	smp_wmb();
+	for (i = 0; i < VHOST_NET_VQ_MAX; ++i) {
+		mutex_lock(&n->vqs[i].mutex);
+		n->vqs[i].hdr_size = hdr_size;
+		mutex_unlock(&n->vqs[i].mutex);
+	}
+	vhost_net_flush(n);
+	mutex_unlock(&n->dev.mutex);
+	return 0;
+}
+
+static long vhost_net_ioctl(struct file *f, unsigned int ioctl,
+			    unsigned long arg)
+{
+	struct vhost_net *n = f->private_data;
+	void __user *argp = (void __user *)arg;
+	u64 __user *featurep = argp;
+	struct vhost_vring_file backend;
+	u64 features;
+	int r;
+	switch (ioctl) {
+	case VHOST_NET_SET_BACKEND:
+		r = copy_from_user(&backend, argp, sizeof backend);
+		if (r < 0)
+			return r;
+		return vhost_net_set_backend(n, backend.index, backend.fd);
+	case VHOST_GET_FEATURES:
+		features = VHOST_FEATURES;
+		return copy_to_user(featurep, &features, sizeof features);
+	case VHOST_SET_FEATURES:
+		r = copy_from_user(&features, featurep, sizeof features);
+		if (r < 0)
+			return r;
+		if (features & ~VHOST_FEATURES)
+			return -EOPNOTSUPP;
+		return vhost_net_set_features(n, features);
+	case VHOST_RESET_OWNER:
+		return vhost_net_reset_owner(n);
+	default:
+		mutex_lock(&n->dev.mutex);
+		r = vhost_dev_ioctl(&n->dev, ioctl, arg);
+		vhost_net_flush(n);
+		mutex_unlock(&n->dev.mutex);
+		return r;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+static long vhost_net_compat_ioctl(struct file *f, unsigned int ioctl,
+				   unsigned long arg)
+{
+	return vhost_net_ioctl(f, ioctl, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+const static struct file_operations vhost_net_fops = {
+	.owner          = THIS_MODULE,
+	.release        = vhost_net_release,
+	.unlocked_ioctl = vhost_net_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl   = vhost_net_compat_ioctl,
+#endif
+	.open           = vhost_net_open,
+};
+
+static struct miscdevice vhost_net_misc = {
+	VHOST_NET_MINOR,
+	"vhost-net",
+	&vhost_net_fops,
+};
+
+int vhost_net_init(void)
+{
+	int r = vhost_init();
+	if (r)
+		goto err_init;
+	r = misc_register(&vhost_net_misc);
+	if (r)
+		goto err_reg;
+	return 0;
+err_reg:
+	vhost_cleanup();
+err_init:
+	return r;
+
+}
+module_init(vhost_net_init);
+
+void vhost_net_exit(void)
+{
+	misc_deregister(&vhost_net_misc);
+	vhost_cleanup();
+}
+module_exit(vhost_net_exit);
+
+MODULE_VERSION("0.0.1");
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Michael S. Tsirkin");
+MODULE_DESCRIPTION("Host kernel accelerator for virtio net");
diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c
new file mode 100644
index 000000000000..c8c25dbc5857
--- /dev/null
+++ b/drivers/vhost/vhost.c
@@ -0,0 +1,1098 @@
+/* Copyright (C) 2009 Red Hat, Inc.
+ * Copyright (C) 2006 Rusty Russell IBM Corporation
+ *
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ *
+ * Inspiration, some code, and most witty comments come from
+ * Documentation/lguest/lguest.c, by Rusty Russell
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Generic code for virtio server in host kernel.
+ */
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/virtio_net.h>
+#include <linux/mm.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/rcupdate.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/highmem.h>
+
+#include <linux/net.h>
+#include <linux/if_packet.h>
+#include <linux/if_arp.h>
+
+#include <net/sock.h>
+
+#include "vhost.h"
+
+enum {
+	VHOST_MEMORY_MAX_NREGIONS = 64,
+	VHOST_MEMORY_F_LOG = 0x1,
+};
+
+static struct workqueue_struct *vhost_workqueue;
+
+static void vhost_poll_func(struct file *file, wait_queue_head_t *wqh,
+			    poll_table *pt)
+{
+	struct vhost_poll *poll;
+	poll = container_of(pt, struct vhost_poll, table);
+
+	poll->wqh = wqh;
+	add_wait_queue(wqh, &poll->wait);
+}
+
+static int vhost_poll_wakeup(wait_queue_t *wait, unsigned mode, int sync,
+			     void *key)
+{
+	struct vhost_poll *poll;
+	poll = container_of(wait, struct vhost_poll, wait);
+	if (!((unsigned long)key & poll->mask))
+		return 0;
+
+	queue_work(vhost_workqueue, &poll->work);
+	return 0;
+}
+
+/* Init poll structure */
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+		     unsigned long mask)
+{
+	INIT_WORK(&poll->work, func);
+	init_waitqueue_func_entry(&poll->wait, vhost_poll_wakeup);
+	init_poll_funcptr(&poll->table, vhost_poll_func);
+	poll->mask = mask;
+}
+
+/* Start polling a file. We add ourselves to file's wait queue. The caller must
+ * keep a reference to a file until after vhost_poll_stop is called. */
+void vhost_poll_start(struct vhost_poll *poll, struct file *file)
+{
+	unsigned long mask;
+	mask = file->f_op->poll(file, &poll->table);
+	if (mask)
+		vhost_poll_wakeup(&poll->wait, 0, 0, (void *)mask);
+}
+
+/* Stop polling a file. After this function returns, it becomes safe to drop the
+ * file reference. You must also flush afterwards. */
+void vhost_poll_stop(struct vhost_poll *poll)
+{
+	remove_wait_queue(poll->wqh, &poll->wait);
+}
+
+/* Flush any work that has been scheduled. When calling this, don't hold any
+ * locks that are also used by the callback. */
+void vhost_poll_flush(struct vhost_poll *poll)
+{
+	flush_work(&poll->work);
+}
+
+void vhost_poll_queue(struct vhost_poll *poll)
+{
+	queue_work(vhost_workqueue, &poll->work);
+}
+
+static void vhost_vq_reset(struct vhost_dev *dev,
+			   struct vhost_virtqueue *vq)
+{
+	vq->num = 1;
+	vq->desc = NULL;
+	vq->avail = NULL;
+	vq->used = NULL;
+	vq->last_avail_idx = 0;
+	vq->avail_idx = 0;
+	vq->last_used_idx = 0;
+	vq->used_flags = 0;
+	vq->used_flags = 0;
+	vq->log_used = false;
+	vq->log_addr = -1ull;
+	vq->hdr_size = 0;
+	vq->private_data = NULL;
+	vq->log_base = NULL;
+	vq->error_ctx = NULL;
+	vq->error = NULL;
+	vq->kick = NULL;
+	vq->call_ctx = NULL;
+	vq->call = NULL;
+}
+
+long vhost_dev_init(struct vhost_dev *dev,
+		    struct vhost_virtqueue *vqs, int nvqs)
+{
+	int i;
+	dev->vqs = vqs;
+	dev->nvqs = nvqs;
+	mutex_init(&dev->mutex);
+	dev->log_ctx = NULL;
+	dev->log_file = NULL;
+	dev->memory = NULL;
+	dev->mm = NULL;
+
+	for (i = 0; i < dev->nvqs; ++i) {
+		dev->vqs[i].dev = dev;
+		mutex_init(&dev->vqs[i].mutex);
+		vhost_vq_reset(dev, dev->vqs + i);
+		if (dev->vqs[i].handle_kick)
+			vhost_poll_init(&dev->vqs[i].poll,
+					dev->vqs[i].handle_kick,
+					POLLIN);
+	}
+	return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_check_owner(struct vhost_dev *dev)
+{
+	/* Are you the owner? If not, I don't think you mean to do that */
+	return dev->mm == current->mm ? 0 : -EPERM;
+}
+
+/* Caller should have device mutex */
+static long vhost_dev_set_owner(struct vhost_dev *dev)
+{
+	/* Is there an owner already? */
+	if (dev->mm)
+		return -EBUSY;
+	/* No owner, become one */
+	dev->mm = get_task_mm(current);
+	return 0;
+}
+
+/* Caller should have device mutex */
+long vhost_dev_reset_owner(struct vhost_dev *dev)
+{
+	struct vhost_memory *memory;
+
+	/* Restore memory to default empty mapping. */
+	memory = kmalloc(offsetof(struct vhost_memory, regions), GFP_KERNEL);
+	if (!memory)
+		return -ENOMEM;
+
+	vhost_dev_cleanup(dev);
+
+	memory->nregions = 0;
+	dev->memory = memory;
+	return 0;
+}
+
+/* Caller should have device mutex */
+void vhost_dev_cleanup(struct vhost_dev *dev)
+{
+	int i;
+	for (i = 0; i < dev->nvqs; ++i) {
+		if (dev->vqs[i].kick && dev->vqs[i].handle_kick) {
+			vhost_poll_stop(&dev->vqs[i].poll);
+			vhost_poll_flush(&dev->vqs[i].poll);
+		}
+		if (dev->vqs[i].error_ctx)
+			eventfd_ctx_put(dev->vqs[i].error_ctx);
+		if (dev->vqs[i].error)
+			fput(dev->vqs[i].error);
+		if (dev->vqs[i].kick)
+			fput(dev->vqs[i].kick);
+		if (dev->vqs[i].call_ctx)
+			eventfd_ctx_put(dev->vqs[i].call_ctx);
+		if (dev->vqs[i].call)
+			fput(dev->vqs[i].call);
+		vhost_vq_reset(dev, dev->vqs + i);
+	}
+	if (dev->log_ctx)
+		eventfd_ctx_put(dev->log_ctx);
+	dev->log_ctx = NULL;
+	if (dev->log_file)
+		fput(dev->log_file);
+	dev->log_file = NULL;
+	/* No one will access memory at this point */
+	kfree(dev->memory);
+	dev->memory = NULL;
+	if (dev->mm)
+		mmput(dev->mm);
+	dev->mm = NULL;
+}
+
+static int log_access_ok(void __user *log_base, u64 addr, unsigned long sz)
+{
+	u64 a = addr / VHOST_PAGE_SIZE / 8;
+	/* Make sure 64 bit math will not overflow. */
+	if (a > ULONG_MAX - (unsigned long)log_base ||
+	    a + (unsigned long)log_base > ULONG_MAX)
+		return -EFAULT;
+
+	return access_ok(VERIFY_WRITE, log_base + a,
+			 (sz + VHOST_PAGE_SIZE * 8 - 1) / VHOST_PAGE_SIZE / 8);
+}
+
+/* Caller should have vq mutex and device mutex. */
+static int vq_memory_access_ok(void __user *log_base, struct vhost_memory *mem,
+			       int log_all)
+{
+	int i;
+	for (i = 0; i < mem->nregions; ++i) {
+		struct vhost_memory_region *m = mem->regions + i;
+		unsigned long a = m->userspace_addr;
+		if (m->memory_size > ULONG_MAX)
+			return 0;
+		else if (!access_ok(VERIFY_WRITE, (void __user *)a,
+				    m->memory_size))
+			return 0;
+		else if (log_all && !log_access_ok(log_base,
+						   m->guest_phys_addr,
+						   m->memory_size))
+			return 0;
+	}
+	return 1;
+}
+
+/* Can we switch to this memory table? */
+/* Caller should have device mutex but not vq mutex */
+static int memory_access_ok(struct vhost_dev *d, struct vhost_memory *mem,
+			    int log_all)
+{
+	int i;
+	for (i = 0; i < d->nvqs; ++i) {
+		int ok;
+		mutex_lock(&d->vqs[i].mutex);
+		/* If ring is inactive, will check when it's enabled. */
+		if (d->vqs[i].private_data)
+			ok = vq_memory_access_ok(d->vqs[i].log_base, mem,
+						 log_all);
+		else
+			ok = 1;
+		mutex_unlock(&d->vqs[i].mutex);
+		if (!ok)
+			return 0;
+	}
+	return 1;
+}
+
+static int vq_access_ok(unsigned int num,
+			struct vring_desc __user *desc,
+			struct vring_avail __user *avail,
+			struct vring_used __user *used)
+{
+	return access_ok(VERIFY_READ, desc, num * sizeof *desc) &&
+	       access_ok(VERIFY_READ, avail,
+			 sizeof *avail + num * sizeof *avail->ring) &&
+	       access_ok(VERIFY_WRITE, used,
+			sizeof *used + num * sizeof *used->ring);
+}
+
+/* Can we log writes? */
+/* Caller should have device mutex but not vq mutex */
+int vhost_log_access_ok(struct vhost_dev *dev)
+{
+	return memory_access_ok(dev, dev->memory, 1);
+}
+
+/* Verify access for write logging. */
+/* Caller should have vq mutex and device mutex */
+static int vq_log_access_ok(struct vhost_virtqueue *vq, void __user *log_base)
+{
+	return vq_memory_access_ok(log_base, vq->dev->memory,
+			    vhost_has_feature(vq->dev, VHOST_F_LOG_ALL)) &&
+		(!vq->log_used || log_access_ok(log_base, vq->log_addr,
+					sizeof *vq->used +
+					vq->num * sizeof *vq->used->ring));
+}
+
+/* Can we start vq? */
+/* Caller should have vq mutex and device mutex */
+int vhost_vq_access_ok(struct vhost_virtqueue *vq)
+{
+	return vq_access_ok(vq->num, vq->desc, vq->avail, vq->used) &&
+		vq_log_access_ok(vq, vq->log_base);
+}
+
+static long vhost_set_memory(struct vhost_dev *d, struct vhost_memory __user *m)
+{
+	struct vhost_memory mem, *newmem, *oldmem;
+	unsigned long size = offsetof(struct vhost_memory, regions);
+	long r;
+	r = copy_from_user(&mem, m, size);
+	if (r)
+		return r;
+	if (mem.padding)
+		return -EOPNOTSUPP;
+	if (mem.nregions > VHOST_MEMORY_MAX_NREGIONS)
+		return -E2BIG;
+	newmem = kmalloc(size + mem.nregions * sizeof *m->regions, GFP_KERNEL);
+	if (!newmem)
+		return -ENOMEM;
+
+	memcpy(newmem, &mem, size);
+	r = copy_from_user(newmem->regions, m->regions,
+			   mem.nregions * sizeof *m->regions);
+	if (r) {
+		kfree(newmem);
+		return r;
+	}
+
+	if (!memory_access_ok(d, newmem, vhost_has_feature(d, VHOST_F_LOG_ALL)))
+		return -EFAULT;
+	oldmem = d->memory;
+	rcu_assign_pointer(d->memory, newmem);
+	synchronize_rcu();
+	kfree(oldmem);
+	return 0;
+}
+
+static int init_used(struct vhost_virtqueue *vq,
+		     struct vring_used __user *used)
+{
+	int r = put_user(vq->used_flags, &used->flags);
+	if (r)
+		return r;
+	return get_user(vq->last_used_idx, &used->idx);
+}
+
+static long vhost_set_vring(struct vhost_dev *d, int ioctl, void __user *argp)
+{
+	struct file *eventfp, *filep = NULL,
+		    *pollstart = NULL, *pollstop = NULL;
+	struct eventfd_ctx *ctx = NULL;
+	u32 __user *idxp = argp;
+	struct vhost_virtqueue *vq;
+	struct vhost_vring_state s;
+	struct vhost_vring_file f;
+	struct vhost_vring_addr a;
+	u32 idx;
+	long r;
+
+	r = get_user(idx, idxp);
+	if (r < 0)
+		return r;
+	if (idx > d->nvqs)
+		return -ENOBUFS;
+
+	vq = d->vqs + idx;
+
+	mutex_lock(&vq->mutex);
+
+	switch (ioctl) {
+	case VHOST_SET_VRING_NUM:
+		/* Resizing ring with an active backend?
+		 * You don't want to do that. */
+		if (vq->private_data) {
+			r = -EBUSY;
+			break;
+		}
+		r = copy_from_user(&s, argp, sizeof s);
+		if (r < 0)
+			break;
+		if (!s.num || s.num > 0xffff || (s.num & (s.num - 1))) {
+			r = -EINVAL;
+			break;
+		}
+		vq->num = s.num;
+		break;
+	case VHOST_SET_VRING_BASE:
+		/* Moving base with an active backend?
+		 * You don't want to do that. */
+		if (vq->private_data) {
+			r = -EBUSY;
+			break;
+		}
+		r = copy_from_user(&s, argp, sizeof s);
+		if (r < 0)
+			break;
+		if (s.num > 0xffff) {
+			r = -EINVAL;
+			break;
+		}
+		vq->last_avail_idx = s.num;
+		/* Forget the cached index value. */
+		vq->avail_idx = vq->last_avail_idx;
+		break;
+	case VHOST_GET_VRING_BASE:
+		s.index = idx;
+		s.num = vq->last_avail_idx;
+		r = copy_to_user(argp, &s, sizeof s);
+		break;
+	case VHOST_SET_VRING_ADDR:
+		r = copy_from_user(&a, argp, sizeof a);
+		if (r < 0)
+			break;
+		if (a.flags & ~(0x1 << VHOST_VRING_F_LOG)) {
+			r = -EOPNOTSUPP;
+			break;
+		}
+		/* For 32bit, verify that the top 32bits of the user
+		   data are set to zero. */
+		if ((u64)(unsigned long)a.desc_user_addr != a.desc_user_addr ||
+		    (u64)(unsigned long)a.used_user_addr != a.used_user_addr ||
+		    (u64)(unsigned long)a.avail_user_addr != a.avail_user_addr) {
+			r = -EFAULT;
+			break;
+		}
+		if ((a.avail_user_addr & (sizeof *vq->avail->ring - 1)) ||
+		    (a.used_user_addr & (sizeof *vq->used->ring - 1)) ||
+		    (a.log_guest_addr & (sizeof *vq->used->ring - 1))) {
+			r = -EINVAL;
+			break;
+		}
+
+		/* We only verify access here if backend is configured.
+		 * If it is not, we don't as size might not have been setup.
+		 * We will verify when backend is configured. */
+		if (vq->private_data) {
+			if (!vq_access_ok(vq->num,
+				(void __user *)(unsigned long)a.desc_user_addr,
+				(void __user *)(unsigned long)a.avail_user_addr,
+				(void __user *)(unsigned long)a.used_user_addr)) {
+				r = -EINVAL;
+				break;
+			}
+
+			/* Also validate log access for used ring if enabled. */
+			if ((a.flags & (0x1 << VHOST_VRING_F_LOG)) &&
+			    !log_access_ok(vq->log_base, a.log_guest_addr,
+					   sizeof *vq->used +
+					   vq->num * sizeof *vq->used->ring)) {
+				r = -EINVAL;
+				break;
+			}
+		}
+
+		r = init_used(vq, (struct vring_used __user *)(unsigned long)
+			      a.used_user_addr);
+		if (r)
+			break;
+		vq->log_used = !!(a.flags & (0x1 << VHOST_VRING_F_LOG));
+		vq->desc = (void __user *)(unsigned long)a.desc_user_addr;
+		vq->avail = (void __user *)(unsigned long)a.avail_user_addr;
+		vq->log_addr = a.log_guest_addr;
+		vq->used = (void __user *)(unsigned long)a.used_user_addr;
+		break;
+	case VHOST_SET_VRING_KICK:
+		r = copy_from_user(&f, argp, sizeof f);
+		if (r < 0)
+			break;
+		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+		if (IS_ERR(eventfp))
+			return PTR_ERR(eventfp);
+		if (eventfp != vq->kick) {
+			pollstop = filep = vq->kick;
+			pollstart = vq->kick = eventfp;
+		} else
+			filep = eventfp;
+		break;
+	case VHOST_SET_VRING_CALL:
+		r = copy_from_user(&f, argp, sizeof f);
+		if (r < 0)
+			break;
+		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+		if (IS_ERR(eventfp))
+			return PTR_ERR(eventfp);
+		if (eventfp != vq->call) {
+			filep = vq->call;
+			ctx = vq->call_ctx;
+			vq->call = eventfp;
+			vq->call_ctx = eventfp ?
+				eventfd_ctx_fileget(eventfp) : NULL;
+		} else
+			filep = eventfp;
+		break;
+	case VHOST_SET_VRING_ERR:
+		r = copy_from_user(&f, argp, sizeof f);
+		if (r < 0)
+			break;
+		eventfp = f.fd == -1 ? NULL : eventfd_fget(f.fd);
+		if (IS_ERR(eventfp))
+			return PTR_ERR(eventfp);
+		if (eventfp != vq->error) {
+			filep = vq->error;
+			vq->error = eventfp;
+			ctx = vq->error_ctx;
+			vq->error_ctx = eventfp ?
+				eventfd_ctx_fileget(eventfp) : NULL;
+		} else
+			filep = eventfp;
+		break;
+	default:
+		r = -ENOIOCTLCMD;
+	}
+
+	if (pollstop && vq->handle_kick)
+		vhost_poll_stop(&vq->poll);
+
+	if (ctx)
+		eventfd_ctx_put(ctx);
+	if (filep)
+		fput(filep);
+
+	if (pollstart && vq->handle_kick)
+		vhost_poll_start(&vq->poll, vq->kick);
+
+	mutex_unlock(&vq->mutex);
+
+	if (pollstop && vq->handle_kick)
+		vhost_poll_flush(&vq->poll);
+	return r;
+}
+
+/* Caller must have device mutex */
+long vhost_dev_ioctl(struct vhost_dev *d, unsigned int ioctl, unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	struct file *eventfp, *filep = NULL;
+	struct eventfd_ctx *ctx = NULL;
+	u64 p;
+	long r;
+	int i, fd;
+
+	/* If you are not the owner, you can become one */
+	if (ioctl == VHOST_SET_OWNER) {
+		r = vhost_dev_set_owner(d);
+		goto done;
+	}
+
+	/* You must be the owner to do anything else */
+	r = vhost_dev_check_owner(d);
+	if (r)
+		goto done;
+
+	switch (ioctl) {
+	case VHOST_SET_MEM_TABLE:
+		r = vhost_set_memory(d, argp);
+		break;
+	case VHOST_SET_LOG_BASE:
+		r = copy_from_user(&p, argp, sizeof p);
+		if (r < 0)
+			break;
+		if ((u64)(unsigned long)p != p) {
+			r = -EFAULT;
+			break;
+		}
+		for (i = 0; i < d->nvqs; ++i) {
+			struct vhost_virtqueue *vq;
+			void __user *base = (void __user *)(unsigned long)p;
+			vq = d->vqs + i;
+			mutex_lock(&vq->mutex);
+			/* If ring is inactive, will check when it's enabled. */
+			if (vq->private_data && !vq_log_access_ok(vq, base))
+				r = -EFAULT;
+			else
+				vq->log_base = base;
+			mutex_unlock(&vq->mutex);
+		}
+		break;
+	case VHOST_SET_LOG_FD:
+		r = get_user(fd, (int __user *)argp);
+		if (r < 0)
+			break;
+		eventfp = fd == -1 ? NULL : eventfd_fget(fd);
+		if (IS_ERR(eventfp)) {
+			r = PTR_ERR(eventfp);
+			break;
+		}
+		if (eventfp != d->log_file) {
+			filep = d->log_file;
+			ctx = d->log_ctx;
+			d->log_ctx = eventfp ?
+				eventfd_ctx_fileget(eventfp) : NULL;
+		} else
+			filep = eventfp;
+		for (i = 0; i < d->nvqs; ++i) {
+			mutex_lock(&d->vqs[i].mutex);
+			d->vqs[i].log_ctx = d->log_ctx;
+			mutex_unlock(&d->vqs[i].mutex);
+		}
+		if (ctx)
+			eventfd_ctx_put(ctx);
+		if (filep)
+			fput(filep);
+		break;
+	default:
+		r = vhost_set_vring(d, ioctl, argp);
+		break;
+	}
+done:
+	return r;
+}
+
+static const struct vhost_memory_region *find_region(struct vhost_memory *mem,
+						     __u64 addr, __u32 len)
+{
+	struct vhost_memory_region *reg;
+	int i;
+	/* linear search is not brilliant, but we really have on the order of 6
+	 * regions in practice */
+	for (i = 0; i < mem->nregions; ++i) {
+		reg = mem->regions + i;
+		if (reg->guest_phys_addr <= addr &&
+		    reg->guest_phys_addr + reg->memory_size - 1 >= addr)
+			return reg;
+	}
+	return NULL;
+}
+
+/* TODO: This is really inefficient.  We need something like get_user()
+ * (instruction directly accesses the data, with an exception table entry
+ * returning -EFAULT). See Documentation/x86/exception-tables.txt.
+ */
+static int set_bit_to_user(int nr, void __user *addr)
+{
+	unsigned long log = (unsigned long)addr;
+	struct page *page;
+	void *base;
+	int bit = nr + (log % PAGE_SIZE) * 8;
+	int r;
+	r = get_user_pages_fast(log, 1, 1, &page);
+	if (r)
+		return r;
+	base = kmap_atomic(page, KM_USER0);
+	set_bit(bit, base);
+	kunmap_atomic(base, KM_USER0);
+	set_page_dirty_lock(page);
+	put_page(page);
+	return 0;
+}
+
+static int log_write(void __user *log_base,
+		     u64 write_address, u64 write_length)
+{
+	int r;
+	if (!write_length)
+		return 0;
+	write_address /= VHOST_PAGE_SIZE;
+	for (;;) {
+		u64 base = (u64)(unsigned long)log_base;
+		u64 log = base + write_address / 8;
+		int bit = write_address % 8;
+		if ((u64)(unsigned long)log != log)
+			return -EFAULT;
+		r = set_bit_to_user(bit, (void __user *)(unsigned long)log);
+		if (r < 0)
+			return r;
+		if (write_length <= VHOST_PAGE_SIZE)
+			break;
+		write_length -= VHOST_PAGE_SIZE;
+		write_address += VHOST_PAGE_SIZE;
+	}
+	return r;
+}
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+		    unsigned int log_num, u64 len)
+{
+	int i, r;
+
+	/* Make sure data written is seen before log. */
+	wmb();
+	for (i = 0; i < log_num; ++i) {
+		u64 l = min(log[i].len, len);
+		r = log_write(vq->log_base, log[i].addr, l);
+		if (r < 0)
+			return r;
+		len -= l;
+		if (!len)
+			return 0;
+	}
+	if (vq->log_ctx)
+		eventfd_signal(vq->log_ctx, 1);
+	/* Length written exceeds what we have stored. This is a bug. */
+	BUG();
+	return 0;
+}
+
+int translate_desc(struct vhost_dev *dev, u64 addr, u32 len,
+		   struct iovec iov[], int iov_size)
+{
+	const struct vhost_memory_region *reg;
+	struct vhost_memory *mem;
+	struct iovec *_iov;
+	u64 s = 0;
+	int ret = 0;
+
+	rcu_read_lock();
+
+	mem = rcu_dereference(dev->memory);
+	while ((u64)len > s) {
+		u64 size;
+		if (ret >= iov_size) {
+			ret = -ENOBUFS;
+			break;
+		}
+		reg = find_region(mem, addr, len);
+		if (!reg) {
+			ret = -EFAULT;
+			break;
+		}
+		_iov = iov + ret;
+		size = reg->memory_size - addr + reg->guest_phys_addr;
+		_iov->iov_len = min((u64)len, size);
+		_iov->iov_base = (void *)(unsigned long)
+			(reg->userspace_addr + addr - reg->guest_phys_addr);
+		s += size;
+		addr += size;
+		++ret;
+	}
+
+	rcu_read_unlock();
+	return ret;
+}
+
+/* Each buffer in the virtqueues is actually a chain of descriptors.  This
+ * function returns the next descriptor in the chain,
+ * or -1U if we're at the end. */
+static unsigned next_desc(struct vring_desc *desc)
+{
+	unsigned int next;
+
+	/* If this descriptor says it doesn't chain, we're done. */
+	if (!(desc->flags & VRING_DESC_F_NEXT))
+		return -1U;
+
+	/* Check they're not leading us off end of descriptors. */
+	next = desc->next;
+	/* Make sure compiler knows to grab that: we don't want it changing! */
+	/* We will use the result as an index in an array, so most
+	 * architectures only need a compiler barrier here. */
+	read_barrier_depends();
+
+	return next;
+}
+
+static unsigned get_indirect(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+			     struct iovec iov[], unsigned int iov_size,
+			     unsigned int *out_num, unsigned int *in_num,
+			     struct vhost_log *log, unsigned int *log_num,
+			     struct vring_desc *indirect)
+{
+	struct vring_desc desc;
+	unsigned int i = 0, count, found = 0;
+	int ret;
+
+	/* Sanity check */
+	if (indirect->len % sizeof desc) {
+		vq_err(vq, "Invalid length in indirect descriptor: "
+		       "len 0x%llx not multiple of 0x%zx\n",
+		       (unsigned long long)indirect->len,
+		       sizeof desc);
+		return -EINVAL;
+	}
+
+	ret = translate_desc(dev, indirect->addr, indirect->len, vq->indirect,
+			     ARRAY_SIZE(vq->indirect));
+	if (ret < 0) {
+		vq_err(vq, "Translation failure %d in indirect.\n", ret);
+		return ret;
+	}
+
+	/* We will use the result as an address to read from, so most
+	 * architectures only need a compiler barrier here. */
+	read_barrier_depends();
+
+	count = indirect->len / sizeof desc;
+	/* Buffers are chained via a 16 bit next field, so
+	 * we can have at most 2^16 of these. */
+	if (count > USHORT_MAX + 1) {
+		vq_err(vq, "Indirect buffer length too big: %d\n",
+		       indirect->len);
+		return -E2BIG;
+	}
+
+	do {
+		unsigned iov_count = *in_num + *out_num;
+		if (++found > count) {
+			vq_err(vq, "Loop detected: last one at %u "
+			       "indirect size %u\n",
+			       i, count);
+			return -EINVAL;
+		}
+		if (memcpy_fromiovec((unsigned char *)&desc, vq->indirect,
+				     sizeof desc)) {
+			vq_err(vq, "Failed indirect descriptor: idx %d, %zx\n",
+			       i, (size_t)indirect->addr + i * sizeof desc);
+			return -EINVAL;
+		}
+		if (desc.flags & VRING_DESC_F_INDIRECT) {
+			vq_err(vq, "Nested indirect descriptor: idx %d, %zx\n",
+			       i, (size_t)indirect->addr + i * sizeof desc);
+			return -EINVAL;
+		}
+
+		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+				     iov_size - iov_count);
+		if (ret < 0) {
+			vq_err(vq, "Translation failure %d indirect idx %d\n",
+			       ret, i);
+			return ret;
+		}
+		/* If this is an input descriptor, increment that count. */
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			*in_num += ret;
+			if (unlikely(log)) {
+				log[*log_num].addr = desc.addr;
+				log[*log_num].len = desc.len;
+				++*log_num;
+			}
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (*in_num) {
+				vq_err(vq, "Indirect descriptor "
+				       "has out after in: idx %d\n", i);
+				return -EINVAL;
+			}
+			*out_num += ret;
+		}
+	} while ((i = next_desc(&desc)) != -1);
+	return 0;
+}
+
+/* This looks in the virtqueue and for the first available buffer, and converts
+ * it to an iovec for convenient access.  Since descriptors consist of some
+ * number of output then some number of input descriptors, it's actually two
+ * iovecs, but we pack them into one and note how many of each there were.
+ *
+ * This function returns the descriptor number found, or vq->num (which
+ * is never a valid descriptor number) if none was found. */
+unsigned vhost_get_vq_desc(struct vhost_dev *dev, struct vhost_virtqueue *vq,
+			   struct iovec iov[], unsigned int iov_size,
+			   unsigned int *out_num, unsigned int *in_num,
+			   struct vhost_log *log, unsigned int *log_num)
+{
+	struct vring_desc desc;
+	unsigned int i, head, found = 0;
+	u16 last_avail_idx;
+	int ret;
+
+	/* Check it isn't doing very strange things with descriptor numbers. */
+	last_avail_idx = vq->last_avail_idx;
+	if (get_user(vq->avail_idx, &vq->avail->idx)) {
+		vq_err(vq, "Failed to access avail idx at %p\n",
+		       &vq->avail->idx);
+		return vq->num;
+	}
+
+	if ((u16)(vq->avail_idx - last_avail_idx) > vq->num) {
+		vq_err(vq, "Guest moved used index from %u to %u",
+		       last_avail_idx, vq->avail_idx);
+		return vq->num;
+	}
+
+	/* If there's nothing new since last we looked, return invalid. */
+	if (vq->avail_idx == last_avail_idx)
+		return vq->num;
+
+	/* Only get avail ring entries after they have been exposed by guest. */
+	rmb();
+
+	/* Grab the next descriptor number they're advertising, and increment
+	 * the index we've seen. */
+	if (get_user(head, &vq->avail->ring[last_avail_idx % vq->num])) {
+		vq_err(vq, "Failed to read head: idx %d address %p\n",
+		       last_avail_idx,
+		       &vq->avail->ring[last_avail_idx % vq->num]);
+		return vq->num;
+	}
+
+	/* If their number is silly, that's an error. */
+	if (head >= vq->num) {
+		vq_err(vq, "Guest says index %u > %u is available",
+		       head, vq->num);
+		return vq->num;
+	}
+
+	/* When we start there are none of either input nor output. */
+	*out_num = *in_num = 0;
+	if (unlikely(log))
+		*log_num = 0;
+
+	i = head;
+	do {
+		unsigned iov_count = *in_num + *out_num;
+		if (i >= vq->num) {
+			vq_err(vq, "Desc index is %u > %u, head = %u",
+			       i, vq->num, head);
+			return vq->num;
+		}
+		if (++found > vq->num) {
+			vq_err(vq, "Loop detected: last one at %u "
+			       "vq size %u head %u\n",
+			       i, vq->num, head);
+			return vq->num;
+		}
+		ret = copy_from_user(&desc, vq->desc + i, sizeof desc);
+		if (ret) {
+			vq_err(vq, "Failed to get descriptor: idx %d addr %p\n",
+			       i, vq->desc + i);
+			return vq->num;
+		}
+		if (desc.flags & VRING_DESC_F_INDIRECT) {
+			ret = get_indirect(dev, vq, iov, iov_size,
+					   out_num, in_num,
+					   log, log_num, &desc);
+			if (ret < 0) {
+				vq_err(vq, "Failure detected "
+				       "in indirect descriptor at idx %d\n", i);
+				return vq->num;
+			}
+			continue;
+		}
+
+		ret = translate_desc(dev, desc.addr, desc.len, iov + iov_count,
+				     iov_size - iov_count);
+		if (ret < 0) {
+			vq_err(vq, "Translation failure %d descriptor idx %d\n",
+			       ret, i);
+			return vq->num;
+		}
+		if (desc.flags & VRING_DESC_F_WRITE) {
+			/* If this is an input descriptor,
+			 * increment that count. */
+			*in_num += ret;
+			if (unlikely(log)) {
+				log[*log_num].addr = desc.addr;
+				log[*log_num].len = desc.len;
+				++*log_num;
+			}
+		} else {
+			/* If it's an output descriptor, they're all supposed
+			 * to come before any input descriptors. */
+			if (*in_num) {
+				vq_err(vq, "Descriptor has out after in: "
+				       "idx %d\n", i);
+				return vq->num;
+			}
+			*out_num += ret;
+		}
+	} while ((i = next_desc(&desc)) != -1);
+
+	/* On success, increment avail index. */
+	vq->last_avail_idx++;
+	return head;
+}
+
+/* Reverse the effect of vhost_get_vq_desc. Useful for error handling. */
+void vhost_discard_vq_desc(struct vhost_virtqueue *vq)
+{
+	vq->last_avail_idx--;
+}
+
+/* After we've used one of their buffers, we tell them about it.  We'll then
+ * want to notify the guest, using eventfd. */
+int vhost_add_used(struct vhost_virtqueue *vq, unsigned int head, int len)
+{
+	struct vring_used_elem *used;
+
+	/* The virtqueue contains a ring of used buffers.  Get a pointer to the
+	 * next entry in that used ring. */
+	used = &vq->used->ring[vq->last_used_idx % vq->num];
+	if (put_user(head, &used->id)) {
+		vq_err(vq, "Failed to write used id");
+		return -EFAULT;
+	}
+	if (put_user(len, &used->len)) {
+		vq_err(vq, "Failed to write used len");
+		return -EFAULT;
+	}
+	/* Make sure buffer is written before we update index. */
+	wmb();
+	if (put_user(vq->last_used_idx + 1, &vq->used->idx)) {
+		vq_err(vq, "Failed to increment used idx");
+		return -EFAULT;
+	}
+	if (unlikely(vq->log_used)) {
+		/* Make sure data is seen before log. */
+		wmb();
+		log_write(vq->log_base, vq->log_addr + sizeof *vq->used->ring *
+			  (vq->last_used_idx % vq->num),
+			  sizeof *vq->used->ring);
+		log_write(vq->log_base, vq->log_addr, sizeof *vq->used->ring);
+		if (vq->log_ctx)
+			eventfd_signal(vq->log_ctx, 1);
+	}
+	vq->last_used_idx++;
+	return 0;
+}
+
+/* This actually signals the guest, using eventfd. */
+void vhost_signal(struct vhost_dev *dev, struct vhost_virtqueue *vq)
+{
+	__u16 flags = 0;
+	if (get_user(flags, &vq->avail->flags)) {
+		vq_err(vq, "Failed to get flags");
+		return;
+	}
+
+	/* If they don't want an interrupt, don't signal, unless empty. */
+	if ((flags & VRING_AVAIL_F_NO_INTERRUPT) &&
+	    (vq->avail_idx != vq->last_avail_idx ||
+	     !vhost_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY)))
+		return;
+
+	/* Signal the Guest tell them we used something up. */
+	if (vq->call_ctx)
+		eventfd_signal(vq->call_ctx, 1);
+}
+
+/* And here's the combo meal deal.  Supersize me! */
+void vhost_add_used_and_signal(struct vhost_dev *dev,
+			       struct vhost_virtqueue *vq,
+			       unsigned int head, int len)
+{
+	vhost_add_used(vq, head, len);
+	vhost_signal(dev, vq);
+}
+
+/* OK, now we need to know about added descriptors. */
+bool vhost_enable_notify(struct vhost_virtqueue *vq)
+{
+	u16 avail_idx;
+	int r;
+	if (!(vq->used_flags & VRING_USED_F_NO_NOTIFY))
+		return false;
+	vq->used_flags &= ~VRING_USED_F_NO_NOTIFY;
+	r = put_user(vq->used_flags, &vq->used->flags);
+	if (r) {
+		vq_err(vq, "Failed to enable notification at %p: %d\n",
+		       &vq->used->flags, r);
+		return false;
+	}
+	/* They could have slipped one in as we were doing that: make
+	 * sure it's written, then check again. */
+	mb();
+	r = get_user(avail_idx, &vq->avail->idx);
+	if (r) {
+		vq_err(vq, "Failed to check avail idx at %p: %d\n",
+		       &vq->avail->idx, r);
+		return false;
+	}
+
+	return avail_idx != vq->last_avail_idx;
+}
+
+/* We don't need to be notified again. */
+void vhost_disable_notify(struct vhost_virtqueue *vq)
+{
+	int r;
+	if (vq->used_flags & VRING_USED_F_NO_NOTIFY)
+		return;
+	vq->used_flags |= VRING_USED_F_NO_NOTIFY;
+	r = put_user(vq->used_flags, &vq->used->flags);
+	if (r)
+		vq_err(vq, "Failed to enable notification at %p: %d\n",
+		       &vq->used->flags, r);
+}
+
+int vhost_init(void)
+{
+	vhost_workqueue = create_singlethread_workqueue("vhost");
+	if (!vhost_workqueue)
+		return -ENOMEM;
+	return 0;
+}
+
+void vhost_cleanup(void)
+{
+	destroy_workqueue(vhost_workqueue);
+}
diff --git a/drivers/vhost/vhost.h b/drivers/vhost/vhost.h
new file mode 100644
index 000000000000..44591ba9b07a
--- /dev/null
+++ b/drivers/vhost/vhost.h
@@ -0,0 +1,161 @@
+#ifndef _VHOST_H
+#define _VHOST_H
+
+#include <linux/eventfd.h>
+#include <linux/vhost.h>
+#include <linux/mm.h>
+#include <linux/mutex.h>
+#include <linux/workqueue.h>
+#include <linux/poll.h>
+#include <linux/file.h>
+#include <linux/skbuff.h>
+#include <linux/uio.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_device;
+
+enum {
+	/* Enough place for all fragments, head, and virtio net header. */
+	VHOST_NET_MAX_SG = MAX_SKB_FRAGS + 2,
+};
+
+/* Poll a file (eventfd or socket) */
+/* Note: there's nothing vhost specific about this structure. */
+struct vhost_poll {
+	poll_table                table;
+	wait_queue_head_t        *wqh;
+	wait_queue_t              wait;
+	/* struct which will handle all actual work. */
+	struct work_struct        work;
+	unsigned long		  mask;
+};
+
+void vhost_poll_init(struct vhost_poll *poll, work_func_t func,
+		     unsigned long mask);
+void vhost_poll_start(struct vhost_poll *poll, struct file *file);
+void vhost_poll_stop(struct vhost_poll *poll);
+void vhost_poll_flush(struct vhost_poll *poll);
+void vhost_poll_queue(struct vhost_poll *poll);
+
+struct vhost_log {
+	u64 addr;
+	u64 len;
+};
+
+/* The virtqueue structure describes a queue attached to a device. */
+struct vhost_virtqueue {
+	struct vhost_dev *dev;
+
+	/* The actual ring of buffers. */
+	struct mutex mutex;
+	unsigned int num;
+	struct vring_desc __user *desc;
+	struct vring_avail __user *avail;
+	struct vring_used __user *used;
+	struct file *kick;
+	struct file *call;
+	struct file *error;
+	struct eventfd_ctx *call_ctx;
+	struct eventfd_ctx *error_ctx;
+	struct eventfd_ctx *log_ctx;
+
+	struct vhost_poll poll;
+
+	/* The routine to call when the Guest pings us, or timeout. */
+	work_func_t handle_kick;
+
+	/* Last available index we saw. */
+	u16 last_avail_idx;
+
+	/* Caches available index value from user. */
+	u16 avail_idx;
+
+	/* Last index we used. */
+	u16 last_used_idx;
+
+	/* Used flags */
+	u16 used_flags;
+
+	/* Log writes to used structure. */
+	bool log_used;
+	u64 log_addr;
+
+	struct iovec indirect[VHOST_NET_MAX_SG];
+	struct iovec iov[VHOST_NET_MAX_SG];
+	struct iovec hdr[VHOST_NET_MAX_SG];
+	size_t hdr_size;
+	/* We use a kind of RCU to access private pointer.
+	 * All readers access it from workqueue, which makes it possible to
+	 * flush the workqueue instead of synchronize_rcu. Therefore readers do
+	 * not need to call rcu_read_lock/rcu_read_unlock: the beginning of
+	 * work item execution acts instead of rcu_read_lock() and the end of
+	 * work item execution acts instead of rcu_read_lock().
+	 * Writers use virtqueue mutex. */
+	void *private_data;
+	/* Log write descriptors */
+	void __user *log_base;
+	struct vhost_log log[VHOST_NET_MAX_SG];
+};
+
+struct vhost_dev {
+	/* Readers use RCU to access memory table pointer
+	 * log base pointer and features.
+	 * Writers use mutex below.*/
+	struct vhost_memory *memory;
+	struct mm_struct *mm;
+	struct mutex mutex;
+	unsigned acked_features;
+	struct vhost_virtqueue *vqs;
+	int nvqs;
+	struct file *log_file;
+	struct eventfd_ctx *log_ctx;
+};
+
+long vhost_dev_init(struct vhost_dev *, struct vhost_virtqueue *vqs, int nvqs);
+long vhost_dev_check_owner(struct vhost_dev *);
+long vhost_dev_reset_owner(struct vhost_dev *);
+void vhost_dev_cleanup(struct vhost_dev *);
+long vhost_dev_ioctl(struct vhost_dev *, unsigned int ioctl, unsigned long arg);
+int vhost_vq_access_ok(struct vhost_virtqueue *vq);
+int vhost_log_access_ok(struct vhost_dev *);
+
+unsigned vhost_get_vq_desc(struct vhost_dev *, struct vhost_virtqueue *,
+			   struct iovec iov[], unsigned int iov_count,
+			   unsigned int *out_num, unsigned int *in_num,
+			   struct vhost_log *log, unsigned int *log_num);
+void vhost_discard_vq_desc(struct vhost_virtqueue *);
+
+int vhost_add_used(struct vhost_virtqueue *, unsigned int head, int len);
+void vhost_signal(struct vhost_dev *, struct vhost_virtqueue *);
+void vhost_add_used_and_signal(struct vhost_dev *, struct vhost_virtqueue *,
+			       unsigned int head, int len);
+void vhost_disable_notify(struct vhost_virtqueue *);
+bool vhost_enable_notify(struct vhost_virtqueue *);
+
+int vhost_log_write(struct vhost_virtqueue *vq, struct vhost_log *log,
+		    unsigned int log_num, u64 len);
+
+int vhost_init(void);
+void vhost_cleanup(void);
+
+#define vq_err(vq, fmt, ...) do {                                  \
+		pr_debug(pr_fmt(fmt), ##__VA_ARGS__);       \
+		if ((vq)->error_ctx)                               \
+				eventfd_signal((vq)->error_ctx, 1);\
+	} while (0)
+
+enum {
+	VHOST_FEATURES = (1 << VIRTIO_F_NOTIFY_ON_EMPTY) |
+			 (1 << VIRTIO_RING_F_INDIRECT_DESC) |
+			 (1 << VHOST_F_LOG_ALL) |
+			 (1 << VHOST_NET_F_VIRTIO_NET_HDR),
+};
+
+static inline int vhost_has_feature(struct vhost_dev *dev, int bit)
+{
+	unsigned acked_features = rcu_dereference(dev->acked_features);
+	return acked_features & (1 << bit);
+}
+
+#endif
diff --git a/include/linux/Kbuild b/include/linux/Kbuild
index 756f831cbdd5..d93080748a91 100644
--- a/include/linux/Kbuild
+++ b/include/linux/Kbuild
@@ -362,6 +362,7 @@ unifdef-y += uio.h
 unifdef-y += unistd.h
 unifdef-y += usbdevice_fs.h
 unifdef-y += utsname.h
+unifdef-y += vhost.h
 unifdef-y += videodev2.h
 unifdef-y += videodev.h
 unifdef-y += virtio_config.h
diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h
index adaf3c15e449..8b5f7cc0fba6 100644
--- a/include/linux/miscdevice.h
+++ b/include/linux/miscdevice.h
@@ -30,6 +30,7 @@
 #define HPET_MINOR		228
 #define FUSE_MINOR		229
 #define KVM_MINOR		232
+#define VHOST_NET_MINOR		233
 #define MISC_DYNAMIC_MINOR	255
 
 struct device;
diff --git a/include/linux/vhost.h b/include/linux/vhost.h
new file mode 100644
index 000000000000..e847f1e30756
--- /dev/null
+++ b/include/linux/vhost.h
@@ -0,0 +1,130 @@
+#ifndef _LINUX_VHOST_H
+#define _LINUX_VHOST_H
+/* Userspace interface for in-kernel virtio accelerators. */
+
+/* vhost is used to reduce the number of system calls involved in virtio.
+ *
+ * Existing virtio net code is used in the guest without modification.
+ *
+ * This header includes interface used by userspace hypervisor for
+ * device configuration.
+ */
+
+#include <linux/types.h>
+#include <linux/compiler.h>
+#include <linux/ioctl.h>
+#include <linux/virtio_config.h>
+#include <linux/virtio_ring.h>
+
+struct vhost_vring_state {
+	unsigned int index;
+	unsigned int num;
+};
+
+struct vhost_vring_file {
+	unsigned int index;
+	int fd; /* Pass -1 to unbind from file. */
+
+};
+
+struct vhost_vring_addr {
+	unsigned int index;
+	/* Option flags. */
+	unsigned int flags;
+	/* Flag values: */
+	/* Whether log address is valid. If set enables logging. */
+#define VHOST_VRING_F_LOG 0
+
+	/* Start of array of descriptors (virtually contiguous) */
+	__u64 desc_user_addr;
+	/* Used structure address. Must be 32 bit aligned */
+	__u64 used_user_addr;
+	/* Available structure address. Must be 16 bit aligned */
+	__u64 avail_user_addr;
+	/* Logging support. */
+	/* Log writes to used structure, at offset calculated from specified
+	 * address. Address must be 32 bit aligned. */
+	__u64 log_guest_addr;
+};
+
+struct vhost_memory_region {
+	__u64 guest_phys_addr;
+	__u64 memory_size; /* bytes */
+	__u64 userspace_addr;
+	__u64 flags_padding; /* No flags are currently specified. */
+};
+
+/* All region addresses and sizes must be 4K aligned. */
+#define VHOST_PAGE_SIZE 0x1000
+
+struct vhost_memory {
+	__u32 nregions;
+	__u32 padding;
+	struct vhost_memory_region regions[0];
+};
+
+/* ioctls */
+
+#define VHOST_VIRTIO 0xAF
+
+/* Features bitmask for forward compatibility.  Transport bits are used for
+ * vhost specific features. */
+#define VHOST_GET_FEATURES	_IOR(VHOST_VIRTIO, 0x00, __u64)
+#define VHOST_SET_FEATURES	_IOW(VHOST_VIRTIO, 0x00, __u64)
+
+/* Set current process as the (exclusive) owner of this file descriptor.  This
+ * must be called before any other vhost command.  Further calls to
+ * VHOST_OWNER_SET fail until VHOST_OWNER_RESET is called. */
+#define VHOST_SET_OWNER _IO(VHOST_VIRTIO, 0x01)
+/* Give up ownership, and reset the device to default values.
+ * Allows subsequent call to VHOST_OWNER_SET to succeed. */
+#define VHOST_RESET_OWNER _IO(VHOST_VIRTIO, 0x02)
+
+/* Set up/modify memory layout */
+#define VHOST_SET_MEM_TABLE	_IOW(VHOST_VIRTIO, 0x03, struct vhost_memory)
+
+/* Write logging setup. */
+/* Memory writes can optionally be logged by setting bit at an offset
+ * (calculated from the physical address) from specified log base.
+ * The bit is set using an atomic 32 bit operation. */
+/* Set base address for logging. */
+#define VHOST_SET_LOG_BASE _IOW(VHOST_VIRTIO, 0x04, __u64)
+/* Specify an eventfd file descriptor to signal on log write. */
+#define VHOST_SET_LOG_FD _IOW(VHOST_VIRTIO, 0x07, int)
+
+/* Ring setup. */
+/* Set number of descriptors in ring. This parameter can not
+ * be modified while ring is running (bound to a device). */
+#define VHOST_SET_VRING_NUM _IOW(VHOST_VIRTIO, 0x10, struct vhost_vring_state)
+/* Set addresses for the ring. */
+#define VHOST_SET_VRING_ADDR _IOW(VHOST_VIRTIO, 0x11, struct vhost_vring_addr)
+/* Base value where queue looks for available descriptors */
+#define VHOST_SET_VRING_BASE _IOW(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+/* Get accessor: reads index, writes value in num */
+#define VHOST_GET_VRING_BASE _IOWR(VHOST_VIRTIO, 0x12, struct vhost_vring_state)
+
+/* The following ioctls use eventfd file descriptors to signal and poll
+ * for events. */
+
+/* Set eventfd to poll for added buffers */
+#define VHOST_SET_VRING_KICK _IOW(VHOST_VIRTIO, 0x20, struct vhost_vring_file)
+/* Set eventfd to signal when buffers have beed used */
+#define VHOST_SET_VRING_CALL _IOW(VHOST_VIRTIO, 0x21, struct vhost_vring_file)
+/* Set eventfd to signal an error */
+#define VHOST_SET_VRING_ERR _IOW(VHOST_VIRTIO, 0x22, struct vhost_vring_file)
+
+/* VHOST_NET specific defines */
+
+/* Attach virtio net ring to a raw socket, or tap device.
+ * The socket must be already bound to an ethernet device, this device will be
+ * used for transmit.  Pass fd -1 to unbind from the socket and the transmit
+ * device.  This can be used to stop the ring (e.g. for migration). */
+#define VHOST_NET_SET_BACKEND _IOW(VHOST_VIRTIO, 0x30, struct vhost_vring_file)
+
+/* Feature bits */
+/* Log all write descriptors. Can be changed while device is active. */
+#define VHOST_F_LOG_ALL 26
+/* vhost-net should add virtio_net_hdr for RX, and strip for TX packets. */
+#define VHOST_NET_F_VIRTIO_NET_HDR 27
+
+#endif
-- 
cgit v1.2.3-70-g09d2


From c084ca704a3661bf77690a05bc6bd2c305d87c34 Mon Sep 17 00:00:00 2001
From: Xiaotian Feng <dfeng@redhat.com>
Date: Thu, 10 Dec 2009 19:56:45 +0800
Subject: ACPI: don't cond_resched if irq is disabled

commit 8bd108d adds preemption point after each opcode parse, then
a sleeping function called from invalid context bug was founded
during suspend/resume stage. this was fixed in commit abe1dfa by
don't cond_resched when irq_disabled. But recent commit 138d156 changes
the behaviour to don't cond_resched when in_atomic. This makes the
sleeping function called from invalid context bug happen again, which
is reported in http://lkml.org/lkml/2009/12/1/371.

This patch also fixes http://bugzilla.kernel.org/show_bug.cgi?id=14483

Reported-and-bisected-by: Larry Finger <Larry.Finger@lwfinger.net>
Reported-and-bisected-by: Justin P. Mattock <justinmattock@gmail.com>
Signed-off-by: Xiaotian Feng <dfeng@redhat.com>
Acked-by: Alexey Starikovskiy <astarikovskiy@suse.de>
Signed-off-by: Len Brown <len.brown@intel.com>
---
 include/acpi/platform/aclinux.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/acpi/platform/aclinux.h b/include/acpi/platform/aclinux.h
index 9d7febde10a1..09469971472f 100644
--- a/include/acpi/platform/aclinux.h
+++ b/include/acpi/platform/aclinux.h
@@ -152,7 +152,7 @@ static inline void *acpi_os_acquire_object(acpi_cache_t * cache)
 #include <linux/hardirq.h>
 #define ACPI_PREEMPTION_POINT() \
 	do { \
-		if (!in_atomic_preempt_off()) \
+		if (!in_atomic_preempt_off() && !irqs_disabled()) \
 			cond_resched(); \
 	} while (0)
 
-- 
cgit v1.2.3-70-g09d2


From d00c362f1b0ff54161e0a42b4554ac621a9ef92d Mon Sep 17 00:00:00 2001
From: Jarek Poplawski <jarkao2@gmail.com>
Date: Sat, 16 Jan 2010 01:04:04 -0800
Subject: ax25: netrom: rose: Fix timer oopses

Wrong ax25_cb refcounting in ax25_send_frame() and by its callers can
cause timer oopses (first reported with 2.6.29.6 kernel).

Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=14905

Reported-by: Bernard Pidoux <bpidoux@free.fr>
Tested-by: Bernard Pidoux <bpidoux@free.fr>
Signed-off-by: Jarek Poplawski <jarkao2@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netrom.h  |  2 ++
 net/ax25/ax25_out.c   |  6 ++++++
 net/netrom/nr_route.c | 11 ++++++-----
 net/rose/rose_link.c  |  8 ++++++++
 net/rose/rose_route.c |  5 +++++
 5 files changed, 27 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/net/netrom.h b/include/net/netrom.h
index 15696b1fd30f..ab170a60e7d3 100644
--- a/include/net/netrom.h
+++ b/include/net/netrom.h
@@ -132,6 +132,8 @@ static __inline__ void nr_node_put(struct nr_node *nr_node)
 static __inline__ void nr_neigh_put(struct nr_neigh *nr_neigh)
 {
 	if (atomic_dec_and_test(&nr_neigh->refcount)) {
+		if (nr_neigh->ax25)
+			ax25_cb_put(nr_neigh->ax25);
 		kfree(nr_neigh->digipeat);
 		kfree(nr_neigh);
 	}
diff --git a/net/ax25/ax25_out.c b/net/ax25/ax25_out.c
index bf706f83a5c9..14912600ec57 100644
--- a/net/ax25/ax25_out.c
+++ b/net/ax25/ax25_out.c
@@ -92,6 +92,12 @@ ax25_cb *ax25_send_frame(struct sk_buff *skb, int paclen, ax25_address *src, ax2
 #endif
 	}
 
+	/*
+	 * There is one ref for the state machine; a caller needs
+	 * one more to put it back, just like with the existing one.
+	 */
+	ax25_cb_hold(ax25);
+
 	ax25_cb_add(ax25);
 
 	ax25->state = AX25_STATE_1;
diff --git a/net/netrom/nr_route.c b/net/netrom/nr_route.c
index aacba76070fc..e2e2d33cafdf 100644
--- a/net/netrom/nr_route.c
+++ b/net/netrom/nr_route.c
@@ -843,12 +843,13 @@ int nr_route_frame(struct sk_buff *skb, ax25_cb *ax25)
 	dptr  = skb_push(skb, 1);
 	*dptr = AX25_P_NETROM;
 
-	ax25s = ax25_send_frame(skb, 256, (ax25_address *)dev->dev_addr, &nr_neigh->callsign, nr_neigh->digipeat, nr_neigh->dev);
-	if (nr_neigh->ax25 && ax25s) {
-		/* We were already holding this ax25_cb */
+	ax25s = nr_neigh->ax25;
+	nr_neigh->ax25 = ax25_send_frame(skb, 256,
+					 (ax25_address *)dev->dev_addr,
+					 &nr_neigh->callsign,
+					 nr_neigh->digipeat, nr_neigh->dev);
+	if (ax25s)
 		ax25_cb_put(ax25s);
-	}
-	nr_neigh->ax25 = ax25s;
 
 	dev_put(dev);
 	ret = (nr_neigh->ax25 != NULL);
diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c
index bd86a63960ce..5ef5f6988a2e 100644
--- a/net/rose/rose_link.c
+++ b/net/rose/rose_link.c
@@ -101,13 +101,17 @@ static void rose_t0timer_expiry(unsigned long param)
 static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
 {
 	ax25_address *rose_call;
+	ax25_cb *ax25s;
 
 	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
 		rose_call = (ax25_address *)neigh->dev->dev_addr;
 	else
 		rose_call = &rose_callsign;
 
+	ax25s = neigh->ax25;
 	neigh->ax25 = ax25_send_frame(skb, 260, rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+	if (ax25s)
+		ax25_cb_put(ax25s);
 
 	return (neigh->ax25 != NULL);
 }
@@ -120,13 +124,17 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh)
 static int rose_link_up(struct rose_neigh *neigh)
 {
 	ax25_address *rose_call;
+	ax25_cb *ax25s;
 
 	if (ax25cmp(&rose_callsign, &null_ax25_address) == 0)
 		rose_call = (ax25_address *)neigh->dev->dev_addr;
 	else
 		rose_call = &rose_callsign;
 
+	ax25s = neigh->ax25;
 	neigh->ax25 = ax25_find_cb(rose_call, &neigh->callsign, neigh->digipeat, neigh->dev);
+	if (ax25s)
+		ax25_cb_put(ax25s);
 
 	return (neigh->ax25 != NULL);
 }
diff --git a/net/rose/rose_route.c b/net/rose/rose_route.c
index 795c4b025e31..70a0b3b4b4d2 100644
--- a/net/rose/rose_route.c
+++ b/net/rose/rose_route.c
@@ -235,6 +235,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 
 	if ((s = rose_neigh_list) == rose_neigh) {
 		rose_neigh_list = rose_neigh->next;
+		if (rose_neigh->ax25)
+			ax25_cb_put(rose_neigh->ax25);
 		kfree(rose_neigh->digipeat);
 		kfree(rose_neigh);
 		return;
@@ -243,6 +245,8 @@ static void rose_remove_neigh(struct rose_neigh *rose_neigh)
 	while (s != NULL && s->next != NULL) {
 		if (s->next == rose_neigh) {
 			s->next = rose_neigh->next;
+			if (rose_neigh->ax25)
+				ax25_cb_put(rose_neigh->ax25);
 			kfree(rose_neigh->digipeat);
 			kfree(rose_neigh);
 			return;
@@ -812,6 +816,7 @@ void rose_link_failed(ax25_cb *ax25, int reason)
 
 	if (rose_neigh != NULL) {
 		rose_neigh->ax25 = NULL;
+		ax25_cb_put(ax25);
 
 		rose_del_route_by_neigh(rose_neigh);
 		rose_kill_by_neigh(rose_neigh);
-- 
cgit v1.2.3-70-g09d2


From 7e105057a34c83cea542dacc55ff0528bce67afa Mon Sep 17 00:00:00 2001
From: Stefani Seibold <stefani@seibold.net>
Date: Fri, 15 Jan 2010 17:01:02 -0800
Subject: kfifo: fix kfifo_out_locked race bug

Fix a wrong optimization in include/linux/kfifo.h which could cause a race
in kfifo_out_locked.

Signed-off-by: Stefani Seibold <stefani@seibold.net>
Reported-by: Johan Hovold <jhovold@gmail.com>
Cc: Pete Zaitcev <zaitcev@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7c6b32a1421c..c4ac88b3c302 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -228,13 +228,6 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 	ret = kfifo_out(fifo, to, n);
 
-	/*
-	 * optimization: if the FIFO is empty, set the indices to 0
-	 * so we don't wrap the next time
-	 */
-	if (kfifo_is_empty(fifo))
-		kfifo_reset(fifo);
-
 	spin_unlock_irqrestore(lock, flags);
 
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From 2427b8e3eaea3719e53bbed7b3375382c3aa6f13 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <randy.dunlap@oracle.com>
Date: Fri, 15 Jan 2010 17:01:11 -0800
Subject: tty.h: make tty_port_get() static inline

I get a few dozen of these warnings when using
  gcc (GCC) 4.4.1 20090725 (Red Hat 4.4.1-2):

In file included from mmotm-2010-0113-1217/init/do_mounts.c:5:
mmotm-2010-0113-1217/include/linux/tty.h: In function 'tty_port_get':
mmotm-2010-0113-1217/include/linux/tty.h:469: warning: '______f' is static but declared in inline function 'tty_port_get' which is not static

so make the function static inline.

[akpm@linux-foundation.org: may as well convert tty_port_users() also]
Signed-off-by: Randy Dunlap <randy.dunlap@oracle.com>
Cc: Alan Cox <alan@lxorguk.ukuu.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/tty.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index ef3a2947b102..6abfcf5b5887 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -464,7 +464,7 @@ extern int tty_port_alloc_xmit_buf(struct tty_port *port);
 extern void tty_port_free_xmit_buf(struct tty_port *port);
 extern void tty_port_put(struct tty_port *port);
 
-extern inline struct tty_port *tty_port_get(struct tty_port *port)
+static inline struct tty_port *tty_port_get(struct tty_port *port)
 {
 	if (port)
 		kref_get(&port->kref);
@@ -486,7 +486,7 @@ extern void tty_port_close(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
 extern int tty_port_open(struct tty_port *port,
 				struct tty_struct *tty, struct file *filp);
-extern inline int tty_port_users(struct tty_port *port)
+static inline int tty_port_users(struct tty_port *port)
 {
 	return port->count + port->blocked_open;
 }
-- 
cgit v1.2.3-70-g09d2


From 8ecc2951534af10e04ddb5e5ff5c6d217b79f5c2 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:12 -0800
Subject: kfifo: use void * pointers for user buffers

The pointers to user buffers are currently unsigned char *, which requires
a lot of casting in the caller for any non-char typed buffers.  Use void *
instead.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 +++++-----
 kernel/kfifo.c        |  8 ++++----
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c4ac88b3c302..6fb495ea956a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -104,15 +104,15 @@ union { \
 
 #undef __kfifo_initializer
 
-extern void kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+extern void kfifo_init(struct kfifo *fifo, void *buffer,
 			unsigned int size);
 extern __must_check int kfifo_alloc(struct kfifo *fifo, unsigned int size,
 			gfp_t gfp_mask);
 extern void kfifo_free(struct kfifo *fifo);
 extern unsigned int kfifo_in(struct kfifo *fifo,
-				const unsigned char *from, unsigned int len);
+				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
-				unsigned char *to, unsigned int len);
+				void *to, unsigned int len);
 
 /**
  * kfifo_reset - removes the entire FIFO contents
@@ -194,7 +194,7 @@ static inline __must_check unsigned int kfifo_avail(struct kfifo *fifo)
  * bytes copied.
  */
 static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
-		const unsigned char *from, unsigned int n, spinlock_t *lock)
+		const void *from, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
@@ -219,7 +219,7 @@ static inline unsigned int kfifo_in_locked(struct kfifo *fifo,
  * @to buffer and returns the number of copied bytes.
  */
 static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
-	unsigned char *to, unsigned int n, spinlock_t *lock)
+	void *to, unsigned int n, spinlock_t *lock)
 {
 	unsigned long flags;
 	unsigned int ret;
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index e92d519f93b1..ab615e695052 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -28,7 +28,7 @@
 #include <linux/log2.h>
 #include <linux/uaccess.h>
 
-static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
+static void _kfifo_init(struct kfifo *fifo, void *buffer,
 		unsigned int size)
 {
 	fifo->buffer = buffer;
@@ -44,7 +44,7 @@ static void _kfifo_init(struct kfifo *fifo, unsigned char *buffer,
  * @size: the size of the internal buffer, this have to be a power of 2.
  *
  */
-void kfifo_init(struct kfifo *fifo, unsigned char *buffer, unsigned int size)
+void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
 {
 	/* size must be a power of 2 */
 	BUG_ON(!is_power_of_2(size));
@@ -235,7 +235,7 @@ EXPORT_SYMBOL(__kfifo_in_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_in(struct kfifo *fifo, const unsigned char *from,
+unsigned int kfifo_in(struct kfifo *fifo, const void *from,
 				unsigned int len)
 {
 	len = min(kfifo_avail(fifo), len);
@@ -277,7 +277,7 @@ EXPORT_SYMBOL(__kfifo_out_n);
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_out(struct kfifo *fifo, unsigned char *to, unsigned int len)
+unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 {
 	len = min(kfifo_len(fifo), len);
 
-- 
cgit v1.2.3-70-g09d2


From 64ce1037c5434b1d036cd99ecaee6e00496bc2e9 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:15 -0800
Subject: kfifo: sanitize *_user error handling

Right now for kfifo_*_user it's not easily possible to distingush between
a user copy failing and the FIFO not containing enough data.  The problem
is that both conditions are multiplexed into the same return code.

Avoid this by moving the "copy length" into a separate output parameter
and only return 0/-EFAULT in the main return value.

I didn't fully adapt the weird "record" variants, those seem
to be unused anyways and were rather messy (should they be just removed?)

I would appreciate some double checking if I did all the conversions
correctly.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Cc: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  8 +++---
 kernel/kfifo.c        | 76 +++++++++++++++++++++++++++++++++------------------
 2 files changed, 53 insertions(+), 31 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 6fb495ea956a..86ad50a900c8 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -235,11 +235,11 @@ static inline __must_check unsigned int kfifo_out_locked(struct kfifo *fifo,
 
 extern void kfifo_skip(struct kfifo *fifo, unsigned int len);
 
-extern __must_check unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int n);
+extern __must_check int kfifo_from_user(struct kfifo *fifo,
+	const void __user *from, unsigned int n, unsigned *lenout);
 
-extern __must_check unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int n);
+extern __must_check int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int n, unsigned *lenout);
 
 /*
  * __kfifo_add_out internal helper function for updating the out offset
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index ab615e695052..b50bb622e8b0 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -159,8 +159,9 @@ static inline void __kfifo_out_data(struct kfifo *fifo,
 	memcpy(to + l, fifo->buffer, len - l);
 }
 
-static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
-	 const void __user *from, unsigned int len, unsigned int off)
+static inline int __kfifo_from_user_data(struct kfifo *fifo,
+	 const void __user *from, unsigned int len, unsigned int off,
+	 unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -177,16 +178,20 @@ static inline unsigned int __kfifo_from_user_data(struct kfifo *fifo,
 	/* first put the data starting from fifo->in to buffer end */
 	l = min(len, fifo->size - off);
 	ret = copy_from_user(fifo->buffer + off, from, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	if (unlikely(ret)) {
+		*lenout = ret;
+		return -EFAULT;
+	}
+	*lenout = l;
 
 	/* then put the rest (if any) at the beginning of the buffer */
-	return copy_from_user(fifo->buffer, from + l, len - l);
+	ret = copy_from_user(fifo->buffer, from + l, len - l);
+	*lenout += ret ? ret : len - l;
+	return ret ? -EFAULT : 0;
 }
 
-static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
-		void __user *to, unsigned int len, unsigned int off)
+static inline int __kfifo_to_user_data(struct kfifo *fifo,
+		void __user *to, unsigned int len, unsigned int off, unsigned *lenout)
 {
 	unsigned int l;
 	int ret;
@@ -203,12 +208,21 @@ static inline unsigned int __kfifo_to_user_data(struct kfifo *fifo,
 	/* first get the data from fifo->out until the end of the buffer */
 	l = min(len, fifo->size - off);
 	ret = copy_to_user(to, fifo->buffer + off, l);
-
-	if (unlikely(ret))
-		return ret + len - l;
+	*lenout = l;
+	if (unlikely(ret)) {
+		*lenout -= ret;
+		return -EFAULT;
+	}
 
 	/* then get the rest (if any) from the beginning of the buffer */
-	return copy_to_user(to + l, fifo->buffer, len - l);
+	len -= l;
+	ret = copy_to_user(to + l, fifo->buffer, len);
+	if (unlikely(ret)) {
+		*lenout += len - ret;
+		return -EFAULT;
+	}
+	*lenout += len;
+	return 0;
 }
 
 unsigned int __kfifo_in_n(struct kfifo *fifo,
@@ -299,10 +313,13 @@ EXPORT_SYMBOL(__kfifo_out_generic);
 unsigned int __kfifo_from_user_n(struct kfifo *fifo,
 	const void __user *from, unsigned int len, unsigned int recsize)
 {
+	unsigned total;
+
 	if (kfifo_avail(fifo) < len + recsize)
 		return len + 1;
 
-	return __kfifo_from_user_data(fifo, from, len, recsize);
+	__kfifo_from_user_data(fifo, from, len, recsize, &total);
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_from_user_n);
 
@@ -313,18 +330,21 @@ EXPORT_SYMBOL(__kfifo_from_user_n);
  * @len: the length of the data to be added.
  *
  * This function copies at most @len bytes from the @from into the
- * FIFO depending and returns the number of copied bytes.
+ * FIFO depending and returns -EFAULT/0.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_from_user(struct kfifo *fifo,
-	const void __user *from, unsigned int len)
+int kfifo_from_user(struct kfifo *fifo,
+        const void __user *from, unsigned int len, unsigned *total)
 {
+	int ret;
 	len = min(kfifo_avail(fifo), len);
-	len -= __kfifo_from_user_data(fifo, from, len, 0);
+	ret = __kfifo_from_user_data(fifo, from, len, 0, total);
+	if (ret)
+		return ret;
 	__kfifo_add_in(fifo, len);
-	return len;
+	return 0;
 }
 EXPORT_SYMBOL(kfifo_from_user);
 
@@ -339,17 +359,17 @@ unsigned int __kfifo_to_user_n(struct kfifo *fifo,
 	void __user *to, unsigned int len, unsigned int reclen,
 	unsigned int recsize)
 {
-	unsigned int ret;
+	unsigned int ret, total;
 
 	if (kfifo_len(fifo) < reclen + recsize)
 		return len;
 
-	ret = __kfifo_to_user_data(fifo, to, reclen, recsize);
+	ret = __kfifo_to_user_data(fifo, to, reclen, recsize, &total);
 
 	if (likely(ret == 0))
 		__kfifo_add_out(fifo, reclen + recsize);
 
-	return ret;
+	return total;
 }
 EXPORT_SYMBOL(__kfifo_to_user_n);
 
@@ -358,20 +378,22 @@ EXPORT_SYMBOL(__kfifo_to_user_n);
  * @fifo: the fifo to be used.
  * @to: where the data must be copied.
  * @len: the size of the destination buffer.
+ @ @lenout: pointer to output variable with copied data
  *
  * This function copies at most @len bytes from the FIFO into the
- * @to buffer and returns the number of copied bytes.
+ * @to buffer and 0 or -EFAULT.
  *
  * Note that with only one concurrent reader and one concurrent
  * writer, you don't need extra locking to use these functions.
  */
-unsigned int kfifo_to_user(struct kfifo *fifo,
-	void __user *to, unsigned int len)
+int kfifo_to_user(struct kfifo *fifo,
+	void __user *to, unsigned int len, unsigned *lenout)
 {
+	int ret;
 	len = min(kfifo_len(fifo), len);
-	len -= __kfifo_to_user_data(fifo, to, len, 0);
-	__kfifo_add_out(fifo, len);
-	return len;
+	ret = __kfifo_to_user_data(fifo, to, len, 0, lenout);
+	__kfifo_add_out(fifo, *lenout);
+	return ret;
 }
 EXPORT_SYMBOL(kfifo_to_user);
 
-- 
cgit v1.2.3-70-g09d2


From a5b9e2c1063046421ce01dcf5ddd7ec12567f3e1 Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:16 -0800
Subject: kfifo: add kfifo_out_peek

In some upcoming code it's useful to peek into a FIFO without permanentely
removing data.  This patch implements a new kfifo_out_peek() to do this.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h |  3 +++
 kernel/kfifo.c        | 21 +++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 86ad50a900c8..7ad6d32dd673 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -113,6 +113,9 @@ extern unsigned int kfifo_in(struct kfifo *fifo,
 				const void *from, unsigned int len);
 extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 				void *to, unsigned int len);
+extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
+				void *to, unsigned int len, unsigned offset);
+
 
 /**
  * kfifo_reset - removes the entire FIFO contents
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index b50bb622e8b0..7384f120be87 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -302,6 +302,27 @@ unsigned int kfifo_out(struct kfifo *fifo, void *to, unsigned int len)
 }
 EXPORT_SYMBOL(kfifo_out);
 
+/**
+ * kfifo_out_peek - copy some data from the FIFO, but do not remove it
+ * @fifo: the fifo to be used.
+ * @to: where the data must be copied.
+ * @len: the size of the destination buffer.
+ * @offset: offset into the fifo
+ *
+ * This function copies at most @len bytes at @offset from the FIFO
+ * into the @to buffer and returns the number of copied bytes.
+ * The data is not removed from the FIFO.
+ */
+unsigned int kfifo_out_peek(struct kfifo *fifo, void *to, unsigned int len,
+			    unsigned offset)
+{
+	len = min(kfifo_len(fifo), len + offset);
+
+	__kfifo_out_data(fifo, to, len, offset);
+	return len;
+}
+EXPORT_SYMBOL(kfifo_out_peek);
+
 unsigned int __kfifo_out_generic(struct kfifo *fifo,
 	void *to, unsigned int len, unsigned int recsize,
 	unsigned int *total)
-- 
cgit v1.2.3-70-g09d2


From d994ffc247f7c4a48b848f10c4c01c9b06411ada Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: add kfifo_initialized

Simple inline that checks if kfifo_init() has been executed on a fifo.

This is useful for walking all per CPU fifos, when some of them might not
have been brought up yet.

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 7ad6d32dd673..c8618243ca5a 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -116,6 +116,16 @@ extern __must_check unsigned int kfifo_out(struct kfifo *fifo,
 extern __must_check unsigned int kfifo_out_peek(struct kfifo *fifo,
 				void *to, unsigned int len, unsigned offset);
 
+/**
+ * kfifo_initialized - Check if kfifo is initialized.
+ * @fifo: fifo to check
+ * Return %true if FIFO is initialized, otherwise %false.
+ * Assumes the fifo was 0 before.
+ */
+static inline bool kfifo_initialized(struct kfifo *fifo)
+{
+	return fifo->buffer != 0;
+}
 
 /**
  * kfifo_reset - removes the entire FIFO contents
-- 
cgit v1.2.3-70-g09d2


From 5dab600e6a153ceb64832f608069e6c08185411a Mon Sep 17 00:00:00 2001
From: Andi Kleen <andi@firstfloor.org>
Date: Fri, 15 Jan 2010 17:01:17 -0800
Subject: kfifo: document everywhere that size has to be power of two

On my first try using them I missed that the fifos need to be power of
two, resulting in a runtime bug.  Document that requirement everywhere
(and fix one grammar bug)

Signed-off-by: Andi Kleen <ak@linux.intel.com>
Acked-by: Stefani Seibold <stefani@seibold.net>
Cc: Roland Dreier <rdreier@cisco.com>
Cc: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Cc: Andy Walls <awalls@radix.net>
Cc: Vikram Dhillon <dhillonv10@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kfifo.h | 4 ++--
 kernel/kfifo.c        | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index c8618243ca5a..6f6c5f300af6 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -67,7 +67,7 @@ struct kfifo {
 /**
  * DECLARE_KFIFO - macro to declare a kfifo and the associated buffer
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used inside struct or union declaration
  * Note2: the macro creates two objects:
@@ -91,7 +91,7 @@ union { \
 /**
  * DEFINE_KFIFO - macro to define and initialize a kfifo
  * @name: name of the declared kfifo datatype
- * @size: size of the fifo buffer
+ * @size: size of the fifo buffer. Must be a power of two.
  *
  * Note1: the macro can be used for global and local kfifo data type variables
  * Note2: the macro creates two objects:
diff --git a/kernel/kfifo.c b/kernel/kfifo.c
index 7384f120be87..32c5c15d750d 100644
--- a/kernel/kfifo.c
+++ b/kernel/kfifo.c
@@ -41,7 +41,7 @@ static void _kfifo_init(struct kfifo *fifo, void *buffer,
  * kfifo_init - initialize a FIFO using a preallocated buffer
  * @fifo: the fifo to assign the buffer
  * @buffer: the preallocated buffer to be used.
- * @size: the size of the internal buffer, this have to be a power of 2.
+ * @size: the size of the internal buffer, this has to be a power of 2.
  *
  */
 void kfifo_init(struct kfifo *fifo, void *buffer, unsigned int size)
-- 
cgit v1.2.3-70-g09d2


From cc8ef6eb21e964b1c5eb97b2d0e8ac9893e1bf86 Mon Sep 17 00:00:00 2001
From: Roland Dreier <rdreier@cisco.com>
Date: Fri, 15 Jan 2010 17:01:22 -0800
Subject: kernel.h: add BUILD_BUG_ON_NOT_POWER_OF_2()

Add BUILD_BUG_ON_NOT_POWER_OF_2()

When code relies on a constant being a power of 2:

	#define FOO	512	/* must be a power of 2 */

it would be nice to be able to do:

	BUILD_BUG_ON(!is_power_of_2(FOO));

However applying an inline function does not result in a compile-time
constant that can be used with BUILD_BUG_ON(), so trying that gives
results in:

	error: bit-field '<anonymous>' width not an integer constant

As suggested by akpm, rather than monkeying around with is_power_of_2()
and risking gcc warts about constant expressions, just create a macro
BUILD_BUG_ON_NOT_POWER_OF_2() to encapsulate this common requirement.

Signed-off-by: Roland Dreier <rolandd@cisco.com>
Cc: Bart Van Assche <bvanassche@acm.org>
Cc: David Dillow <dave@thedillows.org>
Cc: "Robert P. J. Day" <rpjday@crashcourse.ca>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3fc9f5aab5f8..328bca609b9b 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,6 +734,10 @@ struct sysinfo {
 /* Force a compilation error if condition is constant and true */
 #define MAYBE_BUILD_BUG_ON(cond) ((void)sizeof(char[1 - 2 * !!(cond)]))
 
+/* Force a compilation error if a constant expression is not a power of 2 */
+#define BUILD_BUG_ON_NOT_POWER_OF_2(n)			\
+	BUILD_BUG_ON((n) == 0 || (((n) & ((n) - 1)) != 0))
+
 /* Force a compilation error if condition is true, but also produce a
    result (of value 0 and type size_t), so the expression can be used
    e.g. in a structure initializer (or where-ever else comma expressions
-- 
cgit v1.2.3-70-g09d2


From 1e2ae599d37e60958c03ca5e46b1f657619a30cd Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:33 -0800
Subject: nommu: struct vm_region's vm_usage count need not be atomic

The vm_usage count field in struct vm_region does not need to be atomic as
it's only even modified whilst nommu_region_sem is write locked.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 +-
 mm/nommu.c               | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 84d020bed083..80cfa78a8cf6 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -122,7 +122,7 @@ struct vm_region {
 	unsigned long	vm_pgoff;	/* the offset in vm_file corresponding to vm_start */
 	struct file	*vm_file;	/* the backing file or NULL */
 
-	atomic_t	vm_usage;	/* region usage count */
+	int		vm_usage;	/* region usage count (access under nommu_region_sem) */
 	bool		vm_icache_flushed : 1; /* true if the icache has been flushed for
 						* this region */
 };
diff --git a/mm/nommu.c b/mm/nommu.c
index 17773862619b..5e39294f8ea8 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -552,11 +552,11 @@ static void free_page_series(unsigned long from, unsigned long to)
 static void __put_nommu_region(struct vm_region *region)
 	__releases(nommu_region_sem)
 {
-	kenter("%p{%d}", region, atomic_read(&region->vm_usage));
+	kenter("%p{%d}", region, region->vm_usage);
 
 	BUG_ON(!nommu_region_tree.rb_node);
 
-	if (atomic_dec_and_test(&region->vm_usage)) {
+	if (--region->vm_usage == 0) {
 		if (region->vm_top > region->vm_start)
 			delete_nommu_region(region);
 		up_write(&nommu_region_sem);
@@ -1205,7 +1205,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 	if (!vma)
 		goto error_getting_vma;
 
-	atomic_set(&region->vm_usage, 1);
+	region->vm_usage = 1;
 	region->vm_flags = vm_flags;
 	region->vm_pgoff = pgoff;
 
@@ -1272,7 +1272,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 			}
 
 			/* we've found a region we can share */
-			atomic_inc(&pregion->vm_usage);
+			pregion->vm_usage++;
 			vma->vm_region = pregion;
 			start = pregion->vm_start;
 			start += (pgoff - pregion->vm_pgoff) << PAGE_SHIFT;
@@ -1289,7 +1289,7 @@ unsigned long do_mmap_pgoff(struct file *file,
 					vma->vm_region = NULL;
 					vma->vm_start = 0;
 					vma->vm_end = 0;
-					atomic_dec(&pregion->vm_usage);
+					pregion->vm_usage--;
 					pregion = NULL;
 					goto error_just_free;
 				}
@@ -1444,7 +1444,7 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma,
 	/* we're only permitted to split anonymous regions that have a single
 	 * owner */
 	if (vma->vm_file ||
-	    atomic_read(&vma->vm_region->vm_usage) != 1)
+	    vma->vm_region->vm_usage != 1)
 		return -ENOMEM;
 
 	if (mm->map_count >= sysctl_max_map_count)
@@ -1518,7 +1518,7 @@ static int shrink_vma(struct mm_struct *mm,
 
 	/* cut the backing region down to size */
 	region = vma->vm_region;
-	BUG_ON(atomic_read(&region->vm_usage) != 1);
+	BUG_ON(region->vm_usage != 1);
 
 	down_write(&nommu_region_sem);
 	delete_nommu_region(region);
-- 
cgit v1.2.3-70-g09d2


From efc1a3b16930c41d64ffefde16b87d82f603a8a0 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:35 -0800
Subject: nommu: don't need get_unmapped_area() for NOMMU

get_unmapped_area() is unnecessary for NOMMU as no-one calls it.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h |  2 ++
 include/linux/sched.h    |  7 +++++--
 mm/nommu.c               | 21 ---------------------
 mm/util.c                |  2 +-
 4 files changed, 8 insertions(+), 24 deletions(-)

(limited to 'include')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 80cfa78a8cf6..36f96271306c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -205,10 +205,12 @@ struct mm_struct {
 	struct vm_area_struct * mmap;		/* list of VMAs */
 	struct rb_root mm_rb;
 	struct vm_area_struct * mmap_cache;	/* last find_vma result */
+#ifdef CONFIG_MMU
 	unsigned long (*get_unmapped_area) (struct file *filp,
 				unsigned long addr, unsigned long len,
 				unsigned long pgoff, unsigned long flags);
 	void (*unmap_area) (struct mm_struct *mm, unsigned long addr);
+#endif
 	unsigned long mmap_base;		/* base of mmap area */
 	unsigned long task_size;		/* size of task vm space */
 	unsigned long cached_hole_size; 	/* if non-zero, the largest hole below free_area_cache */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 8d4991be9d53..6f7bba93929b 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -377,6 +377,8 @@ extern int sysctl_max_map_count;
 
 #include <linux/aio.h>
 
+#ifdef CONFIG_MMU
+extern void arch_pick_mmap_layout(struct mm_struct *mm);
 extern unsigned long
 arch_get_unmapped_area(struct file *, unsigned long, unsigned long,
 		       unsigned long, unsigned long);
@@ -386,6 +388,9 @@ arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
 			  unsigned long flags);
 extern void arch_unmap_area(struct mm_struct *, unsigned long);
 extern void arch_unmap_area_topdown(struct mm_struct *, unsigned long);
+#else
+static inline void arch_pick_mmap_layout(struct mm_struct *mm) {}
+#endif
 
 #if USE_SPLIT_PTLOCKS
 /*
@@ -2491,8 +2496,6 @@ static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
 
 #endif /* CONFIG_SMP */
 
-extern void arch_pick_mmap_layout(struct mm_struct *mm);
-
 #ifdef CONFIG_TRACING
 extern void
 __trace_special(void *__tr, void *__data,
diff --git a/mm/nommu.c b/mm/nommu.c
index d6dd656264a2..32be0cf51ba6 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1760,27 +1760,6 @@ void unmap_mapping_range(struct address_space *mapping,
 }
 EXPORT_SYMBOL(unmap_mapping_range);
 
-/*
- * ask for an unmapped area at which to create a mapping on a file
- */
-unsigned long get_unmapped_area(struct file *file, unsigned long addr,
-				unsigned long len, unsigned long pgoff,
-				unsigned long flags)
-{
-	unsigned long (*get_area)(struct file *, unsigned long, unsigned long,
-				  unsigned long, unsigned long);
-
-	get_area = current->mm->get_unmapped_area;
-	if (file && file->f_op && file->f_op->get_unmapped_area)
-		get_area = file->f_op->get_unmapped_area;
-
-	if (!get_area)
-		return -ENOSYS;
-
-	return get_area(file, addr, len, pgoff, flags);
-}
-EXPORT_SYMBOL(get_unmapped_area);
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 0 means there is enough memory for the allocation to
diff --git a/mm/util.c b/mm/util.c
index 7c35ad95f927..834db7be240f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -220,7 +220,7 @@ char *strndup_user(const char __user *s, long n)
 }
 EXPORT_SYMBOL(strndup_user);
 
-#ifndef HAVE_ARCH_PICK_MMAP_LAYOUT
+#if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
 void arch_pick_mmap_layout(struct mm_struct *mm)
 {
 	mm->mmap_base = TASK_UNMAPPED_BASE;
-- 
cgit v1.2.3-70-g09d2


From 7e6608724c640924aad1d556d17df33ebaa6124d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Fri, 15 Jan 2010 17:01:39 -0800
Subject: nommu: fix shared mmap after truncate shrinkage problems

Fix a problem in NOMMU mmap with ramfs whereby a shared mmap can happen
over the end of a truncation.  The problem is that
ramfs_nommu_check_mappings() checks that the reduced file size against the
VMA tree, but not the vm_region tree.

The following sequence of events can cause the problem:

	fd = open("/tmp/x", O_RDWR|O_TRUNC|O_CREAT, 0600);
	ftruncate(fd, 32 * 1024);
	a = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	b = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
	munmap(a, 32 * 1024);
	ftruncate(fd, 16 * 1024);
	c = mmap(NULL, 32 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'a' creates a vm_region covering 32KB of the file.  Mapping 'b'
sees that the vm_region from 'a' is covering the region it wants and so
shares it, pinning it in memory.

Mapping 'a' then goes away and the file is truncated to the end of VMA
'b'.  However, the region allocated by 'a' is still in effect, and has
_not_ been reduced.

Mapping 'c' is then created, and because there's a vm_region covering the
desired region, get_unmapped_area() is _not_ called to repeat the check,
and the mapping is granted, even though the pages from the latter half of
the mapping have been discarded.

However:

	d = mmap(NULL, 16 * 1024, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);

Mapping 'd' should work, and should end up sharing the region allocated by
'a'.

To deal with this, we shrink the vm_region struct during the truncation,
lest do_mmap_pgoff() take it as licence to share the full region
automatically without calling the get_unmapped_area() file op again.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Al Viro <viro@zeniv.linux.org.uk>
Cc: Greg Ungerer <gerg@snapgear.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/ramfs/file-nommu.c | 31 +-------------------------
 include/linux/mm.h    |  1 +
 mm/nommu.c            | 62 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 30 deletions(-)

(limited to 'include')

diff --git a/fs/ramfs/file-nommu.c b/fs/ramfs/file-nommu.c
index 266531343aae..1739a4aba25f 100644
--- a/fs/ramfs/file-nommu.c
+++ b/fs/ramfs/file-nommu.c
@@ -121,35 +121,6 @@ add_error:
 	return ret;
 }
 
-/*****************************************************************************/
-/*
- * check that file shrinkage doesn't leave any VMAs dangling in midair
- */
-static int ramfs_nommu_check_mappings(struct inode *inode,
-				      size_t newsize, size_t size)
-{
-	struct vm_area_struct *vma;
-	struct prio_tree_iter iter;
-
-	down_write(&nommu_region_sem);
-
-	/* search for VMAs that fall within the dead zone */
-	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
-			      newsize >> PAGE_SHIFT,
-			      (size + PAGE_SIZE - 1) >> PAGE_SHIFT
-			      ) {
-		/* found one - only interested if it's shared out of the page
-		 * cache */
-		if (vma->vm_flags & VM_SHARED) {
-			up_write(&nommu_region_sem);
-			return -ETXTBSY; /* not quite true, but near enough */
-		}
-	}
-
-	up_write(&nommu_region_sem);
-	return 0;
-}
-
 /*****************************************************************************/
 /*
  *
@@ -169,7 +140,7 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
 
 	/* check that a decrease in size doesn't cut off any shared mappings */
 	if (newsize < size) {
-		ret = ramfs_nommu_check_mappings(inode, newsize, size);
+		ret = nommu_shrink_inode_mappings(inode, size, newsize);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 2265f28eb47a..60c467bfbabd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1089,6 +1089,7 @@ extern void zone_pcp_update(struct zone *zone);
 
 /* nommu.c */
 extern atomic_long_t mmap_pages_allocated;
+extern int nommu_shrink_inode_mappings(struct inode *, size_t, size_t);
 
 /* prio_tree.c */
 void vma_prio_tree_add(struct vm_area_struct *, struct vm_area_struct *old);
diff --git a/mm/nommu.c b/mm/nommu.c
index 32be0cf51ba6..48a2ecfaf059 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -1914,3 +1914,65 @@ int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, in
 	mmput(mm);
 	return len;
 }
+
+/**
+ * nommu_shrink_inode_mappings - Shrink the shared mappings on an inode
+ * @inode: The inode to check
+ * @size: The current filesize of the inode
+ * @newsize: The proposed filesize of the inode
+ *
+ * Check the shared mappings on an inode on behalf of a shrinking truncate to
+ * make sure that that any outstanding VMAs aren't broken and then shrink the
+ * vm_regions that extend that beyond so that do_mmap_pgoff() doesn't
+ * automatically grant mappings that are too large.
+ */
+int nommu_shrink_inode_mappings(struct inode *inode, size_t size,
+				size_t newsize)
+{
+	struct vm_area_struct *vma;
+	struct prio_tree_iter iter;
+	struct vm_region *region;
+	pgoff_t low, high;
+	size_t r_size, r_top;
+
+	low = newsize >> PAGE_SHIFT;
+	high = (size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+
+	down_write(&nommu_region_sem);
+
+	/* search for VMAs that fall within the dead zone */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      low, high) {
+		/* found one - only interested if it's shared out of the page
+		 * cache */
+		if (vma->vm_flags & VM_SHARED) {
+			up_write(&nommu_region_sem);
+			return -ETXTBSY; /* not quite true, but near enough */
+		}
+	}
+
+	/* reduce any regions that overlap the dead zone - if in existence,
+	 * these will be pointed to by VMAs that don't overlap the dead zone
+	 *
+	 * we don't check for any regions that start beyond the EOF as there
+	 * shouldn't be any
+	 */
+	vma_prio_tree_foreach(vma, &iter, &inode->i_mapping->i_mmap,
+			      0, ULONG_MAX) {
+		if (!(vma->vm_flags & VM_SHARED))
+			continue;
+
+		region = vma->vm_region;
+		r_size = region->vm_top - region->vm_start;
+		r_top = (region->vm_pgoff << PAGE_SHIFT) + r_size;
+
+		if (r_top > newsize) {
+			region->vm_top -= r_top - newsize;
+			if (region->vm_end > region->vm_top)
+				region->vm_end = region->vm_top;
+		}
+	}
+
+	up_write(&nommu_region_sem);
+	return 0;
+}
-- 
cgit v1.2.3-70-g09d2


From 72659ecce68588b74f6c46862c2b4cec137d7a5a Mon Sep 17 00:00:00 2001
From: Octavian Purdila <opurdila@ixiacom.com>
Date: Sun, 17 Jan 2010 19:09:39 -0800
Subject: tcp: account SYN-ACK timeouts & retransmissions

Currently we don't increment SYN-ACK timeouts & retransmissions
although we do increment the same stats for SYN. We seem to have lost
the SYN-ACK accounting with the introduction of tcp_syn_recv_timer
(commit 2248761e in the netdev-vger-cvs tree).

This patch fixes this issue. In the process we also rename the v4/v6
syn/ack retransmit functions for clarity. We also add a new
request_socket operations (syn_ack_timeout) so we can keep code in
inet_connection_sock.c protocol agnostic.

Signed-off-by: Octavian Purdila <opurdila@ixiacom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/request_sock.h      |  2 ++
 include/net/tcp.h               |  2 ++
 net/ipv4/inet_connection_sock.c |  2 ++
 net/ipv4/tcp_ipv4.c             | 18 ++++++++++--------
 net/ipv4/tcp_timer.c            |  6 ++++++
 net/ipv6/tcp_ipv6.c             | 12 ++++++++++--
 6 files changed, 32 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/include/net/request_sock.h b/include/net/request_sock.h
index c9b50ebd9ce9..99e6e19b57c2 100644
--- a/include/net/request_sock.h
+++ b/include/net/request_sock.h
@@ -45,6 +45,8 @@ struct request_sock_ops {
 	void		(*send_reset)(struct sock *sk,
 				      struct sk_buff *skb);
 	void		(*destructor)(struct request_sock *req);
+	void		(*syn_ack_timeout)(struct sock *sk,
+					   struct request_sock *req);
 };
 
 /* struct request_sock - mini sock to represent a connection request
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 788c99f98597..87d164b9bd8f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -400,6 +400,8 @@ extern int			compat_tcp_setsockopt(struct sock *sk,
 					int level, int optname,
 					char __user *optval, unsigned int optlen);
 extern void			tcp_set_keepalive(struct sock *sk, int val);
+extern void			tcp_syn_ack_timeout(struct sock *sk,
+						    struct request_sock *req);
 extern int			tcp_recvmsg(struct kiocb *iocb, struct sock *sk,
 					    struct msghdr *msg,
 					    size_t len, int nonblock, 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index ee16475f8fc3..8da6429269dd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -529,6 +529,8 @@ void inet_csk_reqsk_queue_prune(struct sock *parent,
 				syn_ack_recalc(req, thresh, max_retries,
 					       queue->rskq_defer_accept,
 					       &expire, &resend);
+				if (req->rsk_ops->syn_ack_timeout)
+					req->rsk_ops->syn_ack_timeout(parent, req);
 				if (!expire &&
 				    (!resend ||
 				     !req->rsk_ops->rtx_syn_ack(parent, req, NULL) ||
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 382f667238ec..356f544c4c10 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -742,9 +742,9 @@ static void tcp_v4_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
  *	This still operates on a request_sock only, not on a big
  *	socket.
  */
-static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
-				struct request_sock *req,
-				struct request_values *rvp)
+static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
+			      struct request_sock *req,
+			      struct request_values *rvp)
 {
 	const struct inet_request_sock *ireq = inet_rsk(req);
 	int err = -1;
@@ -775,10 +775,11 @@ static int __tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst,
 	return err;
 }
 
-static int tcp_v4_send_synack(struct sock *sk, struct request_sock *req,
+static int tcp_v4_rtx_synack(struct sock *sk, struct request_sock *req,
 			      struct request_values *rvp)
 {
-	return __tcp_v4_send_synack(sk, NULL, req, rvp);
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+	return tcp_v4_send_synack(sk, NULL, req, rvp);
 }
 
 /*
@@ -1192,10 +1193,11 @@ static int tcp_v4_inbound_md5_hash(struct sock *sk, struct sk_buff *skb)
 struct request_sock_ops tcp_request_sock_ops __read_mostly = {
 	.family		=	PF_INET,
 	.obj_size	=	sizeof(struct tcp_request_sock),
-	.rtx_syn_ack	=	tcp_v4_send_synack,
+	.rtx_syn_ack	=	tcp_v4_rtx_synack,
 	.send_ack	=	tcp_v4_reqsk_send_ack,
 	.destructor	=	tcp_v4_reqsk_destructor,
 	.send_reset	=	tcp_v4_send_reset,
+	.syn_ack_timeout = 	tcp_syn_ack_timeout,
 };
 
 #ifdef CONFIG_TCP_MD5SIG
@@ -1373,8 +1375,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
 	}
 	tcp_rsk(req)->snt_isn = isn;
 
-	if (__tcp_v4_send_synack(sk, dst, req,
-				 (struct request_values *)&tmp_ext) ||
+	if (tcp_v4_send_synack(sk, dst, req,
+			       (struct request_values *)&tmp_ext) ||
 	    want_cookie)
 		goto drop_and_free;
 
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index 8816a20c2597..de7d1bf9114f 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -474,6 +474,12 @@ static void tcp_synack_timer(struct sock *sk)
 				   TCP_TIMEOUT_INIT, TCP_RTO_MAX);
 }
 
+void tcp_syn_ack_timeout(struct sock *sk, struct request_sock *req)
+{
+	NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPTIMEOUTS);
+}
+EXPORT_SYMBOL(tcp_syn_ack_timeout);
+
 void tcp_set_keepalive(struct sock *sk, int val)
 {
 	if ((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 1c832bf198b3..82f2dea0e39e 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -520,6 +520,13 @@ done:
 	return err;
 }
 
+static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
+			     struct request_values *rvp)
+{
+	TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
+	return tcp_v6_send_synack(sk, req, rvp);
+}
+
 static inline void syn_flood_warning(struct sk_buff *skb)
 {
 #ifdef CONFIG_SYN_COOKIES
@@ -890,10 +897,11 @@ static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
 	.family		=	AF_INET6,
 	.obj_size	=	sizeof(struct tcp6_request_sock),
-	.rtx_syn_ack	=	tcp_v6_send_synack,
+	.rtx_syn_ack	=	tcp_v6_rtx_synack,
 	.send_ack	=	tcp_v6_reqsk_send_ack,
 	.destructor	=	tcp_v6_reqsk_destructor,
-	.send_reset	=	tcp_v6_send_reset
+	.send_reset	=	tcp_v6_send_reset,
+	.syn_ack_timeout = 	tcp_syn_ack_timeout,
 };
 
 #ifdef CONFIG_TCP_MD5SIG
-- 
cgit v1.2.3-70-g09d2


From 9dffe2a32b0deef52605d50527c0d240b15cabf7 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@opensource.wolfsonmicro.com>
Date: Mon, 4 Jan 2010 18:05:00 +0000
Subject: mfd: Correct WM835x ISINK ramp time defines

The constants used to specify ISINK ramp times for WM835x had the
wrong shifts so that the on times applied to the off ramp and vice
versa. The masks for the bitfields are correct.

Signed-off-by: Mark Brown <broonie@opensource.wolfsonmicro.com>
Cc: stable@kernel.org
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 include/linux/mfd/wm8350/pmic.h | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

(limited to 'include')

diff --git a/include/linux/mfd/wm8350/pmic.h b/include/linux/mfd/wm8350/pmic.h
index be3264e286e0..e786fe9841ef 100644
--- a/include/linux/mfd/wm8350/pmic.h
+++ b/include/linux/mfd/wm8350/pmic.h
@@ -666,20 +666,20 @@
 #define WM8350_ISINK_FLASH_DUR_64MS		(1 << 8)
 #define WM8350_ISINK_FLASH_DUR_96MS		(2 << 8)
 #define WM8350_ISINK_FLASH_DUR_1024MS		(3 << 8)
-#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 4)
-#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 4)
-#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 4)
-#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 4)
-#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 4)
-#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 0)
-#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 0)
-#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 0)
-#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_INSTANT		(0 << 0)
+#define WM8350_ISINK_FLASH_ON_0_25S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_0_50S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_1_00S		(3 << 0)
+#define WM8350_ISINK_FLASH_ON_1_95S		(1 << 0)
+#define WM8350_ISINK_FLASH_ON_3_91S		(2 << 0)
+#define WM8350_ISINK_FLASH_ON_7_80S		(3 << 0)
+#define WM8350_ISINK_FLASH_OFF_INSTANT		(0 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_25S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_0_50S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_00S		(3 << 4)
+#define WM8350_ISINK_FLASH_OFF_1_95S		(1 << 4)
+#define WM8350_ISINK_FLASH_OFF_3_91S		(2 << 4)
+#define WM8350_ISINK_FLASH_OFF_7_80S		(3 << 4)
 
 /*
  * Regulator Interrupts.
-- 
cgit v1.2.3-70-g09d2


From 64e8867ba8098b69889c1af94997a5ba2348fb26 Mon Sep 17 00:00:00 2001
From: Ian Molton <ian@mnementh.co.uk>
Date: Wed, 6 Jan 2010 13:51:48 +0100
Subject: mfd: tmio_mmc hardware abstraction for CNF area

This patch abstracts out the CNF area code from tmio_mmc which
is not present in all hardware that can use this driver. This
is required so that we can support non-toshiba based hardware.

ASIC3 support by Philipp Zabel

Signed-off-by: Ian Molton <ian@mnementh.co.uk>
Signed-off-by: Magnus Damm <damm@opensource.se>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/mfd/Makefile        |   6 +--
 drivers/mfd/asic3.c         |  40 ++++++++++++---
 drivers/mfd/t7l66xb.c       |  55 +++++++++++++-------
 drivers/mfd/tc6387xb.c      | 119 ++++++++++++++++++++++++++++++++------------
 drivers/mfd/tc6393xb.c      |  56 +++++++++++++++++----
 drivers/mfd/tmio_core.c     |  52 +++++++++++++++++++
 drivers/mmc/host/tmio_mmc.c |  59 +++++++---------------
 drivers/mmc/host/tmio_mmc.h |  46 +++--------------
 include/linux/mfd/tmio.h    |  39 +++++++++++++++
 9 files changed, 323 insertions(+), 149 deletions(-)
 create mode 100644 drivers/mfd/tmio_core.c

(limited to 'include')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index ca2f2c4ff05e..8f0d18409ede 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -11,9 +11,9 @@ obj-$(CONFIG_HTC_PASIC3)	+= htc-pasic3.o
 
 obj-$(CONFIG_MFD_DM355EVM_MSP)	+= dm355evm_msp.o
 
-obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o
-obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o
-obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o
+obj-$(CONFIG_MFD_T7L66XB)	+= t7l66xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6387XB)	+= tc6387xb.o tmio_core.o
+obj-$(CONFIG_MFD_TC6393XB)	+= tc6393xb.o tmio_core.o
 
 obj-$(CONFIG_MFD_WM8400)	+= wm8400-core.o
 wm831x-objs			:= wm831x-core.o wm831x-irq.o wm831x-otp.o
diff --git a/drivers/mfd/asic3.c b/drivers/mfd/asic3.c
index e22128c3e9a8..95c1e6bd1729 100644
--- a/drivers/mfd/asic3.c
+++ b/drivers/mfd/asic3.c
@@ -80,6 +80,7 @@ struct asic3 {
 	u16 irq_bothedge[4];
 	struct gpio_chip gpio;
 	struct device *dev;
+	void __iomem *tmio_cnf;
 
 	struct asic3_clk clocks[ARRAY_SIZE(asic3_clk_init)];
 };
@@ -685,8 +686,24 @@ static struct mfd_cell asic3_cell_ds1wm = {
 	.resources     = ds1wm_resources,
 };
 
+static void asic3_mmc_pwr(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_pwr(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
+static void asic3_mmc_clk_div(struct platform_device *pdev, int state)
+{
+	struct asic3 *asic = dev_get_drvdata(pdev->dev.parent);
+
+	tmio_core_mmc_clk_div(asic->tmio_cnf, 1 - asic->bus_shift, state);
+}
+
 static struct tmio_mmc_data asic3_mmc_data = {
-	.hclk = 24576000,
+	.hclk           = 24576000,
+	.set_pwr        = asic3_mmc_pwr,
+	.set_clk_div    = asic3_mmc_clk_div,
 };
 
 static struct resource asic3_mmc_resources[] = {
@@ -695,11 +712,6 @@ static struct resource asic3_mmc_resources[] = {
 		.end   = ASIC3_SD_CTRL_BASE + 0x3ff,
 		.flags = IORESOURCE_MEM,
 	},
-	{
-		.start = ASIC3_SD_CONFIG_BASE,
-		.end   = ASIC3_SD_CONFIG_BASE + 0x1ff,
-		.flags = IORESOURCE_MEM,
-	},
 	{
 		.start = 0,
 		.end   = 0,
@@ -743,6 +755,10 @@ static int asic3_mmc_enable(struct platform_device *pdev)
 	asic3_set_register(asic, ASIC3_OFFSET(SDHWCTRL, SDCONF),
 			   ASIC3_SDHWCTRL_SDPWR, 1);
 
+	/* ASIC3_SD_CTRL_BASE assumes 32-bit addressing, TMIO is 16-bit */
+	tmio_core_mmc_enable(asic->tmio_cnf, 1 - asic->bus_shift,
+			     ASIC3_SD_CTRL_BASE >> 1);
+
 	return 0;
 }
 
@@ -797,10 +813,15 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 	asic3_cell_ds1wm.data_size = sizeof(asic3_cell_ds1wm);
 
 	/* MMC */
+	asic->tmio_cnf = ioremap((ASIC3_SD_CONFIG_BASE >> asic->bus_shift) +
+				 mem_sdio->start, 0x400 >> asic->bus_shift);
+	if (!asic->tmio_cnf) {
+		ret = -ENOMEM;
+		dev_dbg(asic->dev, "Couldn't ioremap SD_CONFIG\n");
+		goto out;
+	}
 	asic3_mmc_resources[0].start >>= asic->bus_shift;
 	asic3_mmc_resources[0].end   >>= asic->bus_shift;
-	asic3_mmc_resources[1].start >>= asic->bus_shift;
-	asic3_mmc_resources[1].end   >>= asic->bus_shift;
 
 	asic3_cell_mmc.platform_data = &asic3_cell_mmc;
 	asic3_cell_mmc.data_size = sizeof(asic3_cell_mmc);
@@ -820,7 +841,10 @@ static int __init asic3_mfd_probe(struct platform_device *pdev,
 
 static void asic3_mfd_remove(struct platform_device *pdev)
 {
+	struct asic3 *asic = platform_get_drvdata(pdev);
+
 	mfd_remove_devices(&pdev->dev);
+	iounmap(asic->tmio_cnf);
 }
 
 /* Core */
diff --git a/drivers/mfd/t7l66xb.c b/drivers/mfd/t7l66xb.c
index 0a255c1f1ce7..bcf4687d4af5 100644
--- a/drivers/mfd/t7l66xb.c
+++ b/drivers/mfd/t7l66xb.c
@@ -38,6 +38,19 @@ enum {
 	T7L66XB_CELL_MMC,
 };
 
+static const struct resource t7l66xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end	= 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = IRQ_T7L66XB_MMC,
+		.end	= IRQ_T7L66XB_MMC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 #define SCR_REVID	0x08		/* b Revision ID	*/
 #define SCR_IMR		0x42		/* b Interrupt Mask	*/
 #define SCR_DEV_CTL	0xe0		/* b Device control	*/
@@ -83,6 +96,9 @@ static int t7l66xb_mmc_enable(struct platform_device *mmc)
 
 	spin_unlock_irqrestore(&t7l66xb->lock, flags);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 
@@ -106,28 +122,28 @@ static int t7l66xb_mmc_disable(struct platform_device *mmc)
 	return 0;
 }
 
+static void t7l66xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(t7l66xb->scr + 0x200, 0, state);
+}
+
+static void t7l66xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct t7l66xb *t7l66xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(t7l66xb->scr + 0x200, 0, state);
+}
+
 /*--------------------------------------------------------------------------*/
 
 static struct tmio_mmc_data t7166xb_mmc_data = {
 	.hclk = 24000000,
-};
-
-static const struct resource t7l66xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end	= 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end	= 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = IRQ_T7L66XB_MMC,
-		.end	= IRQ_T7L66XB_MMC,
-		.flags = IORESOURCE_IRQ,
-	},
+	.set_pwr = t7l66xb_mmc_pwr,
+	.set_clk_div = t7l66xb_mmc_clk_div,
 };
 
 static const struct resource t7l66xb_nand_resources[] = {
@@ -282,6 +298,9 @@ static int t7l66xb_resume(struct platform_device *dev)
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_enable(t7l66xb->scr + 0x200, 0,
+		t7l66xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
diff --git a/drivers/mfd/tc6387xb.c b/drivers/mfd/tc6387xb.c
index 3280ab33f88a..5c7f04343d5c 100644
--- a/drivers/mfd/tc6387xb.c
+++ b/drivers/mfd/tc6387xb.c
@@ -22,28 +22,52 @@ enum {
 	TC6387XB_CELL_MMC,
 };
 
+struct tc6387xb {
+	void __iomem *scr;
+	struct clk *clk32k;
+	struct resource rscr;
+};
+
+static struct resource tc6387xb_mmc_resources[] = {
+	{
+		.start = 0x800,
+		.end   = 0x9ff,
+		.flags = IORESOURCE_MEM,
+	},
+	{
+		.start = 0,
+		.end   = 0,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+/*--------------------------------------------------------------------------*/
+
 #ifdef CONFIG_PM
 static int tc6387xb_suspend(struct platform_device *dev, pm_message_t state)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
 	if (pdata && pdata->suspend)
 		pdata->suspend(dev);
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
 static int tc6387xb_resume(struct platform_device *dev)
 {
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
 	if (pdata && pdata->resume)
 		pdata->resume(dev);
 
+	tmio_core_mmc_resume(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
+
 	return 0;
 }
 #else
@@ -53,12 +77,32 @@ static int tc6387xb_resume(struct platform_device *dev)
 
 /*--------------------------------------------------------------------------*/
 
+static void tc6387xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6387xb->scr + 0x200, 0, state);
+}
+
+static void tc6387xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6387xb->scr + 0x200, 0, state);
+}
+
+
 static int tc6387xb_mmc_enable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_enable(clk32k);
+	clk_enable(tc6387xb->clk32k);
+
+	tmio_core_mmc_enable(tc6387xb->scr + 0x200, 0,
+		tc6387xb_mmc_resources[0].start & 0xfffe);
 
 	return 0;
 }
@@ -66,36 +110,20 @@ static int tc6387xb_mmc_enable(struct platform_device *mmc)
 static int tc6387xb_mmc_disable(struct platform_device *mmc)
 {
 	struct platform_device *dev      = to_platform_device(mmc->dev.parent);
-	struct clk *clk32k = platform_get_drvdata(dev);
+	struct tc6387xb *tc6387xb = platform_get_drvdata(dev);
 
-	clk_disable(clk32k);
+	clk_disable(tc6387xb->clk32k);
 
 	return 0;
 }
 
-/*--------------------------------------------------------------------------*/
-
 static struct tmio_mmc_data tc6387xb_mmc_data = {
 	.hclk = 24000000,
+	.set_pwr = tc6387xb_mmc_pwr,
+	.set_clk_div = tc6387xb_mmc_clk_div,
 };
 
-static struct resource tc6387xb_mmc_resources[] = {
-	{
-		.start = 0x800,
-		.end   = 0x9ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0x200,
-		.end   = 0x2ff,
-		.flags = IORESOURCE_MEM,
-	},
-	{
-		.start = 0,
-		.end   = 0,
-		.flags = IORESOURCE_IRQ,
-	},
-};
+/*--------------------------------------------------------------------------*/
 
 static struct mfd_cell tc6387xb_cells[] = {
 	[TC6387XB_CELL_MMC] = {
@@ -111,8 +139,9 @@ static struct mfd_cell tc6387xb_cells[] = {
 static int tc6387xb_probe(struct platform_device *dev)
 {
 	struct tc6387xb_platform_data *pdata = dev->dev.platform_data;
-	struct resource *iomem;
+	struct resource *iomem, *rscr;
 	struct clk *clk32k;
+	struct tc6387xb *tc6387xb;
 	int irq, ret;
 
 	iomem = platform_get_resource(dev, IORESOURCE_MEM, 0);
@@ -120,18 +149,40 @@ static int tc6387xb_probe(struct platform_device *dev)
 		return -EINVAL;
 	}
 
+	tc6387xb = kzalloc(sizeof *tc6387xb, GFP_KERNEL);
+	if (!tc6387xb)
+		return -ENOMEM;
+
 	ret  = platform_get_irq(dev, 0);
 	if (ret >= 0)
 		irq = ret;
 	else
-		goto err_resource;
+		goto err_no_irq;
 
 	clk32k = clk_get(&dev->dev, "CLK_CK32K");
 	if (IS_ERR(clk32k)) {
 		ret = PTR_ERR(clk32k);
+		goto err_no_clk;
+	}
+
+	rscr = &tc6387xb->rscr;
+	rscr->name = "tc6387xb-core";
+	rscr->start = iomem->start;
+	rscr->end = iomem->start + 0xff;
+	rscr->flags = IORESOURCE_MEM;
+
+	ret = request_resource(iomem, rscr);
+	if (ret)
 		goto err_resource;
+
+	tc6387xb->scr = ioremap(rscr->start, rscr->end - rscr->start + 1);
+	if (!tc6387xb->scr) {
+		ret = -ENOMEM;
+		goto err_ioremap;
 	}
-	platform_set_drvdata(dev, clk32k);
+
+	tc6387xb->clk32k = clk32k;
+	platform_set_drvdata(dev, tc6387xb);
 
 	if (pdata && pdata->enable)
 		pdata->enable(dev);
@@ -149,8 +200,13 @@ static int tc6387xb_probe(struct platform_device *dev)
 	if (!ret)
 		return 0;
 
-	clk_put(clk32k);
+err_ioremap:
+	release_resource(&tc6387xb->rscr);
 err_resource:
+	clk_put(clk32k);
+err_no_clk:
+err_no_irq:
+	kfree(tc6387xb);
 	return ret;
 }
 
@@ -195,3 +251,4 @@ MODULE_DESCRIPTION("Toshiba TC6387XB core driver");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton");
 MODULE_ALIAS("platform:tc6387xb");
+
diff --git a/drivers/mfd/tc6393xb.c b/drivers/mfd/tc6393xb.c
index 1429a7341a9a..4bc5a08a2b09 100644
--- a/drivers/mfd/tc6393xb.c
+++ b/drivers/mfd/tc6393xb.c
@@ -136,10 +136,6 @@ static int tc6393xb_nand_enable(struct platform_device *nand)
 	return 0;
 }
 
-static struct tmio_mmc_data tc6393xb_mmc_data = {
-	.hclk = 24000000,
-};
-
 static struct resource __devinitdata tc6393xb_nand_resources[] = {
 	{
 		.start	= 0x1000,
@@ -164,11 +160,6 @@ static struct resource __devinitdata tc6393xb_mmc_resources[] = {
 		.end	= 0x9ff,
 		.flags	= IORESOURCE_MEM,
 	},
-	{
-		.start	= 0x200,
-		.end	= 0x2ff,
-		.flags	= IORESOURCE_MEM,
-	},
 	{
 		.start	= IRQ_TC6393_MMC,
 		.end	= IRQ_TC6393_MMC,
@@ -346,6 +337,50 @@ int tc6393xb_lcd_mode(struct platform_device *fb,
 }
 EXPORT_SYMBOL(tc6393xb_lcd_mode);
 
+static int tc6393xb_mmc_enable(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_enable(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static int tc6393xb_mmc_resume(struct platform_device *mmc)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_resume(tc6393xb->scr + 0x200, 0,
+		tc6393xb_mmc_resources[0].start & 0xfffe);
+
+	return 0;
+}
+
+static void tc6393xb_mmc_pwr(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_pwr(tc6393xb->scr + 0x200, 0, state);
+}
+
+static void tc6393xb_mmc_clk_div(struct platform_device *mmc, int state)
+{
+	struct platform_device *dev = to_platform_device(mmc->dev.parent);
+	struct tc6393xb *tc6393xb = platform_get_drvdata(dev);
+
+	tmio_core_mmc_clk_div(tc6393xb->scr + 0x200, 0, state);
+}
+
+static struct tmio_mmc_data tc6393xb_mmc_data = {
+	.hclk = 24000000,
+	.set_pwr = tc6393xb_mmc_pwr,
+	.set_clk_div = tc6393xb_mmc_clk_div,
+};
+
 static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	[TC6393XB_CELL_NAND] = {
 		.name = "tmio-nand",
@@ -355,6 +390,8 @@ static struct mfd_cell __devinitdata tc6393xb_cells[] = {
 	},
 	[TC6393XB_CELL_MMC] = {
 		.name = "tmio-mmc",
+		.enable = tc6393xb_mmc_enable,
+		.resume = tc6393xb_mmc_resume,
 		.driver_data = &tc6393xb_mmc_data,
 		.num_resources = ARRAY_SIZE(tc6393xb_mmc_resources),
 		.resources = tc6393xb_mmc_resources,
@@ -836,3 +873,4 @@ MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Ian Molton, Dmitry Baryshkov and Dirk Opfer");
 MODULE_DESCRIPTION("tc6393xb Toshiba Mobile IO Controller");
 MODULE_ALIAS("platform:tc6393xb");
+
diff --git a/drivers/mfd/tmio_core.c b/drivers/mfd/tmio_core.c
new file mode 100644
index 000000000000..eddc19ae464b
--- /dev/null
+++ b/drivers/mfd/tmio_core.c
@@ -0,0 +1,52 @@
+/*
+ * Copyright(c) 2009 Ian Molton <spyro@f2s.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/mfd/tmio.h>
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base)
+{
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	/* Disable SD power during suspend */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_3, 0x01);
+
+	/* The below is required but why? FIXME */
+	sd_config_write8(cnf, shift, CNF_STOP_CLK_CTL, 0x1f);
+
+	/* Power down SD bus */
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, 0x00);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_enable);
+
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base)
+{
+
+	/* Enable the MMC/SD Control registers */
+	sd_config_write16(cnf, shift, CNF_CMD, SDCREN);
+	sd_config_write32(cnf, shift, CNF_CTL_BASE, base & 0xfffe);
+
+	return 0;
+}
+EXPORT_SYMBOL(tmio_core_mmc_resume);
+
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_PWR_CTL_2, state ? 0x02 : 0x00);
+}
+EXPORT_SYMBOL(tmio_core_mmc_pwr);
+
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state)
+{
+	sd_config_write8(cnf, shift, CNF_SD_CLK_MODE, state ? 1 : 0);
+}
+EXPORT_SYMBOL(tmio_core_mmc_clk_div);
+
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 7cccc8523747..e22c3fa3516a 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -46,7 +46,9 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host, int new_clock)
 		clk |= 0x100;
 	}
 
-	sd_config_write8(host, CNF_SD_CLK_MODE, clk >> 22);
+	if (host->set_clk_div)
+		host->set_clk_div(host->pdev, (clk>>22) & 1);
+
 	sd_ctrl_write16(host, CTL_SD_CARD_CLK_CTL, clk & 0x1ff);
 }
 
@@ -427,12 +429,13 @@ static void tmio_mmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Power sequence - OFF -> ON -> UP */
 	switch (ios->power_mode) {
 	case MMC_POWER_OFF: /* power down SD bus */
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 0);
 		tmio_mmc_clk_stop(host);
 		break;
 	case MMC_POWER_ON: /* power up SD bus */
-
-		sd_config_write8(host, CNF_PWR_CTL_2, 0x02);
+		if (host->set_pwr)
+			host->set_pwr(host->pdev, 1);
 		break;
 	case MMC_POWER_UP: /* start bus clock */
 		tmio_mmc_clk_start(host);
@@ -485,21 +488,15 @@ static int tmio_mmc_resume(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct mmc_host *mmc = platform_get_drvdata(dev);
-	struct tmio_mmc_host *host = mmc_priv(mmc);
 	int ret = 0;
 
 	/* Tell the MFD core we are ready to be enabled */
-	if (cell->enable) {
-		ret = cell->enable(dev);
+	if (cell->resume) {
+		ret = cell->resume(dev);
 		if (ret)
 			goto out;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
 	mmc_resume_host(mmc);
 
 out:
@@ -514,17 +511,16 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 {
 	struct mfd_cell	*cell = (struct mfd_cell *)dev->dev.platform_data;
 	struct tmio_mmc_data *pdata;
-	struct resource *res_ctl, *res_cnf;
+	struct resource *res_ctl;
 	struct tmio_mmc_host *host;
 	struct mmc_host *mmc;
 	int ret = -EINVAL;
 
-	if (dev->num_resources != 3)
+	if (dev->num_resources != 2)
 		goto out;
 
 	res_ctl = platform_get_resource(dev, IORESOURCE_MEM, 0);
-	res_cnf = platform_get_resource(dev, IORESOURCE_MEM, 1);
-	if (!res_ctl || !res_cnf)
+	if (!res_ctl)
 		goto out;
 
 	pdata = cell->driver_data;
@@ -539,8 +535,12 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	host = mmc_priv(mmc);
 	host->mmc = mmc;
+	host->pdev = dev;
 	platform_set_drvdata(dev, mmc);
 
+	host->set_pwr = pdata->set_pwr;
+	host->set_clk_div = pdata->set_clk_div;
+
 	/* SD control register space size is 0x200, 0x400 for bus_shift=1 */
 	host->bus_shift = resource_size(res_ctl) >> 10;
 
@@ -548,10 +548,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (!host->ctl)
 		goto host_free;
 
-	host->cnf = ioremap(res_cnf->start, resource_size(res_cnf));
-	if (!host->cnf)
-		goto unmap_ctl;
-
 	mmc->ops = &tmio_mmc_ops;
 	mmc->caps = MMC_CAP_4_BIT_DATA;
 	mmc->f_max = pdata->hclk;
@@ -562,23 +558,9 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (cell->enable) {
 		ret = cell->enable(dev);
 		if (ret)
-			goto unmap_cnf;
+			goto unmap_ctl;
 	}
 
-	/* Enable the MMC/SD Control registers */
-	sd_config_write16(host, CNF_CMD, SDCREN);
-	sd_config_write32(host, CNF_CTL_BASE,
-		(dev->resource[0].start >> host->bus_shift) & 0xfffe);
-
-	/* Disable SD power during suspend */
-	sd_config_write8(host, CNF_PWR_CTL_3, 0x01);
-
-	/* The below is required but why? FIXME */
-	sd_config_write8(host, CNF_STOP_CLK_CTL, 0x1f);
-
-	/* Power down SD bus*/
-	sd_config_write8(host, CNF_PWR_CTL_2, 0x00);
-
 	tmio_mmc_clk_stop(host);
 	reset(host);
 
@@ -586,14 +568,14 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 	if (ret >= 0)
 		host->irq = ret;
 	else
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	disable_mmc_irqs(host, TMIO_MASK_ALL);
 
 	ret = request_irq(host->irq, tmio_mmc_irq, IRQF_DISABLED |
 		IRQF_TRIGGER_FALLING, dev_name(&dev->dev), host);
 	if (ret)
-		goto unmap_cnf;
+		goto unmap_ctl;
 
 	mmc_add_host(mmc);
 
@@ -605,8 +587,6 @@ static int __devinit tmio_mmc_probe(struct platform_device *dev)
 
 	return 0;
 
-unmap_cnf:
-	iounmap(host->cnf);
 unmap_ctl:
 	iounmap(host->ctl);
 host_free:
@@ -626,7 +606,6 @@ static int __devexit tmio_mmc_remove(struct platform_device *dev)
 		mmc_remove_host(mmc);
 		free_irq(host->irq, host);
 		iounmap(host->ctl);
-		iounmap(host->cnf);
 		mmc_free_host(mmc);
 	}
 
diff --git a/drivers/mmc/host/tmio_mmc.h b/drivers/mmc/host/tmio_mmc.h
index 9fa998594974..692dc23363b9 100644
--- a/drivers/mmc/host/tmio_mmc.h
+++ b/drivers/mmc/host/tmio_mmc.h
@@ -11,26 +11,6 @@
 
 #include <linux/highmem.h>
 
-#define CNF_CMD     0x04
-#define CNF_CTL_BASE   0x10
-#define CNF_INT_PIN  0x3d
-#define CNF_STOP_CLK_CTL 0x40
-#define CNF_GCLK_CTL 0x41
-#define CNF_SD_CLK_MODE 0x42
-#define CNF_PIN_STATUS 0x44
-#define CNF_PWR_CTL_1 0x48
-#define CNF_PWR_CTL_2 0x49
-#define CNF_PWR_CTL_3 0x4a
-#define CNF_CARD_DETECT_MODE 0x4c
-#define CNF_SD_SLOT 0x50
-#define CNF_EXT_GCLK_CTL_1 0xf0
-#define CNF_EXT_GCLK_CTL_2 0xf1
-#define CNF_EXT_GCLK_CTL_3 0xf9
-#define CNF_SD_LED_EN_1 0xfa
-#define CNF_SD_LED_EN_2 0xfe
-
-#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
-
 #define CTL_SD_CMD 0x00
 #define CTL_ARG_REG 0x04
 #define CTL_STOP_INTERNAL_ACTION 0x08
@@ -110,7 +90,6 @@
 
 
 struct tmio_mmc_host {
-	void __iomem *cnf;
 	void __iomem *ctl;
 	unsigned long bus_shift;
 	struct mmc_command      *cmd;
@@ -119,10 +98,16 @@ struct tmio_mmc_host {
 	struct mmc_host         *mmc;
 	int                     irq;
 
+	/* Callbacks for clock / power control */
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
+
 	/* pio related stuff */
 	struct scatterlist      *sg_ptr;
 	unsigned int            sg_len;
 	unsigned int            sg_off;
+
+	struct platform_device *pdev;
 };
 
 #include <linux/io.h>
@@ -163,25 +148,6 @@ static inline void sd_ctrl_write32(struct tmio_mmc_host *host, int addr,
 	writew(val >> 16, host->ctl + ((addr + 2) << host->bus_shift));
 }
 
-static inline void sd_config_write8(struct tmio_mmc_host *host, int addr,
-		u8 val)
-{
-	writeb(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write16(struct tmio_mmc_host *host, int addr,
-		u16 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-}
-
-static inline void sd_config_write32(struct tmio_mmc_host *host, int addr,
-		u32 val)
-{
-	writew(val, host->cnf + (addr << host->bus_shift));
-	writew(val >> 16, host->cnf + ((addr + 2) << host->bus_shift));
-}
-
 #include <linux/scatterlist.h>
 #include <linux/blkdev.h>
 
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 6b9c5d06690c..9cb1834deffa 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -2,6 +2,8 @@
 #define MFD_TMIO_H
 
 #include <linux/fb.h>
+#include <linux/io.h>
+#include <linux/platform_device.h>
 
 #define tmio_ioread8(addr) readb(addr)
 #define tmio_ioread16(addr) readw(addr)
@@ -18,11 +20,48 @@
 	writew((val) >> 16, (addr) + 2); \
 	} while (0)
 
+#define CNF_CMD     0x04
+#define CNF_CTL_BASE   0x10
+#define CNF_INT_PIN  0x3d
+#define CNF_STOP_CLK_CTL 0x40
+#define CNF_GCLK_CTL 0x41
+#define CNF_SD_CLK_MODE 0x42
+#define CNF_PIN_STATUS 0x44
+#define CNF_PWR_CTL_1 0x48
+#define CNF_PWR_CTL_2 0x49
+#define CNF_PWR_CTL_3 0x4a
+#define CNF_CARD_DETECT_MODE 0x4c
+#define CNF_SD_SLOT 0x50
+#define CNF_EXT_GCLK_CTL_1 0xf0
+#define CNF_EXT_GCLK_CTL_2 0xf1
+#define CNF_EXT_GCLK_CTL_3 0xf9
+#define CNF_SD_LED_EN_1 0xfa
+#define CNF_SD_LED_EN_2 0xfe
+
+#define   SDCREN 0x2   /* Enable access to MMC CTL regs. (flag in COMMAND_REG)*/
+
+#define sd_config_write8(base, shift, reg, val) \
+	tmio_iowrite8((val), (base) + ((reg) << (shift)))
+#define sd_config_write16(base, shift, reg, val) \
+	tmio_iowrite16((val), (base) + ((reg) << (shift)))
+#define sd_config_write32(base, shift, reg, val) \
+	do { \
+		tmio_iowrite16((val), (base) + ((reg) << (shift)));   \
+		tmio_iowrite16((val) >> 16, (base) + ((reg + 2) << (shift))); \
+	} while (0)
+
+int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
+int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
+void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
+void tmio_core_mmc_clk_div(void __iomem *cnf, int shift, int state);
+
 /*
  * data for the MMC controller
  */
 struct tmio_mmc_data {
 	const unsigned int		hclk;
+	void (*set_pwr)(struct platform_device *host, int state);
+	void (*set_clk_div)(struct platform_device *host, int state);
 };
 
 /*
-- 
cgit v1.2.3-70-g09d2


From 4f9c85a1b03bfa5c0a0d8488a3a7766f3c9fb756 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <avorontsov@ru.mvista.com>
Date: Mon, 18 Jan 2010 05:37:16 +0000
Subject: phylib: Move workqueue initialization to a proper place

commit 541cd3ee00a4fe975b22fac6a3bc846bacef37f7 ("phylib: Fix deadlock
on resume") caused TI DaVinci EMAC ethernet driver to oops upon resume:

 PM: resume of devices complete after 237.098 msecs
 Restarting tasks ... done.
 kernel BUG at kernel/workqueue.c:354!
 Unable to handle kernel NULL pointer dereference at virtual address 00000000
 [...]
 Backtrace:
 [<c002c598>] (__bug+0x0/0x2c) from [<c0052a54>] (queue_delayed_work_on+0x74/0xf8)
 [<c00529e0>] (queue_delayed_work_on+0x0/0xf8) from [<c0052b30>] (queue_delayed_work+0x2c/0x30)

The oops pops up because TI DaVinci EMAC driver detaches PHY on
suspend and attaches it back on resume. Attaching makes phylib call
phy_start_machine() that initializes a workqueue. On the other hand,
PHY's resume routine will call phy_start_machine() again, and that
will cause the oops since we just destroyed the already scheduled
workqueue.

This patch fixes the issue by moving workqueue initialization to
phy_device_create().

p.s. We don't see this oops with ucc_geth and gianfar drivers because
they perform a fine-grained suspend, i.e. they just stop the PHYs
without detaching.

Reported-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: Anton Vorontsov <avorontsov@ru.mvista.com>
Tested-by: Sekhar Nori <nsekhar@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 4 +---
 drivers/net/phy/phy_device.c | 1 +
 include/linux/phy.h          | 1 +
 3 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index b0e9f9c51721..0295097d6c44 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -410,7 +410,6 @@ EXPORT_SYMBOL(phy_start_aneg);
 
 
 static void phy_change(struct work_struct *work);
-static void phy_state_machine(struct work_struct *work);
 
 /**
  * phy_start_machine - start PHY state machine tracking
@@ -430,7 +429,6 @@ void phy_start_machine(struct phy_device *phydev,
 {
 	phydev->adjust_state = handler;
 
-	INIT_DELAYED_WORK(&phydev->state_queue, phy_state_machine);
 	schedule_delayed_work(&phydev->state_queue, HZ);
 }
 
@@ -761,7 +759,7 @@ EXPORT_SYMBOL(phy_start);
  * phy_state_machine - Handle the state machine
  * @work: work_struct that describes the work to be done
  */
-static void phy_state_machine(struct work_struct *work)
+void phy_state_machine(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct phy_device *phydev =
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 8212b2b93422..adbc0fded130 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -177,6 +177,7 @@ struct phy_device* phy_device_create(struct mii_bus *bus, int addr, int phy_id)
 	dev->state = PHY_DOWN;
 
 	mutex_init(&dev->lock);
+	INIT_DELAYED_WORK(&dev->state_queue, phy_state_machine);
 
 	return dev;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 7968defd2fa7..6a7eb402165d 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -485,6 +485,7 @@ void phy_driver_unregister(struct phy_driver *drv);
 int phy_driver_register(struct phy_driver *new_driver);
 void phy_prepare_link(struct phy_device *phydev,
 		void (*adjust_link)(struct net_device *));
+void phy_state_machine(struct work_struct *work);
 void phy_start_machine(struct phy_device *phydev,
 		void (*handler)(struct net_device *));
 void phy_stop_machine(struct phy_device *phydev);
-- 
cgit v1.2.3-70-g09d2


From c6fcf6bcfc3cfc1c00cc7fd9610cfa2b1a18041f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Sun, 17 Jan 2010 01:47:59 +0100
Subject: mac80211: re-enable re-transmission of filtered frames

In an earlier commit,

    mac80211: disable software retry for now

    Pavel Roskin reported a problem that seems to be due to
    software retry of already transmitted frames. It turns
    out that we've never done that correctly, but due to
    some recent changes it now crashes in the TX code. I've
    added a comment in the patch that explains the problem
    better and also points to possible solutions -- which
    I can't implement right now.

I disabled software retry of failed/filtered frames
because it was broken. With the work of the previous
patches, it now becomes fairly easy to re-enable it
by adding a flag indicating that the frame shouldn't
be modified, but still running it through the transmit
handlers to populate the control information.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h |  4 ++++
 net/mac80211/status.c  | 32 +++++---------------------------
 net/mac80211/tx.c      |  7 ++++++-
 3 files changed, 15 insertions(+), 28 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index c90047de4428..f03f97b627fe 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -271,6 +271,9 @@ struct ieee80211_bss_conf {
  *	transmit function after the current frame, this can be used
  *	by drivers to kick the DMA queue only if unset or when the
  *	queue gets full.
+ * @IEEE80211_TX_INTFL_RETRANSMISSION: This frame is being retransmitted
+ *	after TX status because the destination was asleep, it must not
+ *	be modified again (no seqno assignment, crypto, etc.)
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -291,6 +294,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_INTFL_DONT_ENCRYPT		= BIT(16),
 	IEEE80211_TX_CTL_PSPOLL_RESPONSE	= BIT(17),
 	IEEE80211_TX_CTL_MORE_FRAMES		= BIT(18),
+	IEEE80211_TX_INTFL_RETRANSMISSION	= BIT(19),
 };
 
 /**
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 9e171b178276..800b6777e0ed 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -44,38 +44,17 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 
-	/*
-	 * XXX: This is temporary!
-	 *
-	 *	The problem here is that when we get here, the driver will
-	 *	quite likely have pretty much overwritten info->control by
-	 *	using info->driver_data or info->rate_driver_data. Thus,
-	 *	when passing out the frame to the driver again, we would be
-	 *	passing completely bogus data since the driver would then
-	 *	expect a properly filled info->control. In mac80211 itself
-	 *	the same problem occurs, since we need info->control.vif
-	 *	internally.
-	 *
-	 *	To fix this, we should send the frame through TX processing
-	 *	again. However, it's not that simple, since the frame will
-	 *	have been software-encrypted (if applicable) already, and
-	 *	encrypting it again doesn't do much good. So to properly do
-	 *	that, we not only have to skip the actual 'raw' encryption
-	 *	(key selection etc. still has to be done!) but also the
-	 *	sequence number assignment since that impacts the crypto
-	 *	encapsulation, of course.
-	 *
-	 *	Hence, for now, fix the bug by just dropping the frame.
-	 */
-	goto drop;
-
 	/*
 	 * This skb 'survived' a round-trip through the driver, and
 	 * hopefully the driver didn't mangle it too badly. However,
 	 * we can definitely not rely on the the control information
-	 * being correct. Clear it so we don't get junk there.
+	 * being correct. Clear it so we don't get junk there, and
+	 * indicate that it needs new processing, but must not be
+	 * modified/encrypted again.
 	 */
 	memset(&info->control, 0, sizeof(info->control));
+	info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING |
+		       IEEE80211_TX_INTFL_RETRANSMISSION;
 
 	sta->tx_filtered_count++;
 
@@ -130,7 +109,6 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local,
 		return;
 	}
 
- drop:
 #ifdef CONFIG_MAC80211_VERBOSE_DEBUG
 	if (net_ratelimit())
 		printk(KERN_DEBUG "%s: dropped TX filtered frame, "
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index e3d8ff533ee6..da557b0d0114 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1285,6 +1285,7 @@ static int __ieee80211_tx(struct ieee80211_local *local,
 static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 {
 	struct sk_buff *skb = tx->skb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	ieee80211_tx_result res = TX_DROP;
 
 #define CALL_TXH(txh) \
@@ -1299,9 +1300,13 @@ static int invoke_tx_handlers(struct ieee80211_tx_data *tx)
 	CALL_TXH(ieee80211_tx_h_ps_buf);
 	CALL_TXH(ieee80211_tx_h_select_key);
 	CALL_TXH(ieee80211_tx_h_sta);
-	CALL_TXH(ieee80211_tx_h_michael_mic_add);
 	if (!(tx->local->hw.flags & IEEE80211_HW_HAS_RATE_CONTROL))
 		CALL_TXH(ieee80211_tx_h_rate_ctrl);
+
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_RETRANSMISSION))
+		goto txh_done;
+
+	CALL_TXH(ieee80211_tx_h_michael_mic_add);
 	CALL_TXH(ieee80211_tx_h_sequence);
 	CALL_TXH(ieee80211_tx_h_fragment);
 	/* handlers after fragment must be aware of tx info fragmentation! */
-- 
cgit v1.2.3-70-g09d2


From 11380a4b2d86fae9a6bce75c9373668cc323fe57 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 19 Jan 2010 13:46:10 -0800
Subject: net: Unexport napi_gro_flush().

Nothing outside of net/core/dev.c uses it.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 -
 net/core/dev.c            | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a3fccc85b1a0..468a11dea58c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1527,7 +1527,6 @@ extern int		netif_rx(struct sk_buff *skb);
 extern int		netif_rx_ni(struct sk_buff *skb);
 #define HAVE_NETIF_RECEIVE_SKB 1
 extern int		netif_receive_skb(struct sk_buff *skb);
-extern void		napi_gro_flush(struct napi_struct *napi);
 extern gro_result_t	dev_gro_receive(struct napi_struct *napi,
 					struct sk_buff *skb);
 extern gro_result_t	napi_skb_finish(gro_result_t ret, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index a008f6987a95..5747b9edc1bb 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2582,7 +2582,7 @@ out:
 	return netif_receive_skb(skb);
 }
 
-void napi_gro_flush(struct napi_struct *napi)
+static void napi_gro_flush(struct napi_struct *napi)
 {
 	struct sk_buff *skb, *next;
 
@@ -2595,7 +2595,6 @@ void napi_gro_flush(struct napi_struct *napi)
 	napi->gro_count = 0;
 	napi->gro_list = NULL;
 }
-EXPORT_SYMBOL(napi_gro_flush);
 
 enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff *skb)
 {
-- 
cgit v1.2.3-70-g09d2


From 04a723ea9c53ba608b0411aa36948bb57c51a08e Mon Sep 17 00:00:00 2001
From: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Date: Wed, 6 Jan 2010 10:16:51 -0800
Subject: USB: Fix duplicate sysfs problem after device reset.

Borislav Petkov reports issues with duplicate sysfs endpoint files after a
resume from a hibernate.  It turns out that the code to support alternate
settings under xHCI has issues when a device with a non-default alternate
setting is reset during the hibernate:

[  427.681810] Restarting tasks ...
[  427.681995] hub 1-0:1.0: state 7 ports 6 chg 0004 evt 0000
[  427.682019] usb usb3: usb resume
[  427.682030] ohci_hcd 0000:00:12.0: wakeup root hub
[  427.682191] hub 1-0:1.0: port 2, status 0501, change 0000, 480 Mb/s
[  427.682205] usb 1-2: usb wakeup-resume
[  427.682226] usb 1-2: finish reset-resume
[  427.682886] done.
[  427.734658] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.734663] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.746682] hub 3-0:1.0: hub_reset_resume
[  427.746693] hub 3-0:1.0: trying to enable port power on non-switchable hub
[  427.786715] usb 1-2: reset high speed USB device using ehci_hcd and address 2
[  427.839653] ehci_hcd 0000:00:12.2: port 2 high speed
[  427.839666] ehci_hcd 0000:00:12.2: GetStatus port 2 status 001005 POWER sig=se0 PE CONNECT
[  427.847717] ohci_hcd 0000:00:12.0: GetStatus roothub.portstatus [1] = 0x00010100 CSC PPS
[  427.915497] hub 1-2:1.0: remove_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 1
[  427.915774] hub 1-2:1.0: remove_intf_ep_devs: bNumEndpoints: 1
[  427.915934] hub 1-2:1.0: if: ffff88022f9e8800: endpoint devs removed.
[  427.916158] hub 1-2:1.0: create_intf_ep_devs: if: ffff88022f9e8800 ->ep_devs_created: 0, ->unregistering: 0
[  427.916434] hub 1-2:1.0: create_intf_ep_devs: bNumEndpoints: 1
[  427.916609]  ep_81: create, parent hub
[  427.916632] ------------[ cut here ]------------
[  427.916644] WARNING: at fs/sysfs/dir.c:477 sysfs_add_one+0x82/0x96()
[  427.916649] Hardware name: System Product Name
[  427.916653] sysfs: cannot create duplicate filename '/devices/pci0000:00/0000:00:12.2/usb1/1-2/1-2:1.0/ep_81'
[  427.916658] Modules linked in: binfmt_misc kvm_amd kvm powernow_k8 cpufreq_ondemand cpufreq_powersave cpufreq_userspace freq_table cpufreq_conservative ipv6 vfat fat
+8250_pnp 8250 pcspkr ohci_hcd serial_core k10temp edac_core
[  427.916694] Pid: 278, comm: khubd Not tainted 2.6.33-rc2-00187-g08d869a-dirty #13
[  427.916699] Call Trace:

The problem is caused by a mismatch between the USB core's view of the
device state and the USB device and xHCI host's view of the device state.

After the device reset and re-configuration, the device and the xHCI host
think they are using alternate setting 0 of all interfaces.  However, the
USB core keeps track of the old state, which may include non-zero
alternate settings.  It uses intf->cur_altsetting to keep the endpoint
sysfs files for the old state across the reset.

The bandwidth allocation functions need to know what the xHCI host thinks
the current alternate settings are, so original patch set
intf->cur_altsetting to the alternate setting 0.  This caused duplicate
endpoint files to be created.

The solution is to not set intf->cur_altsetting before calling
usb_set_interface() in usb_reset_and_verify_device().  Instead, we add a
new flag to struct usb_interface to tell usb_hcd_alloc_bandwidth() to use
alternate setting 0 as the currently installed alternate setting.

Signed-off-by: Sarah Sharp <sarah.a.sharp@linux.intel.com>
Tested-by: Borislav Petkov <petkovbb@googlemail.com>
Cc: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@suse.de>
---
 drivers/usb/core/hcd.c | 18 ++++++++++++++++++
 drivers/usb/core/hub.c | 15 +++++----------
 include/linux/usb.h    |  1 +
 3 files changed, 24 insertions(+), 10 deletions(-)

(limited to 'include')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 0495fa651225..80995ef0868c 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1684,6 +1684,24 @@ int usb_hcd_alloc_bandwidth(struct usb_device *udev,
 		}
 	}
 	if (cur_alt && new_alt) {
+		struct usb_interface *iface = usb_ifnum_to_if(udev,
+				cur_alt->desc.bInterfaceNumber);
+
+		if (iface->resetting_device) {
+			/*
+			 * The USB core just reset the device, so the xHCI host
+			 * and the device will think alt setting 0 is installed.
+			 * However, the USB core will pass in the alternate
+			 * setting installed before the reset as cur_alt.  Dig
+			 * out the alternate setting 0 structure, or the first
+			 * alternate setting if a broken device doesn't have alt
+			 * setting 0.
+			 */
+			cur_alt = usb_altnum_to_altsetting(iface, 0);
+			if (!cur_alt)
+				cur_alt = &iface->altsetting[0];
+		}
+
 		/* Drop all the endpoints in the current alt setting */
 		for (i = 0; i < cur_alt->desc.bNumEndpoints; i++) {
 			ret = hcd->driver->drop_endpoint(hcd, udev,
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b9f5fcd713e2..35cc8b9ba1f5 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -3695,19 +3695,14 @@ static int usb_reset_and_verify_device(struct usb_device *udev)
 			usb_enable_interface(udev, intf, true);
 			ret = 0;
 		} else {
-			/* We've just reset the device, so it will think alt
-			 * setting 0 is installed.  For usb_set_interface() to
-			 * work properly, we need to set the current alternate
-			 * interface setting to 0 (or the first alt setting, if
-			 * the device doesn't have alt setting 0).
+			/* Let the bandwidth allocation function know that this
+			 * device has been reset, and it will have to use
+			 * alternate setting 0 as the current alternate setting.
 			 */
-			intf->cur_altsetting =
-				usb_find_alt_setting(config, i, 0);
-			if (!intf->cur_altsetting)
-				intf->cur_altsetting =
-					&config->intf_cache[i]->altsetting[0];
+			intf->resetting_device = 1;
 			ret = usb_set_interface(udev, desc->bInterfaceNumber,
 					desc->bAlternateSetting);
+			intf->resetting_device = 0;
 		}
 		if (ret < 0) {
 			dev_err(&udev->dev, "failed to restore interface %d "
diff --git a/include/linux/usb.h b/include/linux/usb.h
index e101a2d04d75..d7ace1b80f09 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -192,6 +192,7 @@ struct usb_interface {
 	unsigned needs_altsetting0:1;	/* switch to altsetting 0 is pending */
 	unsigned needs_binding:1;	/* needs delayed unbind/rebind */
 	unsigned reset_running:1;
+	unsigned resetting_device:1;	/* true: bandwidth alloc after reset */
 
 	struct device dev;		/* interface specific device info */
 	struct device *usb_dev;
-- 
cgit v1.2.3-70-g09d2


From 50b926e439620c469565e8be0f28be78f5fca1ce Mon Sep 17 00:00:00 2001
From: Mike Galbraith <efault@gmx.de>
Date: Mon, 4 Jan 2010 14:44:56 +0100
Subject: sched: Fix vmark regression on big machines

SD_PREFER_SIBLING is set at the CPU domain level if power saving isn't
enabled, leading to many cache misses on large machines as we traverse
looking for an idle shared cache to wake to.  Change the enabler of
select_idle_sibling() to SD_SHARE_PKG_RESOURCES, and enable same at the
sibling domain level.

Reported-by: Lin Ming <ming.m.lin@intel.com>
Signed-off-by: Mike Galbraith <efault@gmx.de>
Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <1262612696.15495.15.camel@marge.simson.net>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/topology.h | 2 +-
 kernel/sched_fair.c      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index 57e63579bfdd..5b81156780b1 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -99,7 +99,7 @@ int arch_update_cpu_topology(void);
 				| 1*SD_WAKE_AFFINE			\
 				| 1*SD_SHARE_CPUPOWER			\
 				| 0*SD_POWERSAVINGS_BALANCE		\
-				| 0*SD_SHARE_PKG_RESOURCES		\
+				| 1*SD_SHARE_PKG_RESOURCES		\
 				| 0*SD_SERIALIZE			\
 				| 0*SD_PREFER_SIBLING			\
 				,					\
diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c
index 42ac3c9f66f6..8fe7ee81c552 100644
--- a/kernel/sched_fair.c
+++ b/kernel/sched_fair.c
@@ -1508,7 +1508,7 @@ static int select_task_rq_fair(struct task_struct *p, int sd_flag, int wake_flag
 			 * If there's an idle sibling in this domain, make that
 			 * the wake_affine target instead of the current cpu.
 			 */
-			if (tmp->flags & SD_PREFER_SIBLING)
+			if (tmp->flags & SD_SHARE_PKG_RESOURCES)
 				target = select_idle_sibling(p, tmp, target);
 
 			if (target >= 0) {
-- 
cgit v1.2.3-70-g09d2


From 92b6759857ea3ad19bc6871044e373f6251841d3 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <a.p.zijlstra@chello.nl>
Date: Mon, 18 Jan 2010 14:02:16 +0100
Subject: perf: Change the is_software_event() definition

The is_software_event() definition always confuses me because its an
exclusive expression, make it an inclusive one.

Signed-off-by: Peter Zijlstra <a.p.zijlstra@chello.nl>
LKML-Reference: <new-submission>
Signed-off-by: Ingo Molnar <mingo@elte.hu>
---
 include/linux/perf_event.h | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index c66b34f75eea..8fa71874113f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -814,9 +814,14 @@ extern int perf_event_overflow(struct perf_event *event, int nmi,
  */
 static inline int is_software_event(struct perf_event *event)
 {
-	return (event->attr.type != PERF_TYPE_RAW) &&
-		(event->attr.type != PERF_TYPE_HARDWARE) &&
-		(event->attr.type != PERF_TYPE_HW_CACHE);
+	switch (event->attr.type) {
+	case PERF_TYPE_SOFTWARE:
+	case PERF_TYPE_TRACEPOINT:
+	/* for now the breakpoint stuff also works as software event */
+	case PERF_TYPE_BREAKPOINT:
+		return 1;
+	}
+	return 0;
 }
 
 extern atomic_t perf_swevent_enabled[PERF_COUNT_SW_MAX];
-- 
cgit v1.2.3-70-g09d2


From b3fbdcf49f940d0703c356441e0daf045e64e076 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Thu, 21 Jan 2010 11:40:47 +0100
Subject: mac80211: pass vif and station to update_tkip_key

When a TKIP key is updated, we should pass the station
pointer instead of just the address, since drivers can
use that to store their own data. We also need to pass
the virtual interface pointer.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/b43/main.c        | 11 ++++++++---
 drivers/net/wireless/iwlwifi/iwl-agn.c | 10 +++++++---
 include/net/mac80211.h                 |  6 ++++--
 net/mac80211/driver-ops.h              | 14 ++++++++++----
 net/mac80211/driver-trace.h            | 15 +++++++++------
 net/mac80211/tkip.c                    | 12 +++++-------
 6 files changed, 43 insertions(+), 25 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c
index c238468bca7f..c699e46534dc 100644
--- a/drivers/net/wireless/b43/main.c
+++ b/drivers/net/wireless/b43/main.c
@@ -844,8 +844,10 @@ static void rx_tkip_phase1_write(struct b43_wldev *dev, u8 index, u32 iv32,
 }
 
 static void b43_op_update_tkip_key(struct ieee80211_hw *hw,
-			struct ieee80211_key_conf *keyconf, const u8 *addr,
-			u32 iv32, u16 *phase1key)
+				   struct ieee80211_vif *vif,
+				   struct ieee80211_key_conf *keyconf,
+				   struct ieee80211_sta *sta,
+				   u32 iv32, u16 *phase1key)
 {
 	struct b43_wl *wl = hw_to_b43_wl(hw);
 	struct b43_wldev *dev;
@@ -863,7 +865,10 @@ static void b43_op_update_tkip_key(struct ieee80211_hw *hw,
 	keymac_write(dev, index, NULL);	/* First zero out mac to avoid race */
 
 	rx_tkip_phase1_write(dev, index, iv32, phase1key);
-	keymac_write(dev, index, addr);
+	/* only pairwise TKIP keys are supported right now */
+	if (WARN_ON(!sta))
+		goto out_unlock;
+	keymac_write(dev, index, sta->addr);
 
 out_unlock:
 	mutex_unlock(&wl->mutex);
diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c
index 8db86239bd6a..62b6939df52e 100644
--- a/drivers/net/wireless/iwlwifi/iwl-agn.c
+++ b/drivers/net/wireless/iwlwifi/iwl-agn.c
@@ -2839,14 +2839,18 @@ void iwl_config_ap(struct iwl_priv *priv)
 }
 
 static void iwl_mac_update_tkip_key(struct ieee80211_hw *hw,
-			struct ieee80211_key_conf *keyconf, const u8 *addr,
-			u32 iv32, u16 *phase1key)
+				    struct ieee80211_vif *vif,
+				    struct ieee80211_key_conf *keyconf,
+				    struct ieee80211_sta *sta,
+				    u32 iv32, u16 *phase1key)
 {
 
 	struct iwl_priv *priv = hw->priv;
 	IWL_DEBUG_MAC80211(priv, "enter\n");
 
-	iwl_update_tkip_key(priv, keyconf, addr, iv32, phase1key);
+	iwl_update_tkip_key(priv, keyconf,
+			    sta ? sta->addr : iwl_bcast_addr,
+			    iv32, phase1key);
 
 	IWL_DEBUG_MAC80211(priv, "leave\n");
 }
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f03f97b627fe..f56d6f479532 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1614,8 +1614,10 @@ struct ieee80211_ops {
 		       struct ieee80211_vif *vif, struct ieee80211_sta *sta,
 		       struct ieee80211_key_conf *key);
 	void (*update_tkip_key)(struct ieee80211_hw *hw,
-			struct ieee80211_key_conf *conf, const u8 *address,
-			u32 iv32, u16 *phase1key);
+				struct ieee80211_vif *vif,
+				struct ieee80211_key_conf *conf,
+				struct ieee80211_sta *sta,
+				u32 iv32, u16 *phase1key);
 	int (*hw_scan)(struct ieee80211_hw *hw,
 		       struct cfg80211_scan_request *req);
 	void (*sw_scan_start)(struct ieee80211_hw *hw);
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index de91d39e0276..40c6e9a89864 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -137,16 +137,22 @@ static inline int drv_set_key(struct ieee80211_local *local,
 }
 
 static inline void drv_update_tkip_key(struct ieee80211_local *local,
+				       struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_key_conf *conf,
-				       const u8 *address, u32 iv32,
+				       struct sta_info *sta, u32 iv32,
 				       u16 *phase1key)
 {
+	struct ieee80211_sta *ista = NULL;
+
 	might_sleep();
 
+	if (sta)
+		ista = &sta->sta;
+
 	if (local->ops->update_tkip_key)
-		local->ops->update_tkip_key(&local->hw, conf, address,
-					    iv32, phase1key);
-	trace_drv_update_tkip_key(local, conf, address, iv32);
+		local->ops->update_tkip_key(&local->hw, &sdata->vif, conf,
+					    ista, iv32, phase1key);
+	trace_drv_update_tkip_key(local, sdata, conf, ista, iv32);
 }
 
 static inline int drv_hw_scan(struct ieee80211_local *local,
diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h
index 0ea258123b8e..fefa6e6b01bc 100644
--- a/net/mac80211/driver-trace.h
+++ b/net/mac80211/driver-trace.h
@@ -331,26 +331,29 @@ TRACE_EVENT(drv_set_key,
 
 TRACE_EVENT(drv_update_tkip_key,
 	TP_PROTO(struct ieee80211_local *local,
+		 struct ieee80211_sub_if_data *sdata,
 		 struct ieee80211_key_conf *conf,
-		 const u8 *address, u32 iv32),
+		 struct ieee80211_sta *sta, u32 iv32),
 
-	TP_ARGS(local, conf, address, iv32),
+	TP_ARGS(local, sdata, conf, sta, iv32),
 
 	TP_STRUCT__entry(
 		LOCAL_ENTRY
-		__array(u8, addr, 6)
+		VIF_ENTRY
+		STA_ENTRY
 		__field(u32, iv32)
 	),
 
 	TP_fast_assign(
 		LOCAL_ASSIGN;
-		memcpy(__entry->addr, address, 6);
+		VIF_ASSIGN;
+		STA_ASSIGN;
 		__entry->iv32 = iv32;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT " addr:%pM iv32:%#x",
-		LOCAL_PR_ARG, __entry->addr, __entry->iv32
+		LOCAL_PR_FMT VIF_PR_FMT STA_PR_FMT " iv32:%#x",
+		LOCAL_PR_ARG,VIF_PR_ARG,STA_PR_ARG, __entry->iv32
 	)
 );
 
diff --git a/net/mac80211/tkip.c b/net/mac80211/tkip.c
index 14fe49332c02..7ef491e9d66d 100644
--- a/net/mac80211/tkip.c
+++ b/net/mac80211/tkip.c
@@ -304,14 +304,12 @@ int ieee80211_tkip_decrypt_data(struct crypto_blkcipher *tfm,
 	if (key->local->ops->update_tkip_key &&
 	    key->flags & KEY_FLAG_UPLOADED_TO_HARDWARE &&
 	    key->u.tkip.rx[queue].state != TKIP_STATE_PHASE1_HW_UPLOADED) {
-		static const u8 bcast[ETH_ALEN] =
-		{0xff, 0xff, 0xff, 0xff, 0xff, 0xff};
-		const u8 *sta_addr = key->sta->sta.addr;
+		struct ieee80211_sub_if_data *sdata = key->sdata;
 
-		if (is_multicast_ether_addr(ra))
-			sta_addr = bcast;
-
-		drv_update_tkip_key(key->local, &key->conf, sta_addr,
+		if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN)
+			sdata = container_of(key->sdata->bss,
+					struct ieee80211_sub_if_data, u.ap);
+		drv_update_tkip_key(key->local, sdata, &key->conf, key->sta,
 				iv32, key->u.tkip.rx[queue].p1k);
 		key->u.tkip.rx[queue].state = TKIP_STATE_PHASE1_HW_UPLOADED;
 	}
-- 
cgit v1.2.3-70-g09d2


From ef15aac6073b27fd4f70007784d2d52ed394bf43 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Wed, 20 Jan 2010 12:02:33 +0100
Subject: cfg80211: export multiple MAC addresses in sysfs

If a device has multiple MAC addresses, userspace will
need to know about that. Similarly, if it allows the
MAC addresses to vary by a bitmask.

If a driver exports multiple addresses, it is assumed
that it will be able to deal with that many different
addresses, which need not necessarily match the ones
programmed into the device; if a mask is set then the
device should deal addresses within that mask based
on an arbitrary "base address".

To test it all and show how it is used, add support
to hwsim even though it can't actually deal with
addresses different from the default.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/mac80211_hwsim.c |  8 +++++++-
 include/net/cfg80211.h                | 22 +++++++++++++++++++++-
 net/wireless/core.c                   | 12 ++++++++++++
 net/wireless/sysfs.c                  | 20 ++++++++++++++++++++
 4 files changed, 60 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 84df3fcf37b3..0dbda8dfbd99 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -281,6 +281,8 @@ struct mac80211_hwsim_data {
 	struct ieee80211_channel channels_5ghz[ARRAY_SIZE(hwsim_channels_5ghz)];
 	struct ieee80211_rate rates[ARRAY_SIZE(hwsim_rates)];
 
+	struct mac_address addresses[2];
+
 	struct ieee80211_channel *channel;
 	unsigned long beacon_int; /* in jiffies unit */
 	unsigned int rx_filter;
@@ -1154,7 +1156,11 @@ static int __init init_mac80211_hwsim(void)
 		SET_IEEE80211_DEV(hw, data->dev);
 		addr[3] = i >> 8;
 		addr[4] = i;
-		SET_IEEE80211_PERM_ADDR(hw, addr);
+		memcpy(data->addresses[0].addr, addr, ETH_ALEN);
+		memcpy(data->addresses[1].addr, addr, ETH_ALEN);
+		data->addresses[1].addr[0] |= 0x40;
+		hw->wiphy->n_addresses = 2;
+		hw->wiphy->addresses = data->addresses;
 
 		hw->channel_change_time = 1;
 		hw->queues = 4;
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 2af52704e670..c5d16f299d6f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1195,6 +1195,10 @@ enum wiphy_flags {
 	WIPHY_FLAG_4ADDR_STATION	= BIT(6),
 };
 
+struct mac_address {
+	u8 addr[ETH_ALEN];
+};
+
 /**
  * struct wiphy - wireless hardware description
  * @idx: the wiphy index assigned to this item
@@ -1213,12 +1217,28 @@ enum wiphy_flags {
  *	-1 = fragmentation disabled, only odd values >= 256 used
  * @rts_threshold: RTS threshold (dot11RTSThreshold); -1 = RTS/CTS disabled
  * @net: the network namespace this wiphy currently lives in
+ * @perm_addr: permanent MAC address of this device
+ * @addr_mask: If the device supports multiple MAC addresses by masking,
+ *	set this to a mask with variable bits set to 1, e.g. if the last
+ *	four bits are variable then set it to 00:...:00:0f. The actual
+ *	variable bits shall be determined by the interfaces added, with
+ *	interfaces not matching the mask being rejected to be brought up.
+ * @n_addresses: number of addresses in @addresses.
+ * @addresses: If the device has more than one address, set this pointer
+ *	to a list of addresses (6 bytes each). The first one will be used
+ *	by default for perm_addr. In this case, the mask should be set to
+ *	all-zeroes. In this case it is assumed that the device can handle
+ *	the same number of arbitrary MAC addresses.
  */
 struct wiphy {
 	/* assign these fields before you register the wiphy */
 
-	/* permanent MAC address */
+	/* permanent MAC address(es) */
 	u8 perm_addr[ETH_ALEN];
+	u8 addr_mask[ETH_ALEN];
+
+	u16 n_addresses;
+	struct mac_address *addresses;
 
 	/* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */
 	u16 interface_modes;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index d07f57c906db..71b6b3a9cf1f 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -413,6 +413,18 @@ int wiphy_register(struct wiphy *wiphy)
 	int i;
 	u16 ifmodes = wiphy->interface_modes;
 
+	if (WARN_ON(wiphy->addresses && !wiphy->n_addresses))
+		return -EINVAL;
+
+	if (WARN_ON(wiphy->addresses &&
+		    !is_zero_ether_addr(wiphy->perm_addr) &&
+		    memcmp(wiphy->perm_addr, wiphy->addresses[0].addr,
+			   ETH_ALEN)))
+		return -EINVAL;
+
+	if (wiphy->addresses)
+		memcpy(wiphy->perm_addr, wiphy->addresses[0].addr, ETH_ALEN);
+
 	/* sanity check ifmodes */
 	WARN_ON(!ifmodes);
 	ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1;
diff --git a/net/wireless/sysfs.c b/net/wireless/sysfs.c
index efe3c5c92b2d..9f2cef3e0ca0 100644
--- a/net/wireless/sysfs.c
+++ b/net/wireless/sysfs.c
@@ -33,10 +33,30 @@ static ssize_t name ## _show(struct device *dev,			\
 
 SHOW_FMT(index, "%d", wiphy_idx);
 SHOW_FMT(macaddress, "%pM", wiphy.perm_addr);
+SHOW_FMT(address_mask, "%pM", wiphy.addr_mask);
+
+static ssize_t addresses_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct wiphy *wiphy = &dev_to_rdev(dev)->wiphy;
+	char *start = buf;
+	int i;
+
+	if (!wiphy->addresses)
+		return sprintf(buf, "%pM\n", wiphy->perm_addr);
+
+	for (i = 0; i < wiphy->n_addresses; i++)
+		buf += sprintf(buf, "%pM\n", &wiphy->addresses[i].addr);
+
+	return buf - start;
+}
 
 static struct device_attribute ieee80211_dev_attrs[] = {
 	__ATTR_RO(index),
 	__ATTR_RO(macaddress),
+	__ATTR_RO(address_mask),
+	__ATTR_RO(addresses),
 	{}
 };
 
-- 
cgit v1.2.3-70-g09d2


From d0dd2de0d055f0ffb1e2ecdc21380de9d12a85e2 Mon Sep 17 00:00:00 2001
From: Andriy Tkachuk <andrit@ukr.net>
Date: Wed, 20 Jan 2010 13:55:06 +0200
Subject: mac80211: Account HT Control field in Data frame hdrlen according to
 802.11n-2009

ieee80211_hdrlen() should account account new HT Control field in 802.11
data frame header introduced by IEEE 802.11n standard.

According to 802.11n-2009 HT Control field is present in data frames
when both of following are met:

   1. It is QoS data frame.
   2. Order bit is set in Frame Control field.

The change might be totally compatible with legacy non-11n aware frames,
because 802.11-2007 standard states that "all QoS STAs set this subfield
to 0".

Signed-off-by: Andriy V. Tkachuk <andrit@ukr.net>
Acked-by : Benoit Papillault <benoit.papillault@free.fr>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/linux/ieee80211.h | 2 ++
 net/wireless/util.c       | 5 ++++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 842701906ae9..19984958ab7b 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -138,6 +138,8 @@
 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_MASK	0x03
 #define IEEE80211_WMM_IE_STA_QOSINFO_SP_SHIFT	5
 
+#define IEEE80211_HT_CTL_LEN		4
+
 struct ieee80211_hdr {
 	__le16 frame_control;
 	__le16 duration_id;
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 23557c1d0a9c..be2ab8c59e3a 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -227,8 +227,11 @@ unsigned int ieee80211_hdrlen(__le16 fc)
 	if (ieee80211_is_data(fc)) {
 		if (ieee80211_has_a4(fc))
 			hdrlen = 30;
-		if (ieee80211_is_data_qos(fc))
+		if (ieee80211_is_data_qos(fc)) {
 			hdrlen += IEEE80211_QOS_CTL_LEN;
+			if (ieee80211_has_order(fc))
+				hdrlen += IEEE80211_HT_CTL_LEN;
+		}
 		goto out;
 	}
 
-- 
cgit v1.2.3-70-g09d2


From a271623f871dda970319ca15dfad3a8c8c36249f Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 10:13:10 +0000
Subject: netdev: remove certain HAVE_ macros

After netdev_ops compat code HAVE_* macros aren't needed, in fact
they _will_ result in compile breakage for out of tree drivers.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 10 ----------
 1 file changed, 10 deletions(-)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 468a11dea58c..b5fb51d0b8b1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -621,30 +621,21 @@ struct net_device_ops {
 						   struct net_device *dev);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb);
-#define HAVE_CHANGE_RX_FLAGS
 	void			(*ndo_change_rx_flags)(struct net_device *dev,
 						       int flags);
-#define HAVE_SET_RX_MODE
 	void			(*ndo_set_rx_mode)(struct net_device *dev);
-#define HAVE_MULTICAST
 	void			(*ndo_set_multicast_list)(struct net_device *dev);
-#define HAVE_SET_MAC_ADDR
 	int			(*ndo_set_mac_address)(struct net_device *dev,
 						       void *addr);
-#define HAVE_VALIDATE_ADDR
 	int			(*ndo_validate_addr)(struct net_device *dev);
-#define HAVE_PRIVATE_IOCTL
 	int			(*ndo_do_ioctl)(struct net_device *dev,
 					        struct ifreq *ifr, int cmd);
-#define HAVE_SET_CONFIG
 	int			(*ndo_set_config)(struct net_device *dev,
 					          struct ifmap *map);
-#define HAVE_CHANGE_MTU
 	int			(*ndo_change_mtu)(struct net_device *dev,
 						  int new_mtu);
 	int			(*ndo_neigh_setup)(struct net_device *dev,
 						   struct neigh_parms *);
-#define HAVE_TX_TIMEOUT
 	void			(*ndo_tx_timeout) (struct net_device *dev);
 
 	struct net_device_stats* (*ndo_get_stats)(struct net_device *dev);
@@ -656,7 +647,6 @@ struct net_device_ops {
 	void			(*ndo_vlan_rx_kill_vid)(struct net_device *dev,
 						        unsigned short vid);
 #ifdef CONFIG_NET_POLL_CONTROLLER
-#define HAVE_NETDEV_POLL
 	void                    (*ndo_poll_controller)(struct net_device *dev);
 #endif
 #if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE)
-- 
cgit v1.2.3-70-g09d2


From 5833929cc2ad2b3064b4fac8c44e293972d240d8 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 10:17:26 +0000
Subject: net: constify MIB name tables

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/snmp.h   |  2 +-
 net/ipv4/proc.c      |  4 ++--
 net/ipv6/proc.c      | 12 ++++++------
 net/sctp/proc.c      |  2 +-
 net/xfrm/xfrm_proc.c |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/net/snmp.h b/include/net/snmp.h
index f0d756f2ac99..da02ee027d69 100644
--- a/include/net/snmp.h
+++ b/include/net/snmp.h
@@ -32,7 +32,7 @@
  *  - name of entries.
  */
 struct snmp_mib {
-	char *name;
+	const char *name;
 	int entry;
 };
 
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f25542c48b7d..1b09a6dde7c0 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -127,8 +127,8 @@ static const struct snmp_mib snmp4_ipextstats_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct {
-	char *name;
+static const struct {
+	const char *name;
 	int index;
 } icmpmibmap[] = {
 	{ "DestUnreachs", ICMP_DEST_UNREACH },
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index 02f20016b4c9..bfe2598dd563 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -59,7 +59,7 @@ static const struct file_operations sockstat6_seq_fops = {
 	.release = single_release_net,
 };
 
-static struct snmp_mib snmp6_ipstats_list[] = {
+static const struct snmp_mib snmp6_ipstats_list[] = {
 /* ipv6 mib according to RFC 2465 */
 	SNMP_MIB_ITEM("Ip6InReceives", IPSTATS_MIB_INPKTS),
 	SNMP_MIB_ITEM("Ip6InHdrErrors", IPSTATS_MIB_INHDRERRORS),
@@ -92,7 +92,7 @@ static struct snmp_mib snmp6_ipstats_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp6_icmp6_list[] = {
+static const struct snmp_mib snmp6_icmp6_list[] = {
 /* icmpv6 mib according to RFC 2466 */
 	SNMP_MIB_ITEM("Icmp6InMsgs", ICMP6_MIB_INMSGS),
 	SNMP_MIB_ITEM("Icmp6InErrors", ICMP6_MIB_INERRORS),
@@ -120,7 +120,7 @@ static const char *const icmp6type2name[256] = {
 };
 
 
-static struct snmp_mib snmp6_udp6_list[] = {
+static const struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_ITEM("Udp6InDatagrams", UDP_MIB_INDATAGRAMS),
 	SNMP_MIB_ITEM("Udp6NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("Udp6InErrors", UDP_MIB_INERRORS),
@@ -128,7 +128,7 @@ static struct snmp_mib snmp6_udp6_list[] = {
 	SNMP_MIB_SENTINEL
 };
 
-static struct snmp_mib snmp6_udplite6_list[] = {
+static const struct snmp_mib snmp6_udplite6_list[] = {
 	SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS),
 	SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
 	SNMP_MIB_ITEM("UdpLite6InErrors", UDP_MIB_INERRORS),
@@ -170,8 +170,8 @@ static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib)
 	return;
 }
 
-static inline void
-snmp6_seq_show_item(struct seq_file *seq, void **mib, struct snmp_mib *itemlist)
+static void snmp6_seq_show_item(struct seq_file *seq, void **mib,
+				const struct snmp_mib *itemlist)
 {
 	int i;
 	for (i=0; itemlist[i].name; i++)
diff --git a/net/sctp/proc.c b/net/sctp/proc.c
index d093cbfeaac4..a5ac6e0a8d9c 100644
--- a/net/sctp/proc.c
+++ b/net/sctp/proc.c
@@ -40,7 +40,7 @@
 #include <net/sctp/sctp.h>
 #include <net/ip.h> /* for snmp_fold_field */
 
-static struct snmp_mib sctp_snmp_list[] = {
+static const struct snmp_mib sctp_snmp_list[] = {
 	SNMP_MIB_ITEM("SctpCurrEstab", SCTP_MIB_CURRESTAB),
 	SNMP_MIB_ITEM("SctpActiveEstabs", SCTP_MIB_ACTIVEESTABS),
 	SNMP_MIB_ITEM("SctpPassiveEstabs", SCTP_MIB_PASSIVEESTABS),
diff --git a/net/xfrm/xfrm_proc.c b/net/xfrm/xfrm_proc.c
index fef8db553e8d..c083a4e4e796 100644
--- a/net/xfrm/xfrm_proc.c
+++ b/net/xfrm/xfrm_proc.c
@@ -15,7 +15,7 @@
 #include <net/snmp.h>
 #include <net/xfrm.h>
 
-static struct snmp_mib xfrm_mib_list[] = {
+static const struct snmp_mib xfrm_mib_list[] = {
 	SNMP_MIB_ITEM("XfrmInError", LINUX_MIB_XFRMINERROR),
 	SNMP_MIB_ITEM("XfrmInBufferError", LINUX_MIB_XFRMINBUFFERERROR),
 	SNMP_MIB_ITEM("XfrmInHdrError", LINUX_MIB_XFRMINHDRERROR),
-- 
cgit v1.2.3-70-g09d2


From e754834e65220b2b674c55c3b6dfb2fb1a2804d0 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Fri, 22 Jan 2010 10:18:25 +0000
Subject: icmp: move icmp_err_convert[] to .rodata

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/icmp.h | 2 +-
 net/ipv4/icmp.c    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/icmp.h b/include/net/icmp.h
index dfa72d4e8907..15b3dfe9fce8 100644
--- a/include/net/icmp.h
+++ b/include/net/icmp.h
@@ -28,7 +28,7 @@ struct icmp_err {
   unsigned	fatal:1;
 };
 
-extern struct icmp_err icmp_err_convert[];
+extern const struct icmp_err icmp_err_convert[];
 #define ICMP_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.icmp_statistics, field)
 #define ICMP_INC_STATS_BH(net, field)	SNMP_INC_STATS_BH((net)->mib.icmp_statistics, field)
 #define ICMPMSGOUT_INC_STATS(net, field)	SNMP_INC_STATS((net)->mib.icmpmsg_statistics, field+256)
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fe11f60ce41b..4b4c2bcd15db 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -114,7 +114,7 @@ struct icmp_bxm {
 /* An array of errno for error messages from dest unreach. */
 /* RFC 1122: 3.2.2.1 States that NET_UNREACH, HOST_UNREACH and SR_FAILED MUST be considered 'transient errs'. */
 
-struct icmp_err icmp_err_convert[] = {
+const struct icmp_err icmp_err_convert[] = {
 	{
 		.errno = ENETUNREACH,	/* ICMP_NET_UNREACH */
 		.fatal = 0,
-- 
cgit v1.2.3-70-g09d2


From e071041be037eca208b62b84469a06bdfc692bea Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sat, 23 Jan 2010 13:37:10 +0000
Subject: netns xfrm: fix "ip xfrm state|policy count" misreport

"ip xfrm state|policy count" report SA/SP count from init_net,
not from netns of caller process.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h     |  4 ++--
 net/xfrm/xfrm_policy.c | 16 ++++++++--------
 net/xfrm/xfrm_state.c  |  6 +++---
 net/xfrm/xfrm_user.c   | 14 ++++++++------
 4 files changed, 21 insertions(+), 19 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 6d85861ab990..60c27706e7b9 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1367,8 +1367,8 @@ struct xfrmk_spdinfo {
 extern struct xfrm_state *xfrm_find_acq_byseq(struct net *net, u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
 extern int xfrm_state_flush(struct net *net, u8 proto, struct xfrm_audit *audit_info);
-extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si);
-extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si);
+extern void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si);
+extern void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si);
 extern int xfrm_replay_check(struct xfrm_state *x,
 			     struct sk_buff *skb, __be32 seq);
 extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4725a549ad4d..d2c8cb57ee4c 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -469,16 +469,16 @@ static inline int xfrm_byidx_should_resize(struct net *net, int total)
 	return 0;
 }
 
-void xfrm_spd_getinfo(struct xfrmk_spdinfo *si)
+void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
 {
 	read_lock_bh(&xfrm_policy_lock);
-	si->incnt = init_net.xfrm.policy_count[XFRM_POLICY_IN];
-	si->outcnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT];
-	si->fwdcnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD];
-	si->inscnt = init_net.xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
-	si->outscnt = init_net.xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
-	si->fwdscnt = init_net.xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
-	si->spdhcnt = init_net.xfrm.policy_idx_hmask;
+	si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
+	si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
+	si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
+	si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
+	si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
+	si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
+	si->spdhcnt = net->xfrm.policy_idx_hmask;
 	si->spdhmcnt = xfrm_policy_hashmax;
 	read_unlock_bh(&xfrm_policy_lock);
 }
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index d847f1a52b44..b36cc344474b 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -641,11 +641,11 @@ out:
 }
 EXPORT_SYMBOL(xfrm_state_flush);
 
-void xfrm_sad_getinfo(struct xfrmk_sadinfo *si)
+void xfrm_sad_getinfo(struct net *net, struct xfrmk_sadinfo *si)
 {
 	spin_lock_bh(&xfrm_state_lock);
-	si->sadcnt = init_net.xfrm.state_num;
-	si->sadhcnt = init_net.xfrm.state_hmask;
+	si->sadcnt = net->xfrm.state_num;
+	si->sadhcnt = net->xfrm.state_hmask;
 	si->sadhmcnt = xfrm_state_hashmax;
 	spin_unlock_bh(&xfrm_state_lock);
 }
diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c
index 1ada6186933c..d5a712976004 100644
--- a/net/xfrm/xfrm_user.c
+++ b/net/xfrm/xfrm_user.c
@@ -781,7 +781,8 @@ static inline size_t xfrm_spdinfo_msgsize(void)
 	       + nla_total_size(sizeof(struct xfrmu_spdhinfo));
 }
 
-static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+static int build_spdinfo(struct sk_buff *skb, struct net *net,
+			 u32 pid, u32 seq, u32 flags)
 {
 	struct xfrmk_spdinfo si;
 	struct xfrmu_spdinfo spc;
@@ -795,7 +796,7 @@ static int build_spdinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 
 	f = nlmsg_data(nlh);
 	*f = flags;
-	xfrm_spd_getinfo(&si);
+	xfrm_spd_getinfo(net, &si);
 	spc.incnt = si.incnt;
 	spc.outcnt = si.outcnt;
 	spc.fwdcnt = si.fwdcnt;
@@ -828,7 +829,7 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_spdinfo(r_skb, spid, seq, *flags) < 0)
+	if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0)
 		BUG();
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
@@ -841,7 +842,8 @@ static inline size_t xfrm_sadinfo_msgsize(void)
 	       + nla_total_size(4); /* XFRMA_SAD_CNT */
 }
 
-static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
+static int build_sadinfo(struct sk_buff *skb, struct net *net,
+			 u32 pid, u32 seq, u32 flags)
 {
 	struct xfrmk_sadinfo si;
 	struct xfrmu_sadhinfo sh;
@@ -854,7 +856,7 @@ static int build_sadinfo(struct sk_buff *skb, u32 pid, u32 seq, u32 flags)
 
 	f = nlmsg_data(nlh);
 	*f = flags;
-	xfrm_sad_getinfo(&si);
+	xfrm_sad_getinfo(net, &si);
 
 	sh.sadhmcnt = si.sadhmcnt;
 	sh.sadhcnt = si.sadhcnt;
@@ -882,7 +884,7 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh,
 	if (r_skb == NULL)
 		return -ENOMEM;
 
-	if (build_sadinfo(r_skb, spid, seq, *flags) < 0)
+	if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0)
 		BUG();
 
 	return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid);
-- 
cgit v1.2.3-70-g09d2


From d7c7544c3d5f59033d1bf3236bc7b289f5f26b75 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Sun, 24 Jan 2010 22:47:53 -0800
Subject: netns xfrm: deal with dst entries in netns

GC is non-existent in netns, so after you hit GC threshold, no new
dst entries will be created until someone triggers cleanup in init_net.

Make xfrm4_dst_ops and xfrm6_dst_ops per-netns.
This is not done in a generic way, because it woule waste
(AF_MAX - 2) * sizeof(struct dst_ops) bytes per-netns.

Reorder GC threshold initialization so it'd be done before registering
XFRM policies.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/xfrm.h |  6 +++++
 net/ipv4/xfrm4_policy.c  | 14 +++++++-----
 net/ipv6/xfrm6_policy.c  | 25 +++++++++++---------
 net/xfrm/xfrm_policy.c   | 59 +++++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 84 insertions(+), 20 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h
index 56f8e5585df7..74f119a2829a 100644
--- a/include/net/netns/xfrm.h
+++ b/include/net/netns/xfrm.h
@@ -5,6 +5,7 @@
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 #include <linux/xfrm.h>
+#include <net/dst_ops.h>
 
 struct ctl_table_header;
 
@@ -42,6 +43,11 @@ struct netns_xfrm {
 	unsigned int		policy_count[XFRM_POLICY_MAX * 2];
 	struct work_struct	policy_hash_work;
 
+	struct dst_ops		xfrm4_dst_ops;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	struct dst_ops		xfrm6_dst_ops;
+#endif
+
 	struct sock		*nlsk;
 	struct sock		*nlsk_stash;
 
diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c
index 8c08a28d8f83..67107d63c1cd 100644
--- a/net/ipv4/xfrm4_policy.c
+++ b/net/ipv4/xfrm4_policy.c
@@ -15,7 +15,6 @@
 #include <net/xfrm.h>
 #include <net/ip.h>
 
-static struct dst_ops xfrm4_dst_ops;
 static struct xfrm_policy_afinfo xfrm4_policy_afinfo;
 
 static struct dst_entry *xfrm4_dst_lookup(struct net *net, int tos,
@@ -190,8 +189,10 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm4_garbage_collect(struct dst_ops *ops)
 {
-	xfrm4_policy_afinfo.garbage_collect(&init_net);
-	return (atomic_read(&xfrm4_dst_ops.entries) > xfrm4_dst_ops.gc_thresh*2);
+	struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops);
+
+	xfrm4_policy_afinfo.garbage_collect(net);
+	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
 }
 
 static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -268,7 +269,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = {
 static struct ctl_table xfrm4_policy_table[] = {
 	{
 		.procname       = "xfrm4_gc_thresh",
-		.data           = &xfrm4_dst_ops.gc_thresh,
+		.data           = &init_net.xfrm.xfrm4_dst_ops.gc_thresh,
 		.maxlen         = sizeof(int),
 		.mode           = 0644,
 		.proc_handler   = proc_dointvec,
@@ -295,8 +296,6 @@ static void __exit xfrm4_policy_fini(void)
 
 void __init xfrm4_init(int rt_max_size)
 {
-	xfrm4_state_init();
-	xfrm4_policy_init();
 	/*
 	 * Select a default value for the gc_thresh based on the main route
 	 * table hash size.  It seems to me the worst case scenario is when
@@ -308,6 +307,9 @@ void __init xfrm4_init(int rt_max_size)
 	 * and start cleaning when were 1/2 full
 	 */
 	xfrm4_dst_ops.gc_thresh = rt_max_size/2;
+
+	xfrm4_state_init();
+	xfrm4_policy_init();
 #ifdef CONFIG_SYSCTL
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv4_ctl_path,
 						xfrm4_policy_table);
diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c
index 7254e3f899a7..dbdc696f5fc5 100644
--- a/net/ipv6/xfrm6_policy.c
+++ b/net/ipv6/xfrm6_policy.c
@@ -24,7 +24,6 @@
 #include <net/mip6.h>
 #endif
 
-static struct dst_ops xfrm6_dst_ops;
 static struct xfrm_policy_afinfo xfrm6_policy_afinfo;
 
 static struct dst_entry *xfrm6_dst_lookup(struct net *net, int tos,
@@ -224,8 +223,10 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl, int reverse)
 
 static inline int xfrm6_garbage_collect(struct dst_ops *ops)
 {
-	xfrm6_policy_afinfo.garbage_collect(&init_net);
-	return (atomic_read(&xfrm6_dst_ops.entries) > xfrm6_dst_ops.gc_thresh*2);
+	struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops);
+
+	xfrm6_policy_afinfo.garbage_collect(net);
+	return (atomic_read(&ops->entries) > ops->gc_thresh * 2);
 }
 
 static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu)
@@ -310,7 +311,7 @@ static void xfrm6_policy_fini(void)
 static struct ctl_table xfrm6_policy_table[] = {
 	{
 		.procname       = "xfrm6_gc_thresh",
-		.data	   	= &xfrm6_dst_ops.gc_thresh,
+		.data	   	= &init_net.xfrm.xfrm6_dst_ops.gc_thresh,
 		.maxlen	 	= sizeof(int),
 		.mode	   	= 0644,
 		.proc_handler   = proc_dointvec,
@@ -326,13 +327,6 @@ int __init xfrm6_init(void)
 	int ret;
 	unsigned int gc_thresh;
 
-	ret = xfrm6_policy_init();
-	if (ret)
-		goto out;
-
-	ret = xfrm6_state_init();
-	if (ret)
-		goto out_policy;
 	/*
 	 * We need a good default value for the xfrm6 gc threshold.
 	 * In ipv4 we set it to the route hash table size * 8, which
@@ -346,6 +340,15 @@ int __init xfrm6_init(void)
 	 */
 	gc_thresh = FIB6_TABLE_HASHSZ * 8;
 	xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh;
+
+	ret = xfrm6_policy_init();
+	if (ret)
+		goto out;
+
+	ret = xfrm6_state_init();
+	if (ret)
+		goto out_policy;
+
 #ifdef CONFIG_SYSCTL
 	sysctl_hdr = register_net_sysctl_table(&init_net, net_ipv6_ctl_path,
 						xfrm6_policy_table);
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index d2c8cb57ee4c..0ecb16a9a883 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1309,15 +1309,28 @@ static inline int xfrm_get_tos(struct flowi *fl, int family)
 	return tos;
 }
 
-static inline struct xfrm_dst *xfrm_alloc_dst(int family)
+static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
 {
 	struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
+	struct dst_ops *dst_ops;
 	struct xfrm_dst *xdst;
 
 	if (!afinfo)
 		return ERR_PTR(-EINVAL);
 
-	xdst = dst_alloc(afinfo->dst_ops) ?: ERR_PTR(-ENOBUFS);
+	switch (family) {
+	case AF_INET:
+		dst_ops = &net->xfrm.xfrm4_dst_ops;
+		break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	case AF_INET6:
+		dst_ops = &net->xfrm.xfrm6_dst_ops;
+		break;
+#endif
+	default:
+		BUG();
+	}
+	xdst = dst_alloc(dst_ops) ?: ERR_PTR(-ENOBUFS);
 
 	xfrm_policy_put_afinfo(afinfo);
 
@@ -1366,6 +1379,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 					    struct flowi *fl,
 					    struct dst_entry *dst)
 {
+	struct net *net = xp_net(policy);
 	unsigned long now = jiffies;
 	struct net_device *dev;
 	struct dst_entry *dst_prev = NULL;
@@ -1389,7 +1403,7 @@ static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
 	dst_hold(dst);
 
 	for (; i < nx; i++) {
-		struct xfrm_dst *xdst = xfrm_alloc_dst(family);
+		struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
 		struct dst_entry *dst1 = &xdst->u.dst;
 
 		err = PTR_ERR(xdst);
@@ -2279,6 +2293,7 @@ EXPORT_SYMBOL(xfrm_bundle_ok);
 
 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 {
+	struct net *net;
 	int err = 0;
 	if (unlikely(afinfo == NULL))
 		return -EINVAL;
@@ -2302,6 +2317,27 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
 		xfrm_policy_afinfo[afinfo->family] = afinfo;
 	}
 	write_unlock_bh(&xfrm_policy_afinfo_lock);
+
+	rtnl_lock();
+	for_each_net(net) {
+		struct dst_ops *xfrm_dst_ops;
+
+		switch (afinfo->family) {
+		case AF_INET:
+			xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
+			break;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+		case AF_INET6:
+			xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
+			break;
+#endif
+		default:
+			BUG();
+		}
+		*xfrm_dst_ops = *afinfo->dst_ops;
+	}
+	rtnl_unlock();
+
 	return err;
 }
 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
@@ -2332,6 +2368,22 @@ int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
 }
 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
 
+static void __net_init xfrm_dst_ops_init(struct net *net)
+{
+	struct xfrm_policy_afinfo *afinfo;
+
+	read_lock_bh(&xfrm_policy_afinfo_lock);
+	afinfo = xfrm_policy_afinfo[AF_INET];
+	if (afinfo)
+		net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
+#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE)
+	afinfo = xfrm_policy_afinfo[AF_INET6];
+	if (afinfo)
+		net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
+#endif
+	read_unlock_bh(&xfrm_policy_afinfo_lock);
+}
+
 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
 {
 	struct xfrm_policy_afinfo *afinfo;
@@ -2494,6 +2546,7 @@ static int __net_init xfrm_net_init(struct net *net)
 	rv = xfrm_policy_init(net);
 	if (rv < 0)
 		goto out_policy;
+	xfrm_dst_ops_init(net);
 	rv = xfrm_sysctl_init(net);
 	if (rv < 0)
 		goto out_sysctl;
-- 
cgit v1.2.3-70-g09d2


From 32e7bfc41110bc8f29ec0f293c3bcee6645fef34 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Mon, 25 Jan 2010 13:36:10 -0800
Subject: net: use helpers to access uc list V2

This patch introduces three macros to work with uc list from net drivers.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bnx2.c                  |  5 ++---
 drivers/net/e1000/e1000_main.c      |  4 ++--
 drivers/net/igb/igb_main.c          |  7 ++++---
 drivers/net/ixgbe/ixgbe_common.c    |  7 +++----
 drivers/net/ixgbe/ixgbe_common.h    |  2 +-
 drivers/net/ixgbe/ixgbe_main.c      |  2 +-
 drivers/net/ixgbe/ixgbe_type.h      |  4 ++--
 drivers/net/mv643xx_eth.c           |  3 +--
 drivers/net/niu.c                   |  4 ++--
 drivers/net/stmmac/dwmac1000_core.c | 10 +++++-----
 drivers/net/virtio_net.c            | 12 +++++++-----
 drivers/s390/net/qeth_l2_main.c     |  2 +-
 include/linux/netdevice.h           |  5 +++++
 net/core/dev.c                      |  4 ++--
 14 files changed, 38 insertions(+), 33 deletions(-)

(limited to 'include')

diff --git a/drivers/net/bnx2.c b/drivers/net/bnx2.c
index d83512d3e02f..a7b6b12c1c05 100644
--- a/drivers/net/bnx2.c
+++ b/drivers/net/bnx2.c
@@ -48,7 +48,6 @@
 #include <linux/cache.h>
 #include <linux/firmware.h>
 #include <linux/log2.h>
-#include <linux/list.h>
 
 #if defined(CONFIG_CNIC) || defined(CONFIG_CNIC_MODULE)
 #define BCM_CNIC 1
@@ -3579,14 +3578,14 @@ bnx2_set_rx_mode(struct net_device *dev)
 		sort_mode |= BNX2_RPM_SORT_USER0_MC_HSH_EN;
 	}
 
-	if (dev->uc.count > BNX2_MAX_UNICAST_ADDRESSES) {
+	if (netdev_uc_count(dev) > BNX2_MAX_UNICAST_ADDRESSES) {
 		rx_mode |= BNX2_EMAC_RX_MODE_PROMISCUOUS;
 		sort_mode |= BNX2_RPM_SORT_USER0_PROM_EN |
 			     BNX2_RPM_SORT_USER0_PROM_VLAN;
 	} else if (!(dev->flags & IFF_PROMISC)) {
 		/* Add all entries into to the match filter list */
 		i = 0;
-		list_for_each_entry(ha, &dev->uc.list, list) {
+		netdev_for_each_uc_addr(ha, dev) {
 			bnx2_set_mac_addr(bp, ha->addr,
 					  i + BNX2_START_UNICAST_ADDRESS_INDEX);
 			sort_mode |= (1 <<
diff --git a/drivers/net/e1000/e1000_main.c b/drivers/net/e1000/e1000_main.c
index 87f575ca427d..2ce88c5f75c5 100644
--- a/drivers/net/e1000/e1000_main.c
+++ b/drivers/net/e1000/e1000_main.c
@@ -2139,7 +2139,7 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 			rctl |= E1000_RCTL_VFE;
 	}
 
-	if (netdev->uc.count > rar_entries - 1) {
+	if (netdev_uc_count(netdev) > rar_entries - 1) {
 		rctl |= E1000_RCTL_UPE;
 	} else if (!(netdev->flags & IFF_PROMISC)) {
 		rctl &= ~E1000_RCTL_UPE;
@@ -2162,7 +2162,7 @@ static void e1000_set_rx_mode(struct net_device *netdev)
 	 */
 	i = 1;
 	if (use_uc)
-		list_for_each_entry(ha, &netdev->uc.list, list) {
+		netdev_for_each_uc_addr(ha, netdev) {
 			if (i == rar_entries)
 				break;
 			e1000_rar_set(hw, ha->addr, i++);
diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c
index d9679493c635..01cc29483e2f 100644
--- a/drivers/net/igb/igb_main.c
+++ b/drivers/net/igb/igb_main.c
@@ -2905,12 +2905,13 @@ static int igb_write_uc_addr_list(struct net_device *netdev)
 	int count = 0;
 
 	/* return ENOMEM indicating insufficient memory for addresses */
-	if (netdev->uc.count > rar_entries)
+	if (netdev_uc_count(netdev) > rar_entries)
 		return -ENOMEM;
 
-	if (netdev->uc.count && rar_entries) {
+	if (!netdev_uc_empty(netdev) && rar_entries) {
 		struct netdev_hw_addr *ha;
-		list_for_each_entry(ha, &netdev->uc.list, list) {
+
+		netdev_for_each_uc_addr(ha, netdev) {
 			if (!rar_entries)
 				break;
 			igb_rar_set_qsel(adapter, ha->addr,
diff --git a/drivers/net/ixgbe/ixgbe_common.c b/drivers/net/ixgbe/ixgbe_common.c
index 276c2aaa800b..eb49020903c1 100644
--- a/drivers/net/ixgbe/ixgbe_common.c
+++ b/drivers/net/ixgbe/ixgbe_common.c
@@ -28,7 +28,6 @@
 #include <linux/pci.h>
 #include <linux/delay.h>
 #include <linux/sched.h>
-#include <linux/list.h>
 #include <linux/netdevice.h>
 
 #include "ixgbe.h"
@@ -1347,7 +1346,7 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
 /**
  *  ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses
  *  @hw: pointer to hardware structure
- *  @uc_list: the list of new addresses
+ *  @netdev: pointer to net device structure
  *
  *  The given list replaces any existing list.  Clears the secondary addrs from
  *  receive address registers.  Uses unused receive address registers for the
@@ -1357,7 +1356,7 @@ static void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq)
  *  manually putting the device into promiscuous mode.
  **/
 s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
-				      struct list_head *uc_list)
+				      struct net_device *netdev)
 {
 	u32 i;
 	u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc;
@@ -1381,7 +1380,7 @@ s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
 	}
 
 	/* Add the new addresses */
-	list_for_each_entry(ha, uc_list, list) {
+	netdev_for_each_uc_addr(ha, netdev) {
 		hw_dbg(hw, " Adding the secondary addresses:\n");
 		ixgbe_add_uc_addr(hw, ha->addr, 0);
 	}
diff --git a/drivers/net/ixgbe/ixgbe_common.h b/drivers/net/ixgbe/ixgbe_common.h
index dfff0ffaa502..13606d4809c9 100644
--- a/drivers/net/ixgbe/ixgbe_common.h
+++ b/drivers/net/ixgbe/ixgbe_common.h
@@ -60,7 +60,7 @@ s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list,
                                       u32 mc_addr_count,
                                       ixgbe_mc_addr_itr func);
 s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw,
-				      struct list_head *uc_list);
+				      struct net_device *netdev);
 s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw);
 s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw);
 s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval);
diff --git a/drivers/net/ixgbe/ixgbe_main.c b/drivers/net/ixgbe/ixgbe_main.c
index ee41d331a35f..439645d2aeef 100644
--- a/drivers/net/ixgbe/ixgbe_main.c
+++ b/drivers/net/ixgbe/ixgbe_main.c
@@ -2568,7 +2568,7 @@ void ixgbe_set_rx_mode(struct net_device *netdev)
 	IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl);
 
 	/* reprogram secondary unicast list */
-	hw->mac.ops.update_uc_addr_list(hw, &netdev->uc.list);
+	hw->mac.ops.update_uc_addr_list(hw, netdev);
 
 	/* reprogram multicast list */
 	addr_count = netdev->mc_count;
diff --git a/drivers/net/ixgbe/ixgbe_type.h b/drivers/net/ixgbe/ixgbe_type.h
index b4caa7011a2b..0db67c19b2c4 100644
--- a/drivers/net/ixgbe/ixgbe_type.h
+++ b/drivers/net/ixgbe/ixgbe_type.h
@@ -30,7 +30,7 @@
 
 #include <linux/types.h>
 #include <linux/mdio.h>
-#include <linux/list.h>
+#include <linux/netdevice.h>
 
 /* Vendor ID */
 #define IXGBE_INTEL_VENDOR_ID   0x8086
@@ -2405,7 +2405,7 @@ struct ixgbe_mac_operations {
 	s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32);
 	s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32);
 	s32 (*init_rx_addrs)(struct ixgbe_hw *);
-	s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct list_head *);
+	s32 (*update_uc_addr_list)(struct ixgbe_hw *, struct net_device *);
 	s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32,
 	                           ixgbe_mc_addr_itr);
 	s32 (*enable_mc)(struct ixgbe_hw *);
diff --git a/drivers/net/mv643xx_eth.c b/drivers/net/mv643xx_eth.c
index af67af55efe7..e24072a9a979 100644
--- a/drivers/net/mv643xx_eth.c
+++ b/drivers/net/mv643xx_eth.c
@@ -55,7 +55,6 @@
 #include <linux/types.h>
 #include <linux/inet_lro.h>
 #include <asm/system.h>
-#include <linux/list.h>
 
 static char mv643xx_eth_driver_name[] = "mv643xx_eth";
 static char mv643xx_eth_driver_version[] = "1.4";
@@ -1697,7 +1696,7 @@ static u32 uc_addr_filter_mask(struct net_device *dev)
 		return 0;
 
 	nibbles = 1 << (dev->dev_addr[5] & 0x0f);
-	list_for_each_entry(ha, &dev->uc.list, list) {
+	netdev_for_each_uc_addr(ha, dev) {
 		if (memcmp(dev->dev_addr, ha->addr, 5))
 			return 0;
 		if ((dev->dev_addr[5] ^ ha->addr[5]) & 0xf0)
diff --git a/drivers/net/niu.c b/drivers/net/niu.c
index 0e260cfbff7b..af9a8647c7e8 100644
--- a/drivers/net/niu.c
+++ b/drivers/net/niu.c
@@ -6372,7 +6372,7 @@ static void niu_set_rx_mode(struct net_device *dev)
 	if ((dev->flags & IFF_ALLMULTI) || (dev->mc_count > 0))
 		np->flags |= NIU_FLAGS_MCAST;
 
-	alt_cnt = dev->uc.count;
+	alt_cnt = netdev_uc_count(dev);
 	if (alt_cnt > niu_num_alt_addr(np)) {
 		alt_cnt = 0;
 		np->flags |= NIU_FLAGS_PROMISC;
@@ -6381,7 +6381,7 @@ static void niu_set_rx_mode(struct net_device *dev)
 	if (alt_cnt) {
 		int index = 0;
 
-		list_for_each_entry(ha, &dev->uc.list, list) {
+		netdev_for_each_uc_addr(ha, dev) {
 			err = niu_set_alt_mac(np, index, ha->addr);
 			if (err)
 				printk(KERN_WARNING PFX "%s: Error %d "
diff --git a/drivers/net/stmmac/dwmac1000_core.c b/drivers/net/stmmac/dwmac1000_core.c
index 928eac05b912..d812e9cdb3db 100644
--- a/drivers/net/stmmac/dwmac1000_core.c
+++ b/drivers/net/stmmac/dwmac1000_core.c
@@ -83,7 +83,7 @@ static void dwmac1000_set_filter(struct net_device *dev)
 	unsigned int value = 0;
 
 	DBG(KERN_INFO "%s: # mcasts %d, # unicast %d\n",
-	    __func__, dev->mc_count, dev->uc.count);
+	    __func__, dev->mc_count, netdev_uc_count(dev));
 
 	if (dev->flags & IFF_PROMISC)
 		value = GMAC_FRAME_FILTER_PR;
@@ -117,7 +117,7 @@ static void dwmac1000_set_filter(struct net_device *dev)
 	}
 
 	/* Handle multiple unicast addresses (perfect filtering)*/
-	if (dev->uc.count > GMAC_MAX_UNICAST_ADDRESSES)
+	if (netdev_uc_count(dev) > GMAC_MAX_UNICAST_ADDRESSES)
 		/* Switch to promiscuous mode is more than 16 addrs
 		   are required */
 		value |= GMAC_FRAME_FILTER_PR;
@@ -125,9 +125,9 @@ static void dwmac1000_set_filter(struct net_device *dev)
 		int reg = 1;
 		struct netdev_hw_addr *ha;
 
-			list_for_each_entry(ha, &dev->uc.list, list) {
-				dwmac1000_set_umac_addr(ioaddr, ha->addr, reg);
-				reg++;
+		netdev_for_each_uc_addr(ha, dev) {
+			dwmac1000_set_umac_addr(ioaddr, ha->addr, reg);
+			reg++;
 		}
 	}
 
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c708ecc3cb2e..088332a943f7 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -675,6 +675,7 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	struct virtio_net_ctrl_mac *mac_data;
 	struct dev_addr_list *addr;
 	struct netdev_hw_addr *ha;
+	int uc_count;
 	void *buf;
 	int i;
 
@@ -701,8 +702,9 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 		dev_warn(&dev->dev, "Failed to %sable allmulti mode.\n",
 			 allmulti ? "en" : "dis");
 
+	uc_count = netdev_uc_count(dev);
 	/* MAC filter - use one buffer for both lists */
-	mac_data = buf = kzalloc(((dev->uc.count + dev->mc_count) * ETH_ALEN) +
+	mac_data = buf = kzalloc(((uc_count + dev->mc_count) * ETH_ALEN) +
 				 (2 * sizeof(mac_data->entries)), GFP_ATOMIC);
 	if (!buf) {
 		dev_warn(&dev->dev, "No memory for MAC address buffer\n");
@@ -712,16 +714,16 @@ static void virtnet_set_rx_mode(struct net_device *dev)
 	sg_init_table(sg, 2);
 
 	/* Store the unicast list and count in the front of the buffer */
-	mac_data->entries = dev->uc.count;
+	mac_data->entries = uc_count;
 	i = 0;
-	list_for_each_entry(ha, &dev->uc.list, list)
+	netdev_for_each_uc_addr(ha, dev)
 		memcpy(&mac_data->macs[i++][0], ha->addr, ETH_ALEN);
 
 	sg_set_buf(&sg[0], mac_data,
-		   sizeof(mac_data->entries) + (dev->uc.count * ETH_ALEN));
+		   sizeof(mac_data->entries) + (uc_count * ETH_ALEN));
 
 	/* multicast list and count fill the end */
-	mac_data = (void *)&mac_data->macs[dev->uc.count][0];
+	mac_data = (void *)&mac_data->macs[uc_count][0];
 
 	mac_data->entries = dev->mc_count;
 	addr = dev->mc_list;
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index c3258b0dd649..51fde6f2e0b8 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -622,7 +622,7 @@ static void qeth_l2_set_multicast_list(struct net_device *dev)
 	for (dm = dev->mc_list; dm; dm = dm->next)
 		qeth_l2_add_mc(card, dm->da_addr, 0);
 
-	list_for_each_entry(ha, &dev->uc.list, list)
+	netdev_for_each_uc_addr(ha, dev)
 		qeth_l2_add_mc(card, ha->addr, 1);
 
 	spin_unlock_bh(&card->mclock);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index b5fb51d0b8b1..93a32a5ca74f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -263,6 +263,11 @@ struct netdev_hw_addr_list {
 	int			count;
 };
 
+#define netdev_uc_count(dev) ((dev)->uc.count)
+#define netdev_uc_empty(dev) ((dev)->uc.count == 0)
+#define netdev_for_each_uc_addr(ha, dev) \
+	list_for_each_entry(ha, &dev->uc.list, list)
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
diff --git a/net/core/dev.c b/net/core/dev.c
index 4fad9db417b1..2cba5c521e56 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3665,10 +3665,10 @@ void __dev_set_rx_mode(struct net_device *dev)
 		/* Unicast addresses changes may only happen under the rtnl,
 		 * therefore calling __dev_set_promiscuity here is safe.
 		 */
-		if (dev->uc.count > 0 && !dev->uc_promisc) {
+		if (!netdev_uc_empty(dev) && !dev->uc_promisc) {
 			__dev_set_promiscuity(dev, 1);
 			dev->uc_promisc = 1;
-		} else if (dev->uc.count == 0 && dev->uc_promisc) {
+		} else if (netdev_uc_empty(dev) && dev->uc_promisc) {
 			__dev_set_promiscuity(dev, -1);
 			dev->uc_promisc = 0;
 		}
-- 
cgit v1.2.3-70-g09d2


From eb807fb23878bc319e029ed8ce3d835d239723a5 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kalle.valo@iki.fi>
Date: Sun, 24 Jan 2010 14:55:12 +0200
Subject: mac80211: fix update_tkip_key() documentation about the context

Johannes noticed that I had incorrectly documented the context of
update_tkip_key() driver operation. It must be atomic because all
RX code is run inside rcu critical section.

Reported-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Kalle Valo <kalle.valo@iki.fi>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h    | 2 +-
 net/mac80211/driver-ops.h | 2 --
 2 files changed, 1 insertion(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f56d6f479532..f64402f6312b 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1489,7 +1489,7 @@ enum ieee80211_ampdu_mlme_action {
  * @update_tkip_key: See the section "Hardware crypto acceleration"
  * 	This callback will be called in the context of Rx. Called for drivers
  * 	which set IEEE80211_KEY_FLAG_TKIP_REQ_RX_P1_KEY.
- *	The callback can sleep.
+ *	The callback must be atomic.
  *
  * @hw_scan: Ask the hardware to service the scan request, no need to start
  *	the scan state machine in stack. The scan must honour the channel
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 40c6e9a89864..6c31f38ac7f5 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -144,8 +144,6 @@ static inline void drv_update_tkip_key(struct ieee80211_local *local,
 {
 	struct ieee80211_sta *ista = NULL;
 
-	might_sleep();
-
 	if (sta)
 		ista = &sta->sta;
 
-- 
cgit v1.2.3-70-g09d2


From c21dbf9214bce129f92e1af05552553ff0e318ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 26 Jan 2010 14:15:46 +0100
Subject: cfg80211: export cfg80211_find_ie

This new function (previously a static function
called just "find_ie" can be used to find a
specific IE in a buffer of IEs.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/cfg80211.h | 16 ++++++++++++++++
 net/wireless/scan.c    | 38 ++++++++++++++++++++------------------
 2 files changed, 36 insertions(+), 18 deletions(-)

(limited to 'include')

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c5d16f299d6f..a3f0a7ed31ac 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1658,6 +1658,22 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
  */
 unsigned int cfg80211_classify8021d(struct sk_buff *skb);
 
+/**
+ * cfg80211_find_ie - find information element in data
+ *
+ * @eid: element ID
+ * @ies: data consisting of IEs
+ * @len: length of data
+ *
+ * This function will return %NULL if the element ID could
+ * not be found or if the element is invalid (claims to be
+ * longer than the given data), or a pointer to the first byte
+ * of the requested element, that is the byte containing the
+ * element ID. There are no checks on the element length
+ * other than having to fit into the given data.
+ */
+const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len);
+
 /*
  * Regulatory helper functions for wiphys
  */
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 06b0231ee5e3..978cac3414b5 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -143,9 +143,9 @@ void cfg80211_bss_expire(struct cfg80211_registered_device *dev)
 		dev->bss_generation++;
 }
 
-static u8 *find_ie(u8 num, u8 *ies, int len)
+const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len)
 {
-	while (len > 2 && ies[0] != num) {
+	while (len > 2 && ies[0] != eid) {
 		len -= ies[1] + 2;
 		ies += ies[1] + 2;
 	}
@@ -155,11 +155,12 @@ static u8 *find_ie(u8 num, u8 *ies, int len)
 		return NULL;
 	return ies;
 }
+EXPORT_SYMBOL(cfg80211_find_ie);
 
 static int cmp_ies(u8 num, u8 *ies1, size_t len1, u8 *ies2, size_t len2)
 {
-	const u8 *ie1 = find_ie(num, ies1, len1);
-	const u8 *ie2 = find_ie(num, ies2, len2);
+	const u8 *ie1 = cfg80211_find_ie(num, ies1, len1);
+	const u8 *ie2 = cfg80211_find_ie(num, ies2, len2);
 	int r;
 
 	if (!ie1 && !ie2)
@@ -185,9 +186,9 @@ static bool is_bss(struct cfg80211_bss *a,
 	if (!ssid)
 		return true;
 
-	ssidie = find_ie(WLAN_EID_SSID,
-			 a->information_elements,
-			 a->len_information_elements);
+	ssidie = cfg80211_find_ie(WLAN_EID_SSID,
+				  a->information_elements,
+				  a->len_information_elements);
 	if (!ssidie)
 		return false;
 	if (ssidie[1] != ssid_len)
@@ -204,9 +205,9 @@ static bool is_mesh(struct cfg80211_bss *a,
 	if (!is_zero_ether_addr(a->bssid))
 		return false;
 
-	ie = find_ie(WLAN_EID_MESH_ID,
-		     a->information_elements,
-		     a->len_information_elements);
+	ie = cfg80211_find_ie(WLAN_EID_MESH_ID,
+			      a->information_elements,
+			      a->len_information_elements);
 	if (!ie)
 		return false;
 	if (ie[1] != meshidlen)
@@ -214,9 +215,9 @@ static bool is_mesh(struct cfg80211_bss *a,
 	if (memcmp(ie + 2, meshid, meshidlen))
 		return false;
 
-	ie = find_ie(WLAN_EID_MESH_CONFIG,
-		     a->information_elements,
-		     a->len_information_elements);
+	ie = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
+			      a->information_elements,
+			      a->len_information_elements);
 	if (!ie)
 		return false;
 	if (ie[1] != sizeof(struct ieee80211_meshconf_ie))
@@ -395,11 +396,12 @@ cfg80211_bss_update(struct cfg80211_registered_device *dev,
 
 	if (is_zero_ether_addr(res->pub.bssid)) {
 		/* must be mesh, verify */
-		meshid = find_ie(WLAN_EID_MESH_ID, res->pub.information_elements,
-				 res->pub.len_information_elements);
-		meshcfg = find_ie(WLAN_EID_MESH_CONFIG,
-				  res->pub.information_elements,
-				  res->pub.len_information_elements);
+		meshid = cfg80211_find_ie(WLAN_EID_MESH_ID,
+					  res->pub.information_elements,
+					  res->pub.len_information_elements);
+		meshcfg = cfg80211_find_ie(WLAN_EID_MESH_CONFIG,
+					   res->pub.information_elements,
+					   res->pub.len_information_elements);
 		if (!meshid || !meshcfg ||
 		    meshcfg[1] != sizeof(struct ieee80211_meshconf_ie)) {
 			/* bogus mesh */
-- 
cgit v1.2.3-70-g09d2


From 56007a028c51cbf800a6c969d6f6431d23443b99 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes@sipsolutions.net>
Date: Tue, 26 Jan 2010 14:19:52 +0100
Subject: mac80211: wait for beacon before enabling powersave

Because DTIM information is required for powersave
but is only conveyed in beacons, wait for a beacon
before enabling powersave, and change the way the
information is conveyed to the driver accordingly.

mwl8k doesn't currently seem to implement PS but
requires the DTIM period in a different way; after
talking to Lennert we agreed to just have mwl8k do
the parsing itself in the finalize_join work.

Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Acked-by: Lennert Buytenhek <buytenh@marvell.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/iwlwifi/iwl-power.c  |  2 +-
 drivers/net/wireless/mwl8k.c              | 14 +++++++++-----
 drivers/net/wireless/wl12xx/wl1251.h      |  3 ---
 drivers/net/wireless/wl12xx/wl1251_main.c | 25 +++++++------------------
 include/net/mac80211.h                    |  7 ++++++-
 net/mac80211/mlme.c                       | 27 ++++++++++++++++++++++++---
 net/mac80211/scan.c                       |  4 ----
 7 files changed, 47 insertions(+), 35 deletions(-)

(limited to 'include')

diff --git a/drivers/net/wireless/iwlwifi/iwl-power.c b/drivers/net/wireless/iwlwifi/iwl-power.c
index 8599444bef01..9e3ca0641451 100644
--- a/drivers/net/wireless/iwlwifi/iwl-power.c
+++ b/drivers/net/wireless/iwlwifi/iwl-power.c
@@ -319,7 +319,7 @@ int iwl_power_update_mode(struct iwl_priv *priv, bool force)
 			priv->chain_noise_data.state == IWL_CHAIN_NOISE_ALIVE;
 
 	if (priv->vif)
-		dtimper = priv->vif->bss_conf.dtim_period;
+		dtimper = priv->hw->conf.ps_dtim_period;
 	else
 		dtimper = 1;
 
diff --git a/drivers/net/wireless/mwl8k.c b/drivers/net/wireless/mwl8k.c
index 68546ca0ba37..f0f08f3919cc 100644
--- a/drivers/net/wireless/mwl8k.c
+++ b/drivers/net/wireless/mwl8k.c
@@ -3881,12 +3881,16 @@ static void mwl8k_finalize_join_worker(struct work_struct *work)
 	struct mwl8k_priv *priv =
 		container_of(work, struct mwl8k_priv, finalize_join_worker);
 	struct sk_buff *skb = priv->beacon_skb;
-	struct mwl8k_vif *mwl8k_vif;
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
+	int len = skb->len - offsetof(struct ieee80211_mgmt, u.beacon.variable);
+	const u8 *tim = cfg80211_find_ie(WLAN_EID_TIM,
+					 mgmt->u.beacon.variable, len);
+	int dtim_period = 1;
+
+	if (tim && tim[1] >= 2)
+		dtim_period = tim[3];
 
-	mwl8k_vif = mwl8k_first_vif(priv);
-	if (mwl8k_vif != NULL)
-		mwl8k_cmd_finalize_join(priv->hw, skb->data, skb->len,
-					mwl8k_vif->vif->bss_conf.dtim_period);
+	mwl8k_cmd_finalize_join(priv->hw, skb->data, skb->len, dtim_period);
 
 	dev_kfree_skb(skb);
 	priv->beacon_skb = NULL;
diff --git a/drivers/net/wireless/wl12xx/wl1251.h b/drivers/net/wireless/wl12xx/wl1251.h
index 6301578d1565..37c61c19cae5 100644
--- a/drivers/net/wireless/wl12xx/wl1251.h
+++ b/drivers/net/wireless/wl12xx/wl1251.h
@@ -341,9 +341,6 @@ struct wl1251 {
 	/* Are we currently scanning */
 	bool scanning;
 
-	/* Our association ID */
-	u16 aid;
-
 	/* Default key (for WEP) */
 	u32 default_key;
 
diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c
index 595f0f94d16e..a717dde4822e 100644
--- a/drivers/net/wireless/wl12xx/wl1251_main.c
+++ b/drivers/net/wireless/wl12xx/wl1251_main.c
@@ -617,10 +617,13 @@ static int wl1251_op_config(struct ieee80211_hw *hw, u32 changed)
 
 		wl->psm_requested = true;
 
+		wl->dtim_period = conf->ps_dtim_period;
+
+		ret = wl1251_acx_wr_tbtt_and_dtim(wl, wl->beacon_int,
+						  wl->dtim_period);
+
 		/*
-		 * We enter PSM only if we're already associated.
-		 * If we're not, we'll enter it when joining an SSID,
-		 * through the bss_info_changed() hook.
+		 * mac80211 enables PSM only if we're already associated.
 		 */
 		ret = wl1251_ps_set_mode(wl, STATION_POWER_SAVE_MODE);
 		if (ret < 0)
@@ -943,7 +946,6 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw,
 				       struct ieee80211_bss_conf *bss_conf,
 				       u32 changed)
 {
-	enum wl1251_cmd_ps_mode mode;
 	struct wl1251 *wl = hw->priv;
 	struct sk_buff *beacon, *skb;
 	int ret;
@@ -984,11 +986,6 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_ASSOC) {
 		if (bss_conf->assoc) {
 			wl->beacon_int = bss_conf->beacon_int;
-			wl->dtim_period = bss_conf->dtim_period;
-
-			ret = wl1251_acx_wr_tbtt_and_dtim(wl, wl->beacon_int,
-							  wl->dtim_period);
-			wl->aid = bss_conf->aid;
 
 			skb = ieee80211_pspoll_get(wl->hw, wl->vif);
 			if (!skb)
@@ -1001,17 +998,9 @@ static void wl1251_op_bss_info_changed(struct ieee80211_hw *hw,
 			if (ret < 0)
 				goto out_sleep;
 
-			ret = wl1251_acx_aid(wl, wl->aid);
+			ret = wl1251_acx_aid(wl, bss_conf->aid);
 			if (ret < 0)
 				goto out_sleep;
-
-			/* If we want to go in PSM but we're not there yet */
-			if (wl->psm_requested && !wl->psm) {
-				mode = STATION_POWER_SAVE_MODE;
-				ret = wl1251_ps_set_mode(wl, mode);
-				if (ret < 0)
-					goto out_sleep;
-			}
 		} else {
 			/* use defaults when not associated */
 			wl->beacon_int = WL1251_DEFAULT_BEACON_INT;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f64402f6312b..1e9c93024cf2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -186,7 +186,8 @@ enum ieee80211_bss_change {
  * @use_short_slot: use short slot time (only relevant for ERP);
  *	if the hardware cannot handle this it must set the
  *	IEEE80211_HW_2GHZ_SHORT_SLOT_INCAPABLE hardware flag
- * @dtim_period: num of beacons before the next DTIM, for PSM
+ * @dtim_period: num of beacons before the next DTIM, for beaconing,
+ *	not valid in station mode (cf. hw conf ps_dtim_period)
  * @timestamp: beacon timestamp
  * @beacon_int: beacon interval
  * @assoc_capability: capabilities taken from assoc resp
@@ -648,6 +649,9 @@ enum ieee80211_smps_mode {
  *	value will be only achievable between DTIM frames, the hardware
  *	needs to check for the multicast traffic bit in DTIM beacons.
  *	This variable is valid only when the CONF_PS flag is set.
+ * @ps_dtim_period: The DTIM period of the AP we're connected to, for use
+ *	in power saving. Power saving will not be enabled until a beacon
+ *	has been received and the DTIM period is known.
  * @dynamic_ps_timeout: The dynamic powersave timeout (in ms), see the
  *	powersave documentation below. This variable is valid only when
  *	the CONF_PS flag is set.
@@ -674,6 +678,7 @@ struct ieee80211_conf {
 	int max_sleep_period;
 
 	u16 listen_interval;
+	u8 ps_dtim_period;
 
 	u8 long_frame_max_tx_count, short_frame_max_tx_count;
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1e1d16c55ee5..86c6ad1b058d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -484,6 +484,7 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 
 	if (count == 1 && found->u.mgd.powersave &&
 	    found->u.mgd.associated &&
+	    found->u.mgd.associated->beacon_ies &&
 	    !(found->u.mgd.flags & (IEEE80211_STA_BEACON_POLL |
 				    IEEE80211_STA_CONNECTION_POLL))) {
 		s32 beaconint_us;
@@ -497,14 +498,22 @@ void ieee80211_recalc_ps(struct ieee80211_local *local, s32 latency)
 		if (beaconint_us > latency) {
 			local->ps_sdata = NULL;
 		} else {
-			u8 dtimper = found->vif.bss_conf.dtim_period;
+			struct ieee80211_bss *bss;
 			int maxslp = 1;
+			u8 dtimper;
 
-			if (dtimper > 1)
+			bss = (void *)found->u.mgd.associated->priv;
+			dtimper = bss->dtim_period;
+
+			/* If the TIM IE is invalid, pretend the value is 1 */
+			if (!dtimper)
+				dtimper = 1;
+			else if (dtimper > 1)
 				maxslp = min_t(int, dtimper,
 						    latency / beaconint_us);
 
 			local->hw.conf.max_sleep_period = maxslp;
+			local->hw.conf.ps_dtim_period = dtimper;
 			local->ps_sdata = found;
 		}
 	} else {
@@ -702,7 +711,6 @@ static void ieee80211_set_associated(struct ieee80211_sub_if_data *sdata,
 	/* set timing information */
 	sdata->vif.bss_conf.beacon_int = cbss->beacon_interval;
 	sdata->vif.bss_conf.timestamp = cbss->tsf;
-	sdata->vif.bss_conf.dtim_period = bss->dtim_period;
 
 	bss_info_changed |= BSS_CHANGED_BEACON_INT;
 	bss_info_changed |= ieee80211_handle_bss_capability(sdata,
@@ -1168,6 +1176,13 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	int freq;
 	struct ieee80211_bss *bss;
 	struct ieee80211_channel *channel;
+	bool need_ps = false;
+
+	if (sdata->u.mgd.associated) {
+		bss = (void *)sdata->u.mgd.associated->priv;
+		/* not previously set so we may need to recalc */
+		need_ps = !bss->dtim_period;
+	}
 
 	if (elems->ds_params && elems->ds_params_len == 1)
 		freq = ieee80211_channel_to_frequency(elems->ds_params[0]);
@@ -1187,6 +1202,12 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 	if (!sdata->u.mgd.associated)
 		return;
 
+	if (need_ps) {
+		mutex_lock(&local->iflist_mtx);
+		ieee80211_recalc_ps(local, -1);
+		mutex_unlock(&local->iflist_mtx);
+	}
+
 	if (elems->ch_switch_elem && (elems->ch_switch_elem_len == 3) &&
 	    (memcmp(mgmt->bssid, sdata->u.mgd.associated->bssid,
 							ETH_ALEN) == 0)) {
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 9afe2f9885dc..bc061f629674 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -111,10 +111,6 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
 		bss->dtim_period = tim_ie->dtim_period;
 	}
 
-	/* set default value for buggy AP/no TIM element */
-	if (bss->dtim_period == 0)
-		bss->dtim_period = 1;
-
 	bss->supp_rates_len = 0;
 	if (elems->supp_rates) {
 		clen = IEEE80211_MAX_SUPP_RATES - bss->supp_rates_len;
-- 
cgit v1.2.3-70-g09d2


From a1664773907a2b69e2a3019598dcbeffa6bc724b Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 25 Jan 2010 10:37:54 +0000
Subject: netns xfrm: xfrm6_tunnel in netns

I'm not sure about rcu stuff near kmem cache destruction:
* checks for non-empty hashes look bogus, they're done _before_
  rcu_berrier()
* unregistering netns ops is done before kmem_cache destoy
  (as it should), and unregistering involves rcu barriers by itself

So it looks nothing should be done.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xfrm.h      |   6 +--
 net/ipv6/ipcomp6.c      |   4 +-
 net/ipv6/xfrm6_tunnel.c | 140 ++++++++++++++++++++++++++++--------------------
 3 files changed, 88 insertions(+), 62 deletions(-)

(limited to 'include')

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 60c27706e7b9..fcee547ca7e3 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1408,9 +1408,9 @@ extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr,
 			    xfrm_address_t *saddr, u8 proto);
 extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
 extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family);
-extern __be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr);
-extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr);
-extern __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr);
+extern __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
+extern void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr);
+extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr);
 extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb);
 extern int xfrm6_output(struct sk_buff *skb);
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 2f2a5ca2c878..1d1faf757c9a 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -81,7 +81,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
 		goto out;
 
 	t->id.proto = IPPROTO_IPV6;
-	t->id.spi = xfrm6_tunnel_alloc_spi((xfrm_address_t *)&x->props.saddr);
+	t->id.spi = xfrm6_tunnel_alloc_spi(&init_net, (xfrm_address_t *)&x->props.saddr);
 	if (!t->id.spi)
 		goto error;
 
@@ -112,7 +112,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x)
 	struct xfrm_state *t = NULL;
 	__be32 spi;
 
-	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr);
+	spi = xfrm6_tunnel_spi_lookup(&init_net, (xfrm_address_t *)&x->props.saddr);
 	if (spi)
 		t = xfrm_state_lookup(&init_net, (xfrm_address_t *)&x->id.daddr,
 					      spi, IPPROTO_IPV6, AF_INET6);
diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c
index 23fb1002124c..d6f9aeec69f7 100644
--- a/net/ipv6/xfrm6_tunnel.c
+++ b/net/ipv6/xfrm6_tunnel.c
@@ -30,6 +30,25 @@
 #include <linux/ipv6.h>
 #include <linux/icmpv6.h>
 #include <linux/mutex.h>
+#include <net/netns/generic.h>
+
+#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
+#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
+
+#define XFRM6_TUNNEL_SPI_MIN	1
+#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
+
+struct xfrm6_tunnel_net {
+	struct hlist_head spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
+	struct hlist_head spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
+	u32 spi;
+};
+
+static int xfrm6_tunnel_net_id __read_mostly;
+static inline struct xfrm6_tunnel_net *xfrm6_tunnel_pernet(struct net *net)
+{
+	return net_generic(net, xfrm6_tunnel_net_id);
+}
 
 /*
  * xfrm_tunnel_spi things are for allocating unique id ("spi")
@@ -46,19 +65,8 @@ struct xfrm6_tunnel_spi {
 
 static DEFINE_SPINLOCK(xfrm6_tunnel_spi_lock);
 
-static u32 xfrm6_tunnel_spi;
-
-#define XFRM6_TUNNEL_SPI_MIN	1
-#define XFRM6_TUNNEL_SPI_MAX	0xffffffff
-
 static struct kmem_cache *xfrm6_tunnel_spi_kmem __read_mostly;
 
-#define XFRM6_TUNNEL_SPI_BYADDR_HSIZE 256
-#define XFRM6_TUNNEL_SPI_BYSPI_HSIZE 256
-
-static struct hlist_head xfrm6_tunnel_spi_byaddr[XFRM6_TUNNEL_SPI_BYADDR_HSIZE];
-static struct hlist_head xfrm6_tunnel_spi_byspi[XFRM6_TUNNEL_SPI_BYSPI_HSIZE];
-
 static inline unsigned xfrm6_tunnel_spi_hash_byaddr(xfrm_address_t *addr)
 {
 	unsigned h;
@@ -77,49 +85,30 @@ static inline unsigned xfrm6_tunnel_spi_hash_byspi(u32 spi)
 }
 
 
-static int xfrm6_tunnel_spi_init(void)
+static int __init xfrm6_tunnel_spi_init(void)
 {
-	int i;
-
-	xfrm6_tunnel_spi = 0;
 	xfrm6_tunnel_spi_kmem = kmem_cache_create("xfrm6_tunnel_spi",
 						  sizeof(struct xfrm6_tunnel_spi),
 						  0, SLAB_HWCACHE_ALIGN,
 						  NULL);
 	if (!xfrm6_tunnel_spi_kmem)
 		return -ENOMEM;
-
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
-		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byaddr[i]);
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
-		INIT_HLIST_HEAD(&xfrm6_tunnel_spi_byspi[i]);
 	return 0;
 }
 
 static void xfrm6_tunnel_spi_fini(void)
 {
-	int i;
-
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++) {
-		if (!hlist_empty(&xfrm6_tunnel_spi_byaddr[i]))
-			return;
-	}
-	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++) {
-		if (!hlist_empty(&xfrm6_tunnel_spi_byspi[i]))
-			return;
-	}
-	rcu_barrier();
 	kmem_cache_destroy(xfrm6_tunnel_spi_kmem);
-	xfrm6_tunnel_spi_kmem = NULL;
 }
 
-static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	struct hlist_node *pos;
 
 	hlist_for_each_entry_rcu(x6spi, pos,
-			     &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+			     &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 			     list_byaddr) {
 		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0)
 			return x6spi;
@@ -128,13 +117,13 @@ static struct xfrm6_tunnel_spi *__xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
 	return NULL;
 }
 
-__be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
 	rcu_read_lock_bh();
-	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
 	spi = x6spi ? x6spi->spi : 0;
 	rcu_read_unlock_bh();
 	return htonl(spi);
@@ -142,14 +131,15 @@ __be32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr)
 
 EXPORT_SYMBOL(xfrm6_tunnel_spi_lookup);
 
-static int __xfrm6_tunnel_spi_check(u32 spi)
+static int __xfrm6_tunnel_spi_check(struct net *net, u32 spi)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	int index = xfrm6_tunnel_spi_hash_byspi(spi);
 	struct hlist_node *pos;
 
 	hlist_for_each_entry(x6spi, pos,
-			     &xfrm6_tunnel_spi_byspi[index],
+			     &xfrm6_tn->spi_byspi[index],
 			     list_byspi) {
 		if (x6spi->spi == spi)
 			return -1;
@@ -157,32 +147,33 @@ static int __xfrm6_tunnel_spi_check(u32 spi)
 	return index;
 }
 
-static u32 __xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+static u32 __xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	u32 spi;
 	struct xfrm6_tunnel_spi *x6spi;
 	int index;
 
-	if (xfrm6_tunnel_spi < XFRM6_TUNNEL_SPI_MIN ||
-	    xfrm6_tunnel_spi >= XFRM6_TUNNEL_SPI_MAX)
-		xfrm6_tunnel_spi = XFRM6_TUNNEL_SPI_MIN;
+	if (xfrm6_tn->spi < XFRM6_TUNNEL_SPI_MIN ||
+	    xfrm6_tn->spi >= XFRM6_TUNNEL_SPI_MAX)
+		xfrm6_tn->spi = XFRM6_TUNNEL_SPI_MIN;
 	else
-		xfrm6_tunnel_spi++;
+		xfrm6_tn->spi++;
 
-	for (spi = xfrm6_tunnel_spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
-		index = __xfrm6_tunnel_spi_check(spi);
+	for (spi = xfrm6_tn->spi; spi <= XFRM6_TUNNEL_SPI_MAX; spi++) {
+		index = __xfrm6_tunnel_spi_check(net, spi);
 		if (index >= 0)
 			goto alloc_spi;
 	}
-	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tunnel_spi; spi++) {
-		index = __xfrm6_tunnel_spi_check(spi);
+	for (spi = XFRM6_TUNNEL_SPI_MIN; spi < xfrm6_tn->spi; spi++) {
+		index = __xfrm6_tunnel_spi_check(net, spi);
 		if (index >= 0)
 			goto alloc_spi;
 	}
 	spi = 0;
 	goto out;
 alloc_spi:
-	xfrm6_tunnel_spi = spi;
+	xfrm6_tn->spi = spi;
 	x6spi = kmem_cache_alloc(xfrm6_tunnel_spi_kmem, GFP_ATOMIC);
 	if (!x6spi)
 		goto out;
@@ -192,26 +183,26 @@ alloc_spi:
 	x6spi->spi = spi;
 	atomic_set(&x6spi->refcnt, 1);
 
-	hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tunnel_spi_byspi[index]);
+	hlist_add_head_rcu(&x6spi->list_byspi, &xfrm6_tn->spi_byspi[index]);
 
 	index = xfrm6_tunnel_spi_hash_byaddr(saddr);
-	hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tunnel_spi_byaddr[index]);
+	hlist_add_head_rcu(&x6spi->list_byaddr, &xfrm6_tn->spi_byaddr[index]);
 out:
 	return spi;
 }
 
-__be32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr)
+__be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr)
 {
 	struct xfrm6_tunnel_spi *x6spi;
 	u32 spi;
 
 	spin_lock_bh(&xfrm6_tunnel_spi_lock);
-	x6spi = __xfrm6_tunnel_spi_lookup(saddr);
+	x6spi = __xfrm6_tunnel_spi_lookup(net, saddr);
 	if (x6spi) {
 		atomic_inc(&x6spi->refcnt);
 		spi = x6spi->spi;
 	} else
-		spi = __xfrm6_tunnel_alloc_spi(saddr);
+		spi = __xfrm6_tunnel_alloc_spi(net, saddr);
 	spin_unlock_bh(&xfrm6_tunnel_spi_lock);
 
 	return htonl(spi);
@@ -225,15 +216,16 @@ static void x6spi_destroy_rcu(struct rcu_head *head)
 			container_of(head, struct xfrm6_tunnel_spi, rcu_head));
 }
 
-void xfrm6_tunnel_free_spi(xfrm_address_t *saddr)
+void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr)
 {
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
 	struct xfrm6_tunnel_spi *x6spi;
 	struct hlist_node *pos, *n;
 
 	spin_lock_bh(&xfrm6_tunnel_spi_lock);
 
 	hlist_for_each_entry_safe(x6spi, pos, n,
-				  &xfrm6_tunnel_spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
+				  &xfrm6_tn->spi_byaddr[xfrm6_tunnel_spi_hash_byaddr(saddr)],
 				  list_byaddr)
 	{
 		if (memcmp(&x6spi->addr, saddr, sizeof(x6spi->addr)) == 0) {
@@ -263,10 +255,11 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb)
 
 static int xfrm6_tunnel_rcv(struct sk_buff *skb)
 {
+	struct net *net = dev_net(skb->dev);
 	struct ipv6hdr *iph = ipv6_hdr(skb);
 	__be32 spi;
 
-	spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr);
+	spi = xfrm6_tunnel_spi_lookup(net, (xfrm_address_t *)&iph->saddr);
 	return xfrm6_rcv_spi(skb, IPPROTO_IPV6, spi) > 0 ? : 0;
 }
 
@@ -326,7 +319,9 @@ static int xfrm6_tunnel_init_state(struct xfrm_state *x)
 
 static void xfrm6_tunnel_destroy(struct xfrm_state *x)
 {
-	xfrm6_tunnel_free_spi((xfrm_address_t *)&x->props.saddr);
+	struct net *net = xs_net(x);
+
+	xfrm6_tunnel_free_spi(net, (xfrm_address_t *)&x->props.saddr);
 }
 
 static const struct xfrm_type xfrm6_tunnel_type = {
@@ -351,6 +346,31 @@ static struct xfrm6_tunnel xfrm46_tunnel_handler = {
 	.priority	= 2,
 };
 
+static int __net_init xfrm6_tunnel_net_init(struct net *net)
+{
+	struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net);
+	unsigned int i;
+
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYADDR_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tn->spi_byaddr[i]);
+	for (i = 0; i < XFRM6_TUNNEL_SPI_BYSPI_HSIZE; i++)
+		INIT_HLIST_HEAD(&xfrm6_tn->spi_byspi[i]);
+	xfrm6_tn->spi = 0;
+
+	return 0;
+}
+
+static void __net_exit xfrm6_tunnel_net_exit(struct net *net)
+{
+}
+
+static struct pernet_operations xfrm6_tunnel_net_ops = {
+	.init	= xfrm6_tunnel_net_init,
+	.exit	= xfrm6_tunnel_net_exit,
+	.id	= &xfrm6_tunnel_net_id,
+	.size	= sizeof(struct xfrm6_tunnel_net),
+};
+
 static int __init xfrm6_tunnel_init(void)
 {
 	int rv;
@@ -367,8 +387,13 @@ static int __init xfrm6_tunnel_init(void)
 	rv = xfrm6_tunnel_spi_init();
 	if (rv < 0)
 		goto dereg46;
+	rv = register_pernet_subsys(&xfrm6_tunnel_net_ops);
+	if (rv < 0)
+		goto deregspi;
 	return 0;
 
+deregspi:
+	xfrm6_tunnel_spi_fini();
 dereg46:
 	xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
 dereg6:
@@ -381,6 +406,7 @@ err:
 
 static void __exit xfrm6_tunnel_fini(void)
 {
+	unregister_pernet_subsys(&xfrm6_tunnel_net_ops);
 	xfrm6_tunnel_spi_fini();
 	xfrm6_tunnel_deregister(&xfrm46_tunnel_handler, AF_INET);
 	xfrm6_tunnel_deregister(&xfrm6_tunnel_handler, AF_INET6);
-- 
cgit v1.2.3-70-g09d2


From 57dbb2d83d100ea601c54fe129bfde0678db5dee Mon Sep 17 00:00:00 2001
From: Hagen Paul Pfeifer <hagen@jauu.net>
Date: Sun, 24 Jan 2010 12:30:59 +0000
Subject: sched: add head drop fifo queue

This adds an additional queuing strategy, called pfifo_head_drop,
to remove the oldest skb in the case of an overflow within the queue -
the head element - instead of the last skb (tail). To remove the oldest
skb in congested situations is useful for sensor network environments
where newer packets reflect the superior information.

Reviewed-by: Florian Westphal <fw@strlen.de>
Acked-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/pkt_sched.h   |  1 +
 include/net/sch_generic.h | 19 +++++++++++++++++++
 net/sched/sch_api.c       |  1 +
 net/sched/sch_fifo.c      | 34 ++++++++++++++++++++++++++++++++++
 4 files changed, 55 insertions(+)

(limited to 'include')

diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 2d567265363e..b6cdc33b39c1 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -71,6 +71,7 @@ extern void qdisc_watchdog_cancel(struct qdisc_watchdog *wd);
 
 extern struct Qdisc_ops pfifo_qdisc_ops;
 extern struct Qdisc_ops bfifo_qdisc_ops;
+extern struct Qdisc_ops pfifo_head_drop_qdisc_ops;
 
 extern int fifo_set_limit(struct Qdisc *q, unsigned int limit);
 extern struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index dad558bc06fa..67dc08eaaa45 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -427,6 +427,25 @@ static inline struct sk_buff *qdisc_dequeue_head(struct Qdisc *sch)
 	return __qdisc_dequeue_head(sch, &sch->q);
 }
 
+static inline unsigned int __qdisc_queue_drop_head(struct Qdisc *sch,
+					      struct sk_buff_head *list)
+{
+	struct sk_buff *skb = __qdisc_dequeue_head(sch, list);
+
+	if (likely(skb != NULL)) {
+		unsigned int len = qdisc_pkt_len(skb);
+		kfree_skb(skb);
+		return len;
+	}
+
+	return 0;
+}
+
+static inline unsigned int qdisc_queue_drop_head(struct Qdisc *sch)
+{
+	return __qdisc_queue_drop_head(sch, &sch->q);
+}
+
 static inline struct sk_buff *__qdisc_dequeue_tail(struct Qdisc *sch,
 						   struct sk_buff_head *list)
 {
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 75fd1c672c61..6cd491013b50 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -1707,6 +1707,7 @@ static int __init pktsched_init(void)
 {
 	register_qdisc(&pfifo_qdisc_ops);
 	register_qdisc(&bfifo_qdisc_ops);
+	register_qdisc(&pfifo_head_drop_qdisc_ops);
 	register_qdisc(&mq_qdisc_ops);
 	proc_net_fops_create(&init_net, "psched", 0, &psched_fops);
 
diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c
index 69188e8358b4..4b0a6cc44c77 100644
--- a/net/sched/sch_fifo.c
+++ b/net/sched/sch_fifo.c
@@ -43,6 +43,26 @@ static int pfifo_enqueue(struct sk_buff *skb, struct Qdisc* sch)
 	return qdisc_reshape_fail(skb, sch);
 }
 
+static int pfifo_tail_enqueue(struct sk_buff *skb, struct Qdisc* sch)
+{
+	struct sk_buff *skb_head;
+	struct fifo_sched_data *q = qdisc_priv(sch);
+
+	if (likely(skb_queue_len(&sch->q) < q->limit))
+		return qdisc_enqueue_tail(skb, sch);
+
+	/* queue full, remove one skb to fulfill the limit */
+	skb_head = qdisc_dequeue_head(sch);
+	sch->bstats.bytes -= qdisc_pkt_len(skb_head);
+	sch->bstats.packets--;
+	sch->qstats.drops++;
+	kfree_skb(skb_head);
+
+	qdisc_enqueue_tail(skb, sch);
+
+	return NET_XMIT_CN;
+}
+
 static int fifo_init(struct Qdisc *sch, struct nlattr *opt)
 {
 	struct fifo_sched_data *q = qdisc_priv(sch);
@@ -108,6 +128,20 @@ struct Qdisc_ops bfifo_qdisc_ops __read_mostly = {
 };
 EXPORT_SYMBOL(bfifo_qdisc_ops);
 
+struct Qdisc_ops pfifo_head_drop_qdisc_ops __read_mostly = {
+	.id		=	"pfifo_head_drop",
+	.priv_size	=	sizeof(struct fifo_sched_data),
+	.enqueue	=	pfifo_tail_enqueue,
+	.dequeue	=	qdisc_dequeue_head,
+	.peek		=	qdisc_peek_head,
+	.drop		=	qdisc_queue_drop_head,
+	.init		=	fifo_init,
+	.reset		=	qdisc_reset_queue,
+	.change		=	fifo_init,
+	.dump		=	fifo_dump,
+	.owner		=	THIS_MODULE,
+};
+
 /* Pass size change message down to embedded FIFO */
 int fifo_set_limit(struct Qdisc *q, unsigned int limit)
 {
-- 
cgit v1.2.3-70-g09d2


From 09d989d179d0c679043556dda77c51b41a2dae7e Mon Sep 17 00:00:00 2001
From: "Luis R. Rodriguez" <lrodriguez@atheros.com>
Date: Fri, 29 Jan 2010 19:58:57 -0500
Subject: cfg80211: add regulatory hint disconnect support

This adds a new regulatory hint to be used when we know all
devices have been disconnected and idle. This can happen
when we suspend, for instance. When we disconnect we can
no longer assume the same regulatory rules learned from
a country IE or beacon hints are applicable so restore
regulatory settings to an initial state.

Since driver hints are cached on the wiphy that called
the hint, those hints are not reproduced onto cfg80211
as the wiphy will respect its own wiphy->regd regardless.

Signed-off-by: Luis R. Rodriguez <lrodriguez@atheros.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/regulatory.h |   1 +
 net/wireless/reg.c       | 157 ++++++++++++++++++++++++++++++++++++++++++++++-
 net/wireless/reg.h       |  18 ++++++
 net/wireless/sme.c       |  40 ++++++++++++
 4 files changed, 213 insertions(+), 3 deletions(-)

(limited to 'include')

diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 47995b81c5d7..f873ee37f7e4 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -39,6 +39,7 @@ enum environment_cap {
  * 	00 - World regulatory domain
  * 	99 - built by driver but a specific alpha2 cannot be determined
  * 	98 - result of an intersection between two regulatory domains
+ *	97 - regulatory domain has not yet been configured
  * @intersect: indicates whether the wireless core should intersect
  * 	the requested regulatory domain with the presently set regulatory
  * 	domain.
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 5dcda28b6f04..ed89c59bb431 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -134,6 +134,7 @@ static const struct ieee80211_regdomain *cfg80211_world_regdom =
 	&world_regdom;
 
 static char *ieee80211_regdom = "00";
+static char user_alpha2[2];
 
 module_param(ieee80211_regdom, charp, 0444);
 MODULE_PARM_DESC(ieee80211_regdom, "IEEE 802.11 regulatory domain code");
@@ -252,6 +253,27 @@ static bool regdom_changes(const char *alpha2)
 	return true;
 }
 
+/*
+ * The NL80211_REGDOM_SET_BY_USER regdom alpha2 is cached, this lets
+ * you know if a valid regulatory hint with NL80211_REGDOM_SET_BY_USER
+ * has ever been issued.
+ */
+static bool is_user_regdom_saved(void)
+{
+	if (user_alpha2[0] == '9' && user_alpha2[1] == '7')
+		return false;
+
+	/* This would indicate a mistake on the design */
+	if (WARN((!is_world_regdom(user_alpha2) &&
+		  !is_an_alpha2(user_alpha2)),
+		 "Unexpected user alpha2: %c%c\n",
+		 user_alpha2[0],
+	         user_alpha2[1]))
+		return false;
+
+	return true;
+}
+
 /**
  * country_ie_integrity_changes - tells us if the country IE has changed
  * @checksum: checksum of country IE of fields we are interested in
@@ -1646,7 +1668,7 @@ static int ignore_request(struct wiphy *wiphy,
 
 	switch (pending_request->initiator) {
 	case NL80211_REGDOM_SET_BY_CORE:
-		return -EINVAL;
+		return 0;
 	case NL80211_REGDOM_SET_BY_COUNTRY_IE:
 
 		last_wiphy = wiphy_idx_to_wiphy(last_request->wiphy_idx);
@@ -1785,6 +1807,11 @@ new_request:
 
 	pending_request = NULL;
 
+	if (last_request->initiator == NL80211_REGDOM_SET_BY_USER) {
+		user_alpha2[0] = last_request->alpha2[0];
+		user_alpha2[1] = last_request->alpha2[1];
+	}
+
 	/* When r == REG_INTERSECT we do need to call CRDA */
 	if (r < 0) {
 		/*
@@ -1904,12 +1931,16 @@ static void queue_regulatory_request(struct regulatory_request *request)
 	schedule_work(&reg_work);
 }
 
-/* Core regulatory hint -- happens once during cfg80211_init() */
+/*
+ * Core regulatory hint -- happens during cfg80211_init()
+ * and when we restore regulatory settings.
+ */
 static int regulatory_hint_core(const char *alpha2)
 {
 	struct regulatory_request *request;
 
-	BUG_ON(last_request);
+	kfree(last_request);
+	last_request = NULL;
 
 	request = kzalloc(sizeof(struct regulatory_request),
 			  GFP_KERNEL);
@@ -2107,6 +2138,123 @@ out:
 	mutex_unlock(&reg_mutex);
 }
 
+static void restore_alpha2(char *alpha2, bool reset_user)
+{
+	/* indicates there is no alpha2 to consider for restoration */
+	alpha2[0] = '9';
+	alpha2[1] = '7';
+
+	/* The user setting has precedence over the module parameter */
+	if (is_user_regdom_saved()) {
+		/* Unless we're asked to ignore it and reset it */
+		if (reset_user) {
+			REG_DBG_PRINT("cfg80211: Restoring regulatory settings "
+			       "including user preference\n");
+			user_alpha2[0] = '9';
+			user_alpha2[1] = '7';
+
+			/*
+			 * If we're ignoring user settings, we still need to
+			 * check the module parameter to ensure we put things
+			 * back as they were for a full restore.
+			 */
+			if (!is_world_regdom(ieee80211_regdom)) {
+				REG_DBG_PRINT("cfg80211: Keeping preference on "
+				       "module parameter ieee80211_regdom: %c%c\n",
+				       ieee80211_regdom[0],
+				       ieee80211_regdom[1]);
+				alpha2[0] = ieee80211_regdom[0];
+				alpha2[1] = ieee80211_regdom[1];
+			}
+		} else {
+			REG_DBG_PRINT("cfg80211: Restoring regulatory settings "
+			       "while preserving user preference for: %c%c\n",
+			       user_alpha2[0],
+			       user_alpha2[1]);
+			alpha2[0] = user_alpha2[0];
+			alpha2[1] = user_alpha2[1];
+		}
+	} else if (!is_world_regdom(ieee80211_regdom)) {
+		REG_DBG_PRINT("cfg80211: Keeping preference on "
+		       "module parameter ieee80211_regdom: %c%c\n",
+		       ieee80211_regdom[0],
+		       ieee80211_regdom[1]);
+		alpha2[0] = ieee80211_regdom[0];
+		alpha2[1] = ieee80211_regdom[1];
+	} else
+		REG_DBG_PRINT("cfg80211: Restoring regulatory settings\n");
+}
+
+/*
+ * Restoring regulatory settings involves ingoring any
+ * possibly stale country IE information and user regulatory
+ * settings if so desired, this includes any beacon hints
+ * learned as we could have traveled outside to another country
+ * after disconnection. To restore regulatory settings we do
+ * exactly what we did at bootup:
+ *
+ *   - send a core regulatory hint
+ *   - send a user regulatory hint if applicable
+ *
+ * Device drivers that send a regulatory hint for a specific country
+ * keep their own regulatory domain on wiphy->regd so that does does
+ * not need to be remembered.
+ */
+static void restore_regulatory_settings(bool reset_user)
+{
+	char alpha2[2];
+	struct reg_beacon *reg_beacon, *btmp;
+
+	mutex_lock(&cfg80211_mutex);
+	mutex_lock(&reg_mutex);
+
+	reset_regdomains();
+	restore_alpha2(alpha2, reset_user);
+
+	/* Clear beacon hints */
+	spin_lock_bh(&reg_pending_beacons_lock);
+	if (!list_empty(&reg_pending_beacons)) {
+		list_for_each_entry_safe(reg_beacon, btmp,
+					 &reg_pending_beacons, list) {
+			list_del(&reg_beacon->list);
+			kfree(reg_beacon);
+		}
+	}
+	spin_unlock_bh(&reg_pending_beacons_lock);
+
+	if (!list_empty(&reg_beacon_list)) {
+		list_for_each_entry_safe(reg_beacon, btmp,
+					 &reg_beacon_list, list) {
+			list_del(&reg_beacon->list);
+			kfree(reg_beacon);
+		}
+	}
+
+	/* First restore to the basic regulatory settings */
+	cfg80211_regdomain = cfg80211_world_regdom;
+
+	mutex_unlock(&reg_mutex);
+	mutex_unlock(&cfg80211_mutex);
+
+	regulatory_hint_core(cfg80211_regdomain->alpha2);
+
+	/*
+	 * This restores the ieee80211_regdom module parameter
+	 * preference or the last user requested regulatory
+	 * settings, user regulatory settings takes precedence.
+	 */
+	if (is_an_alpha2(alpha2))
+		regulatory_hint_user(user_alpha2);
+}
+
+
+void regulatory_hint_disconnect(void)
+{
+	REG_DBG_PRINT("cfg80211: All devices are disconnected, going to "
+		      "restore regulatory settings\n");
+	restore_regulatory_settings(false);
+}
+
 static bool freq_is_chan_12_13_14(u16 freq)
 {
 	if (freq == ieee80211_channel_to_frequency(12) ||
@@ -2496,6 +2644,9 @@ int regulatory_init(void)
 
 	cfg80211_regdomain = cfg80211_world_regdom;
 
+	user_alpha2[0] = '9';
+	user_alpha2[1] = '7';
+
 	/* We always try to get an update for the static regdomain */
 	err = regulatory_hint_core(cfg80211_regdomain->alpha2);
 	if (err) {
diff --git a/net/wireless/reg.h b/net/wireless/reg.h
index 3018508226ab..b26224a9f3bc 100644
--- a/net/wireless/reg.h
+++ b/net/wireless/reg.h
@@ -63,4 +63,22 @@ void regulatory_hint_11d(struct wiphy *wiphy,
 			 u8 *country_ie,
 			 u8 country_ie_len);
 
+/**
+ * regulatory_hint_disconnect - informs all devices have been disconneted
+ *
+ * Regulotory rules can be enhanced further upon scanning and upon
+ * connection to an AP. These rules become stale if we disconnect
+ * and go to another country, whether or not we suspend and resume.
+ * If we suspend, go to another country and resume we'll automatically
+ * get disconnected shortly after resuming and things will be reset as well.
+ * This routine is a helper to restore regulatory settings to how they were
+ * prior to our first connect attempt. This includes ignoring country IE and
+ * beacon regulatory hints. The ieee80211_regdom module parameter will always
+ * be respected but if a user had set the regulatory domain that will take
+ * precedence.
+ *
+ * Must be called from process context.
+ */
+void regulatory_hint_disconnect(void);
+
 #endif  /* __NET_WIRELESS_REG_H */
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 745c37e7992e..17fde0da1b08 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -34,6 +34,44 @@ struct cfg80211_conn {
 	bool auto_auth, prev_bssid_valid;
 };
 
+bool cfg80211_is_all_idle(void)
+{
+	struct cfg80211_registered_device *rdev;
+	struct wireless_dev *wdev;
+	bool is_all_idle = true;
+
+	mutex_lock(&cfg80211_mutex);
+
+	/*
+	 * All devices must be idle as otherwise if you are actively
+	 * scanning some new beacon hints could be learned and would
+	 * count as new regulatory hints.
+	 */
+	list_for_each_entry(rdev, &cfg80211_rdev_list, list) {
+		cfg80211_lock_rdev(rdev);
+		list_for_each_entry(wdev, &rdev->netdev_list, list) {
+			wdev_lock(wdev);
+			if (wdev->sme_state != CFG80211_SME_IDLE)
+				is_all_idle = false;
+			wdev_unlock(wdev);
+		}
+		cfg80211_unlock_rdev(rdev);
+	}
+
+	mutex_unlock(&cfg80211_mutex);
+
+	return is_all_idle;
+}
+
+static void disconnect_work(struct work_struct *work)
+{
+	if (!cfg80211_is_all_idle())
+		return;
+
+	regulatory_hint_disconnect();
+}
+
+static DECLARE_WORK(cfg80211_disconnect_work, disconnect_work);
 
 static int cfg80211_conn_scan(struct wireless_dev *wdev)
 {
@@ -658,6 +696,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 	wireless_send_event(dev, SIOCGIWAP, &wrqu, NULL);
 	wdev->wext.connect.ssid_len = 0;
 #endif
+
+	schedule_work(&cfg80211_disconnect_work);
 }
 
 void cfg80211_disconnected(struct net_device *dev, u16 reason,
-- 
cgit v1.2.3-70-g09d2


From 17ad353b8d9843731258b5d23556667b764939e9 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sun, 31 Jan 2010 21:56:25 +0100
Subject: mac80211: fix monitor mode tx radiotap header handling

When an injected frame gets buffered for a powersave STA or filtered
and retransmitted, mac80211 attempts to parse the radiotap header
again, which doesn't work because it's gone at that point.
This patch adds a new flag for checking the availability of a radiotap
header, so that it only attempts to parse it once, reusing the tx info
on the next call to ieee80211_tx().
This fixes severe issues with rekeying in AP mode.

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Cc: stable@kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 include/net/mac80211.h | 3 +++
 net/mac80211/tx.c      | 6 ++++--
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 1e9c93024cf2..74ccf30fdf8e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -275,6 +275,8 @@ struct ieee80211_bss_conf {
  * @IEEE80211_TX_INTFL_RETRANSMISSION: This frame is being retransmitted
  *	after TX status because the destination was asleep, it must not
  *	be modified again (no seqno assignment, crypto, etc.)
+ * @IEEE80211_TX_INTFL_HAS_RADIOTAP: This frame was injected and still
+ *	has a radiotap header at skb->data.
  */
 enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_REQ_TX_STATUS		= BIT(0),
@@ -296,6 +298,7 @@ enum mac80211_tx_control_flags {
 	IEEE80211_TX_CTL_PSPOLL_RESPONSE	= BIT(17),
 	IEEE80211_TX_CTL_MORE_FRAMES		= BIT(18),
 	IEEE80211_TX_INTFL_RETRANSMISSION	= BIT(19),
+	IEEE80211_TX_INTFL_HAS_RADIOTAP		= BIT(20),
 };
 
 /**
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 14c70452c245..e7b1cdc7651b 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1108,7 +1108,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 	tx->flags |= IEEE80211_TX_FRAGMENTED;
 
 	/* process and remove the injection radiotap header */
-	if (unlikely(info->flags & IEEE80211_TX_CTL_INJECTED)) {
+	if (unlikely(info->flags & IEEE80211_TX_INTFL_HAS_RADIOTAP)) {
 		if (!__ieee80211_parse_tx_radiotap(tx, skb))
 			return TX_DROP;
 
@@ -1117,6 +1117,7 @@ ieee80211_tx_prepare(struct ieee80211_sub_if_data *sdata,
 		 * the radiotap header that was present and pre-filled
 		 * 'tx' with tx control information.
 		 */
+		info->flags &= ~IEEE80211_TX_INTFL_HAS_RADIOTAP;
 	}
 
 	/*
@@ -1499,7 +1500,8 @@ static void ieee80211_xmit(struct ieee80211_sub_if_data *sdata,
 		int hdrlen;
 		u16 len_rthdr;
 
-		info->flags |= IEEE80211_TX_CTL_INJECTED;
+		info->flags |= IEEE80211_TX_CTL_INJECTED |
+			       IEEE80211_TX_INTFL_HAS_RADIOTAP;
 
 		len_rthdr = ieee80211_get_radiotap_len(skb->data);
 		hdr = (struct ieee80211_hdr *)(skb->data + len_rthdr);
-- 
cgit v1.2.3-70-g09d2


From c85bb41e93184bf5494dde6d8fe5a81b564c84c8 Mon Sep 17 00:00:00 2001
From: Flavio Leitner <fleitner@redhat.com>
Date: Tue, 2 Feb 2010 07:32:29 -0800
Subject: igmp: fix ip_mc_sf_allow race [v5]

Almost all igmp functions accessing inet->mc_list are protected by
rtnl_lock(), but there is one exception which is ip_mc_sf_allow(),
so there is a chance of either ip_mc_drop_socket or ip_mc_leave_group
remove an entry while ip_mc_sf_allow is running causing a crash.

Signed-off-by: Flavio Leitner <fleitner@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/igmp.h |  2 ++
 net/ipv4/igmp.c      | 83 +++++++++++++++++++++++++++++++++++++++-------------
 2 files changed, 64 insertions(+), 21 deletions(-)

(limited to 'include')

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index 724c27e5d173..93fc2449af10 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -153,6 +153,7 @@ extern int sysctl_igmp_max_msf;
 struct ip_sf_socklist {
 	unsigned int		sl_max;
 	unsigned int		sl_count;
+	struct rcu_head		rcu;
 	__be32			sl_addr[0];
 };
 
@@ -170,6 +171,7 @@ struct ip_mc_socklist {
 	struct ip_mreqn		multi;
 	unsigned int		sfmode;		/* MCAST_{INCLUDE,EXCLUDE} */
 	struct ip_sf_socklist	*sflist;
+	struct rcu_head		rcu;
 };
 
 struct ip_sf_list {
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 8f5468393f01..d28363998743 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -1799,7 +1799,7 @@ int ip_mc_join_group(struct sock *sk , struct ip_mreqn *imr)
 	iml->next = inet->mc_list;
 	iml->sflist = NULL;
 	iml->sfmode = MCAST_EXCLUDE;
-	inet->mc_list = iml;
+	rcu_assign_pointer(inet->mc_list, iml);
 	ip_mc_inc_group(in_dev, addr);
 	err = 0;
 done:
@@ -1807,24 +1807,46 @@ done:
 	return err;
 }
 
+static void ip_sf_socklist_reclaim(struct rcu_head *rp)
+{
+	struct ip_sf_socklist *psf;
+
+	psf = container_of(rp, struct ip_sf_socklist, rcu);
+	/* sk_omem_alloc should have been decreased by the caller*/
+	kfree(psf);
+}
+
 static int ip_mc_leave_src(struct sock *sk, struct ip_mc_socklist *iml,
 			   struct in_device *in_dev)
 {
+	struct ip_sf_socklist *psf = iml->sflist;
 	int err;
 
-	if (iml->sflist == NULL) {
+	if (psf == NULL) {
 		/* any-source empty exclude case */
 		return ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
 			iml->sfmode, 0, NULL, 0);
 	}
 	err = ip_mc_del_src(in_dev, &iml->multi.imr_multiaddr.s_addr,
-			iml->sfmode, iml->sflist->sl_count,
-			iml->sflist->sl_addr, 0);
-	sock_kfree_s(sk, iml->sflist, IP_SFLSIZE(iml->sflist->sl_max));
-	iml->sflist = NULL;
+			iml->sfmode, psf->sl_count, psf->sl_addr, 0);
+	rcu_assign_pointer(iml->sflist, NULL);
+	/* decrease mem now to avoid the memleak warning */
+	atomic_sub(IP_SFLSIZE(psf->sl_max), &sk->sk_omem_alloc);
+	call_rcu(&psf->rcu, ip_sf_socklist_reclaim);
 	return err;
 }
 
+
+static void ip_mc_socklist_reclaim(struct rcu_head *rp)
+{
+	struct ip_mc_socklist *iml;
+
+	iml = container_of(rp, struct ip_mc_socklist, rcu);
+	/* sk_omem_alloc should have been decreased by the caller*/
+	kfree(iml);
+}
+
+
 /*
  *	Ask a socket to leave a group.
  */
@@ -1854,12 +1876,14 @@ int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr)
 
 		(void) ip_mc_leave_src(sk, iml, in_dev);
 
-		*imlp = iml->next;
+		rcu_assign_pointer(*imlp, iml->next);
 
 		if (in_dev)
 			ip_mc_dec_group(in_dev, group);
 		rtnl_unlock();
-		sock_kfree_s(sk, iml, sizeof(*iml));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
+		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
 		return 0;
 	}
 	if (!in_dev)
@@ -1974,9 +1998,12 @@ int ip_mc_source(int add, int omode, struct sock *sk, struct
 		if (psl) {
 			for (i=0; i<psl->sl_count; i++)
 				newpsl->sl_addr[i] = psl->sl_addr[i];
-			sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+			/* decrease mem now to avoid the memleak warning */
+			atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+			call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
 		}
-		pmc->sflist = psl = newpsl;
+		rcu_assign_pointer(pmc->sflist, newpsl);
+		psl = newpsl;
 	}
 	rv = 1;	/* > 0 for insert logic below if sl_count is 0 */
 	for (i=0; i<psl->sl_count; i++) {
@@ -2072,11 +2099,13 @@ int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf, int ifindex)
 	if (psl) {
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			psl->sl_count, psl->sl_addr, 0);
-		sock_kfree_s(sk, psl, IP_SFLSIZE(psl->sl_max));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(IP_SFLSIZE(psl->sl_max), &sk->sk_omem_alloc);
+		call_rcu(&psl->rcu, ip_sf_socklist_reclaim);
 	} else
 		(void) ip_mc_del_src(in_dev, &msf->imsf_multiaddr, pmc->sfmode,
 			0, NULL, 0);
-	pmc->sflist = newpsl;
+	rcu_assign_pointer(pmc->sflist, newpsl);
 	pmc->sfmode = msf->imsf_fmode;
 	err = 0;
 done:
@@ -2209,30 +2238,40 @@ int ip_mc_sf_allow(struct sock *sk, __be32 loc_addr, __be32 rmt_addr, int dif)
 	struct ip_mc_socklist *pmc;
 	struct ip_sf_socklist *psl;
 	int i;
+	int ret;
 
+	ret = 1;
 	if (!ipv4_is_multicast(loc_addr))
-		return 1;
+		goto out;
 
-	for (pmc=inet->mc_list; pmc; pmc=pmc->next) {
+	rcu_read_lock();
+	for (pmc=rcu_dereference(inet->mc_list); pmc; pmc=rcu_dereference(pmc->next)) {
 		if (pmc->multi.imr_multiaddr.s_addr == loc_addr &&
 		    pmc->multi.imr_ifindex == dif)
 			break;
 	}
+	ret = inet->mc_all;
 	if (!pmc)
-		return inet->mc_all;
+		goto unlock;
 	psl = pmc->sflist;
+	ret = (pmc->sfmode == MCAST_EXCLUDE);
 	if (!psl)
-		return pmc->sfmode == MCAST_EXCLUDE;
+		goto unlock;
 
 	for (i=0; i<psl->sl_count; i++) {
 		if (psl->sl_addr[i] == rmt_addr)
 			break;
 	}
+	ret = 0;
 	if (pmc->sfmode == MCAST_INCLUDE && i >= psl->sl_count)
-		return 0;
+		goto unlock;
 	if (pmc->sfmode == MCAST_EXCLUDE && i < psl->sl_count)
-		return 0;
-	return 1;
+		goto unlock;
+	ret = 1;
+unlock:
+	rcu_read_unlock();
+out:
+	return ret;
 }
 
 /*
@@ -2251,7 +2290,7 @@ void ip_mc_drop_socket(struct sock *sk)
 	rtnl_lock();
 	while ((iml = inet->mc_list) != NULL) {
 		struct in_device *in_dev;
-		inet->mc_list = iml->next;
+		rcu_assign_pointer(inet->mc_list, iml->next);
 
 		in_dev = inetdev_by_index(net, iml->multi.imr_ifindex);
 		(void) ip_mc_leave_src(sk, iml, in_dev);
@@ -2259,7 +2298,9 @@ void ip_mc_drop_socket(struct sock *sk)
 			ip_mc_dec_group(in_dev, iml->multi.imr_multiaddr.s_addr);
 			in_dev_put(in_dev);
 		}
-		sock_kfree_s(sk, iml, sizeof(*iml));
+		/* decrease mem now to avoid the memleak warning */
+		atomic_sub(sizeof(*iml), &sk->sk_omem_alloc);
+		call_rcu(&iml->rcu, ip_mc_socklist_reclaim);
 	}
 	rtnl_unlock();
 }
-- 
cgit v1.2.3-70-g09d2


From f9bfbebf34eab707b065116cdc9699d25ba4252a Mon Sep 17 00:00:00 2001
From: Shirley Ma <mashirle@us.ibm.com>
Date: Fri, 29 Jan 2010 03:19:05 +0000
Subject: virtio: Add ability to detach unused buffers from vrings

There's currently no way for a virtio driver to ask for unused
buffers, so it has to keep a list itself to reclaim them at shutdown.
This is redundant, since virtio_ring stores that information.  So
add a new hook to do this.

Signed-off-by: Shirley Ma <xma@us.ibm.com>
Signed-off-by: Amit Shah <amit.shah@redhat.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/virtio/virtio_ring.c | 25 +++++++++++++++++++++++++
 include/linux/virtio.h       |  4 ++++
 2 files changed, 29 insertions(+)

(limited to 'include')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index fbd2ecde93e4..71929ee00d69 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -334,6 +334,30 @@ static bool vring_enable_cb(struct virtqueue *_vq)
 	return true;
 }
 
+static void *vring_detach_unused_buf(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+	unsigned int i;
+	void *buf;
+
+	START_USE(vq);
+
+	for (i = 0; i < vq->vring.num; i++) {
+		if (!vq->data[i])
+			continue;
+		/* detach_buf clears data, so grab it now. */
+		buf = vq->data[i];
+		detach_buf(vq, i);
+		END_USE(vq);
+		return buf;
+	}
+	/* That should have freed everything. */
+	BUG_ON(vq->num_free != vq->vring.num);
+
+	END_USE(vq);
+	return NULL;
+}
+
 irqreturn_t vring_interrupt(int irq, void *_vq)
 {
 	struct vring_virtqueue *vq = to_vvq(_vq);
@@ -360,6 +384,7 @@ static struct virtqueue_ops vring_vq_ops = {
 	.kick = vring_kick,
 	.disable_cb = vring_disable_cb,
 	.enable_cb = vring_enable_cb,
+	.detach_unused_buf = vring_detach_unused_buf,
 };
 
 struct virtqueue *vring_new_virtqueue(unsigned int num,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 057a2e010758..f508c651e53d 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -51,6 +51,9 @@ struct virtqueue {
  *	This re-enables callbacks; it returns "false" if there are pending
  *	buffers in the queue, to detect a possible race between the driver
  *	checking for more work, and enabling callbacks.
+ * @detach_unused_buf: detach first unused buffer
+ * 	vq: the struct virtqueue we're talking about.
+ * 	Returns NULL or the "data" token handed to add_buf
  *
  * Locking rules are straightforward: the driver is responsible for
  * locking.  No two operations may be invoked simultaneously, with the exception
@@ -71,6 +74,7 @@ struct virtqueue_ops {
 
 	void (*disable_cb)(struct virtqueue *vq);
 	bool (*enable_cb)(struct virtqueue *vq);
+	void *(*detach_unused_buf)(struct virtqueue *vq);
 };
 
 /**
-- 
cgit v1.2.3-70-g09d2


From f98bfbd78c37c5946cc53089da32a5f741efdeb7 Mon Sep 17 00:00:00 2001
From: Evgeniy Polyakov <zbr@ioremap.net>
Date: Tue, 2 Feb 2010 15:58:48 -0800
Subject: connector: Delete buggy notification code.

On Tue, Feb 02, 2010 at 02:57:14PM -0800, Greg KH (gregkh@suse.de) wrote:
> > There are at least two ways to fix it: using a big cannon and a small
> > one. The former way is to disable notification registration, since it is
> > not used by anyone at all. Second way is to check whether calling
> > process is root and its destination group is -1 (kind of priveledged
> > one) before command is dispatched to workqueue.
>
> Well if no one is using it, removing it makes the most sense, right?
>
> No objection from me, care to make up a patch either way for this?

Getting it is not used, let's drop support for notifications about
(un)registered events from connector.
Another option was to check credentials on receiving, but we can always
restore it without bugs if needed, but genetlink has a wider code base
and none complained, that userspace can not get notification when some
other clients were (un)registered.

Kudos for Sebastian Krahmer <krahmer@suse.de>, who found a bug in the
code.

Signed-off-by: Evgeniy Polyakov <zbr@ioremap.net>
Acked-by: Greg Kroah-Hartman <gregkh@suse.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/connector/connector.c | 175 ------------------------------------------
 include/linux/connector.h     |  32 --------
 2 files changed, 207 deletions(-)

(limited to 'include')

diff --git a/drivers/connector/connector.c b/drivers/connector/connector.c
index f06024668f99..537c29ac4487 100644
--- a/drivers/connector/connector.c
+++ b/drivers/connector/connector.c
@@ -36,17 +36,6 @@ MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Evgeniy Polyakov <zbr@ioremap.net>");
 MODULE_DESCRIPTION("Generic userspace <-> kernelspace connector.");
 
-static u32 cn_idx = CN_IDX_CONNECTOR;
-static u32 cn_val = CN_VAL_CONNECTOR;
-
-module_param(cn_idx, uint, 0);
-module_param(cn_val, uint, 0);
-MODULE_PARM_DESC(cn_idx, "Connector's main device idx.");
-MODULE_PARM_DESC(cn_val, "Connector's main device val.");
-
-static DEFINE_MUTEX(notify_lock);
-static LIST_HEAD(notify_list);
-
 static struct cn_dev cdev;
 
 static int cn_already_initialized;
@@ -209,54 +198,6 @@ static void cn_rx_skb(struct sk_buff *__skb)
 	}
 }
 
-/*
- * Notification routing.
- *
- * Gets id and checks if there are notification request for it's idx
- * and val.  If there are such requests notify the listeners with the
- * given notify event.
- *
- */
-static void cn_notify(struct cb_id *id, u32 notify_event)
-{
-	struct cn_ctl_entry *ent;
-
-	mutex_lock(&notify_lock);
-	list_for_each_entry(ent, &notify_list, notify_entry) {
-		int i;
-		struct cn_notify_req *req;
-		struct cn_ctl_msg *ctl = ent->msg;
-		int idx_found, val_found;
-
-		idx_found = val_found = 0;
-
-		req = (struct cn_notify_req *)ctl->data;
-		for (i = 0; i < ctl->idx_notify_num; ++i, ++req) {
-			if (id->idx >= req->first &&
-					id->idx < req->first + req->range) {
-				idx_found = 1;
-				break;
-			}
-		}
-
-		for (i = 0; i < ctl->val_notify_num; ++i, ++req) {
-			if (id->val >= req->first &&
-					id->val < req->first + req->range) {
-				val_found = 1;
-				break;
-			}
-		}
-
-		if (idx_found && val_found) {
-			struct cn_msg m = { .ack = notify_event, };
-
-			memcpy(&m.id, id, sizeof(m.id));
-			cn_netlink_send(&m, ctl->group, GFP_KERNEL);
-		}
-	}
-	mutex_unlock(&notify_lock);
-}
-
 /*
  * Callback add routing - adds callback with given ID and name.
  * If there is registered callback with the same ID it will not be added.
@@ -276,8 +217,6 @@ int cn_add_callback(struct cb_id *id, char *name,
 	if (err)
 		return err;
 
-	cn_notify(id, 0);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(cn_add_callback);
@@ -295,111 +234,9 @@ void cn_del_callback(struct cb_id *id)
 	struct cn_dev *dev = &cdev;
 
 	cn_queue_del_callback(dev->cbdev, id);
-	cn_notify(id, 1);
 }
 EXPORT_SYMBOL_GPL(cn_del_callback);
 
-/*
- * Checks two connector's control messages to be the same.
- * Returns 1 if they are the same or if the first one is corrupted.
- */
-static int cn_ctl_msg_equals(struct cn_ctl_msg *m1, struct cn_ctl_msg *m2)
-{
-	int i;
-	struct cn_notify_req *req1, *req2;
-
-	if (m1->idx_notify_num != m2->idx_notify_num)
-		return 0;
-
-	if (m1->val_notify_num != m2->val_notify_num)
-		return 0;
-
-	if (m1->len != m2->len)
-		return 0;
-
-	if ((m1->idx_notify_num + m1->val_notify_num) * sizeof(*req1) !=
-	    m1->len)
-		return 1;
-
-	req1 = (struct cn_notify_req *)m1->data;
-	req2 = (struct cn_notify_req *)m2->data;
-
-	for (i = 0; i < m1->idx_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	for (i = 0; i < m1->val_notify_num; ++i) {
-		if (req1->first != req2->first || req1->range != req2->range)
-			return 0;
-		req1++;
-		req2++;
-	}
-
-	return 1;
-}
-
-/*
- * Main connector device's callback.
- *
- * Used for notification of a request's processing.
- */
-static void cn_callback(struct cn_msg *msg, struct netlink_skb_parms *nsp)
-{
-	struct cn_ctl_msg *ctl;
-	struct cn_ctl_entry *ent;
-	u32 size;
-
-	if (msg->len < sizeof(*ctl))
-		return;
-
-	ctl = (struct cn_ctl_msg *)msg->data;
-
-	size = (sizeof(*ctl) + ((ctl->idx_notify_num +
-				 ctl->val_notify_num) *
-				sizeof(struct cn_notify_req)));
-
-	if (msg->len != size)
-		return;
-
-	if (ctl->len + sizeof(*ctl) != msg->len)
-		return;
-
-	/*
-	 * Remove notification.
-	 */
-	if (ctl->group == 0) {
-		struct cn_ctl_entry *n;
-
-		mutex_lock(&notify_lock);
-		list_for_each_entry_safe(ent, n, &notify_list, notify_entry) {
-			if (cn_ctl_msg_equals(ent->msg, ctl)) {
-				list_del(&ent->notify_entry);
-				kfree(ent);
-			}
-		}
-		mutex_unlock(&notify_lock);
-
-		return;
-	}
-
-	size += sizeof(*ent);
-
-	ent = kzalloc(size, GFP_KERNEL);
-	if (!ent)
-		return;
-
-	ent->msg = (struct cn_ctl_msg *)(ent + 1);
-
-	memcpy(ent->msg, ctl, size - sizeof(*ent));
-
-	mutex_lock(&notify_lock);
-	list_add(&ent->notify_entry, &notify_list);
-	mutex_unlock(&notify_lock);
-}
-
 static int cn_proc_show(struct seq_file *m, void *v)
 {
 	struct cn_queue_dev *dev = cdev.cbdev;
@@ -437,11 +274,8 @@ static const struct file_operations cn_file_ops = {
 static int __devinit cn_init(void)
 {
 	struct cn_dev *dev = &cdev;
-	int err;
 
 	dev->input = cn_rx_skb;
-	dev->id.idx = cn_idx;
-	dev->id.val = cn_val;
 
 	dev->nls = netlink_kernel_create(&init_net, NETLINK_CONNECTOR,
 					 CN_NETLINK_USERS + 0xf,
@@ -457,14 +291,6 @@ static int __devinit cn_init(void)
 
 	cn_already_initialized = 1;
 
-	err = cn_add_callback(&dev->id, "connector", &cn_callback);
-	if (err) {
-		cn_already_initialized = 0;
-		cn_queue_free_dev(dev->cbdev);
-		netlink_kernel_release(dev->nls);
-		return -EINVAL;
-	}
-
 	proc_net_fops_create(&init_net, "connector", S_IRUGO, &cn_file_ops);
 
 	return 0;
@@ -478,7 +304,6 @@ static void __devexit cn_fini(void)
 
 	proc_net_remove(&init_net, "connector");
 
-	cn_del_callback(&dev->id);
 	cn_queue_free_dev(dev->cbdev);
 	netlink_kernel_release(dev->nls);
 }
diff --git a/include/linux/connector.h b/include/linux/connector.h
index 72ba63eb83c5..3a779ffba60b 100644
--- a/include/linux/connector.h
+++ b/include/linux/connector.h
@@ -24,9 +24,6 @@
 
 #include <linux/types.h>
 
-#define CN_IDX_CONNECTOR		0xffffffff
-#define CN_VAL_CONNECTOR		0xffffffff
-
 /*
  * Process Events connector unique ids -- used for message routing
  */
@@ -75,30 +72,6 @@ struct cn_msg {
 	__u8 data[0];
 };
 
-/*
- * Notify structure - requests notification about
- * registering/unregistering idx/val in range [first, first+range].
- */
-struct cn_notify_req {
-	__u32 first;
-	__u32 range;
-};
-
-/*
- * Main notification control message
- * *_notify_num 	- number of appropriate cn_notify_req structures after 
- *				this struct.
- * group 		- notification receiver's idx.
- * len 			- total length of the attached data.
- */
-struct cn_ctl_msg {
-	__u32 idx_notify_num;
-	__u32 val_notify_num;
-	__u32 group;
-	__u32 len;
-	__u8 data[0];
-};
-
 #ifdef __KERNEL__
 
 #include <asm/atomic.h>
@@ -151,11 +124,6 @@ struct cn_callback_entry {
 	u32 seq, group;
 };
 
-struct cn_ctl_entry {
-	struct list_head notify_entry;
-	struct cn_ctl_msg *msg;
-};
-
 struct cn_dev {
 	struct cb_id id;
 
-- 
cgit v1.2.3-70-g09d2


From 8a83a00b0735190384a348156837918271034144 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Jan 2010 12:23:03 +0000
Subject: net: maintain namespace isolation between vlan and real device

In the vlan and macvlan drivers, the start_xmit function forwards
data to the dev_queue_xmit function for another device, which may
potentially belong to a different namespace.

To make sure that classification stays within a single namespace,
this resets the potentially critical fields.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c     |  2 +-
 include/linux/netdevice.h |  9 +++++++++
 net/8021q/vlan_dev.c      |  2 +-
 net/core/dev.c            | 35 +++++++++++++++++++++++++++++++----
 4 files changed, 42 insertions(+), 6 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index fa0dc514dbaf..d32e0bdfc5e9 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -269,7 +269,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 	}
 
 xmit_world:
-	skb->dev = vlan->lowerdev;
+	skb_set_dev(skb, vlan->lowerdev);
 	return dev_queue_xmit(skb);
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 93a32a5ca74f..622ba5aa93c4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1004,6 +1004,15 @@ static inline bool netdev_uses_dsa_tags(struct net_device *dev)
 	return 0;
 }
 
+#ifndef CONFIG_NET_NS
+static inline void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
+{
+	skb->dev = dev;
+}
+#else /* CONFIG_NET_NS */
+void skb_set_dev(struct sk_buff *skb, struct net_device *dev);
+#endif
+
 static inline bool netdev_uses_trailer_tags(struct net_device *dev)
 {
 #ifdef CONFIG_NET_DSA_TAG_TRAILER
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index a9e1f1785614..9e83272fc5b0 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -322,7 +322,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 	}
 
 
-	skb->dev = vlan_dev_info(dev)->real_dev;
+	skb_set_dev(skb, vlan_dev_info(dev)->real_dev);
 	len = skb->len;
 	ret = dev_queue_xmit(skb);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 2cba5c521e56..94c1eeed25e5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1448,13 +1448,10 @@ int dev_forward_skb(struct net_device *dev, struct sk_buff *skb)
 	if (skb->len > (dev->mtu + dev->hard_header_len))
 		return NET_RX_DROP;
 
-	skb_dst_drop(skb);
+	skb_set_dev(skb, dev);
 	skb->tstamp.tv64 = 0;
 	skb->pkt_type = PACKET_HOST;
 	skb->protocol = eth_type_trans(skb, dev);
-	skb->mark = 0;
-	secpath_reset(skb);
-	nf_reset(skb);
 	return netif_rx(skb);
 }
 EXPORT_SYMBOL_GPL(dev_forward_skb);
@@ -1614,6 +1611,36 @@ static bool dev_can_checksum(struct net_device *dev, struct sk_buff *skb)
 	return false;
 }
 
+/**
+ * skb_dev_set -- assign a new device to a buffer
+ * @skb: buffer for the new device
+ * @dev: network device
+ *
+ * If an skb is owned by a device already, we have to reset
+ * all data private to the namespace a device belongs to
+ * before assigning it a new device.
+ */
+#ifdef CONFIG_NET_NS
+void skb_set_dev(struct sk_buff *skb, struct net_device *dev)
+{
+	skb_dst_drop(skb);
+	if (skb->dev && !net_eq(dev_net(skb->dev), dev_net(dev))) {
+		secpath_reset(skb);
+		nf_reset(skb);
+		skb_init_secmark(skb);
+		skb->mark = 0;
+		skb->priority = 0;
+		skb->nf_trace = 0;
+		skb->ipvs_property = 0;
+#ifdef CONFIG_NET_SCHED
+		skb->tc_index = 0;
+#endif
+	}
+	skb->dev = dev;
+}
+EXPORT_SYMBOL(skb_set_dev);
+#endif /* CONFIG_NET_NS */
+
 /*
  * Invalidate hardware checksum when packet is to be mangled, and
  * complete checksum manually on outgoing path.
-- 
cgit v1.2.3-70-g09d2


From fc0663d6b5e6d8e9b57f872a644c0aafd82361b7 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Jan 2010 12:23:40 +0000
Subject: macvlan: allow multiple driver backends

This makes it possible to hook into the macvlan driver
from another kernel module. In particular, the goal is
to extend it with the macvtap backend that provides
a tun/tap compatible interface directly on the macvlan
device.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c      | 113 ++++++++++++++++++++-------------------------
 include/linux/if_macvlan.h |  70 ++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+), 64 deletions(-)

(limited to 'include')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d32e0bdfc5e9..40faa368b07a 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -39,31 +39,6 @@ struct macvlan_port {
 	struct list_head	vlans;
 };
 
-/**
- *	struct macvlan_rx_stats - MACVLAN percpu rx stats
- *	@rx_packets: number of received packets
- *	@rx_bytes: number of received bytes
- *	@multicast: number of received multicast packets
- *	@rx_errors: number of errors
- */
-struct macvlan_rx_stats {
-	unsigned long rx_packets;
-	unsigned long rx_bytes;
-	unsigned long multicast;
-	unsigned long rx_errors;
-};
-
-struct macvlan_dev {
-	struct net_device	*dev;
-	struct list_head	list;
-	struct hlist_node	hlist;
-	struct macvlan_port	*port;
-	struct net_device	*lowerdev;
-	struct macvlan_rx_stats *rx_stats;
-	enum macvlan_mode	mode;
-};
-
-
 static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
@@ -118,31 +93,17 @@ static int macvlan_addr_busy(const struct macvlan_port *port,
 	return 0;
 }
 
-static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-				    unsigned int len, bool success,
-				    bool multicast)
-{
-	struct macvlan_rx_stats *rx_stats;
-
-	rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id());
-	if (likely(success)) {
-		rx_stats->rx_packets++;;
-		rx_stats->rx_bytes += len;
-		if (multicast)
-			rx_stats->multicast++;
-	} else {
-		rx_stats->rx_errors++;
-	}
-}
 
-static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev,
+static int macvlan_broadcast_one(struct sk_buff *skb,
+				 const struct macvlan_dev *vlan,
 				 const struct ethhdr *eth, bool local)
 {
+	struct net_device *dev = vlan->dev;
 	if (!skb)
 		return NET_RX_DROP;
 
 	if (local)
-		return dev_forward_skb(dev, skb);
+		return vlan->forward(dev, skb);
 
 	skb->dev = dev;
 	if (!compare_ether_addr_64bits(eth->h_dest,
@@ -151,7 +112,7 @@ static int macvlan_broadcast_one(struct sk_buff *skb, struct net_device *dev,
 	else
 		skb->pkt_type = PACKET_MULTICAST;
 
-	return netif_rx(skb);
+	return vlan->receive(skb);
 }
 
 static void macvlan_broadcast(struct sk_buff *skb,
@@ -175,7 +136,7 @@ static void macvlan_broadcast(struct sk_buff *skb,
 				continue;
 
 			nskb = skb_clone(skb, GFP_ATOMIC);
-			err = macvlan_broadcast_one(nskb, vlan->dev, eth,
+			err = macvlan_broadcast_one(nskb, vlan, eth,
 					 mode == MACVLAN_MODE_BRIDGE);
 			macvlan_count_rx(vlan, skb->len + ETH_HLEN,
 					 err == NET_RX_SUCCESS, 1);
@@ -238,7 +199,7 @@ static struct sk_buff *macvlan_handle_frame(struct sk_buff *skb)
 	skb->dev = dev;
 	skb->pkt_type = PACKET_HOST;
 
-	netif_rx(skb);
+	vlan->receive(skb);
 	return NULL;
 }
 
@@ -260,7 +221,7 @@ static int macvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
 		dest = macvlan_hash_lookup(port, eth->h_dest);
 		if (dest && dest->mode == MACVLAN_MODE_BRIDGE) {
 			unsigned int length = skb->len + ETH_HLEN;
-			int ret = dev_forward_skb(dest->dev, skb);
+			int ret = dest->forward(dest->dev, skb);
 			macvlan_count_rx(dest, length,
 					 ret == NET_RX_SUCCESS, 0);
 
@@ -273,8 +234,8 @@ xmit_world:
 	return dev_queue_xmit(skb);
 }
 
-static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
-				      struct net_device *dev)
+netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
+			       struct net_device *dev)
 {
 	int i = skb_get_queue_mapping(skb);
 	struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
@@ -290,6 +251,7 @@ static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(macvlan_start_xmit);
 
 static int macvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
 			       unsigned short type, const void *daddr,
@@ -623,8 +585,11 @@ static int macvlan_get_tx_queues(struct net *net,
 	return 0;
 }
 
-static int macvlan_newlink(struct net *src_net, struct net_device *dev,
-			   struct nlattr *tb[], struct nlattr *data[])
+int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
+			   struct nlattr *tb[], struct nlattr *data[],
+			   int (*receive)(struct sk_buff *skb),
+			   int (*forward)(struct net_device *dev,
+					  struct sk_buff *skb))
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvlan_port *port;
@@ -664,6 +629,8 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 	vlan->lowerdev = lowerdev;
 	vlan->dev      = dev;
 	vlan->port     = port;
+	vlan->receive  = receive;
+	vlan->forward  = forward;
 
 	vlan->mode     = MACVLAN_MODE_VEPA;
 	if (data && data[IFLA_MACVLAN_MODE])
@@ -677,8 +644,17 @@ static int macvlan_newlink(struct net *src_net, struct net_device *dev,
 	netif_stacked_transfer_operstate(lowerdev, dev);
 	return 0;
 }
+EXPORT_SYMBOL_GPL(macvlan_common_newlink);
 
-static void macvlan_dellink(struct net_device *dev, struct list_head *head)
+static int macvlan_newlink(struct net *src_net, struct net_device *dev,
+			   struct nlattr *tb[], struct nlattr *data[])
+{
+	return macvlan_common_newlink(src_net, dev, tb, data,
+				      netif_rx,
+				      dev_forward_skb);
+}
+
+void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	struct macvlan_port *port = vlan->port;
@@ -689,6 +665,7 @@ static void macvlan_dellink(struct net_device *dev, struct list_head *head)
 	if (list_empty(&port->vlans))
 		macvlan_port_destroy(port->dev);
 }
+EXPORT_SYMBOL_GPL(macvlan_dellink);
 
 static int macvlan_changelink(struct net_device *dev,
 		struct nlattr *tb[], struct nlattr *data[])
@@ -720,19 +697,27 @@ static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
 	[IFLA_MACVLAN_MODE] = { .type = NLA_U32 },
 };
 
-static struct rtnl_link_ops macvlan_link_ops __read_mostly = {
+int macvlan_link_register(struct rtnl_link_ops *ops)
+{
+	/* common fields */
+	ops->priv_size		= sizeof(struct macvlan_dev);
+	ops->get_tx_queues	= macvlan_get_tx_queues;
+	ops->setup		= macvlan_setup;
+	ops->validate		= macvlan_validate;
+	ops->maxtype		= IFLA_MACVLAN_MAX;
+	ops->policy		= macvlan_policy;
+	ops->changelink		= macvlan_changelink;
+	ops->get_size		= macvlan_get_size;
+	ops->fill_info		= macvlan_fill_info;
+
+	return rtnl_link_register(ops);
+};
+EXPORT_SYMBOL_GPL(macvlan_link_register);
+
+static struct rtnl_link_ops macvlan_link_ops = {
 	.kind		= "macvlan",
-	.priv_size	= sizeof(struct macvlan_dev),
-	.get_tx_queues  = macvlan_get_tx_queues,
-	.setup		= macvlan_setup,
-	.validate	= macvlan_validate,
 	.newlink	= macvlan_newlink,
 	.dellink	= macvlan_dellink,
-	.maxtype	= IFLA_MACVLAN_MAX,
-	.policy		= macvlan_policy,
-	.changelink	= macvlan_changelink,
-	.get_size	= macvlan_get_size,
-	.fill_info	= macvlan_fill_info,
 };
 
 static int macvlan_device_event(struct notifier_block *unused,
@@ -761,7 +746,7 @@ static int macvlan_device_event(struct notifier_block *unused,
 		break;
 	case NETDEV_UNREGISTER:
 		list_for_each_entry_safe(vlan, next, &port->vlans, list)
-			macvlan_dellink(vlan->dev, NULL);
+			vlan->dev->rtnl_link_ops->dellink(vlan->dev, NULL);
 		break;
 	}
 	return NOTIFY_DONE;
@@ -778,7 +763,7 @@ static int __init macvlan_init_module(void)
 	register_netdevice_notifier(&macvlan_notifier_block);
 	macvlan_handle_frame_hook = macvlan_handle_frame;
 
-	err = rtnl_link_register(&macvlan_link_ops);
+	err = macvlan_link_register(&macvlan_link_ops);
 	if (err < 0)
 		goto err1;
 	return 0;
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 5f200bac3749..9a11544bb0b1 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -1,6 +1,76 @@
 #ifndef _LINUX_IF_MACVLAN_H
 #define _LINUX_IF_MACVLAN_H
 
+#include <linux/if_link.h>
+#include <linux/list.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <net/netlink.h>
+
+struct macvlan_port;
+struct macvtap_queue;
+
+/**
+ *	struct macvlan_rx_stats - MACVLAN percpu rx stats
+ *	@rx_packets: number of received packets
+ *	@rx_bytes: number of received bytes
+ *	@multicast: number of received multicast packets
+ *	@rx_errors: number of errors
+ */
+struct macvlan_rx_stats {
+	unsigned long rx_packets;
+	unsigned long rx_bytes;
+	unsigned long multicast;
+	unsigned long rx_errors;
+};
+
+struct macvlan_dev {
+	struct net_device	*dev;
+	struct list_head	list;
+	struct hlist_node	hlist;
+	struct macvlan_port	*port;
+	struct net_device	*lowerdev;
+	struct macvlan_rx_stats *rx_stats;
+	enum macvlan_mode	mode;
+	int (*receive)(struct sk_buff *skb);
+	int (*forward)(struct net_device *dev, struct sk_buff *skb);
+};
+
+static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
+				    unsigned int len, bool success,
+				    bool multicast)
+{
+	struct macvlan_rx_stats *rx_stats;
+
+	rx_stats = per_cpu_ptr(vlan->rx_stats, smp_processor_id());
+	if (likely(success)) {
+		rx_stats->rx_packets++;;
+		rx_stats->rx_bytes += len;
+		if (multicast)
+			rx_stats->multicast++;
+	} else {
+		rx_stats->rx_errors++;
+	}
+}
+
+extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
+				  struct nlattr *tb[], struct nlattr *data[],
+				  int (*receive)(struct sk_buff *skb),
+				  int (*forward)(struct net_device *dev,
+						 struct sk_buff *skb));
+
+extern void macvlan_count_rx(const struct macvlan_dev *vlan,
+			     unsigned int len, bool success,
+			     bool multicast);
+
+extern void macvlan_dellink(struct net_device *dev, struct list_head *head);
+
+extern int macvlan_link_register(struct rtnl_link_ops *ops);
+
+extern netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev);
+
+
 extern struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *);
 
 #endif /* _LINUX_IF_MACVLAN_H */
-- 
cgit v1.2.3-70-g09d2


From 20d29d7a916a47bf533b5709437fe735b6b5b79e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 30 Jan 2010 12:24:26 +0000
Subject: net: macvtap driver

In order to use macvlan with qemu and other tools that require
a tap file descriptor, the macvtap driver adds a small backend
with a character device with the same interface as the tun
driver, with a minimum set of features.

Macvtap interfaces are created in the same way as macvlan
interfaces using ip link, but the netif is just used as a
handle for configuration and accounting, while the data
goes through the chardev. Each macvtap interface has its
own character device, simplifying permission management
significantly over the generic tun/tap driver.

Cc: Patrick McHardy <kaber@trash.net>
Cc: Stephen Hemminger <shemminger@linux-foundation.org>
Cc: David S. Miller" <davem@davemloft.net>
Cc: "Michael S. Tsirkin" <mst@redhat.com>
Cc: Herbert Xu <herbert@gondor.apana.org.au>
Cc: Or Gerlitz <ogerlitz@voltaire.com>
Cc: netdev@vger.kernel.org
Cc: bridge@lists.linux-foundation.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/Kconfig        |  12 +
 drivers/net/Makefile       |   1 +
 drivers/net/macvtap.c      | 581 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/if_macvlan.h |   1 +
 4 files changed, 595 insertions(+)
 create mode 100644 drivers/net/macvtap.c

(limited to 'include')

diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index cb0e534418e3..411e20703110 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -90,6 +90,18 @@ config MACVLAN
 	  To compile this driver as a module, choose M here: the module
 	  will be called macvlan.
 
+config MACVTAP
+	tristate "MAC-VLAN based tap driver (EXPERIMENTAL)"
+	depends on MACVLAN
+	help
+	  This adds a specialized tap character device driver that is based
+	  on the MAC-VLAN network interface, called macvtap. A macvtap device
+	  can be added in the same way as a macvlan device, using 'type
+	  macvlan', and then be accessed through the tap user space interface.
+
+	  To compile this driver as a module, choose M here: the module
+	  will be called macvtap.
+
 config EQUALIZER
 	tristate "EQL (serial line load balancing) support"
 	---help---
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 0b763cbe9b1f..95958032cd31 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -169,6 +169,7 @@ obj-$(CONFIG_XEN_NETDEV_FRONTEND) += xen-netfront.o
 obj-$(CONFIG_DUMMY) += dummy.o
 obj-$(CONFIG_IFB) += ifb.o
 obj-$(CONFIG_MACVLAN) += macvlan.o
+obj-$(CONFIG_MACVTAP) += macvtap.o
 obj-$(CONFIG_DE600) += de600.o
 obj-$(CONFIG_DE620) += de620.o
 obj-$(CONFIG_LANCE) += lance.o
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
new file mode 100644
index 000000000000..ad1f6ef89308
--- /dev/null
+++ b/drivers/net/macvtap.c
@@ -0,0 +1,581 @@
+#include <linux/etherdevice.h>
+#include <linux/if_macvlan.h>
+#include <linux/interrupt.h>
+#include <linux/nsproxy.h>
+#include <linux/compat.h>
+#include <linux/if_tun.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <linux/cache.h>
+#include <linux/sched.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/wait.h>
+#include <linux/cdev.h>
+#include <linux/fs.h>
+
+#include <net/net_namespace.h>
+#include <net/rtnetlink.h>
+#include <net/sock.h>
+
+/*
+ * A macvtap queue is the central object of this driver, it connects
+ * an open character device to a macvlan interface. There can be
+ * multiple queues on one interface, which map back to queues
+ * implemented in hardware on the underlying device.
+ *
+ * macvtap_proto is used to allocate queues through the sock allocation
+ * mechanism.
+ *
+ * TODO: multiqueue support is currently not implemented, even though
+ * macvtap is basically prepared for that. We will need to add this
+ * here as well as in virtio-net and qemu to get line rate on 10gbit
+ * adapters from a guest.
+ */
+struct macvtap_queue {
+	struct sock sk;
+	struct socket sock;
+	struct macvlan_dev *vlan;
+	struct file *file;
+};
+
+static struct proto macvtap_proto = {
+	.name = "macvtap",
+	.owner = THIS_MODULE,
+	.obj_size = sizeof (struct macvtap_queue),
+};
+
+/*
+ * Minor number matches netdev->ifindex, so need a potentially
+ * large value. This also makes it possible to split the
+ * tap functionality out again in the future by offering it
+ * from other drivers besides macvtap. As long as every device
+ * only has one tap, the interface numbers assure that the
+ * device nodes are unique.
+ */
+static unsigned int macvtap_major;
+#define MACVTAP_NUM_DEVS 65536
+static struct class *macvtap_class;
+static struct cdev macvtap_cdev;
+
+/*
+ * RCU usage:
+ * The macvtap_queue is referenced both from the chardev struct file
+ * and from the struct macvlan_dev using rcu_read_lock.
+ *
+ * We never actually update the contents of a macvtap_queue atomically
+ * with RCU but it is used for race-free destruction of a queue when
+ * either the file or the macvlan_dev goes away. Pointers back to
+ * the dev and the file are implicitly valid as long as the queue
+ * exists.
+ *
+ * The callbacks from macvlan are always done with rcu_read_lock held
+ * already, while in the file_operations, we get it ourselves.
+ *
+ * When destroying a queue, we remove the pointers from the file and
+ * from the dev and then synchronize_rcu to make sure no thread is
+ * still using the queue. There may still be references to the struct
+ * sock inside of the queue from outbound SKBs, but these never
+ * reference back to the file or the dev. The data structure is freed
+ * through __sk_free when both our references and any pending SKBs
+ * are gone.
+ *
+ * macvtap_lock is only used to prevent multiple concurrent open()
+ * calls to assign a new vlan->tap pointer. It could be moved into
+ * the macvlan_dev itself but is extremely rarely used.
+ */
+static DEFINE_SPINLOCK(macvtap_lock);
+
+/*
+ * Choose the next free queue, for now there is only one
+ */
+static int macvtap_set_queue(struct net_device *dev, struct file *file,
+				struct macvtap_queue *q)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	int err = -EBUSY;
+
+	spin_lock(&macvtap_lock);
+	if (rcu_dereference(vlan->tap))
+		goto out;
+
+	err = 0;
+	q->vlan = vlan;
+	rcu_assign_pointer(vlan->tap, q);
+
+	q->file = file;
+	rcu_assign_pointer(file->private_data, q);
+
+out:
+	spin_unlock(&macvtap_lock);
+	return err;
+}
+
+/*
+ * We must destroy each queue exactly once, when either
+ * the netdev or the file go away.
+ *
+ * Using the spinlock makes sure that we don't get
+ * to the queue again after destroying it.
+ *
+ * synchronize_rcu serializes with the packet flow
+ * that uses rcu_read_lock.
+ */
+static void macvtap_del_queue(struct macvtap_queue **qp)
+{
+	struct macvtap_queue *q;
+
+	spin_lock(&macvtap_lock);
+	q = rcu_dereference(*qp);
+	if (!q) {
+		spin_unlock(&macvtap_lock);
+		return;
+	}
+
+	rcu_assign_pointer(q->vlan->tap, NULL);
+	rcu_assign_pointer(q->file->private_data, NULL);
+	spin_unlock(&macvtap_lock);
+
+	synchronize_rcu();
+	sock_put(&q->sk);
+}
+
+/*
+ * Since we only support one queue, just dereference the pointer.
+ */
+static struct macvtap_queue *macvtap_get_queue(struct net_device *dev,
+					       struct sk_buff *skb)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
+	return rcu_dereference(vlan->tap);
+}
+
+static void macvtap_del_queues(struct net_device *dev)
+{
+	struct macvlan_dev *vlan = netdev_priv(dev);
+	macvtap_del_queue(&vlan->tap);
+}
+
+static inline struct macvtap_queue *macvtap_file_get_queue(struct file *file)
+{
+	rcu_read_lock_bh();
+	return rcu_dereference(file->private_data);
+}
+
+static inline void macvtap_file_put_queue(void)
+{
+	rcu_read_unlock_bh();
+}
+
+/*
+ * Forward happens for data that gets sent from one macvlan
+ * endpoint to another one in bridge mode. We just take
+ * the skb and put it into the receive queue.
+ */
+static int macvtap_forward(struct net_device *dev, struct sk_buff *skb)
+{
+	struct macvtap_queue *q = macvtap_get_queue(dev, skb);
+	if (!q)
+		return -ENOLINK;
+
+	skb_queue_tail(&q->sk.sk_receive_queue, skb);
+	wake_up(q->sk.sk_sleep);
+	return 0;
+}
+
+/*
+ * Receive is for data from the external interface (lowerdev),
+ * in case of macvtap, we can treat that the same way as
+ * forward, which macvlan cannot.
+ */
+static int macvtap_receive(struct sk_buff *skb)
+{
+	skb_push(skb, ETH_HLEN);
+	return macvtap_forward(skb->dev, skb);
+}
+
+static int macvtap_newlink(struct net *src_net,
+			   struct net_device *dev,
+			   struct nlattr *tb[],
+			   struct nlattr *data[])
+{
+	struct device *classdev;
+	dev_t devt;
+	int err;
+
+	err = macvlan_common_newlink(src_net, dev, tb, data,
+				     macvtap_receive, macvtap_forward);
+	if (err)
+		goto out;
+
+	devt = MKDEV(MAJOR(macvtap_major), dev->ifindex);
+
+	classdev = device_create(macvtap_class, &dev->dev, devt,
+				 dev, "tap%d", dev->ifindex);
+	if (IS_ERR(classdev)) {
+		err = PTR_ERR(classdev);
+		macvtap_del_queues(dev);
+	}
+
+out:
+	return err;
+}
+
+static void macvtap_dellink(struct net_device *dev,
+			    struct list_head *head)
+{
+	device_destroy(macvtap_class,
+		       MKDEV(MAJOR(macvtap_major), dev->ifindex));
+
+	macvtap_del_queues(dev);
+	macvlan_dellink(dev, head);
+}
+
+static struct rtnl_link_ops macvtap_link_ops __read_mostly = {
+	.kind		= "macvtap",
+	.newlink	= macvtap_newlink,
+	.dellink	= macvtap_dellink,
+};
+
+
+static void macvtap_sock_write_space(struct sock *sk)
+{
+	if (!sock_writeable(sk) ||
+	    !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags))
+		return;
+
+	if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
+		wake_up_interruptible_sync(sk->sk_sleep);
+}
+
+static int macvtap_open(struct inode *inode, struct file *file)
+{
+	struct net *net = current->nsproxy->net_ns;
+	struct net_device *dev = dev_get_by_index(net, iminor(inode));
+	struct macvtap_queue *q;
+	int err;
+
+	err = -ENODEV;
+	if (!dev)
+		goto out;
+
+	/* check if this is a macvtap device */
+	err = -EINVAL;
+	if (dev->rtnl_link_ops != &macvtap_link_ops)
+		goto out;
+
+	err = -ENOMEM;
+	q = (struct macvtap_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL,
+					     &macvtap_proto);
+	if (!q)
+		goto out;
+
+	init_waitqueue_head(&q->sock.wait);
+	q->sock.type = SOCK_RAW;
+	q->sock.state = SS_CONNECTED;
+	sock_init_data(&q->sock, &q->sk);
+	q->sk.sk_allocation = GFP_ATOMIC; /* for now */
+	q->sk.sk_write_space = macvtap_sock_write_space;
+
+	err = macvtap_set_queue(dev, file, q);
+	if (err)
+		sock_put(&q->sk);
+
+out:
+	if (dev)
+		dev_put(dev);
+
+	return err;
+}
+
+static int macvtap_release(struct inode *inode, struct file *file)
+{
+	macvtap_del_queue((struct macvtap_queue **)&file->private_data);
+	return 0;
+}
+
+static unsigned int macvtap_poll(struct file *file, poll_table * wait)
+{
+	struct macvtap_queue *q = macvtap_file_get_queue(file);
+	unsigned int mask = POLLERR;
+
+	if (!q)
+		goto out;
+
+	mask = 0;
+	poll_wait(file, &q->sock.wait, wait);
+
+	if (!skb_queue_empty(&q->sk.sk_receive_queue))
+		mask |= POLLIN | POLLRDNORM;
+
+	if (sock_writeable(&q->sk) ||
+	    (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock.flags) &&
+	     sock_writeable(&q->sk)))
+		mask |= POLLOUT | POLLWRNORM;
+
+out:
+	macvtap_file_put_queue();
+	return mask;
+}
+
+/* Get packet from user space buffer */
+static ssize_t macvtap_get_user(struct macvtap_queue *q,
+				const struct iovec *iv, size_t count,
+				int noblock)
+{
+	struct sk_buff *skb;
+	size_t len = count;
+	int err;
+
+	if (unlikely(len < ETH_HLEN))
+		return -EINVAL;
+
+	skb = sock_alloc_send_skb(&q->sk, NET_IP_ALIGN + len, noblock, &err);
+
+	if (!skb) {
+		macvlan_count_rx(q->vlan, 0, false, false);
+		return err;
+	}
+
+	skb_reserve(skb, NET_IP_ALIGN);
+	skb_put(skb, count);
+
+	if (skb_copy_datagram_from_iovec(skb, 0, iv, 0, len)) {
+		macvlan_count_rx(q->vlan, 0, false, false);
+		kfree_skb(skb);
+		return -EFAULT;
+	}
+
+	skb_set_network_header(skb, ETH_HLEN);
+
+	macvlan_start_xmit(skb, q->vlan->dev);
+
+	return count;
+}
+
+static ssize_t macvtap_aio_write(struct kiocb *iocb, const struct iovec *iv,
+				 unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	ssize_t result = -ENOLINK;
+	struct macvtap_queue *q = macvtap_file_get_queue(file);
+
+	if (!q)
+		goto out;
+
+	result = macvtap_get_user(q, iv, iov_length(iv, count),
+			      file->f_flags & O_NONBLOCK);
+out:
+	macvtap_file_put_queue();
+	return result;
+}
+
+/* Put packet to the user space buffer */
+static ssize_t macvtap_put_user(struct macvtap_queue *q,
+				const struct sk_buff *skb,
+				const struct iovec *iv, int len)
+{
+	struct macvlan_dev *vlan = q->vlan;
+	int ret;
+
+	len = min_t(int, skb->len, len);
+
+	ret = skb_copy_datagram_const_iovec(skb, 0, iv, 0, len);
+
+	macvlan_count_rx(vlan, len, ret == 0, 0);
+
+	return ret ? ret : len;
+}
+
+static ssize_t macvtap_aio_read(struct kiocb *iocb, const struct iovec *iv,
+				unsigned long count, loff_t pos)
+{
+	struct file *file = iocb->ki_filp;
+	struct macvtap_queue *q = macvtap_file_get_queue(file);
+
+	DECLARE_WAITQUEUE(wait, current);
+	struct sk_buff *skb;
+	ssize_t len, ret = 0;
+
+	if (!q) {
+		ret = -ENOLINK;
+		goto out;
+	}
+
+	len = iov_length(iv, count);
+	if (len < 0) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+	add_wait_queue(q->sk.sk_sleep, &wait);
+	while (len) {
+		current->state = TASK_INTERRUPTIBLE;
+
+		/* Read frames from the queue */
+		skb = skb_dequeue(&q->sk.sk_receive_queue);
+		if (!skb) {
+			if (file->f_flags & O_NONBLOCK) {
+				ret = -EAGAIN;
+				break;
+			}
+			if (signal_pending(current)) {
+				ret = -ERESTARTSYS;
+				break;
+			}
+			/* Nothing to read, let's sleep */
+			schedule();
+			continue;
+		}
+		ret = macvtap_put_user(q, skb, iv, len);
+		kfree_skb(skb);
+		break;
+	}
+
+	current->state = TASK_RUNNING;
+	remove_wait_queue(q->sk.sk_sleep, &wait);
+
+out:
+	macvtap_file_put_queue();
+	return ret;
+}
+
+/*
+ * provide compatibility with generic tun/tap interface
+ */
+static long macvtap_ioctl(struct file *file, unsigned int cmd,
+			  unsigned long arg)
+{
+	struct macvtap_queue *q;
+	void __user *argp = (void __user *)arg;
+	struct ifreq __user *ifr = argp;
+	unsigned int __user *up = argp;
+	unsigned int u;
+	char devname[IFNAMSIZ];
+
+	switch (cmd) {
+	case TUNSETIFF:
+		/* ignore the name, just look at flags */
+		if (get_user(u, &ifr->ifr_flags))
+			return -EFAULT;
+		if (u != (IFF_TAP | IFF_NO_PI))
+			return -EINVAL;
+		return 0;
+
+	case TUNGETIFF:
+		q = macvtap_file_get_queue(file);
+		if (!q)
+			return -ENOLINK;
+		memcpy(devname, q->vlan->dev->name, sizeof(devname));
+		macvtap_file_put_queue();
+
+		if (copy_to_user(&ifr->ifr_name, q->vlan->dev->name, IFNAMSIZ) ||
+		    put_user((TUN_TAP_DEV | TUN_NO_PI), &ifr->ifr_flags))
+			return -EFAULT;
+		return 0;
+
+	case TUNGETFEATURES:
+		if (put_user((IFF_TAP | IFF_NO_PI), up))
+			return -EFAULT;
+		return 0;
+
+	case TUNSETSNDBUF:
+		if (get_user(u, up))
+			return -EFAULT;
+
+		q = macvtap_file_get_queue(file);
+		q->sk.sk_sndbuf = u;
+		macvtap_file_put_queue();
+		return 0;
+
+	case TUNSETOFFLOAD:
+		/* let the user check for future flags */
+		if (arg & ~(TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+			  TUN_F_TSO_ECN | TUN_F_UFO))
+			return -EINVAL;
+
+		/* TODO: add support for these, so far we don't
+			 support any offload */
+		if (arg & (TUN_F_CSUM | TUN_F_TSO4 | TUN_F_TSO6 |
+			 TUN_F_TSO_ECN | TUN_F_UFO))
+			return -EINVAL;
+
+		return 0;
+
+	default:
+		return -EINVAL;
+	}
+}
+
+#ifdef CONFIG_COMPAT
+static long macvtap_compat_ioctl(struct file *file, unsigned int cmd,
+				 unsigned long arg)
+{
+	return macvtap_ioctl(file, cmd, (unsigned long)compat_ptr(arg));
+}
+#endif
+
+static const struct file_operations macvtap_fops = {
+	.owner		= THIS_MODULE,
+	.open		= macvtap_open,
+	.release	= macvtap_release,
+	.aio_read	= macvtap_aio_read,
+	.aio_write	= macvtap_aio_write,
+	.poll		= macvtap_poll,
+	.llseek		= no_llseek,
+	.unlocked_ioctl	= macvtap_ioctl,
+#ifdef CONFIG_COMPAT
+	.compat_ioctl	= macvtap_compat_ioctl,
+#endif
+};
+
+static int macvtap_init(void)
+{
+	int err;
+
+	err = alloc_chrdev_region(&macvtap_major, 0,
+				MACVTAP_NUM_DEVS, "macvtap");
+	if (err)
+		goto out1;
+
+	cdev_init(&macvtap_cdev, &macvtap_fops);
+	err = cdev_add(&macvtap_cdev, macvtap_major, MACVTAP_NUM_DEVS);
+	if (err)
+		goto out2;
+
+	macvtap_class = class_create(THIS_MODULE, "macvtap");
+	if (IS_ERR(macvtap_class)) {
+		err = PTR_ERR(macvtap_class);
+		goto out3;
+	}
+
+	err = macvlan_link_register(&macvtap_link_ops);
+	if (err)
+		goto out4;
+
+	return 0;
+
+out4:
+	class_unregister(macvtap_class);
+out3:
+	cdev_del(&macvtap_cdev);
+out2:
+	unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS);
+out1:
+	return err;
+}
+module_init(macvtap_init);
+
+static void macvtap_exit(void)
+{
+	rtnl_link_unregister(&macvtap_link_ops);
+	class_unregister(macvtap_class);
+	cdev_del(&macvtap_cdev);
+	unregister_chrdev_region(macvtap_major, MACVTAP_NUM_DEVS);
+}
+module_exit(macvtap_exit);
+
+MODULE_ALIAS_RTNL_LINK("macvtap");
+MODULE_AUTHOR("Arnd Bergmann <arnd@arndb.de>");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 9a11544bb0b1..51f1512045e9 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -34,6 +34,7 @@ struct macvlan_dev {
 	enum macvlan_mode	mode;
 	int (*receive)(struct sk_buff *skb);
 	int (*forward)(struct net_device *dev, struct sk_buff *skb);
+	struct macvtap_queue	*tap;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
-- 
cgit v1.2.3-70-g09d2


From 1621e0940294c20e302faf401f41204de7252e22 Mon Sep 17 00:00:00 2001
From: Alexey Dobriyan <adobriyan@gmail.com>
Date: Mon, 1 Feb 2010 09:44:19 +0000
Subject: net: CONFIG_COMPAT redux

Ifdef out
	struct proto_ops::compat_ioctl
	struct proto_ops::compat_setsockopt
	struct proto_ops::compat_getsockopt
to make structures smaller on COMPAT=n kernels.

Signed-off-by: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include')

diff --git a/include/linux/net.h b/include/linux/net.h
index 5e8083cacc8b..4157b5d42bd6 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -174,18 +174,22 @@ struct proto_ops {
 				      struct poll_table_struct *wait);
 	int		(*ioctl)     (struct socket *sock, unsigned int cmd,
 				      unsigned long arg);
+#ifdef CONFIG_COMPAT
 	int	 	(*compat_ioctl) (struct socket *sock, unsigned int cmd,
 				      unsigned long arg);
+#endif
 	int		(*listen)    (struct socket *sock, int len);
 	int		(*shutdown)  (struct socket *sock, int flags);
 	int		(*setsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, unsigned int optlen);
 	int		(*getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
+#ifdef CONFIG_COMPAT
 	int		(*compat_setsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, unsigned int optlen);
 	int		(*compat_getsockopt)(struct socket *sock, int level,
 				      int optname, char __user *optval, int __user *optlen);
+#endif
 	int		(*sendmsg)   (struct kiocb *iocb, struct socket *sock,
 				      struct msghdr *m, size_t total_len);
 	int		(*recvmsg)   (struct kiocb *iocb, struct socket *sock,
-- 
cgit v1.2.3-70-g09d2


From 6683ece36e3531fc8c75f69e7165c5f20930be88 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 4 Feb 2010 10:22:25 -0800
Subject: net: use helpers to access mc list V2

This patch introduces the similar helpers as those already done for uc list.
However multicast lists are no list_head lists but "mademanually". The three
macros added by this patch will make the transition of mc_list to list_head
smooth in two steps:

1) convert all drivers to use these macros (with the original iterator of type
   "struct dev_mc_list")
2) once all drivers are converted, convert list type and iterators to "struct
   netdev_hw_addr" in one patch.

>From now on, drivers can (and should) use "netdev_for_each_mc_addr" to iterate
over the addresses with iterator of type "struct netdev_hw_addr". Also macros
"netdev_mc_count" and "netdev_mc_empty" to read list's length. This is the state
which should be reached in all drivers.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 622ba5aa93c4..e535700a3b72 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -268,6 +268,12 @@ struct netdev_hw_addr_list {
 #define netdev_for_each_uc_addr(ha, dev) \
 	list_for_each_entry(ha, &dev->uc.list, list)
 
+#define netdev_mc_count(dev) ((dev)->mc_count)
+#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0)
+
+#define netdev_for_each_mc_addr(mclist, dev) \
+	for (mclist = dev->mc_list; mclist; mclist = mclist->next)
+
 struct hh_cache {
 	struct hh_cache *hh_next;	/* Next entry			     */
 	atomic_t	hh_refcnt;	/* number of users                   */
-- 
cgit v1.2.3-70-g09d2


From f8f76db1db369f3a130ac3fd33e2eee5f1610d9c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jpirko@redhat.com>
Date: Thu, 4 Feb 2010 10:23:02 -0800
Subject: libphy: add phy_find_first function

Many drivers do this in them manually. Now they can use this function.

Signed-off-by: Jiri Pirko <jpirko@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 16 ++++++++++++++++
 include/linux/phy.h          |  1 +
 2 files changed, 17 insertions(+)

(limited to 'include')

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index adbc0fded130..db1794546c56 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -276,6 +276,22 @@ int phy_device_register(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(phy_device_register);
 
+/**
+ * phy_find_first - finds the first PHY device on the bus
+ * @bus: the target MII bus
+ */
+struct phy_device *phy_find_first(struct mii_bus *bus)
+{
+	int addr;
+
+	for (addr = 0; addr < PHY_MAX_ADDR; addr++) {
+		if (bus->phy_map[addr])
+			return bus->phy_map[addr];
+	}
+	return NULL;
+}
+EXPORT_SYMBOL(phy_find_first);
+
 /**
  * phy_prepare_link - prepares the PHY layer to monitor link status
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6a7eb402165d..14d7fdf6a90a 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -452,6 +452,7 @@ int phy_attach_direct(struct net_device *dev, struct phy_device *phydev,
 		u32 flags, phy_interface_t interface);
 struct phy_device * phy_attach(struct net_device *dev,
 		const char *bus_id, u32 flags, phy_interface_t interface);
+struct phy_device *phy_find_first(struct mii_bus *bus);
 int phy_connect_direct(struct net_device *dev, struct phy_device *phydev,
 		void (*handler)(struct net_device *), u32 flags,
 		phy_interface_t interface);
-- 
cgit v1.2.3-70-g09d2


From bfd5f4a3d605e0f6054df0b59fe0907ff7e696d3 Mon Sep 17 00:00:00 2001
From: Sridhar Samudrala <sri@us.ibm.com>
Date: Thu, 4 Feb 2010 20:24:10 -0800
Subject: packet: Add GSO/csum offload support.

This patch adds GSO/checksum offload to af_packet sockets using
virtio_net_hdr. Based on Rusty's patch to add this support to tun.
It allows GSO/checksum offload to be enabled when using raw socket
backend with virtio_net.
Adds PACKET_VNET_HDR socket option to prepend virtio_net_hdr in the
receive path and process/skip virtio_net_hdr in the send path. This
option is only allowed with SOCK_RAW sockets attached to ethernet
type devices.

v2 updates
----------
Michael's Comments
- Perform length check in packet_snd() when GSO is off even when
  vnet_hdr is present.
- Check for SKB_GSO_FCOE type and return -EINVAL
- don't allow tx/rx ring when vnet_hdr is enabled.
Herbert's Comments
- Removed ethernet specific code.
- protocol value is assumed to be passed in by the caller.

Signed-off-by: Sridhar Samudrala <sri@us.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_packet.h |   1 +
 net/packet/af_packet.c    | 187 +++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 177 insertions(+), 11 deletions(-)

(limited to 'include')

diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index 4021d47cc437..aa57a5f993fc 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -46,6 +46,7 @@ struct sockaddr_ll {
 #define PACKET_RESERVE			12
 #define PACKET_TX_RING			13
 #define PACKET_LOSS			14
+#define PACKET_VNET_HDR			15
 
 struct tpacket_stats {
 	unsigned int	tp_packets;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 53633c5fdb1d..178e2937bbaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -80,6 +80,7 @@
 #include <linux/init.h>
 #include <linux/mutex.h>
 #include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
 
 #ifdef CONFIG_INET
 #include <net/inet_common.h>
@@ -193,7 +194,8 @@ struct packet_sock {
 	struct mutex		pg_vec_lock;
 	unsigned int		running:1,	/* prot_hook is attached*/
 				auxdata:1,
-				origdev:1;
+				origdev:1,
+				has_vnet_hdr:1;
 	int			ifindex;	/* bound device		*/
 	__be16			num;
 	struct packet_mclist	*mclist;
@@ -1056,6 +1058,30 @@ out:
 }
 #endif
 
+static inline struct sk_buff *packet_alloc_skb(struct sock *sk, size_t prepad,
+					       size_t reserve, size_t len,
+					       size_t linear, int noblock,
+					       int *err)
+{
+	struct sk_buff *skb;
+
+	/* Under a page?  Don't bother with paged skb. */
+	if (prepad + len < PAGE_SIZE || !linear)
+		linear = len;
+
+	skb = sock_alloc_send_pskb(sk, prepad + linear, len - linear, noblock,
+				   err);
+	if (!skb)
+		return NULL;
+
+	skb_reserve(skb, reserve);
+	skb_put(skb, linear);
+	skb->data_len = len - linear;
+	skb->len += len - linear;
+
+	return skb;
+}
+
 static int packet_snd(struct socket *sock,
 			  struct msghdr *msg, size_t len)
 {
@@ -1066,14 +1092,17 @@ static int packet_snd(struct socket *sock,
 	__be16 proto;
 	unsigned char *addr;
 	int ifindex, err, reserve = 0;
+	struct virtio_net_hdr vnet_hdr = { 0 };
+	int offset = 0;
+	int vnet_hdr_len;
+	struct packet_sock *po = pkt_sk(sk);
+	unsigned short gso_type = 0;
 
 	/*
 	 *	Get and verify the address.
 	 */
 
 	if (saddr == NULL) {
-		struct packet_sock *po = pkt_sk(sk);
-
 		ifindex	= po->ifindex;
 		proto	= po->num;
 		addr	= NULL;
@@ -1100,25 +1129,74 @@ static int packet_snd(struct socket *sock,
 	if (!(dev->flags & IFF_UP))
 		goto out_unlock;
 
+	if (po->has_vnet_hdr) {
+		vnet_hdr_len = sizeof(vnet_hdr);
+
+		err = -EINVAL;
+		if (len < vnet_hdr_len)
+			goto out_unlock;
+
+		len -= vnet_hdr_len;
+
+		err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
+				       vnet_hdr_len);
+		if (err < 0)
+			goto out_unlock;
+
+		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
+		    (vnet_hdr.csum_start + vnet_hdr.csum_offset + 2 >
+		      vnet_hdr.hdr_len))
+			vnet_hdr.hdr_len = vnet_hdr.csum_start +
+						 vnet_hdr.csum_offset + 2;
+
+		err = -EINVAL;
+		if (vnet_hdr.hdr_len > len)
+			goto out_unlock;
+
+		if (vnet_hdr.gso_type != VIRTIO_NET_HDR_GSO_NONE) {
+			switch (vnet_hdr.gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
+			case VIRTIO_NET_HDR_GSO_TCPV4:
+				gso_type = SKB_GSO_TCPV4;
+				break;
+			case VIRTIO_NET_HDR_GSO_TCPV6:
+				gso_type = SKB_GSO_TCPV6;
+				break;
+			case VIRTIO_NET_HDR_GSO_UDP:
+				gso_type = SKB_GSO_UDP;
+				break;
+			default:
+				goto out_unlock;
+			}
+
+			if (vnet_hdr.gso_type & VIRTIO_NET_HDR_GSO_ECN)
+				gso_type |= SKB_GSO_TCP_ECN;
+
+			if (vnet_hdr.gso_size == 0)
+				goto out_unlock;
+
+		}
+	}
+
 	err = -EMSGSIZE;
-	if (len > dev->mtu+reserve)
+	if (!gso_type && (len > dev->mtu+reserve))
 		goto out_unlock;
 
-	skb = sock_alloc_send_skb(sk, len + LL_ALLOCATED_SPACE(dev),
-				msg->msg_flags & MSG_DONTWAIT, &err);
+	err = -ENOBUFS;
+	skb = packet_alloc_skb(sk, LL_ALLOCATED_SPACE(dev),
+			       LL_RESERVED_SPACE(dev), len, vnet_hdr.hdr_len,
+			       msg->msg_flags & MSG_DONTWAIT, &err);
 	if (skb == NULL)
 		goto out_unlock;
 
-	skb_reserve(skb, LL_RESERVED_SPACE(dev));
-	skb_reset_network_header(skb);
+	skb_set_network_header(skb, reserve);
 
 	err = -EINVAL;
 	if (sock->type == SOCK_DGRAM &&
-	    dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len) < 0)
+	    (offset = dev_hard_header(skb, dev, ntohs(proto), addr, NULL, len)) < 0)
 		goto out_free;
 
 	/* Returns -EFAULT on error */
-	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
 	if (err)
 		goto out_free;
 
@@ -1127,6 +1205,25 @@ static int packet_snd(struct socket *sock,
 	skb->priority = sk->sk_priority;
 	skb->mark = sk->sk_mark;
 
+	if (po->has_vnet_hdr) {
+		if (vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) {
+			if (!skb_partial_csum_set(skb, vnet_hdr.csum_start,
+						  vnet_hdr.csum_offset)) {
+				err = -EINVAL;
+				goto out_free;
+			}
+		}
+
+		skb_shinfo(skb)->gso_size = vnet_hdr.gso_size;
+		skb_shinfo(skb)->gso_type = gso_type;
+
+		/* Header must be checked, and gso_segs computed. */
+		skb_shinfo(skb)->gso_type |= SKB_GSO_DODGY;
+		skb_shinfo(skb)->gso_segs = 0;
+
+		len += vnet_hdr_len;
+	}
+
 	/*
 	 *	Now send it
 	 */
@@ -1420,6 +1517,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	struct sk_buff *skb;
 	int copied, err;
 	struct sockaddr_ll *sll;
+	int vnet_hdr_len = 0;
 
 	err = -EINVAL;
 	if (flags & ~(MSG_PEEK|MSG_DONTWAIT|MSG_TRUNC|MSG_CMSG_COMPAT))
@@ -1451,6 +1549,48 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (skb == NULL)
 		goto out;
 
+	if (pkt_sk(sk)->has_vnet_hdr) {
+		struct virtio_net_hdr vnet_hdr = { 0 };
+
+		err = -EINVAL;
+		vnet_hdr_len = sizeof(vnet_hdr);
+		if ((len -= vnet_hdr_len) < 0)
+			goto out_free;
+
+		if (skb_is_gso(skb)) {
+			struct skb_shared_info *sinfo = skb_shinfo(skb);
+
+			/* This is a hint as to how much should be linear. */
+			vnet_hdr.hdr_len = skb_headlen(skb);
+			vnet_hdr.gso_size = sinfo->gso_size;
+			if (sinfo->gso_type & SKB_GSO_TCPV4)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
+			else if (sinfo->gso_type & SKB_GSO_TCPV6)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_TCPV6;
+			else if (sinfo->gso_type & SKB_GSO_UDP)
+				vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_UDP;
+			else if (sinfo->gso_type & SKB_GSO_FCOE)
+				goto out_free;
+			else
+				BUG();
+			if (sinfo->gso_type & SKB_GSO_TCP_ECN)
+				vnet_hdr.gso_type |= VIRTIO_NET_HDR_GSO_ECN;
+		} else
+			vnet_hdr.gso_type = VIRTIO_NET_HDR_GSO_NONE;
+
+		if (skb->ip_summed == CHECKSUM_PARTIAL) {
+			vnet_hdr.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+			vnet_hdr.csum_start = skb->csum_start -
+							skb_headroom(skb);
+			vnet_hdr.csum_offset = skb->csum_offset;
+		} /* else everything is zero */
+
+		err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
+				     vnet_hdr_len);
+		if (err < 0)
+			goto out_free;
+	}
+
 	/*
 	 *	If the address length field is there to be filled in, we fill
 	 *	it in now.
@@ -1502,7 +1642,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 	 *	Free or return the buffer as appropriate. Again this
 	 *	hides all the races and re-entrancy issues from us.
 	 */
-	err = (flags&MSG_TRUNC) ? skb->len : copied;
+	err = vnet_hdr_len + ((flags&MSG_TRUNC) ? skb->len : copied);
 
 out_free:
 	skb_free_datagram(sk, skb);
@@ -1740,6 +1880,8 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 
 		if (optlen < sizeof(req))
 			return -EINVAL;
+		if (pkt_sk(sk)->has_vnet_hdr)
+			return -EINVAL;
 		if (copy_from_user(&req, optval, sizeof(req)))
 			return -EFAULT;
 		return packet_set_ring(sk, &req, 0, optname == PACKET_TX_RING);
@@ -1826,6 +1968,22 @@ packet_setsockopt(struct socket *sock, int level, int optname, char __user *optv
 		po->origdev = !!val;
 		return 0;
 	}
+	case PACKET_VNET_HDR:
+	{
+		int val;
+
+		if (sock->type != SOCK_RAW)
+			return -EINVAL;
+		if (po->rx_ring.pg_vec || po->tx_ring.pg_vec)
+			return -EBUSY;
+		if (optlen < sizeof(val))
+			return -EINVAL;
+		if (copy_from_user(&val, optval, sizeof(val)))
+			return -EFAULT;
+
+		po->has_vnet_hdr = !!val;
+		return 0;
+	}
 	default:
 		return -ENOPROTOOPT;
 	}
@@ -1874,6 +2032,13 @@ static int packet_getsockopt(struct socket *sock, int level, int optname,
 			len = sizeof(int);
 		val = po->origdev;
 
+		data = &val;
+		break;
+	case PACKET_VNET_HDR:
+		if (len > sizeof(int))
+			len = sizeof(int);
+		val = po->has_vnet_hdr;
+
 		data = &val;
 		break;
 #ifdef CONFIG_PACKET_MMAP
-- 
cgit v1.2.3-70-g09d2


From 5b3501faa8741d50617ce4191c20061c6ef36cb3 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <eric.dumazet@gmail.com>
Date: Mon, 8 Feb 2010 11:16:56 -0800
Subject: netfilter: nf_conntrack: per netns nf_conntrack_cachep

nf_conntrack_cachep is currently shared by all netns instances, but
because of SLAB_DESTROY_BY_RCU special semantics, this is wrong.

If we use a shared slab cache, one object can instantly flight between
one hash table (netns ONE) to another one (netns TWO), and concurrent
reader (doing a lookup in netns ONE, 'finding' an object of netns TWO)
can be fooled without notice, because no RCU grace period has to be
observed between object freeing and its reuse.

We dont have this problem with UDP/TCP slab caches because TCP/UDP
hashtables are global to the machine (and each object has a pointer to
its netns).

If we use per netns conntrack hash tables, we also *must* use per netns
conntrack slab caches, to guarantee an object can not escape from one
namespace to another one.

Signed-off-by: Eric Dumazet <eric.dumazet@gmail.com>
[Patrick: added unique slab name allocation]
Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
---
 include/net/netns/conntrack.h     |  2 ++
 net/netfilter/nf_conntrack_core.c | 39 +++++++++++++++++++++++----------------
 2 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index ba1ba0c5efd1..aed23b6c8478 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
 	struct hlist_nulls_head	unconfirmed;
@@ -28,5 +29,6 @@ struct netns_ct {
 #endif
 	int			hash_vmalloc;
 	int			expect_vmalloc;
+	char			*slabname;
 };
 #endif
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 37e2b88313f2..9de4bd4c0dd7 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -63,8 +63,6 @@ EXPORT_SYMBOL_GPL(nf_conntrack_max);
 struct nf_conn nf_conntrack_untracked __read_mostly;
 EXPORT_SYMBOL_GPL(nf_conntrack_untracked);
 
-static struct kmem_cache *nf_conntrack_cachep __read_mostly;
-
 static int nf_conntrack_hash_rnd_initted;
 static unsigned int nf_conntrack_hash_rnd;
 
@@ -572,7 +570,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 	 * Do not use kmem_cache_zalloc(), as this cache uses
 	 * SLAB_DESTROY_BY_RCU.
 	 */
-	ct = kmem_cache_alloc(nf_conntrack_cachep, gfp);
+	ct = kmem_cache_alloc(net->ct.nf_conntrack_cachep, gfp);
 	if (ct == NULL) {
 		pr_debug("nf_conntrack_alloc: Can't alloc conntrack.\n");
 		atomic_dec(&net->ct.count);
@@ -611,7 +609,7 @@ void nf_conntrack_free(struct nf_conn *ct)
 	nf_ct_ext_destroy(ct);
 	atomic_dec(&net->ct.count);
 	nf_ct_ext_free(ct);
-	kmem_cache_free(nf_conntrack_cachep, ct);
+	kmem_cache_free(net->ct.nf_conntrack_cachep, ct);
 }
 EXPORT_SYMBOL_GPL(nf_conntrack_free);
 
@@ -1119,7 +1117,6 @@ static void nf_conntrack_cleanup_init_net(void)
 
 	nf_conntrack_helper_fini();
 	nf_conntrack_proto_fini();
-	kmem_cache_destroy(nf_conntrack_cachep);
 }
 
 static void nf_conntrack_cleanup_net(struct net *net)
@@ -1137,6 +1134,8 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+	kfree(net->ct.slabname);
 	free_percpu(net->ct.stat);
 }
 
@@ -1272,15 +1271,6 @@ static int nf_conntrack_init_init_net(void)
 	       NF_CONNTRACK_VERSION, nf_conntrack_htable_size,
 	       nf_conntrack_max);
 
-	nf_conntrack_cachep = kmem_cache_create("nf_conntrack",
-						sizeof(struct nf_conn),
-						0, SLAB_DESTROY_BY_RCU, NULL);
-	if (!nf_conntrack_cachep) {
-		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
-		ret = -ENOMEM;
-		goto err_cache;
-	}
-
 	ret = nf_conntrack_proto_init();
 	if (ret < 0)
 		goto err_proto;
@@ -1302,8 +1292,6 @@ static int nf_conntrack_init_init_net(void)
 err_helper:
 	nf_conntrack_proto_fini();
 err_proto:
-	kmem_cache_destroy(nf_conntrack_cachep);
-err_cache:
 	return ret;
 }
 
@@ -1325,6 +1313,21 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_stat;
 	}
+
+	net->ct.slabname = kasprintf(GFP_KERNEL, "nf_conntrack_%p", net);
+	if (!net->ct.slabname) {
+		ret = -ENOMEM;
+		goto err_slabname;
+	}
+
+	net->ct.nf_conntrack_cachep = kmem_cache_create(net->ct.slabname,
+							sizeof(struct nf_conn), 0,
+							SLAB_DESTROY_BY_RCU, NULL);
+	if (!net->ct.nf_conntrack_cachep) {
+		printk(KERN_ERR "Unable to create nf_conn slab cache\n");
+		ret = -ENOMEM;
+		goto err_cache;
+	}
 	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
@@ -1352,6 +1355,10 @@ err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
 			     nf_conntrack_htable_size);
 err_hash:
+	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
+err_cache:
+	kfree(net->ct.slabname);
+err_slabname:
 	free_percpu(net->ct.stat);
 err_stat:
 	return ret;
-- 
cgit v1.2.3-70-g09d2


From d696c7bdaa55e2208e56c6f98e6bc1599f34286d Mon Sep 17 00:00:00 2001
From: Patrick McHardy <kaber@trash.net>
Date: Mon, 8 Feb 2010 11:18:07 -0800
Subject: netfilter: nf_conntrack: fix hash resizing with namespaces

As noticed by Jon Masters <jonathan@jonmasters.org>, the conntrack hash
size is global and not per namespace, but modifiable at runtime through
/sys/module/nf_conntrack/hashsize. Changing the hash size will only
resize the hash in the current namespace however, so other namespaces
will use an invalid hash size. This can cause crashes when enlarging
the hashsize, or false negative lookups when shrinking it.

Move the hash size into the per-namespace data and only use the global
hash size to initialize the per-namespace value when instanciating a
new namespace. Additionally restrict hash resizing to init_net for
now as other namespaces are not handled currently.

Cc: stable@kernel.org
Signed-off-by: Patrick McHardy <kaber@trash.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/conntrack.h                      |  1 +
 include/net/netns/ipv4.h                           |  1 +
 net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c     |  2 +-
 .../netfilter/nf_conntrack_l3proto_ipv4_compat.c   |  4 +-
 net/ipv4/netfilter/nf_nat_core.c                   | 22 ++++-----
 net/netfilter/nf_conntrack_core.c                  | 53 ++++++++++++----------
 net/netfilter/nf_conntrack_expect.c                |  2 +-
 net/netfilter/nf_conntrack_helper.c                |  2 +-
 net/netfilter/nf_conntrack_netlink.c               |  2 +-
 net/netfilter/nf_conntrack_standalone.c            |  7 +--
 10 files changed, 49 insertions(+), 47 deletions(-)

(limited to 'include')

diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h
index aed23b6c8478..63d449807d9b 100644
--- a/include/net/netns/conntrack.h
+++ b/include/net/netns/conntrack.h
@@ -11,6 +11,7 @@ struct nf_conntrack_ecache;
 struct netns_ct {
 	atomic_t		count;
 	unsigned int		expect_count;
+	unsigned int		htable_size;
 	struct kmem_cache	*nf_conntrack_cachep;
 	struct hlist_nulls_head	*hash;
 	struct hlist_head	*expect_hash;
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 2eb3814d6258..9a4b8b714079 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -40,6 +40,7 @@ struct netns_ipv4 {
 	struct xt_table		*iptable_security;
 	struct xt_table		*nat_table;
 	struct hlist_head	*nat_bysource;
+	unsigned int		nat_htable_size;
 	int			nat_vmalloced;
 #endif
 
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
index d171b123a656..d1ea38a7c490 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4.c
@@ -210,7 +210,7 @@ static ctl_table ip_ct_sysctl_table[] = {
 	},
 	{
 		.procname	= "ip_conntrack_buckets",
-		.data		= &nf_conntrack_htable_size,
+		.data		= &init_net.ct.htable_size,
 		.maxlen		= sizeof(unsigned int),
 		.mode		= 0444,
 		.proc_handler	= proc_dointvec,
diff --git a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
index 8668a3defda6..2fb7b76da94f 100644
--- a/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
+++ b/net/ipv4/netfilter/nf_conntrack_l3proto_ipv4_compat.c
@@ -32,7 +32,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -50,7 +50,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c
index fe1a64479dd0..26066a2327ad 100644
--- a/net/ipv4/netfilter/nf_nat_core.c
+++ b/net/ipv4/netfilter/nf_nat_core.c
@@ -35,9 +35,6 @@ static DEFINE_SPINLOCK(nf_nat_lock);
 
 static struct nf_conntrack_l3proto *l3proto __read_mostly;
 
-/* Calculated at init based on memory size */
-static unsigned int nf_nat_htable_size __read_mostly;
-
 #define MAX_IP_NAT_PROTO 256
 static const struct nf_nat_protocol *nf_nat_protos[MAX_IP_NAT_PROTO]
 						__read_mostly;
@@ -72,7 +69,7 @@ EXPORT_SYMBOL_GPL(nf_nat_proto_put);
 
 /* We keep an extra hash for each conntrack, for fast searching. */
 static inline unsigned int
-hash_by_src(const struct nf_conntrack_tuple *tuple)
+hash_by_src(const struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	unsigned int hash;
 
@@ -80,7 +77,7 @@ hash_by_src(const struct nf_conntrack_tuple *tuple)
 	hash = jhash_3words((__force u32)tuple->src.u3.ip,
 			    (__force u32)tuple->src.u.all,
 			    tuple->dst.protonum, 0);
-	return ((u64)hash * nf_nat_htable_size) >> 32;
+	return ((u64)hash * net->ipv4.nat_htable_size) >> 32;
 }
 
 /* Is this tuple already taken? (not by us) */
@@ -147,7 +144,7 @@ find_appropriate_src(struct net *net,
 		     struct nf_conntrack_tuple *result,
 		     const struct nf_nat_range *range)
 {
-	unsigned int h = hash_by_src(tuple);
+	unsigned int h = hash_by_src(net, tuple);
 	const struct nf_conn_nat *nat;
 	const struct nf_conn *ct;
 	const struct hlist_node *n;
@@ -330,7 +327,7 @@ nf_nat_setup_info(struct nf_conn *ct,
 	if (have_to_hash) {
 		unsigned int srchash;
 
-		srchash = hash_by_src(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+		srchash = hash_by_src(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
 		spin_lock_bh(&nf_nat_lock);
 		/* nf_conntrack_alter_reply might re-allocate exntension aera */
 		nat = nfct_nat(ct);
@@ -679,8 +676,10 @@ nfnetlink_parse_nat_setup(struct nf_conn *ct,
 
 static int __net_init nf_nat_net_init(struct net *net)
 {
-	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&nf_nat_htable_size,
-						      &net->ipv4.nat_vmalloced, 0);
+	/* Leave them the same for the moment. */
+	net->ipv4.nat_htable_size = net->ct.htable_size;
+	net->ipv4.nat_bysource = nf_ct_alloc_hashtable(&net->ipv4.nat_htable_size,
+						       &net->ipv4.nat_vmalloced, 0);
 	if (!net->ipv4.nat_bysource)
 		return -ENOMEM;
 	return 0;
@@ -703,7 +702,7 @@ static void __net_exit nf_nat_net_exit(struct net *net)
 	nf_ct_iterate_cleanup(net, &clean_nat, NULL);
 	synchronize_rcu();
 	nf_ct_free_hashtable(net->ipv4.nat_bysource, net->ipv4.nat_vmalloced,
-			     nf_nat_htable_size);
+			     net->ipv4.nat_htable_size);
 }
 
 static struct pernet_operations nf_nat_net_ops = {
@@ -724,9 +723,6 @@ static int __init nf_nat_init(void)
 		return ret;
 	}
 
-	/* Leave them the same for the moment. */
-	nf_nat_htable_size = nf_conntrack_htable_size;
-
 	ret = register_pernet_subsys(&nf_nat_net_ops);
 	if (ret < 0)
 		goto cleanup_extend;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 9de4bd4c0dd7..4d79e3c1616c 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -30,6 +30,7 @@
 #include <linux/netdevice.h>
 #include <linux/socket.h>
 #include <linux/mm.h>
+#include <linux/nsproxy.h>
 #include <linux/rculist_nulls.h>
 
 #include <net/netfilter/nf_conntrack.h>
@@ -84,9 +85,10 @@ static u_int32_t __hash_conntrack(const struct nf_conntrack_tuple *tuple,
 	return ((u64)h * size) >> 32;
 }
 
-static inline u_int32_t hash_conntrack(const struct nf_conntrack_tuple *tuple)
+static inline u_int32_t hash_conntrack(const struct net *net,
+				       const struct nf_conntrack_tuple *tuple)
 {
-	return __hash_conntrack(tuple, nf_conntrack_htable_size,
+	return __hash_conntrack(tuple, net->ct.htable_size,
 				nf_conntrack_hash_rnd);
 }
 
@@ -294,7 +296,7 @@ __nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple)
 {
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we normally need to disable them
 	 * at least once for the stats anyway.
@@ -364,10 +366,11 @@ static void __nf_conntrack_hash_insert(struct nf_conn *ct,
 
 void nf_conntrack_hash_insert(struct nf_conn *ct)
 {
+	struct net *net = nf_ct_net(ct);
 	unsigned int hash, repl_hash;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	__nf_conntrack_hash_insert(ct, hash, repl_hash);
 }
@@ -395,8 +398,8 @@ __nf_conntrack_confirm(struct sk_buff *skb)
 	if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL)
 		return NF_ACCEPT;
 
-	hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-	repl_hash = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
+	hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
+	repl_hash = hash_conntrack(net, &ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 
 	/* We're not in hash table, and we refuse to set up related
 	   connections for unconfirmed conns.  But packet copies and
@@ -466,7 +469,7 @@ nf_conntrack_tuple_taken(const struct nf_conntrack_tuple *tuple,
 	struct net *net = nf_ct_net(ignored_conntrack);
 	struct nf_conntrack_tuple_hash *h;
 	struct hlist_nulls_node *n;
-	unsigned int hash = hash_conntrack(tuple);
+	unsigned int hash = hash_conntrack(net, tuple);
 
 	/* Disable BHs the entire time since we need to disable them at
 	 * least once for the stats anyway.
@@ -501,7 +504,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 	int dropped = 0;
 
 	rcu_read_lock();
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry_rcu(h, n, &net->ct.hash[hash],
 					 hnnode) {
 			tmp = nf_ct_tuplehash_to_ctrack(h);
@@ -521,7 +524,7 @@ static noinline int early_drop(struct net *net, unsigned int hash)
 		if (cnt >= NF_CT_EVICTION_RANGE)
 			break;
 
-		hash = (hash + 1) % nf_conntrack_htable_size;
+		hash = (hash + 1) % net->ct.htable_size;
 	}
 	rcu_read_unlock();
 
@@ -555,7 +558,7 @@ struct nf_conn *nf_conntrack_alloc(struct net *net,
 
 	if (nf_conntrack_max &&
 	    unlikely(atomic_read(&net->ct.count) > nf_conntrack_max)) {
-		unsigned int hash = hash_conntrack(orig);
+		unsigned int hash = hash_conntrack(net, orig);
 		if (!early_drop(net, hash)) {
 			atomic_dec(&net->ct.count);
 			if (net_ratelimit())
@@ -1012,7 +1015,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
 	struct hlist_nulls_node *n;
 
 	spin_lock_bh(&nf_conntrack_lock);
-	for (; *bucket < nf_conntrack_htable_size; (*bucket)++) {
+	for (; *bucket < net->ct.htable_size; (*bucket)++) {
 		hlist_nulls_for_each_entry(h, n, &net->ct.hash[*bucket], hnnode) {
 			ct = nf_ct_tuplehash_to_ctrack(h);
 			if (iter(ct, data))
@@ -1130,7 +1133,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
 	}
 
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 	nf_conntrack_ecache_fini(net);
 	nf_conntrack_acct_fini(net);
 	nf_conntrack_expect_fini(net);
@@ -1190,10 +1193,12 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 {
 	int i, bucket, vmalloced, old_vmalloced;
 	unsigned int hashsize, old_size;
-	int rnd;
 	struct hlist_nulls_head *hash, *old_hash;
 	struct nf_conntrack_tuple_hash *h;
 
+	if (current->nsproxy->net_ns != &init_net)
+		return -EOPNOTSUPP;
+
 	/* On boot, we can set this without any fancy locking. */
 	if (!nf_conntrack_htable_size)
 		return param_set_uint(val, kp);
@@ -1206,33 +1211,29 @@ int nf_conntrack_set_hashsize(const char *val, struct kernel_param *kp)
 	if (!hash)
 		return -ENOMEM;
 
-	/* We have to rehahs for the new table anyway, so we also can
-	 * use a newrandom seed */
-	get_random_bytes(&rnd, sizeof(rnd));
-
 	/* Lookups in the old hash might happen in parallel, which means we
 	 * might get false negatives during connection lookup. New connections
 	 * created because of a false negative won't make it into the hash
 	 * though since that required taking the lock.
 	 */
 	spin_lock_bh(&nf_conntrack_lock);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < init_net.ct.htable_size; i++) {
 		while (!hlist_nulls_empty(&init_net.ct.hash[i])) {
 			h = hlist_nulls_entry(init_net.ct.hash[i].first,
 					struct nf_conntrack_tuple_hash, hnnode);
 			hlist_nulls_del_rcu(&h->hnnode);
-			bucket = __hash_conntrack(&h->tuple, hashsize, rnd);
+			bucket = __hash_conntrack(&h->tuple, hashsize,
+						  nf_conntrack_hash_rnd);
 			hlist_nulls_add_head_rcu(&h->hnnode, &hash[bucket]);
 		}
 	}
-	old_size = nf_conntrack_htable_size;
+	old_size = init_net.ct.htable_size;
 	old_vmalloced = init_net.ct.hash_vmalloc;
 	old_hash = init_net.ct.hash;
 
-	nf_conntrack_htable_size = hashsize;
+	init_net.ct.htable_size = nf_conntrack_htable_size = hashsize;
 	init_net.ct.hash_vmalloc = vmalloced;
 	init_net.ct.hash = hash;
-	nf_conntrack_hash_rnd = rnd;
 	spin_unlock_bh(&nf_conntrack_lock);
 
 	nf_ct_free_hashtable(old_hash, old_vmalloced, old_size);
@@ -1328,7 +1329,9 @@ static int nf_conntrack_init_net(struct net *net)
 		ret = -ENOMEM;
 		goto err_cache;
 	}
-	net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
+
+	net->ct.htable_size = nf_conntrack_htable_size;
+	net->ct.hash = nf_ct_alloc_hashtable(&net->ct.htable_size,
 					     &net->ct.hash_vmalloc, 1);
 	if (!net->ct.hash) {
 		ret = -ENOMEM;
@@ -1353,7 +1356,7 @@ err_acct:
 	nf_conntrack_expect_fini(net);
 err_expect:
 	nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
-			     nf_conntrack_htable_size);
+			     net->ct.htable_size);
 err_hash:
 	kmem_cache_destroy(net->ct.nf_conntrack_cachep);
 err_cache:
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 4ad7d1d809af..2f25ff610982 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -577,7 +577,7 @@ int nf_conntrack_expect_init(struct net *net)
 
 	if (net_eq(net, &init_net)) {
 		if (!nf_ct_expect_hsize) {
-			nf_ct_expect_hsize = nf_conntrack_htable_size / 256;
+			nf_ct_expect_hsize = net->ct.htable_size / 256;
 			if (!nf_ct_expect_hsize)
 				nf_ct_expect_hsize = 1;
 		}
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 65c2a7bc3afc..4b1a56bd074c 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -192,7 +192,7 @@ static void __nf_conntrack_helper_unregister(struct nf_conntrack_helper *me,
 	/* Get rid of expecteds, set helpers to NULL. */
 	hlist_nulls_for_each_entry(h, nn, &net->ct.unconfirmed, hnnode)
 		unhelp(h, me);
-	for (i = 0; i < nf_conntrack_htable_size; i++) {
+	for (i = 0; i < net->ct.htable_size; i++) {
 		hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
 			unhelp(h, me);
 	}
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 42f21c01a93e..0ffe689dfe97 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -594,7 +594,7 @@ ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 
 	rcu_read_lock();
 	last = (struct nf_conn *)cb->args[1];
-	for (; cb->args[0] < nf_conntrack_htable_size; cb->args[0]++) {
+	for (; cb->args[0] < init_net.ct.htable_size; cb->args[0]++) {
 restart:
 		hlist_nulls_for_each_entry_rcu(h, n, &init_net.ct.hash[cb->args[0]],
 					 hnnode) {
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 028aba667ef7..e310f1561bb2 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -51,7 +51,7 @@ static struct hlist_nulls_node *ct_get_first(struct seq_file *seq)
 	struct hlist_nulls_node *n;
 
 	for (st->bucket = 0;
-	     st->bucket < nf_conntrack_htable_size;
+	     st->bucket < net->ct.htable_size;
 	     st->bucket++) {
 		n = rcu_dereference(net->ct.hash[st->bucket].first);
 		if (!is_a_nulls(n))
@@ -69,7 +69,7 @@ static struct hlist_nulls_node *ct_get_next(struct seq_file *seq,
 	head = rcu_dereference(head->next);
 	while (is_a_nulls(head)) {
 		if (likely(get_nulls_value(head) == st->bucket)) {
-			if (++st->bucket >= nf_conntrack_htable_size)
+			if (++st->bucket >= net->ct.htable_size)
 				return NULL;
 		}
 		head = rcu_dereference(net->ct.hash[st->bucket].first);
@@ -355,7 +355,7 @@ static ctl_table nf_ct_sysctl_table[] = {
 	},
 	{
 		.procname       = "nf_conntrack_buckets",
-		.data           = &nf_conntrack_htable_size,
+		.data           = &init_net.ct.htable_size,
 		.maxlen         = sizeof(unsigned int),
 		.mode           = 0444,
 		.proc_handler   = proc_dointvec,
@@ -421,6 +421,7 @@ static int nf_conntrack_standalone_init_sysctl(struct net *net)
 		goto out_kmemdup;
 
 	table[1].data = &net->ct.count;
+	table[2].data = &net->ct.htable_size;
 	table[3].data = &net->ct.sysctl_checksum;
 	table[4].data = &net->ct.sysctl_log_invalid;
 
-- 
cgit v1.2.3-70-g09d2